Python chainer.functions 模块,broadcast_to() 实例源码


项目:chainer_nmt    作者:odashi    | 项目源码 | 文件源码
def _context(self, p, fb_mat, fbe_mat):
    batch_size, source_length, _ =
    # {pe,e}_mat: shape = [batch * srclen, atten]
    pe_mat = F.reshape(
            F.expand_dims(self.p_e(p), 1),
            [batch_size, source_length, self.atten_size]),
        [batch_size * source_length, self.atten_size])
    e_mat = F.tanh(fbe_mat + pe_mat)
    # a_mat: shape = [batch, srclen]
    a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length]))
    # q: shape = [batch, 2 * hidden]
    q = F.reshape(
        F.batch_matmul(a_mat, fb_mat, transa=True),
        [batch_size, 2 * self.hidden_size])

    return q
项目:convolutional_seq2seq    作者:soskek    | 项目源码 | 文件源码
def attend(self, query, key, value, mask, minfs=None):
        Input shapes:
            q=(b, units, dec_l), k=(b, units, enc_l),
            v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)

        # Calculate Attention Scores with Mask for Zero-padded Areas
        pre_a = F.batch_matmul(query, key, transa=True)  # (b, dec_l, enc_l)
        minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
            if minfs is None else minfs
        pre_a = F.where(mask, pre_a, minfs)
        a = F.softmax(pre_a, axis=2)
        # if values in axis=2 are all -inf, they become nan. thus do re-mask.
        a = F.where(self.xp.isnan(,
                    self.xp.zeros(a.shape, dtype=a.dtype), a)
        reshaped_a = a[:, None]  # (b, 1, dec_xl, enc_l)

        # Calculate Weighted Sum
        pre_c = F.broadcast_to(reshaped_a, value.shape) * value
        c = F.sum(pre_c, axis=3, keepdims=True)  # (b, units, dec_xl, 1)
        return c
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def __call__(self, x, context):

        x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1]))
        x = F.reshape(x, (context.shape[0] * context.shape[1],))

        if args.subword == 'rnn':
            context = context.reshape((context.shape[0] * context.shape[1]))
            e = self.rnn.charRNN(context)

        if args.subword == 'none':
            e = self.embed(context)
            e = F.reshape(e, (e.shape[0] * e.shape[1], e.shape[2]))

        loss = self.loss_func(e, x){'loss': loss}, self)
        return loss
项目:nmtrain    作者:philip30    | 项目源码 | 文件源码
def __call__(self, y, a, ht, y_lex):
    y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
    return (y + F.log(y_dict + self.alpha))

#class LinearInterpolationLexicon(chainer.Chain):
#  def __init__(self, hidden_size):
#    super(LinearInterpolationLexicon, self).__init__(
#      perceptron = chainer.links.Linear(hidden_size, 1)
#    )
#  def __call__(self, y, a, ht, y_lex):
#    y      = F.softmax(y)
#    y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2)
#    gamma  = F.broadcast_to(F.sigmoid(self.perceptron(ht)),
#    return (gamma * y_dict + (1-gamma) * y)
项目:teras    作者:chantera    | 项目源码 | 文件源码
def __call__(self, x1, x2):
        xp = self.xp
        out_size = self.out_size
        batch_size, len1, dim1 = x1.shape
        if not self.nobias[0]:
            x1 = F.concat((x1, xp.ones((batch_size, len1, 1),
                                       dtype=xp.float32)), axis=2)
            dim1 += 1
        len2, dim2 = x2.shape[1:]
        if not self.nobias[1]:
            x2 = F.concat((x2, xp.ones((batch_size, len2, 1),
                                       dtype=xp.float32)), axis=2)
            dim2 += 1
        x1_reshaped = F.reshape(x1, (batch_size * len1, dim1))
        W_reshaped = F.reshape(F.transpose(self.W, (0, 2, 1)),
                               (dim1, out_size * dim2))
        affine = F.reshape(F.matmul(x1_reshaped, W_reshaped),
                           (batch_size, len1 * out_size, dim2))
        biaffine = F.transpose(
            F.reshape(batch_matmul(affine, x2, transb=True),
                      (batch_size, len1, out_size, len2)),
            (0, 1, 3, 2))
        if not self.nobias[2]:
            biaffine += F.broadcast_to(self.b, biaffine.shape)
        return biaffine
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def term_bias(self, bs, train=True):
        """ Compute overall bias and broadcast to shape of batchsize

        shape = (bs, 1,)
        # Bias is drawn from a Gaussian with given mu and log variance
        bs_mu = F.broadcast_to(self.bias_mu.b, shape)
        bs_lv = F.broadcast_to(self.bias_lv.b, shape)
        bias = F.flatten(F.gaussian(bs_mu, bs_lv))

        # Add a very negative log variance so we're sampling
        # from a very narrow distribution about the mean.
        # Useful for validation dataset when we want to only guess
        # the mean.
        if not train:
            bs_lv += self.lv_floor

        # Compute prior on the bias, so compute the KL div
        # from the KL(N(mu_bias, var_bias) | N(0, 1))
        kld = F.gaussian_kl_divergence(self.bias_mu.b, self.bias_lv.b)
        return bias, kld
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
        # Change all of the shapes to form interaction vectors
        shape = (bs, nf * 2, self.n_dim)
        feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
        feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
        if not train:
            feat_lv_vec += self.lv_floor

        # Construct the interaction mean and variance
        # iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
        # dot(feat, feat) is (bs, nf)
        ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
                          feat_lv_vec + self.feat_delta_lv(iloc))
        jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
                          feat_lv_vec + self.feat_delta_lv(jloc))
        # feat is (bs, )
        feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)

        # Compute the KLD for the group mean vector and variance vector
        kld1 = F.gaussian_kl_divergence(self.feat_mu_vec.b, self.feat_lv_vec.b)
        # Compute the KLD for vector deviations from the group mean and var
        kld2 = F.gaussian_kl_divergence(self.feat_delta_mu.W,
        return feat, kld1 + kld2
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def term_bias(self, bs, train=True):
        """ Compute overall bias and broadcast to shape of batchsize

        shape = (bs, 1,)
        # Bias is drawn from a Gaussian with given mu and log variance
        bs_mu = F.broadcast_to(self.bias_mu.b, shape)
        bs_lv = F.broadcast_to(self.bias_lv.b, shape)
        bias = F.flatten(F.gaussian(bs_mu, bs_lv))

        # Add a very negative log variance so we're sampling
        # from a very narrow distribution about the mean.
        # Useful for validation dataset when we want to only guess
        # the mean.
        if not train:
            bs_lv += self.lv_floor

        # Compute prior on the bias, so compute the KL div
        # from the KL(N(mu_bias, var_bias) | N(0, 1))
        kld = F.gaussian_kl_divergence(self.bias_mu.b, self.bias_lv.b)
        return bias, kld
项目:chainer-gan-improvements    作者:hvy    | 项目源码 | 文件源码
def __call__(self, x):
        minibatch_size = x.shape[0]
        activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim))
        activation_ex = F.expand_dims(activation, 3)
        activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0)
        activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t)
        diff = activation_ex - activation_ex_t

        xp = chainer.cuda.get_array_module(
        eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1)
        eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size))
        sum_diff = F.sum(abs(diff), axis=2)
        sum_diff = F.broadcast_to(sum_diff, eps.shape)
        abs_diff = sum_diff + eps

        minibatch_features = F.sum(F.exp(-abs_diff), 2)
        return F.concat((x, minibatch_features), axis=1)
项目:unrolled-gan    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        xp = chainer.cuda.get_array_module(
        batchsize = x.shape[0]
        if self.train_weights == False and self.initial_T is not None:
   = self.initial_T

        M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
        M = F.expand_dims(M, 3)
        M_T = F.transpose(M, (3, 1, 2, 0))
        M, M_T = F.broadcast(M, M_T)

        norm = F.sum(abs(M - M_T), axis=2)
        eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
        c_b = F.exp(-(norm + 1e6 * eraser))
        o_b = F.sum(c_b, axis=2)

        if self.train_weights == False:
            self.initial_T =

        return F.concat((x, o_b), axis=1)
项目:wavenet    作者:rampage644    | 项目源码 | 文件源码
def __call__(self, v, h, label):
        v_t = self.vertical_conv_t(v)
        v_s = self.vertical_conv_s(v)
        to_vertical_t = self.v_to_h_conv_t(v_t)
        to_vertical_s = self.v_to_h_conv_s(v_s)

        # v_gate = self.vertical_gate_conv(v)
        # label bias is added to both vertical and horizontal conv
        # here we take only shape as it should be the same
        label = F.broadcast_to(F.expand_dims(F.expand_dims(self.label(label), -1), -1), v_t.shape)
        v_t, v_s = v_t + label, v_s + label
        v = F.tanh(v_t) * F.sigmoid(v_s)

        h_t = self.horizontal_conv_t(h)
        h_s = self.horizontal_conv_s(h)
        h_t, h_s = h_t + to_vertical_t + label, h_s + to_vertical_s + label
        h = self.horizontal_output(F.tanh(h_t) * F.sigmoid(h_s))

        return v, h
项目:chainer-cf-nade    作者:dsanno    | 项目源码 | 文件源码
def ordinal_loss(y, mask):
    xp = cuda.get_array_module(
    volatile = y.volatile
    b, c, n =
    max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True),
    y = y - max_y
    sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1),
    down_tri = np.tri(c, dtype=np.float32)
    up_tri = down_tri.T
    w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile)
    w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile)
    h = F.exp(F.expand_dims(y, -1))
    h1 = F.convolution_2d(h, w1)
    h1 = F.convolution_2d(F.log(h1), w1)
    h2 = F.convolution_2d(h, w2)
    h2 = F.convolution_2d(F.log(h2), w2)
    h = F.reshape(h1 + h2, (b, c, n))
    return F.sum((h - sum_y - y) * mask) / b
项目:chainer-cf-nade    作者:dsanno    | 项目源码 | 文件源码
def __forward(self, batch_x, batch_t, weight, train=True):
        xp = self.xp
        x = Variable(xp.asarray(batch_x), volatile=not train)
        t = Variable(xp.asarray(batch_t), volatile=not train)
        y =, train=train)

        b, c, n =
        mask = Variable(xp.asarray(np.broadcast_to(weight.reshape(-1, 1, 1), (b, c, n)) * loss_mask(batch_t,, volatile=not train)
        if self.ordinal_weight == 0:
            loss = F.sum(-F.log_softmax(y) * mask) / b
        elif self.ordinal_weight == 1:
            loss = ordinal_loss(y, mask)
            loss = (1 - self.ordinal_weight) * F.sum(-F.log_softmax(y) * mask) / b + self.ordinal_weight * ordinal_loss(y, mask)

        acc = self.__accuracy(y, t)
        return loss, acc
项目:SeRanet    作者:corochann    | 项目源码 | 文件源码
def propup(self, vis):
        This function propagates the visible units activation upwards to the hidden units
        :param vis: Variable Matrix(batch_size, in_channels, image_height, image_width)
                    - given v_sample
        :return: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)
                 - probability for each hidden units to be h_i=1
        # conv.W: Matrix(out_channels, in_channels, filter height=ksize, filter width=ksize)
        # conv.b: Vec   (out_channels, )
        if self.real == 0:
            pre_sigmoid_activation = self.conv(vis)
            pre_sigmoid_activation = self.conv(vis / self.std_ch)
        # F.matmul(vis, self.conv.W, transb=True) + F.broadcast_to(self.conv.b, ([0], self.n_hidden))
        return F.sigmoid(pre_sigmoid_activation)
项目:SeRanet    作者:corochann    | 项目源码 | 文件源码
def propdown(self, hid):
        """ This function propagates the hidden units activation downwords to the visible units
        :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)  - given h_sample
        :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
        batch_size =[0]
        if self.real == 0:
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
                # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
            v_mean = F.sigmoid(pre_sigmoid_activation)
            #print('W info ',, 'W_flipped info ',
            #print('W info ',[3, 0, 2, 3], 'W_flipped info ',[0, 3, 8, 7])
            #print('W info ',[3, 0, 8, 7], 'W_flipped info ',[0, 3, 2, 3])
            #print('W info ',[19, 0, 4, 0], 'W_flipped info ',[0, 19, 6, 10])
            #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
            #print('v_mean sum', F.sum(v_mean).data)

            # TODO: check
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
        return v_mean
项目:SeRanet    作者:corochann    | 项目源码 | 文件源码
def reconstruct(self, v):

        :param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
        :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
        batch_size =[0]
        xp = cuda.get_array_module(
        if self.real == 0:
            h = F.sigmoid(self.conv(v))
            std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
            h = F.sigmoid(self.conv(v / std_ch))
        # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
        W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
        reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
            # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
        return reconstructed_v
项目:LSGAN    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        xp = chainer.cuda.get_array_module(
        batchsize = x.shape[0]
        if self.train_weights == False and self.initial_T is not None:
   = self.initial_T

        M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
        M = F.expand_dims(M, 3)
        M_T = F.transpose(M, (3, 1, 2, 0))
        M, M_T = F.broadcast(M, M_T)

        norm = F.sum(abs(M - M_T), axis=2)
        eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
        c_b = F.exp(-(norm + 1e6 * eraser))
        o_b = F.sum(c_b, axis=2)

        if self.train_weights == False:
            self.initial_T =

        return F.concat((x, o_b), axis=1)
项目:adgm    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        xp = chainer.cuda.get_array_module(
        batchsize = x.shape[0]
        if self.train_weights == False and self.initial_T is not None:
   = self.initial_T

        M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
        M = F.expand_dims(M, 3)
        M_T = F.transpose(M, (3, 1, 2, 0))
        M, M_T = F.broadcast(M, M_T)

        norm = F.sum(abs(M - M_T), axis=2)
        eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
        c_b = F.exp(-(norm + 1e6 * eraser))
        o_b = F.sum(c_b, axis=2)

        if self.train_weights == False:
            self.initial_T =

        return F.concat((x, o_b), axis=1)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def clip_actions(actions, min_action, max_action):
    min_actions = F.broadcast_to(min_action, actions.shape)
    max_actions = F.broadcast_to(max_action, actions.shape)
    return F.maximum(F.minimum(actions, max_actions), min_actions)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def compute_mean_and_var(self, x):
        h = x
        for layer in self.hidden_layers:
            h = self.nonlinearity(layer(h))
        mean = self.mean_layer(h)
        if self.bound_mean:
            mean = bound_by_tanh(mean, self.min_action, self.max_action)
        var = F.broadcast_to(F.softplus(self.var_layer(h)), mean.shape) + \
        return mean, var
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def __call__(self, x):
        mean = self.hidden_layers(x)
        var = F.broadcast_to(
        return distribution.GaussianDistribution(mean, var)
项目:instance_normalization_chainer    作者:crcrpar    | 项目源码 | 文件源码
def instance_norm(self, x, gamma=None, beta=None):
        mean = F.mean(x, axis=-1)
        mean = F.mean(mean, axis=-1)
        mean = F.broadcast_to(mean[Ellipsis, None, None], x.shape)
        var = F.squared_difference(x, mean)
        std = F.sqrt(var + 1e-5)
        x_hat = (x - mean) / std
        if gamma is not None:
            gamma = F.broadcast_to(gamma[None, Ellipsis, None, None], x.shape)
            beta = F.broadcast_to(beta[None, Ellipsis, None, None], x.shape)
            return gamma * x_hat + beta
            return x_hat
项目:lencon    作者:kiyukuta    | 项目源码 | 文件源码
def prepare_decoding(self, state, lengths, train=True):
        state = super().prepare_decoding(state, lengths, train=train)

        x = state['x']
        h = state['h']

        c = F.broadcast_to(self.encoder.c0, (self.batchsize, self.dim_hid))
        lengths = lengths.astype(np.float32)
        lengths = lengths.reshape((self.batchsize, 1))
        c = c * lengths
        return {'x': x, 'c': c, 'h': h}
项目:lencon    作者:kiyukuta    | 项目源码 | 文件源码
def prepare_decoding(self, state, lengths, train=True):
        state = super().prepare_decoding(state, lengths, train=train)

        x = state['x']
        h = state['h']

        c = F.broadcast_to(self.encoder.c0, (self.batchsize, self.dim_hid))
        lengths = lengths.astype(np.float32)
        lengths = lengths.reshape((self.batchsize, 1))
        c = c * lengths
        return {'x': x, 'c': c, 'h': h}
项目:lencon    作者:kiyukuta    | 项目源码 | 文件源码
def _attend(self, p):
        p = self.xh(p)
        p = F.expand_dims(p, 1)
        p = F.broadcast_to(p, self.shape2)

        h = F.tanh(self.h + p)
        shape3 = (self.batchsize * self.src_len, self.dim_hid)
        h_reshaped = F.reshape(h, shape3)
        weight_reshaped = self.hw(h_reshaped)
        weight = F.reshape(weight_reshaped, (self.batchsize, self.src_len, 1))
        weight = F.where(self.mask, weight, self.minf)
        attention = F.softmax(weight)
        return attention
项目:chainer-speech-recognition    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        return functions.broadcast_to(x, self.shape)
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, m, k)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (1, k, n)).astype(numpy.float32) = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(
            x, F.broadcast_to(y, (batch_size, k, n)))
        self.forward_answer = numpy.array([
  [i], self.x2[0])
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, m, k)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (k, n)).astype(numpy.float32) = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(
            x, F.broadcast_to(F.expand_dims(y, 0), (batch_size, k, n)))
        self.forward_answer = numpy.array([
  [i], self.x2)
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def check_forward(self, data):
        x = chainer.Variable(data)
        bx = functions.broadcast_to(x, self.out_shape)

        self.assertEqual(, self.out_shape)
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def test_type_check(self):
        x = chainer.Variable(
        with self.assertRaises(type_check.InvalidType):
            functions.broadcast_to(x, self.out_shape)
项目:deep_metric_learning    作者:ronekko    | 项目源码 | 文件源码
def squared_distance_matrix(X):
    n = X.shape[0]
    XX = F.sum(X ** 2.0, axis=1)
    distances = -2.0 * F.linear(X, X)
    distances = distances + F.broadcast_to(XX, (n, n))
    distances = distances + F.broadcast_to(F.expand_dims(XX, 1), (n, n))
    return distances
项目:deep_metric_learning    作者:ronekko    | 项目源码 | 文件源码
def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha + matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like( - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def __call__(self, x, context):
        e = self.embed(context)
        shape = e.shape
        x = F.broadcast_to(x[:, None], (shape[0], shape[1]))
        e = F.reshape(e, (shape[0] * shape[1], shape[2]))
        x = F.reshape(x, (shape[0] * shape[1],))
        loss = self.loss_func(e, x){'loss': loss}, self)
        return loss
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def __call__(self, x, context):

        x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1]))
        x = F.reshape(x, (context.shape[0] * context.shape[1],))

        context = context.reshape((context.shape[0] * context.shape[1]))
        e = self.rnn.charRNN(context)

        loss = self.loss_func(e, x){'loss': loss}, self)
        return loss
项目:adversarial-autoencoder    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        return functions.broadcast_to(x, self.shape)
项目:chainercv    作者:chainer    | 项目源码 | 文件源码
def __call__(self, x):
        """Normalize input and scale it.

            x (chainer.Variable): A variable holding 4-dimensional array.
                Its :obj:`dtype` is :obj:`numpy.float32`.

            The shape and :obj:`dtype` are same as those of input.

        x = F.normalize(x, eps=self.eps, axis=1)
        scale = F.broadcast_to(self.scale[:, np.newaxis, np.newaxis], x.shape)
        return x * scale
项目:nmtrain    作者:philip30    | 项目源码 | 文件源码
def __call__(self, S, h):
    batch_size, src_len, hidden_size =
    h = F.broadcast_to(F.expand_dims(h, axis=2), (batch_size, hidden_size, src_len))
    h = F.swapaxes(h, 1, 2)
    S = F.reshape(F.concat((S, h), axis=2), (batch_size * src_len, 2 * hidden_size))
    a = F.softmax(F.reshape(self.second_layer(F.tanh(self.first_layer(S))), (batch_size, src_len)))
    return a
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def term_slop(self, loc, val, bs, nf, train=True):
        """ Compute the slope for each active feature.
        shape = (bs, nf)

        # Reshape all of our constants
        pr_mu = F.broadcast_to(self.slop_mu.b, shape)
        pr_lv = F.broadcast_to(self.slop_lv.b, shape)
        # This is either zero or a very negative number
        # indicating to sample N(mean, logvar) or just draw
        # the mean preicsely
        if not train:
            pr_lv += self.lv_floor

        # The feature slopes are grouped together so that they
        # all share a common mean. Then individual features slop_delta_lv
        # are shrunk towards zero, which effectively sets features to fall
        # back on the group mean.
        sl_mu = F.reshape(self.slop_delta_mu(loc), shape) + pr_mu
        sl_lv = F.reshape(self.slop_delta_lv(loc), shape) + pr_lv
        coef = F.gaussian(sl_mu, sl_lv)
        slop = F.sum(coef * val, axis=1)

        # Calculate divergence between group mean and N(0, 1)
        kld1 = F.gaussian_kl_divergence(self.slop_mu.b, self.slop_lv.b)
        # Calculate divergence of individual delta means and delta vars
        args = (self.slop_delta_mu.W, self.slop_delta_lv.W)
        kld2 = F.gaussian_kl_divergence(*args)

        return slop, kld1 + kld2
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def kl_div(mu1, lv1, lv2):
    # KL Divergence between given normal and prior at N(0, sigma_2)
    # Prior assumes mean at zero
    # lns2 - lns1 + (s2^2 + (u1 - u2)**2)/ 2s2**2 - 0.5
    if len(lv1.shape) == 2:
        lv1 = F.expand_dims(lv1, 0)
        mu1 = F.expand_dims(mu1, 0)
    lv2 = F.broadcast_to(lv2, lv1.shape)
    v12 = F.exp(lv1)**2.0
    v22 = F.exp(lv2)**2.0
    return lv2 - lv1 + .5 * v12 / v22 + .5 * mu1**2. / v22 - .5
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
        # Change all of the shapes to form interaction vectors
        shape = (bs, nf * 2, self.n_dim)
        feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
        feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
        if not train:
            feat_lv_vec += self.lv_floor

        # Construct the interaction mean and variance
        # iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
        # dot(feat, feat) is (bs, nf)
        ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
                          feat_lv_vec + self.feat_delta_lv(iloc))
        jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
                          feat_lv_vec + self.feat_delta_lv(jloc))
        # feat is (bs, )
        feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)

        # Compute the KLD for the group mean vector and variance vector
        # KL(N(group mu, group lv) || N(0, hyper_lv))
        # hyper_lv ~ gamma(1, 1)
        kldg = F.sum(kl_div(self.feat_mu_vec.b, self.feat_lv_vec.b,
        # Compute deviations from hyperprior
        # KL(N(delta_i, delta_i lv) || N(0, hyper_delta_lv))
        # hyper_delta_lv ~ gamma(1, 1)
        kldi = F.sum(kl_div(self.feat_delta_mu.W, self.feat_delta_lv.W,
        # Hyperprior penalty for log(var) ~ Gamma(alpha=1, beta=1)
        # Gamma(log(var) | alpha=1, beta=1) = -log(var)
        # The loss function will attempt to make log(var) as negative as 
        # possible which will in turn make the variance as small as possible
        # The sum just casts a 1D vector to a scalar
        hyperg = -F.sum(self.hyper_feat_lv_vec.b)
        hyperi = -F.sum(self.hyper_feat_delta_lv.b)
        return feat, kldg, kldi, hyperg, hyperi
项目:Semantic-Segmentation-using-Adversarial-Networks    作者:oyam    | 项目源码 | 文件源码
def _make_dis_input(self, input_img, label_map):
        b = F.broadcast_to(input_img[:,0,:,:], shape=label_map.shape)
        g = F.broadcast_to(input_img[:,1,:,:], shape=label_map.shape)
        r = F.broadcast_to(input_img[:,2,:,:], shape=label_map.shape)
        product_b = label_map * b
        product_g = label_map * g
        product_r = label_map * r
        dis_input = F.concat([product_b, product_g, product_r], axis=1)
        return dis_input
项目:SeRanet    作者:corochann    | 项目源码 | 文件源码
def free_energy(self, v):
        :param Variable (batch_size, in_channels, image_height, image_width) - input data (training data)
        :return: scalar
        batch_size =[0]
        in_channels = self.in_channels
        real = self.real
        if real == 0:
            visible layer is 0, 1 (bit)
            vbias_term = 1 * SUM(a(i) * v(i))
            v_sum = F.sum(v, axis=(2, 3))  # sum over image_height & image_width
            # Originally, it should return sum for each batch.
            # but it returns scalar, which is sum over batches, since sum is used at the end anyway.
            vbias_term = F.sum(F.matmul(v_sum, self.conv.a))
            wx_b = self.conv(v)

            visible layer takes real value
            vbias_term = 0.5 * SUM((v(i)-a(i)) * (v(i) - a(i)))
            #TODO: check
            #m = Variable(xp.ones((batch_size, 1), dtype=xp.float32))
            n = F.reshape(self.conv.a, (1, in_channels, 1, 1))
            xp = cuda.get_array_module(
            std_ch = xp.reshape(self.std, (1, in_channels, 1, 1))

            #v_ = v - F.matmul(m, n)
            v_ = (v - F.broadcast_to(n, / std_ch
            vbias_term = F.sum(0.5 * v_ * v_)
            wx_b = self.conv(v / std_ch)

        hidden_term = F.sum(F.log(1 + F.exp(wx_b)))
        # print('vbias = ',, ', hidden = ',, 'F.exp(wx_b) = ', F.exp(wx_b).data)
        return - vbias_term - hidden_term
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def maximum_entropy_mellowmax(values, omega=1., beta_min=-10, beta_max=10):
    """Maximum entropy mellowmax policy function.

    This function provides a categorical distribution whose expectation matches
    the one of mellowmax function while maximizing its entropy.


        values (Variable or ndarray):
            Input values. Mellowmax is taken along the second axis.
        omega (float):
            Parameter of mellowmax.
        beta_min (float):
            Minimum value of beta, used in Brent's algorithm.
        beta_max (float):
            Maximum value of beta, used in Brent's algorithm.
        outputs (Variable)
    xp = chainer.cuda.get_array_module(values)
    mm = mellowmax(values, axis=1)

    # Advantage: Q - mellowmax(Q)
    batch_adv = values - F.broadcast_to(F.expand_dims(mm, 1), values.shape)
    # Move data to CPU because we use Brent's algorithm in scipy
    batch_adv = chainer.cuda.to_cpu(
    batch_beta = np.empty(mm.shape, dtype=np.float32)

    # Beta is computed as the root of this function
    def f(y, adv):
        return np.sum(np.exp(y * adv) * adv)

    for idx in np.ndindex(mm.shape):
        idx_full = idx[:1] + (slice(None),) + idx[1:]
        adv = batch_adv[idx_full]
            beta = scipy.optimize.brentq(
                f, a=beta_min, b=beta_max, args=(adv,))
        except ValueError:
            beta = 0
        batch_beta[idx] = beta

    return F.softmax(xp.expand_dims(xp.asarray(batch_beta), 1) * values)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def __init__(self, n_input_channels, action_size, var,
                 n_hidden_layers=0, n_hidden_channels=None,
                 min_action=None, max_action=None, bound_mean=False,
                 nonlinearity=F.relu, mean_wscale=1):

        self.n_input_channels = n_input_channels
        self.action_size = action_size
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.min_action = min_action
        self.max_action = max_action
        self.bound_mean = bound_mean
        self.nonlinearity = nonlinearity
        if np.isscalar(var):
            self.var = np.full(action_size, var, dtype=np.float32)
            self.var = var
        layers = []
        if n_hidden_layers > 0:
            # Input to hidden
            layers.append(L.Linear(n_input_channels, n_hidden_channels))
            for _ in range(n_hidden_layers - 1):
                # Hidden to hidden
                layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
            # The last layer is used to compute the mean
                L.Linear(n_hidden_channels, action_size,
            # There's only one layer for computing the mean
                L.Linear(n_input_channels, action_size,

        if self.bound_mean:
            layers.append(lambda x: bound_by_tanh(
                x, self.min_action, self.max_action))

        def get_var_array(shape):
            self.var = self.xp.asarray(self.var)
            return self.xp.broadcast_to(self.var, shape)

        layers.append(lambda x: distribution.GaussianDistribution(
            x, get_var_array(x.shape)))
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, X, ht_enc, H_enc, skip_mask=None):
        pad = self._kernel_size - 1
        WX = self.W(X)
        if pad > 0:
            WX = WX[:, :, :-pad]
        Vh = self.V(ht_enc)
        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        self.contexts = []
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if t == 0:
                ct = (1 - f) * z
                ct = f * self.contexts[-1] + (1 - f) * z

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
   = ot * self.o(functions.concat((kt, ct), axis=1))

            if t == 0:
                self.H = functions.expand_dims(, 2)
                self.H = functions.concat((self.H, functions.expand_dims(, 2)), axis=2)

        return self.H
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
        pad = self._kernel_size - 1
        WX = self.W(X)[:, :, -pad-1, None]
        Vh = self.V(ht_enc)

        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if self.contexts is None:
                ct = (1 - f) * z
                self.contexts = [ct]
                ct = f * self.contexts[-1] + (1 - f) * z

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t - T]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
   = ot * self.o(functions.concat((kt, ct), axis=1))

            if self.H is None:
                self.H = functions.expand_dims(, 2)
                self.H = functions.concat((self.H, functions.expand_dims(, 2)), axis=2)

        return self.H
项目:mlpnlp-nmt    作者:mlpnlp    | 项目源码 | 文件源码
def calcAttention(self, h1, hList, aList, encLen, cMBSize, args):
        # attention????????????????h1???
        if self.attn_mode == 0:
            return h1
        # 1, attention????????
        target1 = self.model.attnIn_L1(h1)  # ??????
        # (cMBSize, self.hDim) => (cMBSize, 1, self.hDim)
        target2 = chaFunc.expand_dims(target1, axis=1)
        # (cMBSize, 1, self.hDim) => (cMBSize, encLen, self.hDim)
        target3 = chaFunc.broadcast_to(target2, (cMBSize, encLen, self.hDim))
        # target3 = chaFunc.broadcast_to(chaFunc.reshape(
        #    target1, (cMBSize, 1, self.hDim)), (cMBSize, encLen, self.hDim))
        # 2, attention?????????
        if self.attn_mode == 1:  # bilinear
            # bilinear??attention?????hList1 == hList2 ???
            # shape: (cMBSize, encLen)
            aval = chaFunc.sum(target3 * aList, axis=2)
        elif self.attn_mode == 2:  # MLP
            # attnSum ????????
            t1 = chaFunc.reshape(target3, (cMBSize * encLen, self.hDim))
            # (cMBSize*encLen, self.hDim) => (cMBSize*encLen, 1)
            t2 = self.model.attnSum(chaFunc.tanh(t1 + aList))
            # shape: (cMBSize, encLen)
            aval = chaFunc.reshape(t2, (cMBSize, encLen))
            # aval = chaFunc.reshape(self.model.attnSum(
            #    chaFunc.tanh(t1 + aList)), (cMBSize, encLen))
            assert 0, "ERROR"
        # 3, softmax????
        cAttn1 = chaFunc.softmax(aval)   # (cMBSize, encLen)
        # 4, attention???????context vector????????
        # (cMBSize, encLen) => (cMBSize, 1, encLen)
        cAttn2 = chaFunc.expand_dims(cAttn1, axis=1)
        # (1, encLen) x (encLen, hDim) ?????(matmul)?cMBSize?????
        #     => (cMBSize, 1, hDim)
        cAttn3 = chaFunc.batch_matmul(cAttn2, hList)
        # cAttn3 = chaFunc.batch_matmul(chaFunc.reshape(
        #    cAttn1, (cMBSize, 1, encLen)), hList)
        # axis=1???1????????????
        context = chaFunc.reshape(cAttn3, (cMBSize, self.hDim))
        # 4, attention???????context vector????????
        # ??????????
        # (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
        # cAttn2 = chaFunc.reshape(cAttn1, (cMBSize, encLen, 1))
        # (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
        # cAttn3 = chaFunc.broadcast_to(cAttn2, (cMBSize, encLen, self.hDim))
        # ???????? (cMBSize, encLen, hDim)
        #     => (cMBSize, hDim)  # axis=1 ?????
        # context = chaFunc.sum(aList * cAttn3, axis=1)
        # 6, attention??????????
        c1 = chaFunc.concat((h1, context))
        c2 = self.model.attnOut_L2(c1)
        finalH = chaFunc.tanh(c2)
        # finalH = chaFunc.tanh(self.model.attnOut_L2(
        #    chaFunc.concat((h1, context))))
        return finalH  # context

    # ??????