Python chainer.functions 模块,sum() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.functions.sum()

项目:shoelace    作者:rjagerman    | 项目源码 | 文件源码
def listmle(x, t):
    """
    The ListMLE loss as in Xia et al (2008), Listwise Approach to Learning to
    Rank - Theory and Algorithm.

    :param x: The activation of the previous layer 
    :param t: The target labels
    :return: The loss
    """

    # Get the ground truth by sorting activations by the relevance labels
    xp = cuda.get_array_module(t)
    t_hat = t[:, 0]
    x_hat = x[xp.flip(xp.argsort(t_hat), axis=0)]

    # Compute MLE loss
    final = logcumsumexp(x_hat)
    return F.sum(final - x_hat)
项目:shoelace    作者:rjagerman    | 项目源码 | 文件源码
def listpl(x, t, ?=15.0):
    """
    The ListPL loss, a stochastic variant of ListMLE that in expectation
    approximates the true ListNet loss.

    :param x: The activation of the previous layer 
    :param t: The target labels
    :param ?: The smoothing factor
    :return: The loss
    """

    # Sample permutation from PL(t)
    index = _pl_sample(t, ?)
    x = x[index]

    # Compute MLE loss
    final = logcumsumexp(x)
    return F.sum(final - x)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def __call__(self, x):
        h = x
        for l in self.conv_layers:
            h = self.activation(l(h))

        # Advantage
        batch_size = x.shape[0]
        ya = self.a_stream(h)
        mean = F.reshape(
            F.sum(ya, axis=1) / self.n_actions, (batch_size, 1))
        ya, mean = F.broadcast(ya, mean)
        ya -= mean

        # State value
        ys = self.v_stream(h)

        ya, ys = F.broadcast(ya, ys)
        q = ya + ys
        return action_value.DiscreteActionValue(q)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def compute_policy_gradient_full_correction(
        action_distrib, action_distrib_mu, action_value, v,
        truncation_threshold):
    """Compute off-policy bias correction term wrt all actions."""
    assert truncation_threshold is not None
    assert np.isscalar(v)
    with chainer.no_backprop_mode():
        rho_all_inv = compute_full_importance(action_distrib_mu,
                                              action_distrib)
        correction_weight = (
            np.maximum(1 - truncation_threshold * rho_all_inv,
                       np.zeros_like(rho_all_inv)) *
            action_distrib.all_prob.data[0])
        correction_advantage = action_value.q_values.data[0] - v
    return -F.sum(correction_weight *
                  action_distrib.all_log_prob *
                  correction_advantage, axis=1)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def setUp(self):

        def evaluator(actions):
            # negative square norm of actions
            return -F.sum(actions ** 2, axis=1)

        self.evaluator = evaluator

        if self.has_maximizer:
            def maximizer():
                return chainer.Variable(np.zeros(
                    (self.batch_size, self.action_size), dtype=np.float32))
        else:
            maximizer = None
        self.maximizer = maximizer
        self.av = action_value.SingleActionValue(
            evaluator=evaluator, maximizer=maximizer)
项目:gconv_experiments    作者:tscohen    | 项目源码 | 文件源码
def __call__(self, x, t, train=True, finetune=False):

        h = x
        h = F.dropout(h, ratio=0.2, train=train)
        h = self.l1(h, train, finetune)
        h = self.l2(h, train, finetune)
        h = self.l3(h, train, finetune)
        h = F.dropout(h, ratio=0.5, train=train)
        h = self.l4(h, train, finetune)
        h = self.l5(h, train, finetune)
        h = self.l6(h, train, finetune)
        h = F.dropout(h, ratio=0.5, train=train)
        h = self.l7(h, train, finetune)
        h = self.l8(h, train, finetune)
        h = self.l9(h, train, finetune)

        h = F.sum(h, axis=-1)
        h = F.sum(h, axis=-1)
        h = F.sum(h, axis=-1)
        h /= 8 * 8 * 8

        return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
项目:gconv_experiments    作者:tscohen    | 项目源码 | 文件源码
def __call__(self, x, t, train=True, finetune=False):

        h = x
        h = F.dropout(h, ratio=0.2, train=train)
        h = self.l1(h, train, finetune)
        h = self.l2(h, train, finetune)
        h = self.l3(h, train, finetune)
        h = F.dropout(h, ratio=0.5, train=train)
        h = self.l4(h, train, finetune)
        h = self.l5(h, train, finetune)
        h = self.l6(h, train, finetune)
        h = F.dropout(h, ratio=0.5, train=train)
        h = self.l7(h, train, finetune)
        h = self.l8(h, train, finetune)
        h = self.l9(h, train, finetune)

        h = F.sum(h, axis=-1)
        h = F.sum(h, axis=-1)
        h = F.sum(h, axis=-1)
        h /= 8 * 8 * 4

        return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
项目:gconv_experiments    作者:tscohen    | 项目源码 | 文件源码
def __call__(self, x, t, train=True, finetune=False):

        h = x
        h = F.dropout(h, ratio=0.2, train=train)
        h = self.l1(h, train, finetune)
        h = self.l2(h, train, finetune)
        h = self.l3(h, train, finetune)
        h = F.dropout(h, ratio=0.5, train=train)
        h = self.l4(h, train, finetune)
        h = self.l5(h, train, finetune)
        h = self.l6(h, train, finetune)
        h = F.dropout(h, ratio=0.5, train=train)
        h = self.l7(h, train, finetune)
        h = self.l8(h, train, finetune)
        h = self.l9(h, train, finetune)

        h = F.sum(h, axis=-1)
        h = F.sum(h, axis=-1)
        h /= 8 * 8

        return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
项目:NANHM-for-GEC    作者:shinochin    | 项目源码 | 文件源码
def __call__(self, ht, xs, d_bar_s_1):
        #ht:encoder?????????????????
        #batch_size * n_words * in_size
        #xs:??????
        if d_bar_s_1 == None:
            d_bar_s_1 = np.zeros(self.in_size)

        ht_T = list(map(F.transpose, ht))
        phi_ht = list(map(W1, ht_T))

        d_s = rnn(d_bar_s_1, y_s_1)

        phi_d = F.transpose_sequence(W2(F.transpose_sequence(d_s)))
        u_st = list(map(lambda x: phi_d*x, phi_ht))   #(4)

        sum_u = F.sum(u_st)
        alpha_st = list(map(lambda x:x/sum_u, u_st))   #(3)
        z_s = F.argmax(alpha_st, axis=0)

        c_s = F.sum(list(map(lambda x,y:x*y , alpha_st, ht)))   #(2)

        d_bar_s = F.relu(W3(F.concat([c_s, d_s])))

        return d_bar_s, d_s, c_s, z_s
项目:chainer-neural-style    作者:dsanno    | 项目源码 | 文件源码
def nearest_neighbor_patch(x, patch, patch_norm):
    assert patch.data.shape[0] == 1, 'mini batch size of patch must be 1'
    assert patch_norm.data.shape[0] == 1, 'mini batch size of patch_norm must be 1'

    xp = cuda.get_array_module(x.data)
    z = x.data
    b, ch, h, w = z.shape
    z = z.transpose((1, 0, 2, 3)).reshape((ch, -1))
    norm = xp.expand_dims(xp.sum(z ** 2, axis=0) ** 0.5, 0)
    z = z / xp.broadcast_to(norm, z.shape)
    p = patch.data
    p_norm = patch_norm.data
    p = p.reshape((ch, -1))
    p_norm = p_norm.reshape((1, -1))
    p_normalized = p / xp.broadcast_to(p_norm, p.shape)
    correlation = z.T.dot(p_normalized)
    min_index = xp.argmax(correlation, axis=1)
    nearest_neighbor = p.take(min_index, axis=1).reshape((ch, b, h, w)).transpose((1, 0, 2, 3))
    return Variable(nearest_neighbor)
项目:async-rl    作者:muupan    | 项目源码 | 文件源码
def _sample_discrete_actions(batch_probs):
    """Sample a batch of actions from a batch of action probabilities.

    Args:
      batch_probs (ndarray): batch of action probabilities BxA
    Returns:
      List consisting of sampled actions
    """
    action_indices = []

    # Subtract a tiny value from probabilities in order to avoid
    # "ValueError: sum(pvals[:-1]) > 1.0" in numpy.multinomial
    batch_probs = batch_probs - np.finfo(np.float32).epsneg

    for i in range(batch_probs.shape[0]):
        histogram = np.random.multinomial(1, batch_probs[i])
        action_indices.append(int(np.nonzero(histogram)[0]))
    return action_indices
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __call__(self, ws, cs, ls, ts):
        h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim)
        h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim)
        batchsize, windowsize, _, _ = h_c.data.shape
        # (batchsize, windowsize, char_dim)
        h_c = F.sum(h_c, 2)
        h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1)))
        h_c = h_c / ls
        h = F.concat([h_w, h_c], 2)
        h = F.reshape(h, (batchsize, -1))
        # ys = self.linear1(h)
        h = F.relu(self.linear1(h))
        h = F.dropout(h, ratio=.5, train=self.train)
        ys = self.linear2(h)

        loss = F.softmax_cross_entropy(ys, ts)
        acc = F.accuracy(ys, ts)
        chainer.report({
            "loss": loss,
            "accuracy": acc
            }, self)
        return loss
项目:DeepLearning    作者:Wanwannodao    | 项目源码 | 文件源码
def meanQvalue(Q, samples): 
    xp = Q.xp
    s = np.ndarray(shape=(minibatch_size, STATE_LENGTH, FRAME_WIDTH, FRAME_HEIGHT), dtype=np.float32)
    a = np.asarray([sample[1] for sample in samples], dtype=np.int32)

    for i in xrange(minibatch_size):
        s[i] = samples[i][0]

    # to gpu if available
    s = xp.asarray(s)
    a = xp.asarray(a)

    # Prediction: Q(s,a)
    y = F.select_item(Q(s), a)
    mean_Q = (F.sum(y)/minibatch_size).data
    return mean_Q
项目:DeepLearning    作者:Wanwannodao    | 项目源码 | 文件源码
def meanQvalue(Q, samples): 
    xp = Q.xp
    s = np.ndarray(shape=(minibatch_size, STATE_LENGTH, FRAME_WIDTH, FRAME_HEIGHT), dtype=np.float32)
    a = np.asarray([sample[1] for sample in samples], dtype=np.int32)

    for i in xrange(minibatch_size):
        s[i] = samples[i][0]

    # to gpu if available
    s = xp.asarray(s)
    a = xp.asarray(a)

    # Prediction: Q(s,a)
    y = F.select_item(Q(s), a)
    mean_Q = (F.sum(y)/minibatch_size).data
    return mean_Q
项目:chainer-dfi    作者:dsanno    | 项目源码 | 文件源码
def mean_feature(net, paths, image_size, base_feature, top_num, batch_size, clip_rect=None):
    xp = net.xp
    image_num = len(paths)
    features = []
    for i in six.moves.range(0, image_num, batch_size):
        x = [preprocess_image(Image.open(path).convert('RGB'), image_size, clip_rect) for path in paths[i:i + batch_size]]
        x = xp.asarray(np.concatenate(x, axis=0))
        y = feature(net, x)
        features.append([cuda.to_cpu(layer.data) for layer in y])
    if image_num > top_num:
        last_features = np.concatenate([f[-1] for f in features], axis=0)
        last_features = last_features.reshape((last_features.shape[0], -1))
        base_feature = cuda.to_cpu(base_feature).reshape((1, -1,))
        diff = np.sum((last_features - base_feature) ** 2, axis=1)

        nearest_indices = np.argsort(diff)[:top_num]
        nearests = [np.concatenate(xs, axis=0)[nearest_indices] for xs in zip(*features)]
    else:
        nearests = [np.concatenate(xs, axis=0) for xs in zip(*features)]

    return [xp.asarray(np.mean(f, axis=0, keepdims=True)) for f in nearests]
项目:deep_metric_learning    作者:ronekko    | 项目源码 | 文件源码
def normalized_mutual_info_score(x, y):
    xp = chainer.cuda.get_array_module(x)

    contingency = contingency_matrix(x, y)
    nonzero_mask = contingency != 0
    nonzero_val = contingency[nonzero_mask]

    pi = contingency.sum(axis=1, keepdims=True)
    pj = contingency.sum(axis=0, keepdims=True)
    total_mass = pj.sum()
    pi /= total_mass
    pj /= total_mass
    pi_pj = (pj * pi)[nonzero_mask]

    pij = nonzero_val / total_mass
    log_pij = xp.log(pij)
    log_pi_pj = xp.log(pi_pj)
    mi = xp.sum(pij * (log_pij - log_pi_pj))
    nmi = mi / max(xp.sqrt(entropy(pi) * entropy(pj)), 1e-10)
    return xp.clip(nmi, 0, 1)
项目:convolutional_seq2seq    作者:soskek    | 项目源码 | 文件源码
def __call__(self, x, z, ze, mask, conv_mask):
        att_scale = self.xp.sum(
            mask, axis=2, keepdims=True)[:, None, :, :] ** 0.5
        pad = self.xp.zeros(
            (x.shape[0], x.shape[1], self.width - 1, 1), dtype=x.dtype)
        base_x = x
        z = F.squeeze(z, axis=3)
        # Note: these behaviors of input, output, and attention result
        # may refer to the code by authors, which looks little different
        # from the paper's saying.
        for conv_name, preatt_name in zip(self.conv_names, self.preatt_names):
            # Calculate Output of GLU
            out = getattr(self, conv_name)(
                F.concat([pad, x], axis=2), conv_mask)
            # Calcualte Output of Attention using Output of GLU
            preatt = seq_linear(getattr(self, preatt_name), out)
            query = base_x + preatt
            query = F.squeeze(query, axis=3)
            c = self.attend(query, z, ze, mask) * att_scale
            # Merge Them in Redidual Calculation and Scaling
            x = (x + (c + out) * scale05) * scale05

        return x
项目:convolutional_seq2seq    作者:soskek    | 项目源码 | 文件源码
def attend(self, query, key, value, mask, minfs=None):
        """
        Input shapes:
            q=(b, units, dec_l), k=(b, units, enc_l),
            v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)
        """

        # Calculate Attention Scores with Mask for Zero-padded Areas
        pre_a = F.batch_matmul(query, key, transa=True)  # (b, dec_l, enc_l)
        minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
            if minfs is None else minfs
        pre_a = F.where(mask, pre_a, minfs)
        a = F.softmax(pre_a, axis=2)
        # if values in axis=2 are all -inf, they become nan. thus do re-mask.
        a = F.where(self.xp.isnan(a.data),
                    self.xp.zeros(a.shape, dtype=a.dtype), a)
        reshaped_a = a[:, None]  # (b, 1, dec_xl, enc_l)

        # Calculate Weighted Sum
        pre_c = F.broadcast_to(reshaped_a, value.shape) * value
        c = F.sum(pre_c, axis=3, keepdims=True)  # (b, units, dec_xl, 1)
        return c
项目:ddnn    作者:kunglab    | 项目源码 | 文件源码
def avg_pool_max_pool(self, hs):
        num_output = len(hs[0]) 
        houts = []
        i = 0
        shape = hs[0][i].shape
        h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
        x = 1.0*F.sum(h,2)/h.shape[2]
        x = F.reshape(x, shape)
        houts.append(x)

        for i in range(1,num_output):
            shape = hs[0][i].shape
            h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
            x = 1.0*F.max(h,2)
            x = F.reshape(x, shape)
            houts.append(x)
        return houts
项目:ddnn    作者:kunglab    | 项目源码 | 文件源码
def max_pool_avg_pool(self, hs):
        num_output = len(hs[0]) 
        houts = []
        i = 0
        shape = hs[0][i].shape
        h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
        x = 1.0*F.max(h,2)
        x = F.reshape(x, shape)
        houts.append(x)

        for i in range(1,num_output):
            shape = hs[0][i].shape
            h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
            x = 1.0*F.sum(h,2)/h.shape[2]
            x = F.reshape(x, shape)
            houts.append(x)
        return houts
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
        # Change all of the shapes to form interaction vectors
        shape = (bs, nf * 2, self.n_dim)
        feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
        feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
        if not train:
            feat_lv_vec += self.lv_floor

        # Construct the interaction mean and variance
        # iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
        # dot(feat, feat) is (bs, nf)
        ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
                          feat_lv_vec + self.feat_delta_lv(iloc))
        jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
                          feat_lv_vec + self.feat_delta_lv(jloc))
        # feat is (bs, )
        feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)

        # Compute the KLD for the group mean vector and variance vector
        kld1 = F.gaussian_kl_divergence(self.feat_mu_vec.b, self.feat_lv_vec.b)
        # Compute the KLD for vector deviations from the group mean and var
        kld2 = F.gaussian_kl_divergence(self.feat_delta_mu.W,
                                        self.feat_delta_lv.W)
        return feat, kld1 + kld2
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def __call__(self, loc, val, y, train=True):
        bs = val.data.shape[0]
        pred, kld0, kld1, kld2 = self.forward(loc, val, y, train=train)

        # Compute MSE loss
        mse = F.mean_squared_error(pred, y)
        rmse = F.sqrt(mse)  # Only used for reporting

        # Now compute the total KLD loss
        kldt = kld0 * self.lambda0 + kld1 * self.lambda1 + kld2 * self.lambda2

        # Total loss is MSE plus regularization losses
        loss = mse + kldt * (1.0 / self.total_nobs)

        # Log the errors
        logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1,
                'kld2': kld2, 'kldt': kldt, 'bias': F.sum(self.bias_mu.b)}
        reporter.report(logs, self)
        return loss
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def __call__(self, loc, val, y, train=True):
        bs = val.data.shape[0]
        ret = self.forward(loc, val, y, train=train)
        pred, kld0, kld1, kldg, kldi, hypg, hypi = ret

        # Compute MSE loss
        mse = F.mean_squared_error(pred, y)
        rmse = F.sqrt(mse)  # Only used for reporting

        # Now compute the total KLD loss
        kldt = kld0 * self.lambda0 + kld1 * self.lambda1
        kldt += kldg + kldi + hypg + hypi

        # Total loss is MSE plus regularization losses
        loss = mse + kldt * (1.0 / self.total_nobs)

        # Log the errors
        logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1,
                'kldg': kldg, 'kldi': kldi, 'hypg': hypg, 'hypi': hypi,
                'hypglv': F.sum(self.hyper_feat_lv_vec.b),
                'hypilv': F.sum(self.hyper_feat_delta_lv.b),
                'kldt': kldt, 'bias': F.sum(self.bias_mu.b)}
        reporter.report(logs, self)
        return loss
项目:gan-rl    作者:iaroslav-ai    | 项目源码 | 文件源码
def evaluate_on_diff_env(env, n_sample_traj, agent):
    # this function is used to sample n traj using GAN version of environment
    R = 0.0

    # reset environment
    env.reset_state()
    agent.reset_state()

    # get initial observation
    observations = env(tv(np.zeros((n_sample_traj, env.act_size))))[:, :-2]

    for i in range(env.spec.timestep_limit):
        act = agent(observations)
        obs_rew = env(act)
        rewards = obs_rew[:, -2]
        ends = obs_rew[:, -1]
        observations = obs_rew[:, :-2]
        R += F.sum(rewards * (1.0 - ends)) / (-len(rewards) * env.spec.timestep_limit)

    return R
项目:sg-mcmc-survey    作者:delta2323    | 项目源码 | 文件源码
def calc_log_posterior(theta, x, n=None):
    """Calculate unnormalized log posterior, ``log p(theta | x) + C``

    Args:
        theta(chainer.Variable): model parameters
        x(numpy.ndarray): sample data
        n(int): total data size
    Returns:
        chainer.Variable: Variable that holding unnormalized log posterior,
        ``log p(theta | x) + C`` of shape ``()``
    """

    theta1, theta2 = F.split_axis(theta, 2, 0)
    log_prior1 = F.sum(F.log(gaussian.gaussian_likelihood(theta1, 0, VAR1)))
    log_prior2 = F.sum(F.log(gaussian.gaussian_likelihood(theta2, 0, VAR2)))
    prob1 = gaussian.gaussian_likelihood(x, theta1, VAR_X)
    prob2 = gaussian.gaussian_likelihood(x, theta1 + theta2, VAR_X)
    log_likelihood = F.sum(F.log(prob1 / 2 + prob2 / 2))
    if n is not None:
        log_likelihood *= n / len(x)
    return log_prior1 + log_prior2 + log_likelihood
项目:chainer_frmqn    作者:okdshin    | 项目源码 | 文件源码
def calc_loss(self, state, state_dash, actions, rewards, done_list):
        assert(state.shape == state_dash.shape)
        s = state.reshape((state.shape[0], reduce(lambda x, y: x*y, state.shape[1:]))).astype(np.float32)
        s_dash = state_dash.reshape((state.shape[0], reduce(lambda x, y: x*y, state.shape[1:]))).astype(np.float32)
        q = self.model.q_function(s)

        q_dash = self.model_target.q_function(s_dash)  # Q(s',*)
        max_q_dash = np.asarray(list(map(np.max, q_dash.data)), dtype=np.float32) # max_a Q(s',a)

        target = q.data.copy()
        for i in range(self.replay_batch_size):
            assert(self.replay_batch_size == len(done_list))
            r = np.sign(rewards[i]) if self.clipping else rewards[i]
            if done_list[i]:
                discounted_sum = r
            else:
                discounted_sum = r + self.gamma * max_q_dash[i]
            assert(self.replay_batch_size == len(actions))
            target[i, actions[i]] = discounted_sum

        loss = F.sum(F.huber_loss(Variable(target), q, delta=1.0)) #/ self.replay_batch_size
        return loss, q
项目:chainer-gan-improvements    作者:hvy    | 项目源码 | 文件源码
def __call__(self, x):
        minibatch_size = x.shape[0]
        activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim))
        activation_ex = F.expand_dims(activation, 3)
        activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0)
        activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t)
        diff = activation_ex - activation_ex_t

        xp = chainer.cuda.get_array_module(x.data)
        eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1)
        eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size))
        sum_diff = F.sum(abs(diff), axis=2)
        sum_diff = F.broadcast_to(sum_diff, eps.shape)
        abs_diff = sum_diff + eps

        minibatch_features = F.sum(F.exp(-abs_diff), 2)
        return F.concat((x, minibatch_features), axis=1)
项目:unrolled-gan    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        xp = chainer.cuda.get_array_module(x.data)
        batchsize = x.shape[0]
        if self.train_weights == False and self.initial_T is not None:
            self.T.W.data = self.initial_T

        M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
        M = F.expand_dims(M, 3)
        M_T = F.transpose(M, (3, 1, 2, 0))
        M, M_T = F.broadcast(M, M_T)

        norm = F.sum(abs(M - M_T), axis=2)
        eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
        c_b = F.exp(-(norm + 1e6 * eraser))
        o_b = F.sum(c_b, axis=2)

        if self.train_weights == False:
            self.initial_T = self.T.W.data

        return F.concat((x, o_b), axis=1)
项目:chainer-cf-nade    作者:dsanno    | 项目源码 | 文件源码
def ordinal_loss(y, mask):
    xp = cuda.get_array_module(y.data)
    volatile = y.volatile
    b, c, n = y.data.shape
    max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True), y.data.shape)
    y = y - max_y
    sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1), y.data.shape)
    down_tri = np.tri(c, dtype=np.float32)
    up_tri = down_tri.T
    w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile)
    w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile)
    h = F.exp(F.expand_dims(y, -1))
    h1 = F.convolution_2d(h, w1)
    h1 = F.convolution_2d(F.log(h1), w1)
    h2 = F.convolution_2d(h, w2)
    h2 = F.convolution_2d(F.log(h2), w2)
    h = F.reshape(h1 + h2, (b, c, n))
    return F.sum((h - sum_y - y) * mask) / b
项目:chainer-cf-nade    作者:dsanno    | 项目源码 | 文件源码
def __forward(self, batch_x, batch_t, weight, train=True):
        xp = self.xp
        x = Variable(xp.asarray(batch_x), volatile=not train)
        t = Variable(xp.asarray(batch_t), volatile=not train)
        y = self.net(x, train=train)

        b, c, n = y.data.shape
        mask = Variable(xp.asarray(np.broadcast_to(weight.reshape(-1, 1, 1), (b, c, n)) * loss_mask(batch_t, self.net.rating_num)), volatile=not train)
        if self.ordinal_weight == 0:
            loss = F.sum(-F.log_softmax(y) * mask) / b
        elif self.ordinal_weight == 1:
            loss = ordinal_loss(y, mask)
        else:
            loss = (1 - self.ordinal_weight) * F.sum(-F.log_softmax(y) * mask) / b + self.ordinal_weight * ordinal_loss(y, mask)

        acc = self.__accuracy(y, t)
        return loss, acc
项目:SeRanet    作者:corochann    | 项目源码 | 文件源码
def propdown(self, hid):
        """ This function propagates the hidden units activation downwords to the visible units
        :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)  - given h_sample
        :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
        """
        batch_size = hid.data.shape[0]
        if self.real == 0:
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
                # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
            v_mean = F.sigmoid(pre_sigmoid_activation)
            #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
            #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
            #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
            #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
            #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
            #print('v_mean', v_mean.data.shape)
            #print('v_mean sum', F.sum(v_mean).data)
            #print('hid', hid.data.shape)

        else:
            # TODO: check
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
        return v_mean
项目:LSGAN    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        xp = chainer.cuda.get_array_module(x.data)
        batchsize = x.shape[0]
        if self.train_weights == False and self.initial_T is not None:
            self.T.W.data = self.initial_T

        M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
        M = F.expand_dims(M, 3)
        M_T = F.transpose(M, (3, 1, 2, 0))
        M, M_T = F.broadcast(M, M_T)

        norm = F.sum(abs(M - M_T), axis=2)
        eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
        c_b = F.exp(-(norm + 1e6 * eraser))
        o_b = F.sum(c_b, axis=2)

        if self.train_weights == False:
            self.initial_T = self.T.W.data

        return F.concat((x, o_b), axis=1)
项目:chainer-EWC    作者:okdshin    | 项目源码 | 文件源码
def __call__(self, *args):
        x = args[:-1]
        t = args[-1]
        self.y = None
        self.loss = None
        self.accuracy = None
        self.y = self.predictor(*x)
        self.loss = F.softmax_cross_entropy(self.y, t)

        if self.stored_variable_list is not None and \
                self.fisher_list is not None:  # i.e. Stored
            for i in range(len(self.variable_list)):
                self.loss += self.lam/2. * F.sum(
                        self.fisher_list[i] *
                        F.square(self.variable_list[i][1] -
                                 self.stored_variable_list[i]))
        reporter.report({'loss': self.loss}, self)
        if self.compute_accuracy:
            self.accuracy = F.accuracy(self.y, t)
            reporter.report({'accuracy': self.accuracy}, self)
        return self.loss
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def loss_l1(h, t):
    return F.sum(F.absolute(h-t)) / np.prod(h.data.shape)
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def loss_l2(h, t):
    return F.sum((h-t)**2) / np.prod(h.data.shape)
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def loss_l2_norm(h, t, axis=(1)):
    return F.sum(F.sqrt(F.sum((h-t)**2, axis=axis))) / h.data.shape[0]
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def loss_func_dcgan_dis_real(y_real):
    return F.sum(F.softplus(-y_real)) / np.prod(y_real.data.shape)
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def loss_func_dcgan_dis_fake(y_fake):
    return F.sum(F.softplus(y_fake)) / np.prod(y_fake.data.shape)
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def loss_func_tv_l2(x_out):
    xp = cuda.get_array_module(x_out.data)
    b, ch, h, w = x_out.data.shape
    Wx = xp.zeros((ch, ch, 2, 2), dtype="f")
    Wy = xp.zeros((ch, ch, 2, 2), dtype="f")
    for i in range(ch):
        Wx[i,i,0,0] = -1
        Wx[i,i,0,1] = 1
        Wy[i,i,0,0] = -1
        Wy[i,i,1,0] = 1
    return F.sum(F.convolution_2d(x_out, W=Wx) ** 2) + F.sum(F.convolution_2d(x_out, W=Wy) ** 2)
项目:vaelm    作者:TatsuyaShirakawa    | 项目源码 | 文件源码
def train(model, batch, num_samples, word_keep_rate, UNK, alpha):

    xp = model.xp
    use_gpu = (xp == cuda.cupy)

    if use_gpu:
        batch = cuda.to_gpu(batch)

    KL, xents = forward(model, batch, num_samples=num_samples, word_keep_rate=word_keep_rate, UNK=UNK, train=True)
    loss = alpha * KL + sum(xents) / num_samples
    loss.backward()
    optimizer.update()
    loss.unchain_backward()
    if alpha == 0: KL.unchain_backward()
项目:ROCStory_skipthought_baseline    作者:soskek    | 项目源码 | 文件源码
def solve(self, x_seq, pos, neg, train=True, variablize=False, onebyone=True):
        if variablize:# If arguments are just arrays (not variables), make them variables
            x_seq = [chainer.Variable(x, volatile=not train) for x in x_seq]
            x_seq = [F.dropout(x, ratio=self.dropout_ratio, train=train) for x in x_seq]
            pos = self.act1(self.W_candidate(
                F.dropout(chainer.Variable(pos, volatile=not train),
                          ratio=self.dropout_ratio, train=train)))
            neg = self.act1(self.W_candidate(
                F.dropout(chainer.Variable(neg, volatile=not train),
                          ratio=self.dropout_ratio, train=train)))
        if onebyone and train:
            target_x_seq = [self.act1(self.W_candidate(x)) for x in x_seq[:4]]# 1,2,3,4,5-th targets
            onebyone_loss = 0.

        self.LSTM.reset_state()
        for i, x in enumerate(x_seq):
            h = self.LSTM( F.dropout(x, ratio=self.dropout_ratio, train=train) )
            if onebyone and train and target_x_seq[i+1:]:
                pos_score, neg_score = self.calculate_score(h, target_x_seq[i+1:], neg,
                                                            multipos=True)
                onebyone_loss += F.relu( self.margin - pos_score + neg_score )

        pos_score, neg_score = self.calculate_score(h, pos, neg)
        accum_loss = F.relu( self.margin - pos_score + neg_score )
        TorFs = sum(accum_loss.data < self.margin)

        if onebyone and train:
            return F.sum(accum_loss) + F.sum(onebyone_loss), TorFs
        else:
            return F.sum(accum_loss), TorFs
项目:shoelace    作者:rjagerman    | 项目源码 | 文件源码
def _pl_sample(t, ?):
    """
    Sample from the plackett luce distribution directly

    :param t: The target labels 
    :return: A random permutation from the plackett-luce distribution
             parameterized by the target labels
    """
    xp = cuda.get_array_module(t)
    t = t[:, 0]

    probs = xp.exp(t * ?)
    probs /= xp.sum(probs)
    return xp.random.choice(probs.shape[0], probs.shape[0], replace=False,
                            p=probs)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def compute_expectation(self, beta):
        return F.sum(F.softmax(beta * self.q_values) * self.q_values, axis=1)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def compute_actor_loss(self, batch):
        """Compute loss for actor.

        Preconditions:
          q_function must have seen up to s_{t-1} and s_{t-1}.
          policy must have seen up to s_{t-1}.
        Preconditions:
          q_function must have seen up to s_t and s_t.
          policy must have seen up to s_t.
        """

        batch_state = batch['state']
        batch_action = batch['action']
        batch_size = len(batch_action)

        # Estimated policy observes s_t
        onpolicy_actions = self.policy(batch_state).sample()

        # Q(s_t, mu(s_t)) is evaluated.
        # This should not affect the internal state of Q.
        with state_kept(self.q_function):
            q = self.q_function(batch_state, onpolicy_actions)

        # Estimated Q-function observes s_t and a_t
        if isinstance(self.q_function, Recurrent):
            self.q_function.update_state(batch_state, batch_action)

        # Avoid the numpy #9165 bug (see also: chainer #2744)
        q = q[:, :]

        # Since we want to maximize Q, loss is negation of Q
        loss = - F.sum(q) / batch_size

        # Update stats
        self.average_actor_loss *= self.average_loss_decay
        self.average_actor_loss += ((1 - self.average_loss_decay) *
                                    float(loss.data))
        return loss
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def compute_value_loss(y, t, clip_delta=True, batch_accumulator='mean'):
    """Compute a loss for value prediction problem.

    Args:
        y (Variable or ndarray): Predicted values.
        t (Variable or ndarray): Target values.
        clip_delta (bool): Use the Huber loss function if set True.
        batch_accumulator (str): 'mean' or 'sum'. 'mean' will use the mean of
            the loss values in a batch. 'sum' will use the sum.
    Returns:
        (Variable) scalar loss
    """
    assert batch_accumulator in ('mean', 'sum')
    y = F.reshape(y, (-1, 1))
    t = F.reshape(t, (-1, 1))
    if clip_delta:
        loss_sum = F.sum(F.huber_loss(y, t, delta=1.0))
        if batch_accumulator == 'mean':
            loss = loss_sum / y.shape[0]
        elif batch_accumulator == 'sum':
            loss = loss_sum
    else:
        loss_mean = F.mean_squared_error(y, t) / 2
        if batch_accumulator == 'mean':
            loss = loss_mean
        elif batch_accumulator == 'sum':
            loss = loss_mean * y.shape[0]
    return loss
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def _compute_loss(self, exp_batch, gamma, errors_out=None):
        """Compute the Q-learning loss for a batch of experiences


        Args:
          experiences (list): see update()'s docstring
          gamma (float): discount factor
        Returns:
          loss
        """

        y, t = self._compute_y_and_t(exp_batch, gamma)

        if errors_out is not None:
            del errors_out[:]
            delta = F.sum(abs(y - t), axis=1)
            delta = cuda.to_cpu(delta.data)
            for e in delta:
                errors_out.append(e)

        if 'weights' in exp_batch:
            return compute_weighted_value_loss(
                y, t, exp_batch['weights'],
                clip_delta=self.clip_delta,
                batch_accumulator=self.batch_accumulator)
        else:
            return compute_value_loss(y, t, clip_delta=self.clip_delta,
                                      batch_accumulator=self.batch_accumulator)
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def __call__(self, obs):
        action_distrib = self.pi(obs)
        action_value = self.q(obs)
        v = F.sum(action_distrib.all_prob *
                  action_value.q_values, axis=1)
        return action_distrib, action_value, v
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def __call__(self, obs):
        action_distrib = self.pi(obs)
        v = self.v(obs)

        def evaluator(action):
            adv_mean = sum(self.adv(obs, action_distrib.sample().data)
                           for _ in range(self.n)) / self.n
            return v + self.adv(obs, action) - adv_mean

        action_value = SingleActionValue(evaluator)

        return action_distrib, action_value, v
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def update(self, t_start, t_stop, R, states, actions, rewards, values,
               action_values, action_distribs, action_distribs_mu,
               avg_action_distribs):

        assert np.isscalar(R)

        total_loss = self.compute_loss(
            t_start=t_start,
            t_stop=t_stop,
            R=R,
            states=states,
            actions=actions,
            rewards=rewards,
            values=values,
            action_values=action_values,
            action_distribs=action_distribs,
            action_distribs_mu=action_distribs_mu,
            avg_action_distribs=avg_action_distribs)

        # Compute gradients using thread-specific model
        self.model.zerograds()
        total_loss.backward()
        # Copy the gradients to the globally shared model
        self.shared_model.zerograds()
        copy_param.copy_grad(
            target_link=self.shared_model, source_link=self.model)
        # Update the globally shared model
        if self.process_idx == 0:
            norm = sum(np.sum(np.square(param.grad))
                       for param in self.optimizer.target.params())
            self.logger.debug('grad norm:%s', norm)
        self.optimizer.update()

        self.sync_parameters()
        if isinstance(self.model, Recurrent):
            self.model.unchain_backward()
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def entropy(self):
        with chainer.force_backprop_mode():
            return - F.sum(self.all_prob * self.all_log_prob, axis=1)