Python tensorflow 模块,reduce_sum() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.reduce_sum()

项目:almond-nnparser    作者:Stanford-Mobisocial-IoT-Lab    | 项目源码 | 文件源码
def finalize(self, outputs : BeamSearchOptimizationDecoderOutput, final_state : BeamSearchOptimizationDecoderState, sequence_lengths):
        # all output fields are [max_time, batch_size, ...]
        predicted_ids = tf.contrib.seq2seq.gather_tree(
            outputs.predicted_ids, outputs.parent_ids,
            sequence_length=sequence_lengths, name='predicted_ids')
        total_loss = tf.reduce_sum(outputs.loss, axis=0, name='violation_loss')

        predicted_time = tf.shape(predicted_ids)[0]
        last_score = predicted_time-1
        with tf.name_scope('gold_score'):
            gold_score = outputs.gold_score[last_score]
        with tf.name_scope('sequence_scores'):
            sequence_scores = outputs.scores[last_score]

        return FinalBeamSearchOptimizationDecoderOutput(beam_search_decoder_output=outputs,
                                                        predicted_ids=predicted_ids,
                                                        scores=sequence_scores,
                                                        gold_score=gold_score,
                                                        gold_beam_id=final_state.gold_beam_id,
                                                        num_available_beams=final_state.num_available_beams,
                                                        total_violation_loss=total_loss), final_state
项目:almond-nnparser    作者:Stanford-Mobisocial-IoT-Lab    | 项目源码 | 文件源码
def bag_of_tokens(config, labels, label_lengths):
    if config.train_output_embeddings:
        with tf.variable_scope('embed', reuse=True):
            output_embeddings = tf.get_variable('output_embedding')
    else:
        output_embeddings = tf.constant(config.output_embedding_matrix)

    #everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32)
    #everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32)

    labels = tf.constant(np.array(labels))
    embedded_output = tf.gather(output_embeddings, labels)
    print('embedded_output before', embedded_output)
    #mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32)
    # note: this multiplication will broadcast the mask along all elements of the depth dimension
    # (which is why we run the expand_dims to choose how to broadcast)
    #embedded_output = embedded_output * tf.expand_dims(mask, axis=2)
    #print('embedded_output after', embedded_output)

    return tf.reduce_sum(embedded_output, axis=1)
项目:facerecognition    作者:guoxiaolu    | 项目源码 | 文件源码
def triplet_loss(anchor, positive, negative, alpha):
    """Calculate the triplet loss according to the FaceNet paper

    Args:
      anchor: the embeddings for the anchor images.
      positive: the embeddings for the positive images.
      negative: the embeddings for the negative images.

    Returns:
      the triplet loss according to the FaceNet paper as a float tensor.
    """
    with tf.variable_scope('triplet_loss'):
        pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
        neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)

        basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
        loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)

    return loss
项目:variational-text-tensorflow    作者:carpedm20    | 项目源码 | 文件源码
def build_model(self):
    self.q = tf.placeholder(tf.float32, [self.reader.vocab_size], name="question")
    self.a = tf.placeholder(tf.float32, [self.reader.vocab_size], name="answer")

    self.build_encoder()
    self.build_decoder()

    # Kullback Leibler divergence
    self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq))

    # Log likelihood
    self.g_loss = tf.reduce_sum(tf.log(self.p_x_i))

    self.loss = tf.reduce_mean(self.e_loss + self.g_loss)
    self.optim = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.loss)

    _ = tf.scalar_summary("encoder loss", self.e_loss)
    _ = tf.scalar_summary("decoder loss", self.g_loss)
    _ = tf.scalar_summary("loss", self.loss)
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss_distill_boost(self, predictions, labels_distill, labels, **unused_params):
    with tf.name_scope("loss_distill_boost"):
      print("loss_distill_boost")
      epsilon = 10e-6
      float_labels = tf.cast(labels, tf.float32)
      batch_size = tf.shape(float_labels)[0]
      float_labels_distill = tf.cast(labels_distill, tf.float32)
      error = tf.negative(float_labels * tf.log(float_labels_distill + epsilon) + (
          1 - float_labels) * tf.log(1 - float_labels_distill + epsilon))
      error = tf.reduce_sum(error,axis=1,keep_dims=True)
      alpha = error / tf.reduce_sum(error) * tf.cast(batch_size,dtype=tf.float32)
      alpha = tf.clip_by_value(alpha, 0.5, 5)
      alpha = alpha / tf.reduce_sum(alpha) * tf.cast(batch_size,dtype=tf.float32)
      cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
          1 - float_labels) * tf.log(1 - predictions + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss * alpha)

      return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss(self, predictions, labels, **unused_params):
    with tf.name_scope("loss_xent"):
      epsilon = 10e-6
      vocab_size = predictions.get_shape().as_list()[1]
      float_labels = tf.cast(labels, tf.float32)
      cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
          1 - float_labels) * tf.log(1 - predictions + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss)
      neg_labels = 1 - float_labels
      predictions_pos = predictions*float_labels+10*neg_labels
      predictions_minpos = tf.reduce_min(predictions_pos,axis=1,keep_dims=True)
      predictions_neg = predictions*neg_labels-10*float_labels
      predictions_maxneg = tf.reduce_max(predictions_neg,axis=1,keep_dims=True)
      mask_1 = tf.cast(tf.greater_equal(predictions_neg, predictions_minpos),dtype=tf.float32)
      mask_2 = tf.cast(tf.less_equal(predictions_pos, predictions_maxneg),dtype=tf.float32)
      cross_entropy_loss = cross_entropy_loss*(mask_1+mask_2)*10 + cross_entropy_loss
      return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss(self, predictions, labels, **unused_params):
    bound = FLAGS.softmax_bound
    vocab_size_1 = bound
    with tf.name_scope("loss_softmax"):
      epsilon = 10e-8
      float_labels = tf.cast(labels, tf.float32)
      labels_1 = float_labels[:,:vocab_size_1]
      predictions_1 = predictions[:,:vocab_size_1]
      cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
      lables_2 = float_labels[:,vocab_size_1:]
      predictions_2 = predictions[:,vocab_size_1:]
      # l1 normalization (labels are no less than 0)
      label_rowsum = tf.maximum(
          tf.reduce_sum(lables_2, 1, keep_dims=True),
          epsilon)
      label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
      norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
      predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
      softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
      softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
          1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
      softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
    return tf.reduce_mean(softmax_loss) + cross_entropy_loss
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss(self, predictions, labels, **unused_params):
        bound = FLAGS.softmax_bound
        vocab_size_1 = bound
        with tf.name_scope("loss_softmax"):
            epsilon = 10e-8
            float_labels = tf.cast(labels, tf.float32)
            labels_1 = float_labels[:,:vocab_size_1]
            predictions_1 = predictions[:,:vocab_size_1]
            cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1)
            lables_2 = float_labels[:,vocab_size_1:]
            predictions_2 = predictions[:,vocab_size_1:]
            # l1 normalization (labels are no less than 0)
            label_rowsum = tf.maximum(
                tf.reduce_sum(lables_2, 1, keep_dims=True),
                epsilon)
            label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True)
            norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1)
            predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True)
            softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1)
            softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + (
                                                                                       1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon)
            softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1))
        return tf.reduce_mean(softmax_loss) + cross_entropy_loss
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss(self, predictions, labels, weights=None, **unused_params):
    with tf.name_scope("loss_xent"):
      epsilon = 10e-6
      if FLAGS.label_smoothing:
        float_labels = smoothing(labels)
      else:
        float_labels = tf.cast(labels, tf.float32)
      cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
          1 - float_labels) * tf.log(1 - predictions + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss)
      if weights is not None:
        print cross_entropy_loss, weights
        weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights)
        print "create weighted_loss", weighted_loss
        return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1))
      else:
        return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
项目:seq2seq    作者:google    | 项目源码 | 文件源码
def compute_loss(self, decoder_output, _features, labels):
    """Computes the loss for this model.

    Returns a tuple `(losses, loss)`, where `losses` are the per-batch
    losses and loss is a single scalar tensor to minimize.
    """
    #pylint: disable=R0201
    # Calculate loss per example-timestep of shape [B, T]
    losses = seq2seq_losses.cross_entropy_sequence_loss(
        logits=decoder_output.logits[:, :, :],
        targets=tf.transpose(labels["target_ids"][:, 1:], [1, 0]),
        sequence_length=labels["target_len"] - 1)

    # Calculate the average log perplexity
    loss = tf.reduce_sum(losses) / tf.to_float(
        tf.reduce_sum(labels["target_len"] - 1))

    return losses, loss
项目:dcan-tensorflow    作者:lisjin    | 项目源码 | 文件源码
def get_dice_coef(logits, labels):
    """Compute dice coefficient.
    Args:
        logits: Softmax probability applied to fuse layers.
        labels: Correct annotations (0 or 1).
    Returns:
        Mean dice coefficient over full tensor.

    Source:
        https://github.com/zsdonghao/tensorlayer/blob/master/tensorlayer/cost.py#L125
    """
    smooth = 1e-5
    inter = tf.reduce_sum(tf.multiply(logits, labels))
    l = tf.reduce_sum(logits)
    r = tf.reduce_sum(labels)
    return tf.reduce_mean((2.0 * inter + smooth) / (l + r + smooth))
项目:deep-learning    作者:ljanyst    | 项目源码 | 文件源码
def __init__(self, embedding):
        self.sess         = tf.Session()
        self.inputs       = tf.placeholder(tf.float32,
                                           [None, embedding.shape[1]],
                                           name='inputs')
        self.test_vec     = tf.placeholder(tf.float32, [1, embedding.shape[1]],
                                           name='test_vec')
        self.cos_distance = tf.matmul(self.inputs, tf.transpose(self.test_vec))

        #-----------------------------------------------------------------------
        # Compute normalized embedding matrix
        #-----------------------------------------------------------------------
        row_sum    = tf.reduce_sum(tf.square(self.inputs), axis=1,
                                   keep_dims=True)
        norm       = tf.sqrt(row_sum)
        self.normalized = self.inputs / norm
        self.embedding = self.sess.run(self.normalized,
                                       feed_dict={self.inputs: embedding})

    #---------------------------------------------------------------------------
项目:cxflow-tensorflow    作者:Cognexa    | 项目源码 | 文件源码
def bin_stats(predictions: tf.Tensor, labels: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
    """
    Calculate f1, precision and recall from binary classification expected and predicted values.

    :param predictions: 2-d tensor (batch, predictions) of predicted 0/1 classes
    :param labels: 2-d tensor (batch, labels) of expected 0/1 classes
    :return: a tuple of batched (f1, precision and recall) values
    """
    predictions = tf.cast(predictions, tf.int32)
    labels = tf.cast(labels, tf.int32)

    true_positives = tf.reduce_sum((predictions * labels), axis=1)
    false_positives = tf.reduce_sum(tf.cast(tf.greater(predictions, labels), tf.int32), axis=1)
    false_negatives = tf.reduce_sum(tf.cast(tf.greater(labels, predictions), tf.int32), axis=1)

    recall = true_positives / (true_positives + false_negatives)
    precision = true_positives / (true_positives + false_positives)
    f1_score = 2 / (1 / precision + 1 / recall)

    return f1_score, precision, recall
项目:cxflow-tensorflow    作者:Cognexa    | 项目源码 | 文件源码
def bin_dice(predictions: tf.Tensor, labels: tf.Tensor) -> tf.Tensor:
    """
    Calculate Sorensen–Dice coefficient from the given binary classification expected and predicted values.

    The coefficient is defined as :math:`2*|X \cup Y| / (|X| + |Y|)`.

    :param predictions: 2-d tensor (batch, predictions) of predicted 0/1 classes
    :param labels: 2-d tensor (batch, labels) of expected 0/1 classes
    :return: batched Sørensen–Dice coefficients
    """
    predictions = tf.cast(predictions, tf.int32)
    labels = tf.cast(labels, tf.int32)

    true_positives = tf.reduce_sum((predictions * labels), axis=1)
    pred_positives = tf.reduce_sum(predictions, axis=1)
    label_positives = tf.reduce_sum(labels, axis=1)

    return 2 * true_positives / (pred_positives + label_positives)
项目:a-nice-mc    作者:ermongroup    | 项目源码 | 文件源码
def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn):
    def leapfrog(pos, vel, step, i):
        de_dp_ = tf.gradients(tf.reduce_sum(energy_fn(pos)), pos)[0]
        new_vel_ = vel - step * de_dp_
        new_pos_ = pos + step * new_vel_
        return [new_pos_, new_vel_, step, tf.add(i, 1)]

    def condition(pos, vel, step, i):
        return tf.less(i, n_steps)

    de_dp = tf.gradients(tf.reduce_sum(energy_fn(initial_pos)), initial_pos)[0]
    vel_half_step = initial_vel - 0.5 * stepsize * de_dp
    pos_full_step = initial_pos + stepsize * vel_half_step

    i = tf.constant(0)
    final_pos, new_vel, _, _ = tf.while_loop(condition, leapfrog, [pos_full_step, vel_half_step, stepsize, i])
    de_dp = tf.gradients(tf.reduce_sum(energy_fn(final_pos)), final_pos)[0]
    final_vel = new_vel - 0.5 * stepsize * de_dp
    return final_pos, final_vel
项目:Renewables_Scenario_Gen_GAN    作者:chennnnnyize    | 项目源码 | 文件源码
def build_model(self):

        Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z])
        Y = tf.placeholder(tf.float32, [self.batch_size, self.dim_y])

        image_real = tf.placeholder(tf.float32, [self.batch_size]+self.image_shape)
        h4 = self.generate(Z,Y)
        #image_gen comes from sigmoid output of generator
        image_gen = tf.nn.sigmoid(h4)

        raw_real2 = self.discriminate(image_real, Y)
        #p_real = tf.nn.sigmoid(raw_real)
        p_real=tf.reduce_mean(raw_real2)

        raw_gen2 = self.discriminate(image_gen, Y)
        #p_gen = tf.nn.sigmoid(raw_gen)
        p_gen = tf.reduce_mean(raw_gen2)

        discrim_cost = tf.reduce_sum(raw_real2) - tf.reduce_sum(raw_gen2)
        gen_cost = -tf.reduce_mean(raw_gen2)

        return Z, Y, image_real, discrim_cost, gen_cost, p_real, p_gen
项目:segmentation_DLMI    作者:imatge-upc    | 项目源码 | 文件源码
def categorical_crossentropy_3d(y_true, y_predicted):
    """
    Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array
    with shape (num_samples, num_classes, dim1, dim2, dim3)

    Parameters
    ----------
    y_true : keras.placeholder [batches, dim0,dim1,dim2]
        Placeholder for data holding the ground-truth labels encoded in a one-hot representation
    y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2]
        Placeholder for data holding the softmax distribution over classes

    Returns
    -------
    scalar
        Categorical cross-entropy loss value
    """
    y_true_flatten = K.flatten(y_true)
    y_pred_flatten = K.flatten(y_predicted)
    y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon())
    num_total_elements = K.sum(y_true_flatten)
    # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log))
    cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log))
    mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon())
    return mean_cross_entropy
项目:yoctol-keras-layer-zoo    作者:Yoctol    | 项目源码 | 文件源码
def step_with_training(self, training=None):

        def step(inputs, states):
            input_shape = K.int_shape(inputs)
            y_tm1 = self.layer.preprocess_input(
                K.expand_dims(states[0], axis=1),
                training
            )
            y_tm1 = K.reshape(y_tm1, (-1, input_shape[-1]))

            inputs_sum = tf.reduce_sum(inputs)

            def inputs_f(): return inputs
            def output_f(): return y_tm1
            current_inputs = tf.case(
                [(tf.equal(inputs_sum, 0.0), output_f)],
                default=inputs_f
            )

            return self.layer.step(
                current_inputs,
                states
            )

        return step
项目:Tensormodels    作者:asheshjain399    | 项目源码 | 文件源码
def l1_regularizer(weight=1.0, scope=None):
  """Define a L1 regularizer.

  Args:
    weight: scale the loss by this factor.
    scope: Optional scope for op_scope.

  Returns:
    a regularizer function.
  """
  def regularizer(tensor):
    with tf.op_scope([tensor], scope, 'L1Regularizer'):
      l1_weight = tf.convert_to_tensor(weight,
                                       dtype=tensor.dtype.base_dtype,
                                       name='weight')
      return tf.mul(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value')
  return regularizer
项目:Tensormodels    作者:asheshjain399    | 项目源码 | 文件源码
def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None):
  """Define a L1L2 regularizer.

  Args:
    weight_l1: scale the L1 loss by this factor.
    weight_l2: scale the L2 loss by this factor.
    scope: Optional scope for op_scope.

  Returns:
    a regularizer function.
  """
  def regularizer(tensor):
    with tf.op_scope([tensor], scope, 'L1L2Regularizer'):
      weight_l1_t = tf.convert_to_tensor(weight_l1,
                                         dtype=tensor.dtype.base_dtype,
                                         name='weight_l1')
      weight_l2_t = tf.convert_to_tensor(weight_l2,
                                         dtype=tensor.dtype.base_dtype,
                                         name='weight_l2')
      reg_l1 = tf.mul(weight_l1_t, tf.reduce_sum(tf.abs(tensor)),
                      name='value_l1')
      reg_l2 = tf.mul(weight_l2_t, tf.nn.l2_loss(tensor),
                      name='value_l2')
      return tf.add(reg_l1, reg_l2, name='value')
  return regularizer
项目:Tensormodels    作者:asheshjain399    | 项目源码 | 文件源码
def l1_loss(tensor, weight=1.0, scope=None):
  """Define a L1Loss, useful for regularize, i.e. lasso.

  Args:
    tensor: tensor to regularize.
    weight: scale the loss by this factor.
    scope: Optional scope for op_scope.

  Returns:
    the L1 loss op.
  """
  with tf.op_scope([tensor], scope, 'L1Loss'):
    weight = tf.convert_to_tensor(weight,
                                  dtype=tensor.dtype.base_dtype,
                                  name='loss_weight')
    loss = tf.mul(weight, tf.reduce_sum(tf.abs(tensor)), name='value')
    tf.add_to_collection(LOSSES_COLLECTION, loss)
    return loss
项目:comprehend    作者:Fenugreek    | 项目源码 | 文件源码
def recode_cost(self, inputs, variation, eps=1e-5, **kwargs):
        """
        Cost for given input batch of samples, under current params.
        """
        h = self.get_h_inputs(inputs)
        z_mu = tf.matmul(h, self.params['Mhz']) + self.params['bMhz']
        z_sig = tf.matmul(h, self.params['Shz']) + self.params['bShz']

        # KL divergence between latent space induced by encoder and ...
        lat_loss = -tf.reduce_sum(1 + z_sig - z_mu**2 - tf.exp(z_sig), 1)

        z = z_mu + tf.sqrt(tf.exp(z_sig)) * variation
        h = self.get_h_latents(z)
        x_mu = self.decoding(tf.matmul(h, self.params['Mhx']) + self.params['bMhx'])
        x_sig = self.decoding(tf.matmul(h, self.params['Shx']) + self.params['bShx'])
#        x_sig = tf.clip_by_value(x_mu * (1 - x_mu), .05, 1)

        # decoding likelihood term
        like_loss = tf.reduce_sum(tf.log(x_sig + eps) +
                                  (inputs - x_mu)**2 / x_sig, 1)

#        # Mean cross entropy between input and encode-decoded input.
#        like_loss = 2 * tf.reduce_sum(functions.cross_entropy(inputs, x_mu), 1)

        return .5 * tf.reduce_mean(like_loss + lat_loss)
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'):
    num_inputs=df_dim*4
    theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05))
    log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0))
    W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0))
    W = tf.reshape(W,[-1,num_kernels*dim_per_kernel])
    x = input
    x=tf.reshape(x, [batchsize,num_inputs])
    activation = tf.matmul(x, W)
    activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel])
    abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2),
                                                1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1))
    f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif))
    print(f.get_shape())
    print(input.get_shape())
    return tf.concat(1,[x, f])
项目:text_classification    作者:brightmart    | 项目源码 | 文件源码
def output_module(self):
        """
        1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. 2.non-linearity of query and hidden state to get label.
        input: query_embedding:[batch_size,embed_size], hidden state:[batch_size,block_size,hidden_size] of memory
        :return:y: predicted label.[]
        """
        # 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state.
        # 1.1 get possibility distribution (of similiarity)
        p=tf.nn.softmax(tf.multiply(tf.expand_dims(self.query_embedding,axis=1),self.hidden_state)) #shape:[batch_size,block_size,hidden_size]<---query_embedding_expand:[batch_size,1,hidden_size]; hidden_state:[batch_size,block_size,hidden_size]
        # 1.2 get weighted sum of hidden state
        u=tf.reduce_sum(tf.multiply(p,self.hidden_state),axis=1) #shape:[batch_size,hidden_size]<----------([batch_size,block_size,hidden_size],[batch_size,block_size,hidden_size])

        # 2.non-linearity of query and hidden state to get label
        H_u_matmul=tf.matmul(u,self.H)+self.h_u_bias #shape:[batch_size,hidden_size]<----([batch_size,hidden_size],[hidden_size,hidden_size])
        activation=self.activation(self.query_embedding + H_u_matmul,scope="query_add_hidden")           #shape:[batch_size,hidden_size]
        activation = tf.nn.dropout(activation,keep_prob=self.dropout_keep_prob) #shape:[batch_size,hidden_size]
        y=tf.matmul(activation,self.R)+self.y_bias #shape:[batch_size,vocab_size]<-----([batch_size,hidden_size],[hidden_size,vocab_size])
        return y #shape:[batch_size,vocab_size]
项目:text_classification    作者:brightmart    | 项目源码 | 文件源码
def loss(self, l2_lambda=0.0001):  # 0.001
        with tf.name_scope("loss"):
            # input: `logits`:[batch_size, num_classes], and `labels`:[batch_size]
            # output: A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the softmax cross entropy loss.
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label,logits=self.logits);  # sigmoid_cross_entropy_with_logits.#losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits)
            # print("1.sparse_softmax_cross_entropy_with_logits.losses:",losses) # shape=(?,)
            loss = tf.reduce_mean(losses)  # print("2.loss.loss:", loss) #shape=()
            l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda
            loss = loss + l2_losses
        return loss

    #def loss_seq2seq(self):
    #    with tf.variable_scope("loss"):
    #        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label, logits=self.logits);#losses:[batch_size,self.decoder_sent_length]
    #        loss_batch=tf.reduce_sum(losses,axis=1)/self.decoder_sent_length #loss_batch:[batch_size]
    #        loss=tf.reduce_mean(loss_batch)
    #        l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * self.l2_lambda
    #        loss = loss + l2_losses
    #        return loss
项目:RickLiuGC    作者:liuruijin17    | 项目源码 | 文件源码
def SoftArgmin(outputLeft, outputRight, D=192):

    left_result_D = outputLeft
    right_result_D = outputRight
    left_result_D_squeeze = tf.squeeze(left_result_D, axis=[0, 4])
    right_result_D_squeeze = tf.squeeze(right_result_D, axis=[0, 4])  # 192 256 512
    left_result_softmax = tf.nn.softmax(left_result_D_squeeze, dim=0)
    right_result_softmax = tf.nn.softmax(right_result_D_squeeze, dim=0)  # 192 256 512

    d_grid = tf.cast(tf.range(D), tf.float32)
    d_grid = tf.reshape(d_grid, (-1, 1, 1))
    d_grid = tf.tile(d_grid, [1, 256, 512])

    left_softargmin = tf.reduce_sum(tf.multiply(left_result_softmax, d_grid), axis=0, keep_dims=True)
    right_softargmin = tf.reduce_sum(tf.multiply(right_result_softmax, d_grid), axis=0, keep_dims=True)

    return left_softargmin, right_softargmin
项目:DmsMsgRcg    作者:bshao001    | 项目源码 | 文件源码
def custom_loss(y_true, y_pred):
        # Get prediction
        pred_box_xy = tf.sigmoid(y_pred[..., :2])
        pred_box_wh = y_pred[..., 2:4]
        pred_box_conf = tf.sigmoid(y_pred[..., 4])

        # Get ground truth
        true_box_xy = y_true[..., :2]
        true_box_wh = y_true[..., 2:4]
        true_box_conf = y_true[..., 4]

        # Determine the mask: simply the position of the ground truth boxes (the predictors)
        true_mask = tf.expand_dims(y_true[..., 4], axis=-1)

        # Calculate the loss. A scale can be associated with each loss, indicating how important
        # the loss is. The bigger the scale, more important the loss is.
        loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * true_mask) * 1.0
        loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * true_mask) * 1.0
        loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf)) * 1.2

        loss = loss_xy + loss_wh + loss_conf
        return loss
项目:tfplus    作者:renmengye    | 项目源码 | 文件源码
def build_loss(self, inp, output):
        y_gt = inp['y_gt']
        y_out = output['y_out']
        ce = tfplus.nn.CE()({'y_gt': y_gt, 'y_out': y_out})
        num_ex_f = tf.to_float(tf.shape(inp['x'])[0])
        ce = tf.reduce_sum(ce) / num_ex_f
        self.add_loss(ce)
        total_loss = self.get_loss()
        self.register_var('loss', total_loss)

        ans = tf.argmax(y_gt, 1)
        correct = tf.equal(ans, tf.argmax(y_out, 1))
        top5_acc = tf.reduce_sum(tf.to_float(
            tf.nn.in_top_k(y_out, ans, 5))) / num_ex_f
        self.register_var('top5_acc', top5_acc)
        acc = tf.reduce_sum(tf.to_float(correct)) / num_ex_f
        self.register_var('acc', acc)
        return total_loss
项目:tfplus    作者:renmengye    | 项目源码 | 文件源码
def build_loss_grad(self, inp, output):
        y_gt = inp['y_gt']
        y_out = output['y_out']
        ce = tfplus.nn.CE()({'y_gt': y_gt, 'y_out': y_out})
        num_ex_f = tf.to_float(tf.shape(inp['x'])[0])
        ce = tf.reduce_sum(ce) / num_ex_f
        self.add_loss(ce)
        learn_rate = self.get_option('learn_rate')
        total_loss = self.get_loss()
        self.register_var('loss', total_loss)
        eps = self.get_option('adam_eps')
        optimizer = tf.train.AdamOptimizer(learn_rate, epsilon=eps)
        global_step = tf.Variable(0.0)
        self.register_var('step', global_step)
        train_step = optimizer.minimize(
            total_loss, global_step=global_step)
        self.register_var('train_step', train_step)
        correct = tf.equal(tf.argmax(y_gt, 1), tf.argmax(y_out, 1))
        acc = tf.reduce_sum(tf.to_float(correct)) / num_ex_f
        self.register_var('acc', acc)
        pass
项目:querysum    作者:helmertz    | 项目源码 | 文件源码
def _score(self, prev_decoder_state, prev_embedding):
        # Returns scores in a tensor of shape [batch_size, input_sequence_length]

        if self.mode == 'decode':
            query_part = self.query_attention_partial_score_placeholder
            encoder_part = self.encoder_state_attention_partial_scores_placeholder
        else:
            query_part = self.query_attention_partial_score
            encoder_part = self.encoder_state_attention_partial_scores

        embedding_part = tf.matmul(prev_embedding, self.attention_w_e)

        output = tf.matmul(prev_decoder_state,
                           self.attention_w) + embedding_part + query_part + encoder_part + self.attention_b
        output = tf.tanh(output)
        output = tf.reduce_sum(self.attention_v * output, axis=2)
        output = tf.transpose(output, [1, 0])

        # Handle input document padding by giving a large penalty, eliminating it from the weighted average
        padding_penalty = -1e20 * tf.to_float(1 - tf.sign(self.documents_placeholder))
        masked = output + padding_penalty

        return masked
项目:querysum    作者:helmertz    | 项目源码 | 文件源码
def _attention(self, prev_decoder_state, prev_embedding):
        with tf.variable_scope('attention') as scope:
            # e = score of shape [batch_size, output_seq_length, input_seq_length], e_{ij} = score(s_{i-1}, h_j)
            # e_i = score of shape [batch_size, input_seq_length], e_ij = score(prev_decoder_state, h_j)
            e_i = self._score(prev_decoder_state, prev_embedding)

            # alpha_i = softmax(e_i) of shape [batch_size, input_seq_length]
            alpha_i = tf.nn.softmax(e_i)

            resized_alpha_i = tf.reshape(tf.tile(alpha_i, [1, self.encoder_output_size]),
                                         [self.batch_size, -1, self.encoder_output_size])

            if self.mode == 'decode':
                c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.pre_computed_encoder_states_placeholder), axis=1)
            else:
                c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.encoder_outputs), axis=1)
            return c_i, e_i
项目:A3C    作者:go2sea    | 项目源码 | 文件源码
def actor_loss(self):
        if self.config.mode == 'discrete':
            log_prob = tf.reduce_sum(tf.log(self.a_prob) * tf.one_hot(self.action_input, self.action_dim, dtype=tf.float32),
                                     axis=1, keep_dims=True)
            # use entropy to encourage exploration
            exp_v = log_prob * self.TD_loss
            entropy = -tf.reduce_sum(self.a_prob * tf.log(self.a_prob), axis=1, keep_dims=True)  # encourage exploration
            exp_v = self.config.ENTROPY_BETA * entropy + exp_v
            return tf.reduce_mean(-exp_v)  # ????????log_prb????????????????????TD_loss
        elif self.config.mode == 'continuous':
            log_prob = self.action_normal_dist.log_prob(self.action_input)
            exp_v = log_prob * self.TD_loss
            # use entropy to encourage exploration
            exp_v = self.config.ENTROPY_BETA * self.action_normal_dist.entropy() + exp_v
            return tf.reduce_mean(-exp_v)
项目:almond-nnparser    作者:Stanford-Mobisocial-IoT-Lab    | 项目源码 | 文件源码
def encode(self, inputs, _input_length, _parses):
        with tf.variable_scope('BagOfWordsEncoder'):
            W = tf.get_variable('W', (self.embed_size, self.output_size))
            b = tf.get_variable('b', shape=(self.output_size,), initializer=tf.constant_initializer(0, tf.float32))

            enc_hidden_states = tf.tanh(tf.tensordot(inputs, W, [[2], [0]]) + b)
            enc_final_state = tf.reduce_sum(enc_hidden_states, axis=1)

            #assert enc_hidden_states.get_shape()[1:] == (self.config.max_length, self.config.hidden_size)
            if self._cell_type == 'lstm':
                enc_final_state = (tf.contrib.rnn.LSTMStateTuple(enc_final_state, enc_final_state),)

            enc_output = tf.nn.dropout(enc_hidden_states, keep_prob=self._dropout, seed=12345)

            return enc_output, enc_final_state
项目:facerecognition    作者:guoxiaolu    | 项目源码 | 文件源码
def decov_loss(xs):
    """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
    'Reducing Overfitting In Deep Networks by Decorrelating Representation'
    """
    x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
    m = tf.reduce_mean(x, 0, True)
    z = tf.expand_dims(x-m, 2)
    corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
    corr_frob_sqr = tf.reduce_sum(tf.square(corr))
    corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
    loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
    return loss
项目:variational-text-tensorflow    作者:carpedm20    | 项目源码 | 文件源码
def build_model(self):
    self.x = tf.placeholder(tf.float32, [self.reader.vocab_size], name="input")
    self.x_idx = tf.placeholder(tf.int32, [None], name="x_idx")

    self.build_encoder()
    self.build_generator()

    # Kullback Leibler divergence
    self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq))

    # Log likelihood
    self.g_loss = -tf.reduce_sum(tf.log(tf.gather(self.p_x_i, self.x_idx) + 1e-10))

    self.loss = self.e_loss + self.g_loss

    self.encoder_var_list, self.generator_var_list = [], []
    for var in tf.trainable_variables():
      if "encoder" in var.name:
        self.encoder_var_list.append(var)
      elif "generator" in var.name:
        self.generator_var_list.append(var)

    # optimizer for alternative update
    self.optim_e = tf.train.AdamOptimizer(learning_rate=self.lr) \
                         .minimize(self.e_loss, global_step=self.step, var_list=self.encoder_var_list)
    self.optim_g = tf.train.AdamOptimizer(learning_rate=self.lr) \
                         .minimize(self.g_loss, global_step=self.step, var_list=self.generator_var_list)

    # optimizer for one shot update
    self.optim = tf.train.AdamOptimizer(learning_rate=self.lr) \
                         .minimize(self.loss, global_step=self.step)

    _ = tf.scalar_summary("encoder loss", self.e_loss)
    _ = tf.scalar_summary("generator loss", self.g_loss)
    _ = tf.scalar_summary("total loss", self.loss)
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def create_model(self, model_input, vocab_size, num_frames, **unused_params):
    """Creates a model which uses a logistic classifier over the average of the
    frame-level features.

    This class is intended to be an example for implementors of frame level
    models. If you want to train a model over averaged features it is more
    efficient to average them beforehand rather than on the fly.

    Args:
      model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of
                   input features.
      vocab_size: The number of classes in the dataset.
      num_frames: A vector of length 'batch' which indicates the number of
           frames for each video (before padding).

    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      'batch_size' x 'num_classes'.
    """
    num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
    feature_size = model_input.get_shape().as_list()[2]
    max_frames = model_input.get_shape().as_list()[1]


    denominators = tf.reshape(
        tf.tile(num_frames, [1, feature_size]), [-1, feature_size])
    avg_pooled = tf.reduce_sum(model_input,
                               axis=[1]) / denominators
    output = slim.fully_connected(
        avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid,
        weights_regularizer=slim.l2_regularizer(1e-8))
    return {"predictions": output}
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def sub_moe(self,
                model_input,
                vocab_size,
                num_mixtures = None,
                l2_penalty=1e-8,
                scopename="",
                **unused_params):

        num_mixtures = num_mixtures or FLAGS.moe_num_mixtures

        gate_activations = slim.fully_connected(
            model_input,
            vocab_size * (num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="gates"+scopename)
        expert_activations = slim.fully_connected(
            model_input,
            vocab_size * num_mixtures,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="experts"+scopename)

        gating_distribution = tf.nn.softmax(tf.reshape(
            gate_activations,
            [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
        expert_distribution = tf.nn.sigmoid(tf.reshape(
            expert_activations,
            [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures


        final_probabilities_by_class_and_batch = tf.reduce_sum(
            gating_distribution[:, :num_mixtures] * expert_distribution, 1)

        final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
                                         [-1, vocab_size])
        return model_input, final_probabilities
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     **unused_params):

        shape = model_input.get_shape().as_list()
        frames_sum = tf.reduce_sum(tf.abs(model_input),axis=2)
        frames_true = tf.ones(tf.shape(frames_sum))
        frames_false = tf.zeros(tf.shape(frames_sum))
        frames_bool = tf.reshape(tf.where(tf.greater(frames_sum, frames_false), frames_true, frames_false),[-1,shape[1],1])

        activation_1 = tf.reduce_max(model_input, axis=1)
        activation_2 = tf.reduce_sum(model_input*frames_bool, axis=1)/(tf.reduce_sum(frames_bool, axis=1)+1e-6)
        activation_3 = tf.reduce_min(model_input, axis=1)

        model_input_1, final_probilities_1 = self.sub_moe(activation_1,vocab_size,scopename="_max")
        model_input_2, final_probilities_2 = self.sub_moe(activation_2,vocab_size,scopename="_mean")
        model_input_3, final_probilities_3 = self.sub_moe(activation_3,vocab_size,scopename="_min")
        final_probilities = tf.stack((final_probilities_1,final_probilities_2,final_probilities_3),axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[shape[2], 3, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        activations = tf.stack((model_input_1, model_input_2, model_input_3), axis=2)
        weight = tf.nn.softmax(tf.einsum("aij,ijk->ajk", activations, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss(self, predictions, labels, **unused_params):
        with tf.name_scope("loss_frames"):
            epsilon = 10e-6
            float_labels = tf.cast(labels, tf.float32)
            cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
                1 - float_labels) * tf.log(1 - predictions + epsilon)

        return tf.reduce_sum(cross_entropy_loss, 2)
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss(self, predictions, labels, **unused_params):
        with tf.name_scope("loss_frames"):
            epsilon = 10e-6
            float_labels = tf.cast(labels, tf.float32)
            cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
                1 - float_labels) * tf.log(1 - predictions + epsilon)

        return tf.reduce_sum(cross_entropy_loss, axis=2)
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size = 2
        cnn_input = model_input
        cnn_size = FLAGS.cnn_cells
        num_filters = [cnn_size, cnn_size, cnn_size*2]
        filter_sizes = [1, 2, 3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []

        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, lstm_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = 10
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)

        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            if layer < 3:
                num_t = pool_size*(num_t//pool_size)
                cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
                cnn_input = tf.reduce_max(cnn_output, axis=2)
            else:
                cnn_input = cnn_output

        cnn_output, num_t = self.kmax(cnn_input, num_filters=features_size, filter_sizes=num_extend, sub_scope="kmax")
        cnn_input = tf.reshape(cnn_output,[-1,features_size])
        final_probilities = self.sub_moe(cnn_input,vocab_size)
        final_probilities = tf.reshape(final_probilities,[-1,num_extend,vocab_size])
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", cnn_output, weight2d), dim=1)
        result = {}
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = CnnKmaxModel().cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1), l2_penalty=0.0)
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = tf.reduce_max(cnn_output,axis=1)
            moe_inputs.append(cnn_multiscale)
            final_probility = CnnKmaxModel().sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1), l2_penalty=0.0)
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", tf.stop_gradient(moe_inputs), weight2d), dim=1)
        result = {}
        result["predictions"] = tf.reduce_sum(tf.stop_gradient(final_probilities)*weight, axis=1)
        return result
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = tf.reduce_max(cnn_output,axis=1)
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def sub_model(self, model_input, vocab_size, num_mixtures=None,
                  l2_penalty=1e-8, sub_scope="", distill_labels=None,**unused_params):
        num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
        class_size = 256
        if distill_labels is not None:
            class_input = slim.fully_connected(
                distill_labels,
                class_size,
                activation_fn=tf.nn.relu,
                weights_regularizer=slim.l2_regularizer(l2_penalty),
                scope="class_inputs")
            class_input = tf.nn.l2_normalize(class_input, dim=1)
            model_input = tf.concat((model_input, class_input),axis=1)
        gate_activations = slim.fully_connected(
            model_input,
            vocab_size * (num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="gates-"+sub_scope)
        expert_activations = slim.fully_connected(
            model_input,
            vocab_size * num_mixtures,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="experts-"+sub_scope)

        gating_distribution = tf.nn.softmax(tf.reshape(
            gate_activations,
            [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
        expert_distribution = tf.nn.sigmoid(tf.reshape(
            expert_activations,
            [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures

        final_probabilities_by_class_and_batch = tf.reduce_sum(
            gating_distribution[:, :num_mixtures] * expert_distribution, 1)
        final_probabilities = tf.reshape(final_probabilities_by_class_and_batch,
                                         [-1, vocab_size])
        return final_probabilities
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss_distill(self, predictions, labels_distill, labels, **unused_params):
    with tf.name_scope("loss_distill"):
      print("loss_distill")
      epsilon = 10e-6
      float_labels = tf.cast(labels, tf.float32)
      float_labels_distill = tf.cast(labels_distill, tf.float32)
      embedding_mat = np.loadtxt("./resources/embedding_matrix.model")
      vocab_size = embedding_mat.shape[1]
      labels_size = float_labels.get_shape().as_list()[1]
      embedding_mat = tf.cast(embedding_mat,dtype=tf.float32)
      cross_entropy_loss_1 = float_labels * tf.log(predictions + epsilon) + (
          1 - float_labels) * tf.log(1 - predictions + epsilon)
      float_labels_1 = float_labels[:,:vocab_size]
      labels_smooth = tf.matmul(float_labels_1,embedding_mat)/tf.reduce_sum(float_labels_1,axis=1,keep_dims=True)
      float_classes = labels_smooth
      for i in range(labels_size//vocab_size-1):
        float_classes = tf.concat((float_classes,labels_smooth),axis=1)
      cross_entropy_loss_2 = float_classes * tf.log(predictions + epsilon) + (
          1 - float_classes) * tf.log(1 - predictions + epsilon)
      cross_entropy_loss_3 = float_labels_distill * tf.log(predictions + epsilon) + (
          1 - float_labels_distill) * tf.log(1 - predictions + epsilon)

      cross_entropy_loss = cross_entropy_loss_1*0.5 + cross_entropy_loss_2*0.5 + cross_entropy_loss_3*0.5
      cross_entropy_loss = tf.negative(cross_entropy_loss)

      return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss_negative(self, predictions_pos, predictions_neg, labels, **unused_params):
    with tf.name_scope("loss_negative"):
      epsilon = 10e-6
      float_labels = tf.cast(labels, tf.float32)
      weight_pos = np.loadtxt(FLAGS.autoencoder_dir+"labels_uni.out")
      weight_pos = tf.reshape(tf.cast(weight_pos,dtype=tf.float32),[1,-1])
      weight_pos = tf.log(tf.reduce_max(weight_pos)/weight_pos)+1
      cross_entropy_loss_1 = float_labels * tf.log(predictions_pos + epsilon)*weight_pos + (
          1 - float_labels) * tf.log(1 - predictions_pos + epsilon)
      cross_entropy_loss_2 = (1-float_labels) * tf.log(predictions_neg + epsilon) + \
                             float_labels * tf.log(1 - predictions_neg + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss_1+cross_entropy_loss_2)
      return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_mseloss(self, predictions, labels, **unused_params):
    with tf.name_scope("loss_mse"):
      float_labels = tf.cast(labels, tf.float32)
      mse_loss = tf.square(predictions-float_labels)
      return tf.reduce_mean(tf.reduce_sum(mse_loss, 1))
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_loss_postprocess(self, predictions, labels, **unused_params):
    with tf.name_scope("loss_postprocess"):
      float_labels = tf.cast(labels, tf.float32)
      predictions_pos = predictions*float_labels + (1-float_labels)
      predictions_neg = predictions*(1-float_labels)
      min_pos = tf.stop_gradient(tf.reduce_min(predictions_pos))
      max_neg = tf.stop_gradient(tf.reduce_max(predictions_neg))
      predictions_pos_mistake = tf.nn.relu(max_neg-predictions_pos)-0.01*tf.nn.relu(predictions_pos-max_neg)
      predictions_neg_mistake = tf.nn.relu(predictions_neg-min_pos)-0.01*tf.nn.relu(min_pos-predictions_neg)
      postprocess_loss = predictions_pos_mistake + predictions_neg_mistake
      return tf.reduce_mean(tf.reduce_sum(postprocess_loss, 1))