def GMM_sample(mus, sigmas, mix_weights):
    First, sample according to the prior mixing probabilities
    to choose the component density.
    Second, draw sample from that density

    Inspired by implementation in `cle`
    chosen_component = \
    selected_mus = mus[T.arange(mus.shape[0]), :, chosen_component]
    selected_sigmas = sigmas[T.arange(sigmas.shape[0]), :, chosen_component]
    sample = srng.normal(size=selected_mus.shape,
    sample *= selected_sigmas
    sample += selected_mus
    return sample, selected_mus, selected_sigmas, chosen_component
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def negative_log_likelihood(self, y):
        Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.
        .. math::
            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
            \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                    \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example
            the correct label.
        Note: We use the mean instead of the sum so that the learning rate
            is less dependent of the batch size.
        if self.is_binary:
            return -T.mean(T.log(self.p_y_given_x))
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
项目:Neural-Photo-Editor    作者:ajbrock    | 项目源码 | 文件源码
def reset(self):
        # Set Original ordering
        self.ordering.set_value(np.arange(self._input_size, dtype=theano.config.floatX))

        # Reset RandomStreams

        # Initial layer connectivity
        self.layers_connectivity[0].set_value((self.ordering + 1).eval())
        for i in range(1, len(self.layers_connectivity)-1):
            self.layers_connectivity[i].set_value(np.zeros((self._hidden_sizes[i-1]), dtype=theano.config.floatX))

        # Reset MRG_RandomStreams (GPU)
        self._mrng.rstate = self._initial_mrng_rstate
        for state, value in zip(self._mrng.state_updates, self._initial_mrng_state_updates):

项目:LiviaNET    作者:josedolz    | 项目源码 | 文件源码
def negativeLogLikelihoodWeighted(self, y, weightPerClass):      
        #Weighting the cost of the different classes in the cost-function, in order to counter class imbalance.
        e1 = np.finfo(np.float32).tiny
        addTinyProbMatrix =, 4*e1) * e1

        weights = weightPerClass.dimshuffle('x', 0, 'x', 'x', 'x')
        log_p_y_given_x_train = T.log(self.p_y_given_x_train + addTinyProbMatrix) 
        weighted_log_probs = log_p_y_given_x_train * weights

        wShape =  weighted_log_probs.shape

        # Re-arrange 
        idx0 = T.arange( wShape[0] ).dimshuffle( 0, 'x','x','x')
        idx2 = T.arange( wShape[2] ).dimshuffle('x', 0, 'x','x')
        idx3 = T.arange( wShape[3] ).dimshuffle('x','x', 0, 'x')
        idx4 = T.arange( wShape[4] ).dimshuffle('x','x','x', 0)

        return -T.mean( weighted_log_probs[ idx0, y, idx2, idx3, idx4] )
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def sample(self, n_samples):
        Inspired by jbornschein's implementation.

        z0 = T.zeros((n_samples, self.dim,)).astype(floatX) + T.shape_padleft(self.b)
        rs = self.trng.uniform((self.dim, n_samples), dtype=floatX)

        def _step_sample(i, W_i, r_i, z):
            p_i = T.nnet.sigmoid(z[:, i]) * 0.9999 + 0.000005
            x_i = (r_i <= p_i).astype(floatX)
            z   = z + T.outer(x_i, W_i)
            return z, x_i

        seqs = [T.arange(self.dim), self.W, rs]
        outputs_info = [z0, None]
        non_seqs = []

        (zs, x), updates = scan(_step_sample, seqs, outputs_info, non_seqs,

        return x.T, updates
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask
项目:emnlp2017-bilstm-cnn-crf    作者:UKPLab    | 项目源码 | 文件源码
def batch_gather(reference, indices):
        '''Batchwise gathering of row indices.

        The numpy equivalent is reference[np.arange(batch_size), indices].

        # Arguments
            reference: tensor with ndim >= 2 of shape
              (batch_size, dim1, dim2, ..., dimN)
            indices: 1d integer tensor of shape (batch_size) satisfiying
              0 <= i < dim2 for each element i.

        # Returns
            A tensor with shape (batch_size, dim2, ..., dimN)
            equal to reference[1:batch_size, indices]
        batch_size = K.shape(reference)[0]
        indices = tf.pack([tf.range(batch_size), indices], axis=1)
        return tf.gather_nd(reference, indices)
项目:emnlp2017-bilstm-cnn-crf    作者:UKPLab    | 项目源码 | 文件源码
def batch_gather(reference, indices):
        '''Batchwise gathering of row indices.

        The numpy equivalent is reference[np.arange(batch_size), indices],

        # Arguments
            reference: tensor with ndim >= 2 of shape
              (batch_size, dim1, dim2, ..., dimN)
            indices: 1d integer tensor of shape (batch_size) satisfiying
              0 <= i < dim2 for each element i.

        # Returns
            A tensor with shape (batch_size, dim2, ..., dimN)
            equal to reference[1:batch_size, indices]
        batch_size = K.shape(reference)[0]
        return reference[T.arange(batch_size), indices]
项目:pl-cnn    作者:oval-group    | 项目源码 | 文件源码
def max_oracle(scores,

        n_classes = scores.shape[1]
        t_range = T.arange(y_truth.shape[0])

        # classification loss for any combination
        losses = 1. - T.extra_ops.to_one_hot(y_truth, n_classes)

        # get max score for each sample
        y_star = T.argmax(scores + losses, axis=1)

        # compute classification loss for batch
        delta = losses[t_range, y_star].sum()

        return y_star, delta
项目:KGP-ASR    作者:KGPML    | 项目源码 | 文件源码
def sequence_log_likelihood(y, y_hat, y_mask, y_hat_mask, blank_symbol):
    Based on code from Shawn Tan.
    Credits to Kyle Kastner as well.
    y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype='int32')
    y_mask_len = tensor.sum(y_mask, axis=0, dtype='int32')
    log_probabs = _log_path_probabs(
        y, T.log(y_hat), y_mask, y_hat_mask, blank_symbol)
    batch_size = log_probabs.shape[1]
    log_labels_probab = _log_add(
        log_probabs[y_hat_mask_len - 1,
                    y_mask_len - 1],
        log_probabs[y_hat_mask_len - 1,
                    y_mask_len - 2])
    return log_labels_probab
项目:KGP-ASR    作者:KGPML    | 项目源码 | 文件源码
def _labeling_batch_to_class_batch(y, y_labeling, num_classes,
    # FIXME: y_hat_mask is currently not used
    batch_size = y.shape[1]
    N = y_labeling.shape[0]
    n_labels = y.shape[0]
    # sum over all repeated labels
    # from (T, B, L) to (T, C, B)
    out = T.zeros((num_classes, batch_size, N))
    y_labeling = y_labeling.dimshuffle((2, 1, 0))  # L, B, T
    y_ = y

    def scan_step(index, prev_res, y_labeling, y_):
        res_t = T.inc_subtensor(prev_res[y_[index, T.arange(batch_size)],
                                y_labeling[index, T.arange(batch_size)])
        return res_t

    result, updates = theano.scan(scan_step,
                                  non_sequences=[y_labeling, y_],
    # result will be (C, B, T) so we make it (T, B, C)
    return result[-1].dimshuffle(2, 1, 0)
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def get_minibatches_idx(n, minibatch_size, shuffle=False):
    Used to shuffle the dataset at each iteration.

    idx_list = numpy.arange(n, dtype="int32")

    if shuffle:

    minibatches = []
    minibatch_start = 0
    for i in range(n // minibatch_size):
                                    minibatch_start + minibatch_size])
        minibatch_start += minibatch_size

    if (minibatch_start != n):
        # Make a minibatch out of what is left

    return zip(range(len(minibatches)), minibatches)
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask
项目:DeepEnhancer    作者:minxueric    | 项目源码 | 文件源码
def __init__(self, name, x, y, n_in, n_out):
        self.x= x = name
        # weight matrix W (n_in, n_out)
        self.W = theano.shared(
                value=np.zeros((n_in, n_out), dtype=theano.config.floatX),
        # bias vector b (n_out, )
        self.b = theano.shared(
                value=np.zeros((n_out,), dtype=theano.config.floatX),
        # p(y|x, w, b)
        self.p_y_given_x = T.nnet.softmax(, self.W) + self.b)
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        self.negative_log_likelihood = -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
        self.errors = T.mean(T.neq(self.y_pred, y))
        # params
        self.params = [self.W, self.b]
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def nll_of_x_given_o(self, input, ordering):
        """ Returns the theano graph that computes $-ln p(\bx|o)$.

        input: 1D vector
            One image with shape (nb_channels * images_height * images_width).

        ordering: 1D vector of int
            List of pixel indices representing the input ordering.

        D = int(
        mask_o_d = T.zeros((D, D), dtype=theano.config.floatX)
        mask_o_d = T.set_subtensor(mask_o_d[T.arange(D), ordering], 1.)

        mask_o_lt_d = T.cumsum(mask_o_d, axis=0)
        mask_o_lt_d = T.set_subtensor(mask_o_lt_d[1:], mask_o_lt_d[:-1])
        mask_o_lt_d = T.set_subtensor(mask_o_lt_d[0, :], 0.)

        input = T.tile(input[None, :], (D, 1))
        nll = -T.sum(self.lnp_x_o_d_given_x_o_lt_d(input, mask_o_d, mask_o_lt_d))
        return nll
项目:DynamicMemoryNetworks    作者:swstarlab    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):
        input          = inputs[0]
        input_word     = T.flatten(inputs[1])
        word_dropout   = inputs[2]        

        # Apply word embedding
        sentence_rep = self.SemMem.get_output_for([input, word_dropout])

        # Apply GRU Layer
        gru_outs = self.GRU.get_output_for([sentence_rep])

        # Extract candidate fact from GRU's output by input_word variable
        # resolving input with adtional word
        # e.g. John when to the hallway nil nil nil -> [GRU1, ... ,GRU8] -> GRU5
        candidate_facts = T.reshape(
            gru_outs[T.arange(gru_outs.shape[0],dtype='int32'), input_word-1], 
            (-1, input.shape[1], self.hid_state_size))
        return candidate_facts
项目:senti    作者:stevenxxiu    | 项目源码 | 文件源码
def get_output_for(self, input_, **kwargs):
        return input_[
            T.arange(input_.shape[0]).dimshuffle(0, 'x', 'x'),
            T.arange(input_.shape[1]).dimshuffle('x', 0, 'x'),
            T.sort(T.argsort(input_, axis=-1)[:, :, -self.k:], axis=-1),
项目:neural-semantic-role-labeler    作者:hiroki13    | 项目源码 | 文件源码
def get_y_prob(self, h, y):
        :param h: 1D: n_words, 2D: Batch, 3D: n_y
        :param y: 1D: n_words, 2D: Batch
        :return: gradient of cross entropy: 1D: Batch
        batch_index = T.arange(h.shape[1])
        z_score0 = self.BOS + h[0]  # 1D: batch, 2D: n_y
        y_score0 = z_score0[batch_index, y[0]]  # 1D: batch

        [_, y_scores, z_scores], _ = theano.scan(fn=self._forward_step,
                                                 sequences=[h[1:], y[1:]],
                                                 outputs_info=[y[0], y_score0, z_score0],
                                                 non_sequences=[self.W_t, batch_index])

        y_score = y_scores[-1]
        z_score = logsumexp(z_scores[-1], axis=1).flatten()

        return y_score - z_score
项目:deeplearning    作者:wangzhics    | 项目源码 | 文件源码
def __init__(self, x, y, n_x, n_y):
        # initialize with 0 the weights as a matrix of shape (n_in, n_out)
        self.w = theano.shared(
            value=numpy.zeros((n_x, n_y), dtype=theano.config.floatX),
        # initialize the biases b as a vector of n_out 0s
        self.b = theano.shared(
            value=numpy.zeros((n_y,), dtype=theano.config.floatX),
        self.params = [self.w, self.b]
        # save x, y
        self.x = x
        self.y = y
        # calculate
        p_y_given_x = T.nnet.softmax(, self.w) + self.b)
        # probability is maximal
        y_pred = T.argmax(p_y_given_x, axis=1)
        # error
        self.error = T.mean(T.neq(y_pred, self.y))
        # cost
        self.cost = -T.mean(T.log(p_y_given_x)[T.arange(self.y.shape[0]), self.y])
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask
项目:deeptravel    作者:keon    | 项目源码 | 文件源码
def get_minibatches_idx(n, minibatch_size, shuffle=False):
    Used to shuffle the dataset at each iteration.

    idx_list = numpy.arange(n, dtype="int32")

    if shuffle:

    minibatches = []
    minibatch_start = 0
    for i in range(n // minibatch_size):
        minibatches.append(idx_list[minibatch_start:minibatch_start + minibatch_size])
        minibatch_start += minibatch_size

    if minibatch_start != n:
        # Make a minibatch out of what is left

    return zip(range(len(minibatches)), minibatches)
项目    作者:fizerkhan    | 项目源码 | 文件源码
def sample_gmm(mu, sigma, weight, theano_rng):

    k = weight.shape[-1]
    dim = mu.shape[-1] / k

    shape_result = weight.shape
    shape_result = tensor.set_subtensor(shape_result[-1], dim)
    ndim_result = weight.ndim

    mu = mu.reshape((-1, dim, k))
    sigma = sigma.reshape((-1, dim, k))
    weight = weight.reshape((-1, k))

    sample_weight = theano_rng.multinomial(pvals=weight, dtype=weight.dtype)
    idx = predict(sample_weight, axis=-1)

    mu = mu[tensor.arange(mu.shape[0]), :, idx]
    sigma = sigma[tensor.arange(sigma.shape[0]), :, idx]

    epsilon = theano_rng.normal(
        size=mu.shape, avg=0., std=1., dtype=mu.dtype)

    result = mu + sigma * epsilon

    return result.reshape(shape_result, ndim=ndim_result)
项目    作者:fizerkhan    | 项目源码 | 文件源码
def GMM_sample(mus, sigmas, mix_weights):
    First, sample according to the prior mixing probabilities
    to choose the component density.
    Second, draw sample from that density

    Inspired by implementation in `cle`
    chosen_component = \
    selected_mus = mus[T.arange(mus.shape[0]), :, chosen_component]
    selected_sigmas = sigmas[T.arange(sigmas.shape[0]), :, chosen_component]
    sample = srng.normal(size=selected_mus.shape,
    sample *= selected_sigmas
    sample += selected_mus
    return sample, selected_mus, selected_sigmas, chosen_component
项目:lstm_tensorflow_imdb    作者:AaronZhouQian    | 项目源码 | 文件源码
def get_minibatches_idx(n, minibatch_size, shuffle=False):
    Used to shuffle the dataset at each iteration.

    idx_list = numpy.arange(n, dtype="int32")

    if shuffle:

    minibatches = []
    minibatch_start = 0
    for i in range(n // minibatch_size):
                                    minibatch_start + minibatch_size])
        minibatch_start += minibatch_size

    if (minibatch_start != n):
        # Make a minibatch out of what is left

    return zip(range(len(minibatches)), minibatches)
项目:StockRecommendSystem    作者:doncat99    | 项目源码 | 文件源码
def __init__(self, seq_len, n_feature):
        import theano.tensor as T
        self.Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature))
        self.output = lasagne.layers.get_output(
        self.params = lasagne.layers.get_all_params(, trainable=True)
        self.output_fn = theano.function([self.Input.input_var], self.output)

        fx = T.fvector().astype("float64")
        choices = T.ivector()
        px = self.output[T.arange(self.output.shape[0]), choices]
        log_px = T.log(px)
        cost =
        updates = lasagne.updates.adagrad(cost, self.params, 0.0008)
        Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature))
        self.train_fn = theano.function([self.Input.input_var, choices, fx], [cost, px, log_px], updates=updates)
项目:StockRecommendSystem    作者:doncat99    | 项目源码 | 文件源码
def update_critic(self, random_sample):
        #random_sample = np.random.choice(np.arange(len(self.rewards)-1), self.batch_size)

        states_batch = np.zeros((self.batch_size, self.lookback_size, self.n_feature), dtype = "float32")
        states_next_batch = np.zeros((self.batch_size, self.lookback_size, self.n_feature),dtype = "float32")

        #print random_sample

        for i in range(self.batch_size):
            random_id = random_sample[i]
            states_batch[i,:,:] =np.array(self.states[random_id:random_id+self.lookback_size]).astype("float32")
            states_next_batch[i,:,:] =np.array(self.states[random_id + 1:(random_id+self.lookback_size +1)]).astype("float32")

        reward_batch = np.array([self.rewards[i] for i in random_sample]).astype("float32")
        #using target model to predict
        target_value = self.target_model.predict(states_next_batch).flatten()*self.gamma + reward_batch

        self.critic_model.train(states_batch, target_value.reshape(self.batch_size,1))
项目:LSTM-and-maxlayer-for-SNV-based-phenotype-prediction    作者:widmi    | 项目源码 | 文件源码
def get_output_for(self, input, timesteps=None, *args, **kwargs):
        Only forward outputs at certain/last sequence positions

        input : tensor
            Input layer with shape: [samples, sequence positions, features]
        timesteps : array of integers or None
            None: Take output at last sequence position
            Array of integers: take outputs at sequence positions specified
                in array; values serve as indices and must not exeed
                sequence lenght; length of array must be number of samples
        if timesteps != None:
            return input[T.arange(start=0,stop=self.input_shape[0]),timesteps,:]
            return input[:,-1,:]
项目:Theano-NN_Starter    作者:nightinwhite    | 项目源码 | 文件源码
def get_minibatches_idx(n, minibatch_size, shuffle=False):
    Used to shuffle the dataset at each iteration.

    idx_list = numpy.arange(n, dtype="int32")

    if shuffle:

    minibatches = []
    minibatch_start = 0
    for i in range(n // minibatch_size):
                                    minibatch_start + minibatch_size])
        minibatch_start += minibatch_size

    if (minibatch_start != n):
        # Make a minibatch out of what is left

    return zip(range(len(minibatches)), minibatches)
项目:Attentive_reader    作者:caglar    | 项目源码 | 文件源码
def _grab_probs(class_probs, target, use_fast_ver=False):
    if class_probs.ndim == 3:
        class_probs = class_probs.reshape((-1, class_probs.shape[-1]))

    shape0 = class_probs.shape[0]
    shape1 = class_probs.shape[1]

    p = None
    if target.ndim == 2 and use_fast_ver:
        target = target.flatten()
        cp = class_probs.reshape((target.shape[0], -1))
        p = TT.diag(cp.T[target])
        if target.ndim > 1:
            target = target.flatten()
        assert target.ndim == 1, 'make sure target is a vector of ints'
        assert 'int' in target.dtype
        pos = TT.arange(shape0)*shape1
        new_targ = target + pos
        p = class_probs.reshape((shape0*shape1, 1))[new_targ].reshape((shape0,))
    return p
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])
项目:dl4mt-multi    作者:nyu-dl    | 项目源码 | 文件源码
def cost(self, probs, y, y_mask):
        y_flat = y.flatten()
        y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat
        cost = -tensor.log(probs.flatten()[y_flat_idx])
        cost = cost.reshape([y.shape[0], y.shape[1]])
        cost = (cost * y_mask).sum(0)
        cost = cost.mean()
        return cost
项目:dl4mt-multi    作者:nyu-dl    | 项目源码 | 文件源码
def f_log_probs(self, probs, x, x_mask, y, y_mask,
                    src_selector, trg_selector, cg=None):
        y_flat = y.flatten()
        y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat
        cost = -tensor.log(probs.flatten()[y_flat_idx])
        cost = cost.reshape([y.shape[0], y.shape[1]])
        cost = (cost * y_mask).sum(0)
        func_inps = [x, x_mask, y, y_mask, src_selector, trg_selector]
        return theano.function(
            outputs=cost, on_unused_input='warn')
项目:sampleRNN_ICLR2017    作者:soroushmehr    | 项目源码 | 文件源码
def T_one_hot(inp_tensor, n_classes):
        - Implement other methods from here: 
        - Compare them speed-wise for different sizes
        - Implement N_one_hot for Numpy version, with speed tests.

    Theano one-hot (1-of-k) from an input tensor of indecies.
    If the indecies are of the shape (a0, a1, ..., an) the output
    shape would be (a0, a1, ..., a2, n_classes).

        - inp_tensor: any theano tensor with dtype int* as indecies and all of
                      them between [0, n_classes-1].
        - n_classes: number of classes which determines the output size.

        >>> idx = T.itensor3()
        >>> idx_val = numpy.array([[[0,1,2,3],[4,5,6,7]]], dtype='int32')
        >>> one_hot = T_one_hot(t, 8)
        >>> one_hot.eval({idx:idx_val})
        >>> print out
        array([[[[ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.]],
        [[ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]]]])
        >>> print idx_val.shape, out.shape
        (1, 2, 4) (1, 2, 4, 8)
    flattened = inp_tensor.flatten()
    z = T.zeros((flattened.shape[0], n_classes), dtype=theano.config.floatX)
    one_hot = T.set_subtensor(z[T.arange(flattened.shape[0]), flattened], 1)
    out_shape = [inp_tensor.shape[i] for i in xrange(inp_tensor.ndim)] + [n_classes]
    one_hot = one_hot.reshape(out_shape)
    return one_hot
项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def connect(self, inputs, weights, labels):
    """ - inputs: flattened log scores from the softmax layer.
    y_flat = labels.flatten()
    x_flat_idx = tensor.arange(y_flat.shape[0])
    cross_ent = - inputs[x_flat_idx, y_flat].reshape([labels.shape[0], labels.shape[1]])
    if weights != None:
      cross_ent = cross_ent * weights
    # Summed over timesteps. Averaged across samples in the batch.
    return cross_ent.sum(axis=0).mean()
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def heaviside(x, size):
    return T.arange(0, size).dimshuffle('x', 0) - T.repeat(x, size, axis=1) >= 0.
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        mu = input[0]
        sigma = input[1]

        x_range = T.arange(0, self.max_support).dimshuffle('x', 0)
        mu = T.repeat(mu, self.max_support, axis=1)
        sigma = T.repeat(sigma, self.max_support, axis=1)
        x = (x_range - mu) / (sigma * T.sqrt(2.) + 1e-16)
        cdf = (T.erf(x) + 1.) / 2.
        return cdf
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):

    agg_mil_loss = nn.layers.get_output(model.l_agg_mil_loss, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    loss = agg_mil_loss[T.arange(agg_mil_loss.shape[0]), targets]
    return T.mean(loss)
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)

    loss = T.mean(T.log(p))
    return -loss
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)

    loss = T.mean(T.log(p))
    return -loss