Python theano 模块,scan() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.scan()

项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def connect(self, inputs, mask, is_train):
    """ is_train: A boolean tensor.
    """
    max_length = inputs.shape[0]
    batch_size = inputs.shape[1]
    outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim),
            tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)]
    # Dropout mask sharing for variational dropout.
    self.is_train = is_train
    if self.recurrent_dropout_layer != None:
      self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train)

    inputs = tensor.dot(inputs, self.W) + self.b
    rval, _ = theano.scan(self._step, # Scan function
                sequences=[inputs, mask], # Input sequence
                outputs_info=outputs_info,
                name=_p(self.prefix, '_layers'),
                n_steps=max_length) # scan steps
    return rval[0]
项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def connect(self, inputs, mask, is_train):
    max_length = inputs.shape[0]
    batch_size = inputs.shape[1]
    outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim),
            tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)]

    # Dropout layers
    self.is_train = is_train
    if self.recurrent_dropout_layer != None:
      self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train)

    proj_inputs = tensor.dot(inputs, self.W) + self.b
    rval, _ = theano.scan(self._step, # Scan function
                sequences=[inputs, proj_inputs, mask], # Input sequence
                outputs_info=outputs_info,
                name=_p(self.prefix, '_layers'),
                n_steps=max_length) # scan steps
    return rval[0]
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def sample_scan(self, x, sigma, n_steps, samples):
        # Enable on-the-fly graph computations
        #  theano.config.compute_test_value = "raise"
        in_val = T.fmatrix("input_values")
        # in_val.tag.test_value = np.asarray(
        #    np.random.rand(1, 784), dtype=theano.config.floatX)
        s_sigma = T.fscalr("sigma_values")
        # s_sigma = np.asarray(
        #    np.random.rand(1), dtype=theano.config.floatX)
        mode = "FAST_RUN"
        values, updates = theano.scan(fn=self.sample_one_step,
                                      outputs_info=in_val,
                                      non_sequences=s_sigma,
                                      n_steps=n_steps,
                                      mode=mode)
        ae_sampler = theano.function(inputs=[in_val, s_sigma],
                                     outputs=values[-1],
                                     updates=updates)
        samples = ae_sampler(x, sigma)
        return samples
项目:gram    作者:mp2893    | 项目源码 | 文件源码
def gru_layer(tparams, emb, options):
    hiddenDimSize = options['hiddenDimSize']
    timesteps = emb.shape[0]
    if emb.ndim == 3: n_samples = emb.shape[1]
    else: n_samples = 1

    def stepFn(wx, h, U_gru):
        uh = T.dot(h, U_gru)
        r = T.nnet.sigmoid(_slice(wx, 0, hiddenDimSize) + _slice(uh, 0, hiddenDimSize))
        z = T.nnet.sigmoid(_slice(wx, 1, hiddenDimSize) + _slice(uh, 1, hiddenDimSize))
        h_tilde = T.tanh(_slice(wx, 2, hiddenDimSize) + r * _slice(uh, 2, hiddenDimSize))
        h_new = z * h + ((1. - z) * h_tilde)
        return h_new

    Wx = T.dot(emb, tparams['W_gru']) + tparams['b_gru']
    results, updates = theano.scan(fn=stepFn, sequences=[Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), non_sequences=[tparams['U_gru']], name='gru_layer', n_steps=timesteps)

    return results
项目:crfrnn_layer    作者:HapeMask    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):
        vals, ref = inputs

        def filt(V, R):
            if self.norm_type is not None:
                o = tt.ones((1, V.shape[1], V.shape[2]), np.float32)
                norm = gaussian_filter(R, o, self.kern_std, self.ref_dim)
                norm = tt.sqrt(norm) if self.norm_type == "sym" else norm
                norm += 1e-8

            V = V / norm if self.norm_type in ["pre", "sym"] else V
            F = gaussian_filter(R, V, self.kern_std)
            return F / norm if self.norm_type in ["post", "sym"] else F

        filtered = theano.scan(fn=filt, sequences=[vals, ref],
                               outputs_info=None)[0]
        return filtered
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def set_net_params(self):
        '''Returns MLP parameters for scan.'''
        super(GRU, self).set_net_params()

        if self.input_net_aux is None:
            self.input_net_aux = MLP(
                self.dim_in, 2 * self.dim_h, 2 * self.dim_hs[0], 1,
                rng=self.rng, trng=self.trng,
                h_act='T.nnet.sigmoid', out_act='T.tanh',
                name='input_net_aux')
        else:
            assert self.input_net_aux.dim_in == self.dim_in
            assert self.input_net_aux.dim_out == 2 * self.dim_hs[0]
        self.input_net_aux.name = self.name + '_input_net_aux'

        self.nets.append(self.input_net_aux)

        for i in xrange(self.n_layers - 1):
            n = MLP(self.dim_hs[i], 2 * self.dim_hs[i+1],
                    rng=self.rng, trng=self.trng,
                    distribution='centered_binomial',
                    name='rnn_net_aux%d' % i)
            self.inter_nets.append(n) #insert(2 * i + 1, n)
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_call(self, x, h0, c0, condition_on, *params):
        n_steps = x.shape[0]
        n_samples = x.shape[1]

        seqs         = self.call_seqs(x, condition_on, *params)
        outputs_info = [h0, c0]
        non_seqs     = self.get_recurrent_args(*params)

        (h, c), updates = theano.scan(
            self._step,
            sequences=seqs,
            outputs_info=outputs_info,
            non_sequences=non_seqs,
            name=self.name + '_recurrent_steps',
            n_steps=n_steps,
            strict=True)

        o_params    = self.get_output_args(*params)
        out_net_out = self.output_net.step_call(h, *o_params)
        preact      = out_net_out['z']
        p           = out_net_out['p']
        #y           = self.output_net.sample(p=p)

        return OrderedDict(h=h, p=p, z=preact), updates
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def call_seqs(self, x, condition_on, level, *params):
        '''Prepares the input for __call__

        Args:
            x (T.tensor): input
            condtion_on (T.tensor or None): tensor to condition recurrence on.
            level (int): reccurent level.
            *params: list of theano.shared.

        Returns:
            list: list of scan inputs.

        '''
        if level == 0:
            i_params = self.get_input_args(*params)
            a        = self.input_net.step_preact(x, *i_params)
        else:
            i_params = self.get_inter_args(level - 1, *params)
            a        = self.inter_nets[level - 1].step_preact(x, *i_params)

        if condition_on is not None:
            a += condition_on

        return [a]
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def shuffle_columns(x, srng):
    '''Shuffles a tensor along the second index.

    Args:
        x (T.tensor).
        srng (sharedRandomstream).

    '''
    def step_shuffle(m, perm):
        return m[perm]

    perm_mat = srng.permutation(n=x.shape[0], size=(x.shape[1],))
    y, _ = scan(
        step_shuffle, [x.transpose(1, 0, 2), perm_mat], [None], [], x.shape[1],
        name='shuffle', strict=False)
    return y.transpose(1, 0, 2)
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask
项目:doctorai    作者:mp2893    | 项目源码 | 文件源码
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
    timesteps = emb.shape[0]
    if emb.ndim == 3: n_samples = emb.shape[1]
    else: n_samples = 1

    W_rx = T.dot(emb, tparams['W_r_'+layerIndex])
    W_zx = T.dot(emb, tparams['W_z_'+layerIndex])
    Wx = T.dot(emb, tparams['W_'+layerIndex])

    def stepFn(stepMask, wrx, wzx, wx, h):
        r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
        z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
        h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
        h_new = z * h + ((1. - z) * h_tilde)
        h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
        return h_new#, output, time

    results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)

    return results
项目:doctorai    作者:mp2893    | 项目源码 | 文件源码
def gru_layer(tparams, emb, layerIndex, hiddenDimSize, mask=None):
    timesteps = emb.shape[0]
    if emb.ndim == 3: n_samples = emb.shape[1]
    else: n_samples = 1

    W_rx = T.dot(emb, tparams['W_r_'+layerIndex])
    W_zx = T.dot(emb, tparams['W_z_'+layerIndex])
    Wx = T.dot(emb, tparams['W_'+layerIndex])

    def stepFn(stepMask, wrx, wzx, wx, h):
        r = T.nnet.sigmoid(wrx + T.dot(h, tparams['U_r_'+layerIndex]) + tparams['b_r_'+layerIndex])
        z = T.nnet.sigmoid(wzx + T.dot(h, tparams['U_z_'+layerIndex]) + tparams['b_z_'+layerIndex])
        h_tilde = T.tanh(wx + T.dot(r*h, tparams['U_'+layerIndex]) + tparams['b_'+layerIndex])
        h_new = z * h + ((1. - z) * h_tilde)
        h_new = stepMask[:, None] * h_new + (1. - stepMask)[:, None] * h
        return h_new

    results, updates = theano.scan(fn=stepFn, sequences=[mask,W_rx,W_zx,Wx], outputs_info=T.alloc(numpy_floatX(0.0), n_samples, hiddenDimSize), name='gru_layer'+layerIndex, n_steps=timesteps)

    return results
项目:Deep-Learning-with-Theano    作者:PacktPublishing    | 项目源码 | 文件源码
def model(inputs, _is_training, params, batch_size, hidden_size, drop_i, drop_s, init_scale, init_H_bias, _theano_rng):
    noise_i_for_H = get_dropout_noise((batch_size, hidden_size), drop_i, _theano_rng)
    i_for_H = ifelse(_is_training, inputs * noise_i_for_H, inputs)
    i_for_H = linear.model(i_for_H, params, hidden_size, hidden_size, init_scale, bias_init=init_H_bias)

    # Dropout noise for recurrent hidden state.
    noise_s = get_dropout_noise((batch_size, hidden_size), drop_s, _theano_rng)

    def step(i_for_H_t, y_tm1, noise_s):
        s_lm1_for_H = ifelse(_is_training, y_tm1 * noise_s, y_tm1)
        return T.tanh(i_for_H_t + linear.model(s_lm1_for_H, params, hidden_size, hidden_size, init_scale))

    y_0 = shared_zeros((batch_size, hidden_size), name='h0')
    y, _ = theano.scan(step, sequences=i_for_H, outputs_info=[y_0], non_sequences = [noise_s])

    y_last = y[-1]
    sticky_state_updates = [(y_0, y_last)]

    return y, y_0, sticky_state_updates
项目:theano-recurrence    作者:uyaseen    | 项目源码 | 文件源码
def generative_sampling(self, seed, emb_data, sample_length):
        fruit = theano.shared(value=seed)

        def step(h_tm, y_tm):
            h_t = self.activation(T.dot(emb_data[y_tm], self.W) +
                                  T.dot(h_tm, self.U) + self.bh)
            y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.by)
            y = T.argmax(y_t, axis=1)

            return h_t, y[0]

        [_, samples], _ = theano.scan(fn=step,
                                      outputs_info=[self.h0, fruit],
                                      n_steps=sample_length)

        get_samples = theano.function(inputs=[],
                                      outputs=samples)

        return get_samples()
项目:KGP-ASR    作者:KGPML    | 项目源码 | 文件源码
def _labeling_batch_to_class_batch(y, y_labeling, num_classes,
                                   y_hat_mask=None):
    # FIXME: y_hat_mask is currently not used
    batch_size = y.shape[1]
    N = y_labeling.shape[0]
    n_labels = y.shape[0]
    # sum over all repeated labels
    # from (T, B, L) to (T, C, B)
    out = T.zeros((num_classes, batch_size, N))
    y_labeling = y_labeling.dimshuffle((2, 1, 0))  # L, B, T
    y_ = y

    def scan_step(index, prev_res, y_labeling, y_):
        res_t = T.inc_subtensor(prev_res[y_[index, T.arange(batch_size)],
                                T.arange(batch_size)],
                                y_labeling[index, T.arange(batch_size)])
        return res_t

    result, updates = theano.scan(scan_step,
                                  sequences=[T.arange(n_labels)],
                                  non_sequences=[y_labeling, y_],
                                  outputs_info=[out])
    # result will be (C, B, T) so we make it (T, B, C)
    return result[-1].dimshuffle(2, 1, 0)
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask
项目:Recurrent-Convolutional-Neural-Network    作者:monisjaved    | 项目源码 | 文件源码
def get_cost(self, X, Y, X_sizes):
        """
        Calculates cost for each values in mini batch, also
        regularizes all the input parameters and then returns
        final cost function as theano variable

        """
        cost_fn, _ = theano.scan(
            fn=self.get_likelihood,
            sequences=[X, Y, X_sizes]
        )

        cost_fn = cost_fn.mean()
        cost_fn += self.reg_lambda * T.sqr(self.W_c_r).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.W_c_l).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.W_conv).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.W_output).sum() / 2.
        cost_fn += self.reg_lambda * T.sqr(self.b_output).sum() / 2.

        # Regularizing word embedding
        cost_fn += self.reg_lambda * T.sqr(self.vector_dict).sum() / 2

        return cost_fn
项目:sciDT    作者:edvisees    | 项目源码 | 文件源码
def get_output(self, train=False):
    input = self.get_input(train)
    proj_input = self.activation(T.tensordot(input, self.att_proj, axes=(3,0)))
    if self.context == 'word':
      att_scores = T.tensordot(proj_input, self.att_scorer, axes=(3, 0))
    elif self.context == 'clause':
      def step(a_t, h_tm1, W_in, W, sc):
        h_t = T.tanh(T.tensordot(a_t, W_in, axes=(2,0)) + T.tensordot(h_tm1, W, axes=(2,0)))
        s_t = T.tensordot(h_t, sc, axes=(2,0))
        return h_t, s_t
      [_, scores], _ = theano.scan(step, sequences=[proj_input.dimshuffle(2,0,1,3)], outputs_info=[T.zeros((proj_input.shape[0], self.td1, self.rec_hid_dim)), None], non_sequences=[self.rec_in_weights, self.rec_hid_weights, self.att_scorer])
      att_scores = scores.dimshuffle(1,2,0)
    elif self.context == 'para':
      att_scores = T.tensordot(proj_input, self.att_scorer, axes=(3, 2)).sum(axis=(1, 2))
    # Nested scans. For shame!
    def get_sample_att(sample_input, sample_att):
      sample_att_inp, _ = theano.scan(fn=lambda s_att_i, s_input_i: T.dot(s_att_i, s_input_i), sequences=[T.nnet.softmax(sample_att), sample_input])
      return sample_att_inp

    att_input, _ = theano.scan(fn=get_sample_att, sequences=[input, att_scores])
    return att_input
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def _ctc_normal(self, predict,labels):

        n = labels.shape[0]

        labels2 = T.concatenate((labels, [self.tpo["CTC_blank"], self.tpo["CTC_blank"]]))
        sec_diag = T.neq(labels2[:-2], labels2[2:]) * \
                   T.eq(labels2[1:-1], self.tpo["CTC_blank"])

        recurrence_relation = \
            T.eye(n) + \
            T.eye(n, k=1) + \
            T.eye(n, k=2) * sec_diag.dimshuffle((0, 'x'))

        pred_y = predict[:, labels]

        probabilities, _ = theano.scan(
            lambda curr, accum: curr * T.dot(accum, recurrence_relation),
            sequences=[pred_y],
            outputs_info=[T.eye(n)[0]]
        )

        labels_probab = T.sum(probabilities[-1, -2:])
        return -T.log(labels_probab)
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def sequence_iteration(self, in_seq, mask, use_dropout,dropout_value=1):

        in_seq_d = T.switch(use_dropout,
                             (in_seq *
                              self.trng.binomial(in_seq.shape,
                                            p=dropout_value, n=1,
                                            dtype=in_seq.dtype)),
                             in_seq)

        rz_in_seq =  T.add( T.dot(in_seq_d, self.weights[0]) , self.weights[1] )

        out_seq, updates = theano.scan(
                                        fn=self.t_forward_step,
                                        sequences=[mask, rz_in_seq],  # in_seq_d],
                                        outputs_info=[self.t_ol_t00],
                                        non_sequences=[i for i in self.weights][2:] + [self.t_n_out],
                                        go_backwards = self.go_backwards,
                                        truncate_gradient=-1,
                                        #n_steps=50,
                                        strict=True,
                                        allow_gc=False,
                                        )
        return out_seq
项目:lddmm-ot    作者:jeanfeydy    | 项目源码 | 文件源码
def _semi_lagrangian_displacement(self, v_sampled, grid_points, dt) :
        """
        Semi-Lagrangian scheme.
        Given a downsampled velocity field v (which  will be linearly interpolated),
        we find "where the information came from", i.e. numerically invert its
        flow during a time-step dt on the 'grid_points'.
        To do so, we simply solve the fixed point equation
        a(y)/2 = (dt/2) * v( y - a(y)/2 )
        by an "Picard-like" iterative scheme,
        where y is a grid point, and -a(y) the corresponding "backward" vector.
        """
        def f(r) :
            return .5 * dt * self._linear_interp_downsampledfield(v_sampled, grid_points - r)
        # Theano on GPU requires float32, i.e. explicit downcast from numpy float64 type :
        r_0 = np.zeros((np.prod(self.image_shape), self.image_dimension), dtype = config.floatX) 
        result, updates = theano.scan(fn            = f,          # Iterated routine
                                      outputs_info  = [r_0],      # Starting estimate for r
                                      n_steps       = 5)          # Number of iterations, sufficient in practice
        r_inf = result[-1]  # We only keep the end result
        return 2. * r_inf   # displacement "alpha"
项目:lddmm-ot    作者:jeanfeydy    | 项目源码 | 文件源码
def _HamiltonianShootingCarrying(self, q, p, i0) :
        """
        Given initial control points/momentums q0 and p0 given as n-by-d matrices,
        and a "template" image i0, outputs the trajectories q_t, p_t, I_t = I0 \circ phi_{t->0}.
        """
        # Here, we use the "scan" theano routine, which  can be understood as a "for" loop
        identity = T.as_tensor_variable(0. * self.dense_grid())  # We encode the identity as a null displacement field.

        # Here, we use the "scan" theano routine, which  can be understood as a "for" loop
        result, updates = theano.scan(fn            = lambda x,y,z : self._hamiltonian_step_carrying2(x,y,z),
                                      outputs_info  = [q,p, identity],
                                      n_steps       = int(np.round(1/self.dt) ))

        phi_inv_1 = result[2][-1]  # We do not store the intermediate results
        I1 = self._image_circ_diffeo(i0, self.dense_grid() + phi_inv_1)  # instead of interpolating the images It at all timesteps, we only do it in the end.
        return [result[0][-1], result[1][-1], I1]                       # and only return the final state + momentum + image
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(T.dot(x.T, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates)
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, v, b, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH
            t = T.dot(h, v) + b
            p_xi_is_one = T.nnet.sigmoid(t) * constantX(0.9999) + constantX(0.0001 * 0.5)  # Make logistic regression more robust by having the sigmoid saturate at 0.00005 and 0.99995
            p = p_prev + x * T.log(p_xi_is_one) + (1 - x) * T.log(1 - p_xi_is_one)
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(T.dot(x.T, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _, _], updates) = theano.scan(density_given_previous_a_and_x,
                                            sequences=[x, self.W, self.V, self.b, self.activation_rescaling],
                                            outputs_info=[p0, a0, x0])
        return (ps[-1], updates)
项目:neural-semantic-role-labeler    作者:hiroki13    | 项目源码 | 文件源码
def get_y_prob(self, h, y):
        """
        :param h: 1D: n_words, 2D: Batch, 3D: n_y
        :param y: 1D: n_words, 2D: Batch
        :return: gradient of cross entropy: 1D: Batch
        """
        batch_index = T.arange(h.shape[1])
        z_score0 = self.BOS + h[0]  # 1D: batch, 2D: n_y
        y_score0 = z_score0[batch_index, y[0]]  # 1D: batch

        [_, y_scores, z_scores], _ = theano.scan(fn=self._forward_step,
                                                 sequences=[h[1:], y[1:]],
                                                 outputs_info=[y[0], y_score0, z_score0],
                                                 non_sequences=[self.W_t, batch_index])

        y_score = y_scores[-1]
        z_score = logsumexp(z_scores[-1], axis=1).flatten()

        return y_score - z_score
项目:neural_wfst    作者:se4u    | 项目源码 | 文件源码
def get_layer(self, x_in):
        assert x_in.ndim == 2
        n_steps = x_in.shape[0]

        def __slice(x_, n, dim):
            return x_[n * dim: (n + 1) * dim]

        def __step(x_, h_, c_):
            preact = T.dot(h_, self._params['U']) + x_ + self._params['b']
            i = T.nnet.sigmoid(__slice(preact, 0, self._ydim))
            f = T.nnet.sigmoid(__slice(preact, 1, self._ydim))
            o = T.nnet.sigmoid(__slice(preact, 2, self._ydim))
            c = T.tanh(__slice(preact, 3, self._ydim))
            c = f * c_ + i * c
            h = o * T.tanh(c)
            return h, c

        x_in = T.dot(x_in, self._params['W']) + self._params['b']
        rval, updates = theano.scan(__step, sequences=x_in, go_backwards=self.go_backwards,
                                    outputs_info=[T.alloc(np_floatX(0.), self._ydim),
                                                  T.alloc(np_floatX(0.), self._ydim)],
                                    name='lstm_layers', n_steps=n_steps)
        return reverse(rval[0]) if self.go_backwards else rval[0]
项目:neural_wfst    作者:se4u    | 项目源码 | 文件源码
def get_layer(self, x_in, C_in, ty_i):  # op,

        n_steps = C_in.shape[0]

        def __logsumexp(x, axis=None):
            xmax = x.max(axis=axis, keepdims=True)
            xmax_ = x.max(axis=axis)
            return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))

        def __step(_C, _x):
            #scores = T.dot( T.dot(_x, self._params['U']) + self._params['b'], self._params['v0'])
            scores = T.dot(T.nnet.sigmoid(T.dot(_x, self._params[
                           'U1']) + T.dot(_C, self._params['U2']) + self._params['b']), self._params['v0'])
            return scores.flatten()

        y_out, _ = theano.scan(
            __step, sequences=C_in, non_sequences=x_in, name='classification_layer', n_steps=n_steps)
        norm_y = y_out.flatten() - __logsumexp(y_out)
        f_lc_debug = theano.function(
            [x_in, C_in, ty_i], [y_out, norm_y, norm_y[ty_i]])
        return norm_y[ty_i], T.argmax(norm_y), f_lc_debug
项目:neural_wfst    作者:se4u    | 项目源码 | 文件源码
def tagged_sequence_unnormalized_score_in_order_one_crf(input_tv, y, l):
    '''
    Simply sum the log-scores along the path suggested by `y` in the `input_tv`
    tensor.

    Params
    ------
    input_tv : A 3D tensor of (token, prev_pos, cur_pos) log scores.
      the input_tv also contains scores of
    y    : The true sequence that was actually followed.
    l    : The score of (EOS | tag)
    '''
    def _score_step(o, y, p_, y_):
        return ((p_ + o[y_, y]), y)
    [rval, _], _ = theano.scan(_score_step,
                               sequences=[input_tv[1:, :-1], y[1:]],
                               #sequences=[input_tv, y],
                               outputs_info=[input_tv[0, -1, y[0]], y[0]],
                               #outputs_info=[0.0, numpy.int32(-1)],
                               name='OrderOnePathMax_scan_score_step',
                               strict=True)
    return rval[-1] + l[y[-1]]
项目:neural_wfst    作者:se4u    | 项目源码 | 文件源码
def retrieve_path_from_backpointers(bp, starting_point):
    '''
    Theano scan loop to follow backpointers, starting from a given spot.
    Params
    ------
    bp             : The trail of backpointers. Think of this is as a list of
        lists where we start from the back `bp = list[N][starting_point]` and
        then go to list[N-1][bp] and so on.
    starting_point :
    '''
    vp_prefix = th_reverse(
        theano.scan(
            lambda p, y: p[y],
            sequences=bp,
            outputs_info=starting_point,
            go_backwards=True,
            name='OrderOnePathMax_scan__bkpntr',
            strict=True)[0])
    return theano.tensor.concatenate([vp_prefix, starting_point.dimshuffle('x')])
项目:deepAI    作者:kaiu85    | 项目源码 | 文件源码
def inner_fn_sample_actions_given(oat_given, stm1):

    st0_condition = theano.shared(name = 'st0_condition', value = numpy.random.randn( n_s,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )
    ot0_condition = theano.shared(name = 'ot0_condition', value = numpy.random.randn( n_o,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )
    oht0_condition = theano.shared(name = 'oht0_condition', value = numpy.random.randn( n_oh,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )
    oat0_condition = theano.shared(name = 'oat0_condition', value = numpy.random.randn( n_oa,n_samples ).astype( dtype = theano.config.floatX ), borrow = True )

    # Iterate MCMC sampler to approximate constrained probabilities
    # p(o,oh|oa) of observations, given a sequence of proprioceptive
    # inputs oa
    # c.f. https://arxiv.org/abs/1401.4082, Appendix F.
    ((st, ot, oht, oat), _) = theano.scan(fn=inner_fn_condition, 
                                           outputs_info=[st0_condition, ot0_condition, oht0_condition, oat0_condition],
                                           non_sequences=[oat_given, stm1],
                                           n_steps=n_iterations_ag)

    st = st[-1]
    ot = ot[-1]
    oht = oht[-1]
    oat = oat[-1]    

    return st, ot, oht, oat

# Define initial state and action
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def renet_layer_lr_allscan(X, rnn1, rnn2, w, h, wp, hp):
    # list_of_images = []
    C = X.shape[0]
    X = X.dimshuffle((1, 0, 2)).reshape((h/hp, hp*C*w)) # split the rows for the first scan
    def rnn_pass(x):
        x = x.reshape((hp, C, w)).dimshuffle((2, 1, 0)).reshape((w/wp, C*wp*hp))
        h1 = rnn1.output(x)
        h2 = rnn2.output(x, go_backwards=True)
        img = T.concatenate([h1.T, h2.T])
        # list_of_images.append(img)
        return img

    results, _ = theano.scan(
        fn=rnn_pass,
        sequences=X,
        outputs_info=None,
        n_steps=h/hp,
    )
    return results.dimshuffle((1, 0, 2))
    # return T.stacklists(list_of_images).dimshuffle((1, 0, 2))
项目:SocializedWordEmbeddings    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def forward_all(self, x, masks = None, h0=None, return_c=False, direction = None):
        if h0 is None:
            if x.ndim > 1:
                h0 = T.zeros((x.shape[1], self.n_out*(self.order+1)), dtype=theano.config.floatX)
            else:
                h0 = T.zeros((self.n_out*(self.order+1),), dtype=theano.config.floatX)

        if masks == None:
            masks = T.ones((x.shape[0], x.shape[1]), dtype = theano.config.floatX)
        h, _ = theano.scan(
                    fn = self.forward,
                    sequences = [x, masks],
                    outputs_info = [ h0 ]
                )
        if return_c:
            return h
        elif x.ndim > 1:
            return h[:,:,self.n_out*self.order:]
        else:
            return h[:,self.n_out*self.order:]
项目:deeplift    作者:kundajelab    | 项目源码 | 文件源码
def for_loop(step_function, inputs, initial_hidden_states, go_backwards):
    """
        inputs: time axis must be first
    """ 
    results = theano.scan(
        step_function,
        sequences=inputs,
        outputs_info=initial_hidden_states,
        go_backwards=go_backwards)[0] #screw the updates
    #when results has length 1, it is not returned as a list. wrap it
    if (isinstance(results, list)==False):
        results = [results]
    #put the batch axis back in front
    results = [dimshuffle(tensor, [1,0]+[x for x in xrange(2, tensor.ndim)])
               for tensor in results]
    return results
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def get_output(self, train=False):
        X = self.get_input(train) # shape: (nb_samples, time (padded with zeros), input_dim)
        # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2)) 
        x = T.dot(X, self.W) + self.b

        # scan = theano symbolic loop.
        # See: http://deeplearning.net/software/theano/library/scan.html
        # Iterate over the first dimension of the x array (=time).
        outputs, updates = theano.scan(
            self._step, # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i])
            sequences=[x, dict(input=padded_mask, taps=[-1])], # tensors to iterate over, inputs to _step
            # initialization of the output. Input to _step with default tap=-1.
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=self.U, # static inputs to _step
            truncate_gradient=self.truncate_gradient
        )

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def get_output(self, train=False):
        X = self.get_input(train) 
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2)) 

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step, 
            sequences=[x_z, x_r, x_h, padded_mask], 
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient
        )

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def get_output(self, train=False):
        X = self.get_input(train) 
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._step, 
            sequences=[xi, xf, xo, xc, padded_mask],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ], 
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c], 
            truncate_gradient=self.truncate_gradient 
        )

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2)) 

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.Pmat) + self.b_r
        x_h = T.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step, 
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2)) 

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step, 
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask
项目:sampleRNN_ICLR2017    作者:soroushmehr    | 项目源码 | 文件源码
def __Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None):
    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None]*len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX)
            )
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    outputs, _ = theano.scan(
        step_fn,
        sequences=inputs,
        outputs_info=h0s,
        non_sequences=non_sequences
    )

    return outputs
项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def __init__(self, dropout_prob, fix_mask=False, fast_predict=False, prefix="dropout"):
    self.dropout_prob = dropout_prob
    self.fix_mask = fix_mask
    self.prefix = prefix
    self.fast_predict = fast_predict
    print (self.prefix, self.dropout_prob, self.fix_mask)
    assert (dropout_prob > 0)
    """ This one works for the scan function.
        (instead of theano.tensor.shared.randomstreams.RandomStreams)
        See discussion: https://groups.google.com/forum/#!topic/theano-users/DbvTgTqkT8o
    """
    self.rng = MRG_RandomStreams(seed=RANDOM_SEED, use_cuda=True)
项目:RBM-DBN-theano-DL4J    作者:lzhbrian    | 项目源码 | 文件源码
def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
        """Approximation to the reconstruction error

        Note that this function requires the pre-sigmoid activation as
        input.  To understand why this is so you need to understand a
        bit about how Theano works. Whenever you compile a Theano
        function, the computational graph that you pass as input gets
        optimized for speed and stability.  This is done by changing
        several parts of the subgraphs with others.  One such
        optimization expresses terms of the form log(sigmoid(x)) in
        terms of softplus.  We need this optimization for the
        cross-entropy since sigmoid of numbers larger than 30. (or
        even less then that) turn to 1. and numbers smaller than
        -30. turn to 0 which in terms will force theano to compute
        log(0) and therefore we will get either -inf or NaN as
        cost. If the value is expressed in terms of softplus we do not
        get this undesirable behaviour. This optimization usually
        works fine, but here we have a special case. The sigmoid is
        applied inside the scan op, while the log is
        outside. Therefore Theano will only see log(scan(..)) instead
        of log(sigmoid(..)) and will not apply the wanted
        optimization. We can not go and replace the sigmoid in scan
        with something else also, because this only needs to be done
        on the last step. Therefore the easiest and more efficient way
        is to get also the pre-sigmoid activation as an output of
        scan, and apply both the log and sigmoid outside scan such
        that Theano can catch and optimize the expression.

        """

        cross_entropy = T.mean(
            T.sum(
                self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) +
                (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)),
                axis=1
            )
        )

        return cross_entropy
项目:lightML    作者:jfzhang95    | 项目源码 | 文件源码
def output(self, train=True):

        outputs_info = [self.h0, self.s0]

        ([outputs, states], updates) = theano.scan(
            fn=self.one_step,   #function
            sequences=self.X,
            # n_steps=600,
            outputs_info = outputs_info,
            go_backwards=self.go_backwards
        )

        return outputs
项目:lightML    作者:jfzhang95    | 项目源码 | 文件源码
def output(self, train=True):

        outputs_info = [self.s0]

        (outputs, updates) = theano.scan(
            fn=self.one_step,
            sequences=self.X,
            outputs_info=outputs_info,
            go_backwards=self.go_backwards
        )

        return outputs
项目:lightML    作者:jfzhang95    | 项目源码 | 文件源码
def output(self, train=True):

        outputs_info = [self.s0]

        (outputs, updates) = theano.scan(
            fn=self.one_step,
            sequences=self.X,
            outputs_info=outputs_info,
            go_backwards=self.go_backwards
        )

        return outputs
项目:transfer    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        def norm_fn(f, mask, label, previous, W_sim):
            # f: inst * class, mask: inst, previous: inst * class, W_sim: class * class
            next = previous.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1)
            if COST:
                next = next + COST_CONST * (1.0 - T.extra_ops.to_one_hot(label, self.num_classes).dimshuffle(0, 'x', 1))
            # next: inst * prev * cur
            next = theano_logsumexp(next, axis = 1)
            # next: inst * class
            mask = mask.dimshuffle(0, 'x')
            next = previous * (1.0 - mask) + next * mask
            return next

        f = T.dot(input, self.W)
        # f: inst * time * class

        initial = f[:, 0, :]
        if CRF_INIT:
            initial = initial + self.W_init[0].dimshuffle('x', 0)
        if COST:
            initial = initial + COST_CONST * (1.0 - T.extra_ops.to_one_hot(self.label_input[:, 0], self.num_classes))
        outputs, _ = theano.scan(fn = norm_fn, \
         sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ], self.label_input.dimshuffle(1, 0)[1:]], \
         outputs_info = initial, non_sequences = [self.W_sim], strict = True)
        norm = T.sum(theano_logsumexp(outputs[-1], axis = 1))

        f_pot = (f.reshape((-1, f.shape[-1]))[T.arange(f.shape[0] * f.shape[1]), self.label_input.flatten()] * self.mask_input.flatten()).sum()
        if CRF_INIT:
            f_pot += self.W_init[0][self.label_input[:, 0]].sum()

        labels = self.label_input
        # labels: inst * time
        shift_labels = T.roll(labels, -1, axis = 1)
        mask = self.mask_input
        # mask : inst * time
        shift_mask = T.roll(mask, -1, axis = 1)

        g_pot = (self.W_sim[labels.flatten(), shift_labels.flatten()] * mask.flatten() * shift_mask.flatten()).sum()

        return - (f_pot + g_pot - norm) / f.shape[0]
项目:transfer    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        def max_fn(f, mask, prev_score, prev_back, W_sim):
            next_score = prev_score.dimshuffle(0, 1, 'x') + f.dimshuffle(0, 'x', 1) + W_sim.dimshuffle('x', 0, 1)
            next_back = T.argmax(next_score, axis = 1)
            next_score = T.max(next_score, axis = 1)
            mask = mask.dimshuffle(0, 'x')
            next_score = next_score * mask + prev_score * (1.0 - mask)
            next_back = next_back * mask + prev_back * (1.0 - mask)
            next_back = T.cast(next_back, 'int32')
            return [next_score, next_back]

        def produce_fn(back, mask, prev_py):
            # back: inst * class, prev_py: inst, mask: inst
            next_py = back[T.arange(prev_py.shape[0]), prev_py]
            next_py = mask * next_py + (1.0 - mask) * prev_py
            next_py = T.cast(next_py, 'int32')
            return next_py

        f = T.dot(input, self.W)

        init_score, init_back = f[:, 0, :], T.zeros_like(f[:, 0, :], dtype = 'int32')
        if CRF_INIT:
            init_score = init_score + self.W_init[0].dimshuffle('x', 0)
        ([scores, backs], _) = theano.scan(fn = max_fn, \
            sequences = [f.dimshuffle(1, 0, 2)[1: ], self.mask_input.dimshuffle(1, 0)[1: ]], \
            outputs_info = [init_score, init_back], non_sequences = [self.W_sim], strict = True)

        init_py = T.argmax(scores[-1], axis = 1)
        init_py = T.cast(init_py, 'int32')
        # init_py: inst, backs: time * inst * class
        pys, _ = theano.scan(fn = produce_fn, \
            sequences = [backs, self.mask_input.dimshuffle(1, 0)[1:]], outputs_info = [init_py], go_backwards = True)
        # pys: (rev_time - 1) * inst
        pys = pys.dimshuffle(1, 0)[:, :: -1]
        # pys : inst * (time - 1)
        return T.concatenate([pys, init_py.dimshuffle(0, 'x')], axis = 1)
项目:punctuator2    作者:ottokart    | 项目源码 | 文件源码
def __init__(self, rng, x, minibatch_size, n_hidden, x_vocabulary, y_vocabulary, stage1_model_file_name, p=None):

        y_vocabulary_size = len(y_vocabulary)

        self.stage1_model_file_name = stage1_model_file_name
        self.stage1, _ = load(stage1_model_file_name, minibatch_size, x)

        self.n_hidden = n_hidden
        self.x_vocabulary = x_vocabulary
        self.y_vocabulary = y_vocabulary

        # output model
        self.GRU = GRULayer(rng=rng, n_in=self.stage1.n_hidden + 1, n_out=n_hidden, minibatch_size=minibatch_size)
        self.Wy = weights_const(n_hidden, y_vocabulary_size, 'Wy', 0)
        self.by = weights_const(1, y_vocabulary_size, 'by', 0)

        self.params = [self.Wy, self.by]
        self.params += self.GRU.params

        def recurrence(x_t, p_t, h_tm1, Wy, by):

            h_t = self.GRU.step(x_t=T.concatenate((x_t, p_t.dimshuffle((0, 'x'))), axis=1), h_tm1=h_tm1)

            z = T.dot(h_t, Wy) + by
            y_t = T.nnet.softmax(z)

            return [h_t, y_t]

        [_, self.y], _ = theano.scan(fn=recurrence,
            sequences=[self.stage1.last_hidden_states, p],
            non_sequences=[self.Wy, self.by],
            outputs_info=[self.GRU.h0, None])

        print "Number of parameters is %d" % sum(np.prod(p.shape.eval()) for p in self.params)
        print "Number of parameters with stage1 params is %d" % sum(np.prod(p.shape.eval()) for p in self.params + self.stage1.params)

        self.L1 = sum(abs(p).sum() for p in self.params)
        self.L2_sqr = sum((p**2).sum() for p in self.params)
项目:crfrnn_layer    作者:HapeMask    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):
        unary, ref = inputs

        N, _, H, W = ref.shape
        yx = tt.cast(tt.stack(tt.mgrid[0:H, 0:W]), "float32")
        grid = tt.alloc(yx[np.newaxis, :, :, :], N, 2, H, W)
        stacked = tt.concatenate([grid, ref], axis=1)

        def _bilateral(V, R):
            o = tt.ones((1, V.shape[1], V.shape[2]), "float32")
            norm = tt.sqrt(gaussian_filter(R, o, self.kstd_bf,
                                           self.ref_dim)) + 1e-8
            return gaussian_filter(R, V/norm, self.kstd_bf, self.ref_dim,
                                   self.val_dim) / norm

        def _step(prev_q, U, ref, normalize=True):
            qbf = _bilateral(prev_q, ref,)
            qsf = tt.nnet.conv2d(prev_q[np.newaxis, :, :, :],
                                 self.W_spatial, border_mode="half")[0]

            q_hat = -self.compat_bf * qbf + -self.compat_spatial * qsf
            q_hat = U - q_hat

            return softmax(q_hat, axis=0) if normalize else q_hat

        def _inference(unary_i, ref_i):
            U = tt.log(tt.clip(unary_i, 1e-5, 1))
            prev_q = softmax(U, axis=0)

            # This is faster than using scan.
            for i in range(self.num_iter):
                normalize = self.normalize_final_iter or i < self.num_iter-1
                prev_q = _step(prev_q, U, ref_i, normalize)
            return prev_q

        return theano.scan(fn=_inference, sequences=[unary, stacked],
                           outputs_info=None)[0]
项目:crfrnn_layer    作者:HapeMask    | 项目源码 | 文件源码
def grad(self, inputs, ograds):
        ref, values, ref_dim, val_dim = inputs[:4]
        hash_struct = inputs[4:]
        ograd = ograds[0]

        ref_dim = get_scalar_constant_value(ref_dim)
        val_dim = get_scalar_constant_value(val_dim)

        def _conv(x):
            return GaussianFilter()(ref, x, ref_dim, val_dim, *hash_struct)

        # Since the kernels are separable and symmetric, the gradient w.r.t.
        # input is just the same filtering applied to the output grads.
        grad_i = _conv(ograd)

        def _gradr(r_i, vals, og, *args):
            return (og * (_conv(vals*r_i) - r_i*_conv(vals)) +
                    vals * (_conv(og*r_i) - r_i*_conv(og)))

        grad_r, _ = theano.scan(fn=_gradr, sequences=[ref],
                                non_sequences=[values, ograd] + hash_struct,
                                outputs_info=None)

        grad_r = grad_r.sum(axis=1, acc_dtype="float32")

        grads = [DisconnectedType()() for i in range(len(inputs))]
        grads[0] = grad_r
        grads[1] = grad_i
        return grads