Python theano.tensor 模块,concatenate() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.concatenate()

项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def compute(self, state, w_idx, feat, scene):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w_s = T.concatenate([e_t, word_vec, scene], axis=-1)
        c_t, h_t = self.lstm.compute(e_w_s, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        e_h_w_s = T.concatenate([e_t, h_t, word_vec, scene], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(e_h_w_s)
        return new_state, p, alpha
项目:TAC-GAN    作者:dashayushman    | 项目源码 | 文件源码
def build_encoder_bi(tparams, options):
    """
    build bidirectional encoder, given pre-computed word embeddings
    """
    # word embedding (source)
    embedding = tensor.tensor3('embedding', dtype='float32')
    embeddingr = embedding[::-1]
    x_mask = tensor.matrix('x_mask', dtype='float32')
    xr_mask = x_mask[::-1]

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, embedding, options,
                                            prefix='encoder',
                                            mask=x_mask)
    projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
                                             prefix='encoder_r',
                                             mask=xr_mask)

    ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)

    return embedding, x_mask, ctx


# some utilities
项目:TAC-GAN    作者:dashayushman    | 项目源码 | 文件源码
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    parameter init for GRU
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix,'Ux')] = Ux
    params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params
项目:how_to_convert_text_to_images    作者:llSourcell    | 项目源码 | 文件源码
def build_encoder_bi(tparams, options):
    """
    build bidirectional encoder, given pre-computed word embeddings
    """
    # word embedding (source)
    embedding = tensor.tensor3('embedding', dtype='float32')
    embeddingr = embedding[::-1]
    x_mask = tensor.matrix('x_mask', dtype='float32')
    xr_mask = x_mask[::-1]

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, embedding, options,
                                            prefix='encoder',
                                            mask=x_mask)
    projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
                                             prefix='encoder_r',
                                             mask=xr_mask)

    ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)

    return embedding, x_mask, ctx


# some utilities
项目:how_to_convert_text_to_images    作者:llSourcell    | 项目源码 | 文件源码
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    parameter init for GRU
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix,'Ux')] = Ux
    params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params
项目:Neural-Photo-Editor    作者:ajbrock    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        def _phase_shift(input,r):
            bsize,c,a,b = input.shape[0],1,self.output_shape[2]//r,self.output_shape[3]//r
            X = T.reshape(input, (bsize,r,r,a,b))
            X = T.transpose(X, (0, 3,4,1,2))  # bsize, a, b, r2,r1
            X = T.split(x=X,splits_size=[1]*a,n_splits=a,axis=1)  # a, [bsize, b, r, r]
            X = [T.reshape(x,(bsize,b,r,r))for x in X]
            X = T.concatenate(X,axis=2)  # bsize, b, a*r, r 
            X = T.split(x=X,splits_size =[1]*b,n_splits=b,axis=1)  # b, [bsize, a*r, r]
            X = [T.reshape(x,(bsize,a*r,r))for x in X]
            X = T.concatenate(X,axis=2) # bsize, a*r, b*r 
            return X.dimshuffle(0,'x',1,2)
        Xc = T.split(x=input,splits_size =[input.shape[1]//self.c]*self.c,n_splits=self.c,axis=1)
        return T.concatenate([_phase_shift(xc,self.r) for xc in Xc],axis=1)        

# Multiscale Dilated Convolution Block
# This function (not a layer in and of itself, though you could make it one) returns a set of concatenated conv2d and dilatedconv2d layers.
# Each layer uses the same basic filter W, operating at a different dilation factor (or taken as the mean of W for the 1x1 conv).
# The channel-wise output of each layer is weighted by a set of coefficients, which are initialized to 1 / the total number of dilation scales,
# meaning that were starting by taking an elementwise mean. These should be learnable parameters.

# NOTES: - I'm considering changing the variable names to be more descriptive, and look less like ridiculous academic code. It's on the to-do list.
#        - I keep the bias and nonlinearity out of the default definition for this layer, as I expect it to be batchnormed and nonlinearized in the model config.
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def dist_info_sym(self, obs_var, latent_var=None):  # this is ment to be for one path!
        # now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
        if latent_var is None:
            latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0))  # new fix to avoid putting the latent as an input: just take the one fixed!
            latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])

        # generate the generalized input (append latents to obs.)
        if self.bilinear_integration:
            extended_obs_var = TT.concatenate([obs_var, latent_var,
                                               TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
                                                          outdim=2)]
                                              , axis=1)
        else:
            extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
        mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
        if self.min_std is not None:
            log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
        return dict(mean=mean_var, log_std=log_std_var)
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(
            f, wrt=params, disconnected_inputs='warn')
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])

        def Hx_plain():
            Hx_plain_splits = TT.grad(
                TT.sum([TT.sum(g * x)
                        for g, x in zip(constraint_grads, xs)]),
                wrt=params,
                disconnected_inputs='warn'
            )
            return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self.opt_fun = ext.lazydict(
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ),
        )
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def get_action(self, observation):
        if self.state_include_action:
            if self.prev_action is None:
                prev_action = np.zeros((self.action_space.flat_dim,))
            else:
                prev_action = self.action_space.flatten(self.prev_action)
            all_input = np.concatenate([
                self.observation_space.flatten(observation),
                prev_action
            ])
        else:
            all_input = self.observation_space.flatten(observation)
            # should not be used
            prev_action = np.nan
        probs, hidden_vec = [x[0] for x in self.f_step_prob([all_input], [self.prev_hidden])]
        action = special.weighted_sample(probs, range(self.action_space.n))
        self.prev_action = action
        self.prev_hidden = hidden_vec
        agent_info = dict(prob=probs)
        if self.state_include_action:
            agent_info["prev_action"] = prev_action
        return action, agent_info
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def get_action(self, observation):
        if self._state_include_action:
            if self._prev_action is None:
                prev_action = np.zeros((self.action_space.flat_dim,))
            else:
                prev_action = self.action_space.flatten(self._prev_action)
            all_input = np.concatenate([
                self.observation_space.flatten(observation),
                prev_action
            ])
        else:
            all_input = self.observation_space.flatten(observation)
            # should not be used
            prev_action = np.nan
        mean, log_std, hidden_vec = [x[0] for x in self._f_step_mean_std([all_input], [self._prev_hidden])]
        rnd = np.random.normal(size=mean.shape)
        action = rnd * np.exp(log_std) + mean
        self._prev_action = action
        self._prev_hidden = hidden_vec
        agent_info = dict(mean=mean, log_std=log_std)
        if self._state_include_action:
            agent_info["prev_action"] = prev_action
        return action, agent_info
项目:MachineComprehension    作者:sa-j    | 项目源码 | 文件源码
def __init__(self, incoming, unchanged_W, unchanged_W_shape,
                 oov_in_train_W, oov_in_train_W_shape,
                 p=0.5, rescale=True, dropout_mask=None,
                 **kwargs):
        super(CustomEmbedding, self).__init__(incoming, **kwargs)
        self.output_size = unchanged_W_shape[1]
        self.unchanged_W = self.add_param(unchanged_W, unchanged_W_shape,
                                          name="unchanged_W",
                                          trainable=False,
                                          regularizable=False)
        self.oov_in_train_W = self.add_param(oov_in_train_W,
                                             oov_in_train_W_shape, name='oov_in_train_W')
        self.W = T.concatenate([self.unchanged_W, self.oov_in_train_W])
        self.p = p
        self.rescale = rescale
        if dropout_mask is None:
            dropout_mask = RandomStreams(_rng.randint(1, 2147462579)).binomial(self.W.shape,
                                                                               p=1 - self.p,
                                                                               dtype=self.W.dtype)
        self.dropout_mask = dropout_mask
项目:sesame-paste-noodle    作者:aissehust    | 项目源码 | 文件源码
def forward(self, inputtensor):
        #print('resnet.forward.shape: {}'.format(inputtensor[0].ndim))
        o1 = self.conv1.forward(inputtensor)
        o2 = self.bn1.forward(o1)
        o3 = self.relu1.forward(o2)
        o4 = self.conv2.forward(o3)
        o5 = self.bn2.forward(o4)

        if self.increaseDim:
            subx = T.signal.pool.pool_2d(inputtensor[0], (2,2), ignore_border=True)
            #print('resnet.forward.subx.ndim: {}'.format(subx.ndim))
            retx = T.zeros_like(subx)
            #print('resnet.forward.retx.ndim: {}'.format(retx.ndim))
            sumx = T.concatenate([subx, retx], axis=1)
            #print('resnet.forward.sumx.ndim: {}'.format(sumx.ndim))
            out = self.relu2.forward([o5[0]+sumx,])
            #print('resnet.forward.out.ndim: {}'.format(out[0].ndim))
        else:
            out = self.relu2.forward([o5[0]+inputtensor[0],])

        return out
项目:StackGAN    作者:hanzhanggit    | 项目源码 | 文件源码
def build_encoder_bi(tparams, options):
    """
    build bidirectional encoder, given pre-computed word embeddings
    """
    # word embedding (source)
    embedding = tensor.tensor3('embedding', dtype='float32')
    embeddingr = embedding[::-1]
    x_mask = tensor.matrix('x_mask', dtype='float32')
    xr_mask = x_mask[::-1]

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, embedding, options,
                                            prefix='encoder',
                                            mask=x_mask)
    projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
                                             prefix='encoder_r',
                                             mask=xr_mask)

    ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)

    return embedding, x_mask, ctx


# some utilities
项目:StackGAN    作者:hanzhanggit    | 项目源码 | 文件源码
def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
    """
    parameter init for GRU
    """
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    W = numpy.concatenate([norm_weight(nin,dim),
                           norm_weight(nin,dim)], axis=1)
    params[_p(prefix,'W')] = W
    params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[_p(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[_p(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[_p(prefix,'Ux')] = Ux
    params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params
项目:Deep-Learning-with-Theano    作者:PacktPublishing    | 项目源码 | 文件源码
def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n/nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n-n_gen
    ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny))
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb, ymb)
    samples.append(xmb)
    labels.append(np.argmax(ymb, axis=1))
    return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(
            f, wrt=params, disconnected_inputs='warn')
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])

        def Hx_plain():
            Hx_plain_splits = TT.grad(
                TT.sum([TT.sum(g * x)
                        for g, x in zip(constraint_grads, xs)]),
                wrt=params,
                disconnected_inputs='warn'
            )
            return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self.opt_fun = ext.lazydict(
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ),
        )
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def get_action(self, observation):
        if self.state_include_action:
            if self.prev_action is None:
                prev_action = np.zeros((self.action_space.flat_dim,))
            else:
                prev_action = self.action_space.flatten(self.prev_action)
            all_input = np.concatenate([
                self.observation_space.flatten(observation),
                prev_action
            ])
        else:
            all_input = self.observation_space.flatten(observation)
            # should not be used
            prev_action = np.nan
        probs, hidden_vec = [x[0] for x in self.f_step_prob([all_input], [self.prev_hidden])]
        action = special.weighted_sample(probs, range(self.action_space.n))
        self.prev_action = action
        self.prev_hidden = hidden_vec
        agent_info = dict(prob=probs)
        if self.state_include_action:
            agent_info["prev_action"] = prev_action
        return action, agent_info
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def get_action(self, observation):
        if self._state_include_action:
            if self._prev_action is None:
                prev_action = np.zeros((self.action_space.flat_dim,))
            else:
                prev_action = self.action_space.flatten(self._prev_action)
            all_input = np.concatenate([
                self.observation_space.flatten(observation),
                prev_action
            ])
        else:
            all_input = self.observation_space.flatten(observation)
            # should not be used
            prev_action = np.nan
        mean, log_std, hidden_vec = [x[0] for x in self._f_step_mean_std([all_input], [self._prev_hidden])]
        rnd = np.random.normal(size=mean.shape)
        action = rnd * np.exp(log_std) + mean
        self._prev_action = action
        self._prev_hidden = hidden_vec
        agent_info = dict(mean=mean, log_std=log_std)
        if self._state_include_action:
            agent_info["prev_action"] = prev_action
        return action, agent_info
项目:3D-R2N2    作者:chrischoy    | 项目源码 | 文件源码
def __init__(self, prev_layers, axis=1):
        """
        list of prev layers to concatenate
        axis to concatenate

        For tensor5, channel dimension is axis=2 (due to theano conv3d
        convention). For image, axis=1
        """
        assert (len(prev_layers) > 1)
        super().__init__(prev_layers[0])
        self._axis = axis
        self._prev_layers = prev_layers

        self._output_shape = self._input_shape.copy()
        for prev_layer in prev_layers[1:]:
            self._output_shape[axis] += prev_layer._output_shape[axis]
        print('Concat the prev layer to [%s]' % ','.join(str(x) for x in self._output_shape))
项目:semi-auto-anno    作者:moberweger    | 项目源码 | 文件源码
def project3Dto2D(self, Li, idxs):
        """
        Project 3D point to 2D
        :param Li: joints in normalized 3D
        :param idxs: frames specified by subset
        :return: 2D points, in normalized 2D coordinates
        """

        if not isinstance(idxs, numpy.ndarray):
            idxs = numpy.asarray([idxs])

        # 3D -> 2D projection also shift by M to cropped window
        Li_glob3D = (numpy.reshape(Li, (len(idxs), self.numJoints, 3))*self.Di_scale[idxs][:, None, None]+self.Di_off3D[idxs][:, None, :]).reshape((len(idxs)*self.numJoints, 3))
        Li_glob3D_hom = numpy.concatenate([Li_glob3D, numpy.ones((len(idxs)*self.numJoints, 1), dtype='float32')], axis=1)
        Li_glob2D_hom = numpy.dot(Li_glob3D_hom, self.cam_proj.T)
        Li_glob2D = (Li_glob2D_hom[:, 0:3] / Li_glob2D_hom[:, 3][:, None]).reshape((len(idxs), self.numJoints, 3))
        Li_img2D_hom = numpy.einsum('ijk,ikl->ijl', Li_glob2D, self.Di_trans2D[idxs])
        Li_img2D = (Li_img2D_hom[:, :, 0:2] / Li_img2D_hom[:, :, 2][:, :, None]).reshape((len(idxs), self.numJoints*2))
        Li_img2Dcrop = (Li_img2D - (self.Di.shape[3]/2.)) / (self.Di.shape[3]/2.)
        return Li_img2Dcrop
项目:KGP-ASR    作者:KGPML    | 项目源码 | 文件源码
def _add_blanks(y, blank_symbol, y_mask=None):
    """Add blanks to a matrix and updates mask
    Input shape: output_seq_len x num_batch
    Output shape: 2*output_seq_len+1 x num_batch
    """
    # for y
    y_extended = y.T.dimshuffle(0, 1, 'x')
    blanks = tensor.zeros_like(y_extended) + blank_symbol
    concat = tensor.concatenate([y_extended, blanks], axis=2)
    res = concat.reshape((concat.shape[0],
                          concat.shape[1] * concat.shape[2])).T
    begining_blanks = tensor.zeros((1, res.shape[1])) + blank_symbol
    blanked_y = tensor.concatenate([begining_blanks, res], axis=0)
    # for y_mask
    if y_mask is not None:
        y_mask_extended = y_mask.T.dimshuffle(0, 1, 'x')
        concat = tensor.concatenate([y_mask_extended,
                                     y_mask_extended], axis=2)
        res = concat.reshape((concat.shape[0],
                              concat.shape[1] * concat.shape[2])).T
        begining_blanks = tensor.ones((1, res.shape[1]), dtype=floatX)
        blanked_y_mask = tensor.concatenate([begining_blanks, res], axis=0)
    else:
        blanked_y_mask = None
    return blanked_y.astype('int32'), blanked_y_mask
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
项目:ismir2015    作者:f0k    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        input_shape = input.shape
        if self.dilation[0] > 1:
            # pad such that the time axis length is divisible by the dilation factor
            pad_w = (self.dilation[0] - input_shape[2] % self.dilation[0]) % self.dilation[0]
            input = T.concatenate((input, T.zeros((input_shape[0], input_shape[1], pad_w, input_shape[3]), input.dtype)), axis=2)
            # rearrange data to fold the time axis into the minibatch dimension
            input = input.reshape((input_shape[0], input_shape[1], -1, self.dilation[0], input_shape[3]))
            input = input.transpose(0, 3, 1, 2, 4)
            input = input.reshape((-1,) + tuple(input.shape[2:]))
        output = super(TimeDilatedMaxPool2DLayer, self).get_output_for(input, **kwargs)
        if self.dilation[0] > 1:
            # restore the time axis from the minibatch dimension
            output = output.reshape((input_shape[0], self.dilation[0]) + tuple(output.shape[1:]))
            output = output.transpose(0, 2, 3, 1, 4)
            output = output.reshape((input_shape[0], output.shape[1], -1, output.shape[4]))
            # remove the padding
            output = output[:, :, :output.shape[2] - pad_w]
        return output
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def _ctc_normal(self, predict,labels):

        n = labels.shape[0]

        labels2 = T.concatenate((labels, [self.tpo["CTC_blank"], self.tpo["CTC_blank"]]))
        sec_diag = T.neq(labels2[:-2], labels2[2:]) * \
                   T.eq(labels2[1:-1], self.tpo["CTC_blank"])

        recurrence_relation = \
            T.eye(n) + \
            T.eye(n, k=1) + \
            T.eye(n, k=2) * sec_diag.dimshuffle((0, 'x'))

        pred_y = predict[:, labels]

        probabilities, _ = theano.scan(
            lambda curr, accum: curr * T.dot(accum, recurrence_relation),
            sequences=[pred_y],
            outputs_info=[T.eye(n)[0]]
        )

        labels_probab = T.sum(probabilities[-1, -2:])
        return -T.log(labels_probab)
项目:epsilon_free_inference    作者:gpapamak    | 项目源码 | 文件源码
def visualize_weights(self, layer, imsize, layout):
        """
        Displays the weights of a specified layer as images.
        :param layer: the layer whose weights to display
        :param imsize: the image size
        :param layout: number of rows and columns for each page
        :return: none
        """

        if layer < self.net.n_layers:
            self.net.visualize_weights(layer, imsize, layout)

        elif layer == self.net.n_layers:
            helper.disp_imdata(np.concatenate([W.get_value() for W in [self.Wa] + self.Wms + self.WUs], axis=1).T, imsize, layout)
            plt.show(block=False)

        else:
            raise ValueError('Layer {} doesn\'t exist.'.format(layer))
项目:epsilon_free_inference    作者:gpapamak    | 项目源码 | 文件源码
def visualize_activations(self, x):
        """
        Visualizes the activations in the mdn caused by a given data minibatch.
        :param x: a minibatch of data
        :return: none
        """

        self.net.visualize_activations(x)

        forwprop = theano.function(
            inputs=[self.input],
            outputs=[self.a, tt.concatenate(self.ms, axis=1) + tt.concatenate([tt.reshape(U, [U.shape[0], -1]) for U in self.Us], axis=1)]
        )
        activations = forwprop(x.astype(dtype))

        for a, title in izip(activations, ['mixing coefficients', 'means', 'scale matrices']):

            fig = plt.figure()
            ax = fig.add_subplot(1, 1, 1)
            ax.imshow(a, cmap='gray', interpolation='none')
            ax.set_title(title)
            ax.set_xlabel('layer units')
            ax.set_ylabel('data points')

        plt.show(block=False)
项目:deepAI    作者:kaiu85    | 项目源码 | 文件源码
def randomize_parameters(params, sigmas, sig_min_perturbations):

    r_params = []
    r_epsilons = []

    for i in range(len(params)):
        epsilon_half = theano_rng.normal((n_perturbations/2,params[i].shape[1],params[i].shape[2]), dtype = theano.config.floatX)
        r_epsilon = T.concatenate( [epsilon_half, -1.0*epsilon_half], axis = 0 )
        r_param = params[i] + r_epsilon*(T.nnet.softplus( sigmas[i] ) + sig_min_perturbations)
        r_params.append(r_param)
        r_epsilons.append(r_epsilon)

    return r_params, r_epsilons

####################################################################
#
# Create randomly perturbed version of parameters
#
####################################################################
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def renet_layer_lr_noscan(X, rnn1, rnn2, w, h, wp, hp):
    list_of_images = []
    for i in xrange(h/hp):
        # x = X[:,i*hp:(i*hp + hp),:].dimshuffle((2, 0, 1)).flatten().reshape((w/wp, X.shape[0]*wp*hp))
        h_tm1 = rnn1.H0
        hr_tm1 = rnn2.H0
        h1 = []
        h2 = []
        for j in xrange(w/wp):
            x = X[:,i*hp:(i*hp + hp),j*wp:(j*wp + wp)].flatten()
            h_t = rnn1.recurrence(x, h_tm1)
            h1.append(h_t)
            h_tm1 = h_t

            jr = w/wp - j - 1
            xr = X[:,i*hp:(i*hp + hp),jr*wp:(jr*wp + wp)].flatten()
            hr_t = rnn2.recurrence(x, hr_tm1)
            h2.append(hr_t)
            hr_tm1 = hr_t
        img = T.concatenate([h1, h2])
        list_of_images.append(img)
    return T.stacklists(list_of_images).dimshuffle((1, 0, 2))
项目:lazyprogrammer    作者:inhwane    | 项目源码 | 文件源码
def renet_layer_lr_allscan(X, rnn1, rnn2, w, h, wp, hp):
    # list_of_images = []
    C = X.shape[0]
    X = X.dimshuffle((1, 0, 2)).reshape((h/hp, hp*C*w)) # split the rows for the first scan
    def rnn_pass(x):
        x = x.reshape((hp, C, w)).dimshuffle((2, 1, 0)).reshape((w/wp, C*wp*hp))
        h1 = rnn1.output(x)
        h2 = rnn2.output(x, go_backwards=True)
        img = T.concatenate([h1.T, h2.T])
        # list_of_images.append(img)
        return img

    results, _ = theano.scan(
        fn=rnn_pass,
        sequences=X,
        outputs_info=None,
        n_steps=h/hp,
    )
    return results.dimshuffle((1, 0, 2))
    # return T.stacklists(list_of_images).dimshuffle((1, 0, 2))
项目:opt-mmd    作者:dougalsutherland    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)
项目:EUNN-theano    作者:iguanaus    | 项目源码 | 文件源码
def initialize_matrix(n_in, n_out, name, rng, init='rand'):
    if (init=='rand') or (init=='randSmall'):
        bin = np.sqrt(6. / (n_in + n_out))
        values = np.asarray(rng.uniform(low=-bin,
                                        high=bin,
                                        size=(n_in, n_out)),
                                        dtype=theano.config.floatX)
        if (init=='randSmall'):
            values=np.float32(0.01)*values
    elif (init=='identity'):
        if (n_in >= n_out):
            values = np.concatenate([np.eye(n_out).astype(theano.config.floatX),np.zeros((n_in-n_out,n_out)).astype(theano.config.floatX)],axis=0)
        else:
            values = np.concatenate([np.eye(n_in).astype(theano.config.floatX),np.zeros((n_in,n_out-n_in)).astype(theano.config.floatX)],axis=1)
    else:
       raise ValueError("Unknown initialization method ["+init+"]") 
    return theano.shared(value=values, name=name)
项目:EUNN-theano    作者:iguanaus    | 项目源码 | 文件源码
def times_diag(input, n_hidden, diag, swap_re_im):
    # input is a Ix2n_hidden matrix, where I is number
    # of training examples
    # diag is a n_hidden-dimensional real vector, which creates
    # the 2n_hidden x 2n_hidden complex diagonal matrix using 
    # e.^{j.*diag}=cos(diag)+j.*sin(diag)
    d = T.concatenate([diag, -diag]) #d is 2n_hidden

    Re = T.cos(d).dimshuffle('x',0)
    Im = T.sin(d).dimshuffle('x',0)

    input_times_Re = input * Re
    input_times_Im = input * Im

    output = input_times_Re + input_times_Im[:, swap_re_im]

    return output
项目:SocializedWordEmbeddings    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def forward(self, x, mask, hc):
        n_in, n_out, activation = self.n_in, self.n_out_t, self.activation

        if hc.ndim > 1:
            c_tm1 = hc[:, :n_out]
            h_tm1 = hc[:, n_out:]
        else:
            c_tm1 = hc[:n_out]
            h_tm1 = hc[n_out:]

        in_t = self.in_gate.forward(x,h_tm1)
        forget_t = self.forget_gate.forward(x,h_tm1)
        out_t = self.out_gate.forward(x, h_tm1)

        c_t = forget_t * c_tm1 + in_t * self.input_layer.forward(x,h_tm1)
    c_t = c_t * mask.dimshuffle(0, 'x')
    c_t = T.cast(c_t, 'float32')
        h_t = out_t * T.tanh(c_t)
    h_t = h_t * mask.dimshuffle(0, 'x')
    h_t = T.cast(h_t, 'float32')

        if hc.ndim > 1:
            return T.concatenate([ c_t, h_t ], axis=1)
        else:
            return T.concatenate([ c_t, h_t ])
项目:SocializedWordEmbeddings    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def backward(self, x, mask, hc):
        n_in, n_out, activation = self.n_in, self.n_out_t, self.activation

        if hc.ndim > 1:
            c_tm1 = hc[:, :n_out]
            h_tm1 = hc[:, n_out:]
        else:
            c_tm1 = hc[:n_out]
            h_tm1 = hc[n_out:]

        in_t = self.in_gate_b.forward(x,h_tm1)
        forget_t = self.forget_gate_b.forward(x,h_tm1)
        out_t = self.out_gate_b.forward(x, h_tm1)

        c_t = forget_t * c_tm1 + in_t * self.input_layer_b.forward(x,h_tm1)
        c_t = c_t * mask.dimshuffle(0, 'x')
    c_t = T.cast(c_t, 'float32')
        h_t = out_t * T.tanh(c_t)
        h_t = h_t * mask.dimshuffle(0, 'x')
    h_t = T.cast(h_t, 'float32')

        if hc.ndim > 1:
            return T.concatenate([ c_t, h_t ], axis=1)
        else:
            return T.concatenate([ c_t, h_t ])
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
项目:theanomodels    作者:clinicalml    | 项目源码 | 文件源码
def _getLSTMWeight(self, shape):
        """
        http://yyue.blogspot.com/2015/01/a-brief-overview-of-deep-learning.html
        For LSTMs, use orthogonal initializations for the weight matrices and
        set the forget gate biases to be high
        """
        if len(shape)==1: #bias
            dim = int(shape[0]/4)
            self._p('Sampling biases for LSTM from exponential distribution')
            return np.random.laplace(size=shape).astype(config.floatX)
            #return np.concatenate([self._getUniformWeight((dim,)),np.ones((dim,))*self.params['forget_bias'],
            #                       self._getUniformWeight((dim*2,))]).astype(config.floatX)
        elif len(shape)==2: #weight
            nin = shape[0]
            nout= shape[1]
            assert int(nout/4)==nin,'Not LSTM weight.'
            return np.concatenate([self._getOrthogonalWeight((nin,int(nout/4))),
                                   self._getOrthogonalWeight((nin,int(nout/4))),
                                   self._getOrthogonalWeight((nin,int(nout/4))),
                                   self._getOrthogonalWeight((nin,int(nout/4)))]
                                  ,axis=1).astype(config.floatX)
        else:
            assert False,'Should not get here'
项目:WEARING    作者:nlkim0817    | 项目源码 | 文件源码
def MakeVisual( X_src, X_tar): 
    #LAB pair
    #pdb.set_trace()
    #X_rst = np.zeros( X_src.shape, np.float32)
    #for i in range( X_src.shape[0]):
    #    X_rst[i,:,:,:] = np.concatenate(
    #                    (np.resize( X_src[i,:,:,:], (1,nc,npx,npx/2)),
    #                      np.resize( X_tar[i,:,:,:], (1,nc,npx,npx/2))), axis =3 )


    X_src = np.resize(X_src,(X_src.shape[0],nc,npx,npx/2))
    X_tar = np.resize(X_tar,(X_tar.shape[0],nc,npx,npx/2))

    return X_tar
    #return np.concatenate( (X_src,X_tar), axis = 2) 


# SET PARAMETERS.
项目:WEARING    作者:nlkim0817    | 项目源码 | 文件源码
def gen_samples(n, nbatch=128):
    samples = []
    labels = []
    n_gen = 0
    for i in range(n/nbatch):
        ymb = floatX(OneHot(np_rng.randint(0, 10, nbatch), ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
        xmb = _gen(zmb, ymb)
        samples.append(xmb)
        labels.append(np.argmax(ymb, axis=1))
        n_gen += len(xmb)
    n_left = n-n_gen
    ymb = floatX(OneHot(np_rng.randint(0, 10, n_left), ny))
    zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
    xmb = _gen(zmb, ymb)
    samples.append(xmb)    
    labels.append(np.argmax(ymb, axis=1))
    return np.concatenate(samples, axis=0), np.concatenate(labels, axis=0)
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer
项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def connect(self, inputs):
    features = [None] * self.num_feature_types
    for i in range(self.num_feature_types):
      indices = inputs[:,:,i].flatten()
      proj_shape = [inputs.shape[0], inputs.shape[1], self.embedding_shapes[i][1]]
      features[i] = self.embeddings[i][indices].reshape(proj_shape)

    if self.num_feature_types == 1:
      return features[0]
    return tensor.concatenate(features, axis=2)
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):

        ps = nonlinearities.sigmoid(input)
        sum_p_r_benign = T.sum(ps,axis=1)
        sum_log = T.sum(T.log(1-ps+1.e-12),axis=1)
        return T.concatenate([sum_log, sum_p_r_benign])
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def compute(self, x_t, c_tm1, h_tm1):
        x_and_h = T.concatenate([x_t, h_tm1], axis=1)  # x:(mb, dim_x),  h:(mb,dim_h)
        state = T.dot(x_and_h, self.w) + self.b  # split state to (c, i, o, f)
        c_tilde = T.tanh(state[:, 0:self.dim_h])
        i_t = T.nnet.sigmoid(state[:, self.dim_h:2*self.dim_h])
        o_t = T.nnet.sigmoid(state[:, 2*self.dim_h:3*self.dim_h])
        f_t = T.nnet.sigmoid(state[:, 3*self.dim_h:4*self.dim_h])
        c_t = i_t * c_tilde + f_t * c_tm1
        h_t = o_t * T.tanh(c_t)
        return c_t, h_t
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def compute(self, state, w_idx, scene):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])])
        # lstm step
        w_s = T.concatenate([word_vec, scene], axis=1)
        c_t, h_t = self.lstm.compute(w_s, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        h_and_w = T.concatenate([h_t, word_vec], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(h_and_w)
        return new_state, p
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def compute(self, state, w_idx):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        c_tm1, h_tm1 = split_state(state, scheme=[(2, self.config['nh'])])
        # lstm step
        c_t, h_t = self.lstm.compute(word_vec, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        h_and_w = T.concatenate([h_t, word_vec], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(h_and_w)
        return new_state, p
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def compute(self, state, w_idx, feat):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w = T.concatenate([e_t, word_vec], axis=-1)
        c_t, h_t = self.lstm.compute(e_w, c_tm1, h_tm1)  # (mb,nh)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # predict word probability
        p = self.pred_mlp.compute(T.concatenate([e_t, h_t, word_vec], axis=-1))
        return new_state, p, alpha
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def init_func(self, img_value):
        if self._proj_func is None:
            img = T.tensor3()
            self._proj_func = theano.function([img], self.proj_mlp.compute(img))
        if self._init_func is None:
            init_e = self._feat_shared.mean(axis=1)
            init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
            self._init_func = theano.function([], init_state)
        self._feat_shared.set_value(self._proj_func(img_value))
        return self._init_func()
项目:gram    作者:mp2893    | 项目源码 | 文件源码
def generate_attention(tparams, leaves, ancestors):
    attentionInput = T.concatenate([tparams['W_emb'][leaves], tparams['W_emb'][ancestors]], axis=2)
    mlpOutput = T.tanh(T.dot(attentionInput, tparams['W_attention']) + tparams['b_attention']) 
    preAttention = T.dot(mlpOutput, tparams['v_attention'])
    attention = T.nnet.softmax(preAttention)
    return attention
项目:gram    作者:mp2893    | 项目源码 | 文件源码
def build_model(tparams, leavesList, ancestorsList, options):
    dropoutRate = options['dropoutRate']
    trng = RandomStreams(123)
    use_noise = theano.shared(numpy_floatX(0.))

    x = T.tensor3('x', dtype=config.floatX)
    y = T.tensor3('y', dtype=config.floatX)
    mask = T.matrix('mask', dtype=config.floatX)
    lengths = T.vector('lengths', dtype=config.floatX)

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    embList = []
    for leaves, ancestors in zip(leavesList, ancestorsList):
        tempAttention = generate_attention(tparams, leaves, ancestors)
        tempEmb = (tparams['W_emb'][ancestors] * tempAttention[:,:,None]).sum(axis=1)
        embList.append(tempEmb)

    emb = T.concatenate(embList, axis=0)

    x_emb = T.tanh(T.dot(x, emb))
    hidden = gru_layer(tparams, x_emb, options)
    hidden = dropout_layer(hidden, use_noise, trng, dropoutRate)
    y_hat = softmax_layer(tparams, hidden) * mask[:,:,None]

    logEps = 1e-8
    cross_entropy = -(y * T.log(y_hat + logEps) + (1. - y) * T.log(1. - y_hat + logEps))
    output_loglikelihood = cross_entropy.sum(axis=2).sum(axis=0) / lengths
    cost_noreg = T.mean(output_loglikelihood)

    if options['L2'] > 0.:
        cost = cost_noreg + options['L2'] * ((tparams['W_output']**2).sum() + (tparams['W_attention']**2).sum() + (tparams['v_attention']**2).sum())

    return use_noise, x, y, mask, lengths, cost, cost_noreg, y_hat