Python lasagne.nonlinearities 模块，tanh() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用lasagne.nonlinearities.tanh()。

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def exe_rnn(use_embedd, length, num_units, position, binominal):
    batch_size = BATCH_SIZE

    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
    if use_embedd:
        layer_position = construct_position_input(batch_size, length, num_units)
        layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)

    layer_rnn = RecurrentLayer(layer_input, num_units, nonlinearity=nonlinearities.tanh, only_return_final=True,
                               W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(),
                               b=lasagne.init.Constant(0.), name='RNN')
    # W = layer_rnn.W_hid_to_hid.sum()
    # U = layer_rnn.W_in_to_hid.sum()
    # b = layer_rnn.b.sum()

    layer_output = DenseLayer(layer_rnn, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')

    return train(layer_output, layer_rnn, input_var, target_var, batch_size, length, position, binominal)

项目：LasagneNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_BiRNN_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh,
                    precompute_input=True, num_filters=20, dropout=True, in_to_out=False):
    # first get some necessary dimensions or parameters
    conv_window = 3
    _, sent_length, _ = incoming2.output_shape

    # dropout before cnn?
    if dropout:
        incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5)

    # construct convolution layer
    cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                           nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size)
    # reshape the layer to match rnn incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1]))

    # finally, concatenate the two incoming layers together.
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    return build_BiRNN(incoming, num_units, mask=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity,
                       precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)

项目：LasagneNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_BiLSTM_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True,
                     peepholes=False, num_filters=20, dropout=True, in_to_out=False):
    # first get some necessary dimensions or parameters
    conv_window = 3
    _, sent_length, _ = incoming2.output_shape

    # dropout before cnn?
    if dropout:
        incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5)

    # construct convolution layer
    cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                           nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size)
    # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1]))

    # finally, concatenate the two incoming layers together.
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes,
                        precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)

项目：BiDNN 作者：v-v | 项目源码 | 文件源码

def __init__(self):
        self.hdn = None
        self.rep = None
        self.fname_in = None
        self.mod2size = None
        self.mod1size = None
        self.fname_out = None

        self.lr = 0.1
        self.act = "tanh"
        self.epochs = 1000
        self.verbosity = 3
        self.dropout = 0.2
        self.momentum = 0.9
        self.untied = False
        self.l2_norm = False
        self.batch_size = 128
        self.load_model = None
        self.save_model = None
        self.write_after = None
        self.crossmodal = False
        self.exec_command = None
        self.ignore_zeroes = False

项目：gogh-figure 作者：joelmoniz | 项目源码 | 文件源码

def setup_transform_net(self, input_var=None):
        transform_net = InputLayer(shape=self.shape, input_var=input_var)
        transform_net = style_conv_block(transform_net, self.num_styles, 32, 9, 1)
        transform_net = style_conv_block(transform_net, self.num_styles, 64, 3, 2)
        transform_net = style_conv_block(transform_net, self.num_styles, 128, 3, 2)
        for _ in range(5):
            transform_net = residual_block(transform_net, self.num_styles)
        transform_net = nn_upsample(transform_net, self.num_styles)
        transform_net = nn_upsample(transform_net, self.num_styles)

        if self.net_type == 0:
            transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, tanh)
            transform_net = ExpressionLayer(transform_net, lambda X: 150.*X, output_shape=None)
        elif self.net_type == 1:
            transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, sigmoid)

        self.network['transform_net'] = transform_net

项目：RL4Data 作者：fyabc | 项目源码 | 文件源码

def build_cnn(self):
        # Building the network
        layer_in = InputLayer(shape=(None, 784), input_var=self.input_var)

        # Hidden layer
        layer = DenseLayer(
            layer_in,
            num_units=self.hidden_size,
            W=lasagne.init.Uniform(
                range=(-np.sqrt(6. / (784 + self.hidden_size)),
                       np.sqrt(6. / (784 + self.hidden_size)))),
            nonlinearity=tanh,
        )

        # LR layer
        layer = DenseLayer(
            layer,
            num_units=self.output_size,
            W=lasagne.init.Constant(0.),
            nonlinearity=softmax,
        )

        return layer

项目：recom-system 作者：tizot | 项目源码 | 文件源码

def build_multi_dssm(input_var=None, num_samples=None, num_entries=6, num_ngrams=42**3, num_hid1=300, num_hid2=300, num_out=128):
    """Builds a DSSM structure in a Lasagne/Theano way.

    The built DSSM is the neural network that computes the projection of only one paper.
    The input ``input_var`` should have two dimensions: (``num_samples * num_entries``, ``num_ngrams``).
    The output is then computed in a batch way: one paper at a time, but all papers from the same sample in the dataset are grouped
    (cited papers, citing papers and ``num_entries - 2`` irrelevant papers).

    Args:
        input_var (:class:`theano.tensor.TensorType` or None): symbolic input variable of the DSSM
        num_samples (int): the number of samples in the batch input dataset (number of rows)
        num_entries (int): the number of compared papers in the DSSM structure
        num_ngrams (int): the size of the vocabulary
        num_hid1 (int): the number of units in the first hidden layer
        num_hid2 (int): the number of units in the second hidden layer
        num_out (int): the number of units in the output layer

    Returns:
        :class:`lasagne.layers.Layer`: the output layer of the DSSM
    """

    assert (num_entries > 2)

    # Initialise input layer
    if num_samples is None:
        num_rows = None
    else:
        num_rows = num_samples * num_entries

    l_in = layers.InputLayer(shape=(num_rows, num_ngrams), input_var=input_var)

    # Initialise the hidden and output layers or the DSSM
    l_hid1 = layers.DenseLayer(l_in, num_units=num_hid1, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform())
    l_hid2 = layers.DenseLayer(l_hid1, num_units=num_hid2, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform())
    l_out = layers.DenseLayer(l_hid2, num_units=num_out, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform())

    l_out = layers.ExpressionLayer(l_out, lambda X: X / X.norm(2), output_shape='auto')

    return l_out

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def exe_maxru(length, num_units, position, binominal):
    batch_size = BATCH_SIZE

    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')

    time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)

    time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                       b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)

    resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                     W_cell=lasagne.init.GlorotUniform())

    updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                      W_cell=lasagne.init.GlorotUniform())

    hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                        b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)

    layer_taru = MAXRULayer(layer_input, num_units, max_length=length,
                            P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh,
                            resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
                            time_updategate=time_updategate, time_update=time_update,
                            only_return_final=True, name='MAXRU', p=0.)

    # W = layer_taru.W_hid_to_hidden_update.sum()
    # U = layer_taru.W_in_to_hidden_update.sum()
    # b = layer_taru.b_hidden_update.sum()

    layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')

    return train(layer_output, input_var, target_var, batch_size, length, position, binominal)

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def exe_lstm(use_embedd, length, num_units, position, binominal):
    batch_size = BATCH_SIZE

    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
    if use_embedd:
        layer_position = construct_position_input(batch_size, length, num_units)
        layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)

    ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                  W_cell=lasagne.init.Uniform(range=0.1))

    outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                   W_cell=lasagne.init.Uniform(range=0.1))
    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                      W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    # now use tanh for nonlinear function of cell, need to try pure linear cell
    cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)

    layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
                           peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM')

    # W = layer_lstm.W_hid_to_cell.sum()
    # U = layer_lstm.W_in_to_cell.sum()
    # b = layer_lstm.b_cell.sum()

    layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')

    return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def exe_sgru(use_embedd, length, num_units, position, binominal):
    batch_size = BATCH_SIZE

    input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
    target_var = T.ivector(name='targets')

    layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
    if use_embedd:
        layer_position = construct_position_input(batch_size, length, num_units)
        layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)

    resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)

    resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)

    updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)

    hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                        b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)

    layer_sgru = SGRULayer(layer_input, num_units, resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden,
                           updategate=updategate, hidden_update=hiden_update, only_return_final=True, name='SGRU')

    # W = layer_gru.W_hid_to_hidden_update.sum()
    # U = layer_gru.W_in_to_hidden_update.sum()
    # b = layer_gru.b_hidden_update.sum()

    layer_output = DenseLayer(layer_sgru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')

    return train(layer_output, layer_sgru, input_var, target_var, batch_size, length, position, binominal)

项目：MachineComprehension 作者：sa-j | 项目源码 | 文件源码

def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(),
                 cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(),
                 nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.),
                 backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0,
                 unroll_scan=False, precompute_input=True, mask_input=None, **kwargs):
        super(CustomLSTMEncoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity,
                                                cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps,
                                                grad_clipping, unroll_scan, precompute_input, mask_input, False,
                                                **kwargs)

项目：rllabplusplus 作者：shaneshixiang | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：LasagneNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_BiRNN(incoming, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh,
                precompute_input=True, dropout=True, in_to_out=False):
    # construct the forward and backward rnns. Now, Ws are initialized by He initializer with default arguments.
    # Need to try other initializers for specific tasks.

    # dropout for incoming
    if dropout:
        incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)

    rnn_forward = lasagne.layers.RecurrentLayer(incoming, num_units,
                                                mask_input=mask, grad_clipping=grad_clipping,
                                                nonlinearity=nonlinearity, precompute_input=precompute_input,
                                                W_in_to_hid=lasagne.init.GlorotUniform(),
                                                W_hid_to_hid=lasagne.init.GlorotUniform(), name='forward')
    rnn_backward = lasagne.layers.RecurrentLayer(incoming, num_units,
                                                 mask_input=mask, grad_clipping=grad_clipping,
                                                 nonlinearity=nonlinearity, precompute_input=precompute_input,
                                                 W_in_to_hid=lasagne.init.GlorotUniform(),
                                                 W_hid_to_hid=lasagne.init.GlorotUniform(), backwards=True,
                                                 name='backward')

    # concatenate the outputs of forward and backward RNNs to combine them.
    concat = lasagne.layers.concat([rnn_forward, rnn_backward], axis=2, name="bi-rnn")

    # dropout for output
    if dropout:
        concat = lasagne.layers.DropoutLayer(concat, p=0.5)

    if in_to_out:
        concat = lasagne.layers.concat([concat, incoming], axis=2)

    # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
    return concat

项目：LasagneNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_BiLSTM_HighCNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True,
                         peepholes=False, num_filters=20, dropout=True, in_to_out=False):
    # first get some necessary dimensions or parameters
    conv_window = 3
    _, sent_length, _ = incoming2.output_shape

    # dropout before cnn
    if dropout:
        incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5)

    # construct convolution layer
    cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                           nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size)
    # reshape the layer to match highway incoming layer [batch * sent_length, num_filters, 1] --> [batch * sent_length, num_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], -1))

    # dropout after cnn?
    # if dropout:
    # output_cnn_layer = lasagne.layers.DropoutLayer(output_cnn_layer, p=0.5)

    # construct highway layer
    highway_layer = HighwayDenseLayer(output_cnn_layer, nonlinearity=nonlinearities.rectify)

    # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters] --> [batch, sent_length, number_filters]
    output_highway_layer = lasagne.layers.reshape(highway_layer, (-1, sent_length, [1]))

    # finally, concatenate the two incoming layers together.
    incoming = lasagne.layers.concat([output_highway_layer, incoming2], axis=2)

    return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes,
                        precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)

项目：DynamicMemoryNetworks 作者：swstarlab | 项目源码 | 文件源码

def __init__(self, incomings, hid_state_size, voc_size,
                 resetgate  = GRU_Gate(), updategate = GRU_Gate(),
                 hid_update = GRU_Gate(nonlinearity=nonlin.tanh),
                 W=Normal(), max_answer_word=1, **kwargs):
        super(AnswerModule, self).__init__(incomings, **kwargs)

        self.hid_state_size = hid_state_size

        #FOR GRU
        input_shape = self.input_shapes[0]
        num_inputs = np.prod(input_shape[1]) + voc_size # concatenation of previous prediction

        def add_gate(gate, gate_name):
            return (self.add_param(gate.W_in, (num_inputs, hid_state_size),
                        name="W_in_to_{}".format(gate_name)),
                    self.add_param(gate.W_hid, (hid_state_size, hid_state_size),
                        name="W_hid_to_{}".format(gate_name)),
                    self.add_param(gate.b, (hid_state_size,),
                        name="b_{}".format(gate_name), regularizable=False), 
                    gate.nonlinearity)

        # Add in all parameters from gates
        (self.W_in_to_updategate,
         self.W_hid_to_updategate,
         self.b_updategate,
         self.nonlinearity_updategate)= add_gate(updategate, 'updategate')
        (self.W_in_to_resetgate,
         self.W_hid_to_resetgate,
         self.b_resetgate,
         self.nonlinearity_resetgate) = add_gate(resetgate, 'resetgate')
        (self.W_in_to_hid_update,
         self.W_hid_to_hid_update,
         self.b_hid_update,
         self.nonlinearity_hid)       = add_gate(hid_update, 'hid_update')

        self.W = self.add_param(W, (hid_state_size, voc_size), name="W")
        self.max_answer_word = max_answer_word

        self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))

项目：cpo 作者：jachiam | 项目源码 | 文件源码

def run_task(*_):
        trpo_stepsize = 0.01
        trpo_subsample_factor = 0.2

        env = PointGatherEnv(apple_reward=10,bomb_cost=1,n_apples=2, activity_range=6)

        policy = GaussianMLPPolicy(env.spec,
                    hidden_sizes=(64,32)
                 )

        baseline = GaussianMLPBaseline(
            env_spec=env.spec,
            regressor_args={
                    'hidden_sizes': (64,32),
                    'hidden_nonlinearity': NL.tanh,
                    'learn_std':False,
                    'step_size':trpo_stepsize,
                    'optimizer':ConjugateGradientOptimizer(subsample_factor=trpo_subsample_factor)
                    }
        )

        safety_constraint = GatherSafetyConstraint(max_value=0.1)



        algo = PDO(
            env=env,
            policy=policy,
            baseline=baseline,
            safety_constraint=safety_constraint,
            batch_size=50000,
            max_path_length=15,
            n_itr=100,
            gae_lambda=0.95,
            discount=0.995,
            step_size=trpo_stepsize,
            optimizer_args={'subsample_factor':trpo_subsample_factor},
            #plot=True,
        )

        algo.train()

项目：gail-driver 作者：sisl | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(
                    env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：AttentionNet 作者：sayvazov | 项目源码 | 文件源码

def get_output_for(self, arguments, **kwargs):
        input, hprev, Cprev = arguments
        i = nl.sigmoid(self.Wi * input + self.Ui* hprev + self.bi)
        cand = nl.tanh(self.Wc *input + self.Uc * hprev + self.bc)
        f = nl.sigmoid(self.Wf*input + self.Uf*hprev + self.bf)
        C = i*cand + f * Cprev
        o = nl.sigmoid(self.Wo*input + self.Uo*hprev + self.Vo*C + self.bo)
        h = o*nl.tanh(C)
        return h, C

项目：StockPredictor 作者：wallsbreaker | 项目源码 | 文件源码

def build_combination(input_var, output_nodes, input_size, stocks, period, feature_types):
    # Input layer
    input_layer = InputLayer(shape=(None, 1, input_size), input_var=input_var)
    assert input_size == stocks * period * feature_types
    input_layer = ReshapeLayer(input_layer, (([0], stocks, period, feature_types)))

    #slice for partition
    stock_feature_type_layers = []
    for ix in range(stocks):
        stock_layer = SliceLayer(input_layer, indices=ix, axis=1)
        this_stock_feature_type_layers = []
        for rx in range(feature_types):
            this_stock_feature_type_layers.append(SliceLayer(stock_layer, indices=rx, axis=1))
        stock_feature_type_layers.append(this_stock_feature_type_layers)

    stock_networks = []
    for this_stock_feature_type_layers in stock_feature_type_layers:
        this_stock_networks = []
        for feature_type_layer in this_stock_feature_type_layers:
            tmp = DenseLayer(dropout(feature_type_layer, p=.2),
                num_units=10, nonlinearity=tanh)
            tmp = DenseLayer(dropout(tmp, p=.5), num_units=1, nonlinearity=tanh)
            this_stock_networks.append(tmp)

        this_stock_network = ConcatLayer(this_stock_networks)

        stock_network = DenseLayer(dropout(this_stock_network, p=.5),
                num_units=1, nonlinearity=tanh)

        stock_networks.append(stock_network)

    network = ConcatLayer(stock_networks)
    network = DenseLayer(dropout(network, p=.5),
                num_units=output_nodes, nonlinearity=sigmoid)

    return network, stock_networks

项目：kaggle-quora-solution-8th 作者：qqgeogor | 项目源码 | 文件源码

def __init__(self, incoming, num_units,
                 ingate=Gate(),
                 forgetgate=Gate(),
                 cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh),
                 outgate=Gate(),
                 nonlinearity=nonlinearities.tanh,
                 cell_init=init.Constant(0.),
                 hid_init=init.Constant(0.),
                 backwards=False,
                 learn_init=False,
                 peepholes=True,
                 gradient_steps=-1,
                 grad_clipping=0,
                 unroll_scan=False,
                 precompute_input=True,
                 mask_input=None,
                 only_return_final=False,
                 inter_drop=0.05,
                 **kwargs):
        super(DropLSTMLayer, self).__init__(incoming, num_units,
                                            ingate, forgetgate, cell, outgate,
                                            nonlinearity, cell_init, hid_init,
                                            backwards, learn_init, peepholes,
                                            gradient_steps, grad_clipping, unroll_scan,
                                            precompute_input, mask_input, only_return_final, **kwargs)
        self.inter_retain_prob = 1 - inter_drop
        self._srng = RandomStreams(
            lasagne.random.get_rng().randint(1, 2147462579))

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def generate_lstm_parameters():
    gate_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)
    return gate_parameters, cell_parameters

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26,
                 w_init=las.init.Orthogonal()):
    gate_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm')

    l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
    l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26,
                 w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True):
    gate_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, name='mask')

    symbolic_seqlen = l_in.input_var.shape[1]
    l_delta = DeltaLayer(l_in, window, name='delta')

    if use_blstm:
        f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
        # reshape to (num_examples * seq_len, lstm_size)
        l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
    return l_out

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26,
                 fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True):

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    symbolic_seqlen_raw = l_mask.input_var.shape[1]

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer(substreams, name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer(substreams, axis=-1, name='concat')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes,
        nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output')

    return l_out, l_fuse

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26,
                 w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True):
    gate_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init, W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_seqlen = l_in.input_var.shape[1]
    if use_blstm:
        f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')

        # reshape to (num_examples * seq_len, lstm_size)
        l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
    else:
        l_lstm = create_lstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
    return l_out

项目：rllab 作者：rll | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：time_lstm 作者：DarryO | 项目源码 | 文件源码

def __init__(self, W_t=init.Normal(0.1), W_x=init.Normal(0.1),
            b=init.Constant(0.),
            nonlinearity_inside=nonlinearities.tanh,
            nonlinearity_outside=nonlinearities.sigmoid):
        self.W_t = W_t
        self.W_x = W_x
        self.b = b
        self.nonlinearity_inside = nonlinearity_inside
        self.nonlinearity_outside = nonlinearity_outside

项目：maml_rl 作者：cbfinn | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：snn4hrl 作者：florensacc | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            latent_dim=0,    # all this is fake
            latent_name='categorical',
            bilinear_integration=False,
            resample=False,  # until here
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        #bullshit
        self.latent_dim = latent_dim  ##could I avoid needing this self for the get_action?
        self.latent_name = latent_name
        self.bilinear_integration = bilinear_integration
        self.resample = resample
        self._set_std_to_0 = False

        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim,),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
            prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])

项目：third_person_im 作者：bstadie | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            hidden_W_init=LI.HeUniform(),
            hidden_b_init=LI.Constant(0.),
            output_nonlinearity=NL.tanh,
            output_W_init=LI.Uniform(-3e-3, 3e-3),
            output_b_init=LI.Uniform(-3e-3, 3e-3),
            bn=False):
        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim))

        l_hidden = l_obs
        if bn:
            l_hidden = batch_norm(l_hidden)

        for idx, size in enumerate(hidden_sizes):
            l_hidden = L.DenseLayer(
                l_hidden,
                num_units=size,
                W=hidden_W_init,
                b=hidden_b_init,
                nonlinearity=hidden_nonlinearity,
                name="h%d" % idx
            )
            if bn:
                l_hidden = batch_norm(l_hidden)

        l_output = L.DenseLayer(
            l_hidden,
            num_units=env_spec.action_space.flat_dim,
            W=output_W_init,
            b=output_b_init,
            nonlinearity=output_nonlinearity,
            name="output"
        )

        # Note the deterministic=True argument. It makes sure that when getting
        # actions from single observations, we do not update params in the
        # batch normalization layers

        action_var = L.get_output(l_output, deterministic=True)
        self._output_layer = l_output

        self._f_actions = ext.compile_function([l_obs.input_var], action_var)

        super(DeterministicMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [l_output])

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_recur_dropout(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
    # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=p, shared_axes=(1,))

    ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                          W_cell=lasagne.init.Uniform(range=0.1))
    outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                           W_cell=lasagne.init.Uniform(range=0.1))
    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                              W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    # now use tanh for nonlinear function of cell, need to try pure linear cell
    cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                        nonlinearity=nonlinearities.tanh)
    lstm_forward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                             nonlinearity=nonlinearities.tanh, peepholes=False,
                             ingate=ingate_forward, outgate=outgate_forward,
                             forgetgate=forgetgate_forward, cell=cell_forward, p=p, name='forward')

    ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                           W_cell=lasagne.init.Uniform(range=0.1))
    outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                            W_cell=lasagne.init.Uniform(range=0.1))
    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                               W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    # now use tanh for nonlinear function of cell, need to try pure linear cell
    cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                         nonlinearity=nonlinearities.tanh)
    lstm_backward = LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                              nonlinearity=nonlinearities.tanh, peepholes=False, backwards=True,
                              ingate=ingate_backward, outgate=outgate_backward,
                              forgetgate=forgetgate_backward, cell=cell_backward, p=p, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_lstm_cnn = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm")
    # shape = [batch, n-step, num_units]
    bi_lstm_cnn = lasagne.layers.DropoutLayer(bi_lstm_cnn, p=p, shared_axes=(1,))

    return ChainCRFLayer(bi_lstm_cnn, num_labels, mask_input=mask)

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_RNN(architec, layer_input, layer_mask, num_units, grad_clipping):
    def build_GRU(reset_input):
        resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)

        updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)

        hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                            b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)

        return GRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
                        resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
                        reset_input=reset_input, only_return_final=True, p=0.5, name='GRU')

    def build_LSTM():
        ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                      W_cell=lasagne.init.Uniform(range=0.1))

        outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                       W_cell=lasagne.init.Uniform(range=0.1))
        # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
        forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                          W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
        # now use tanh for nonlinear function of cell, need to try pure linear cell
        cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                    b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)

        return LSTMLayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
                         ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
                         peepholes=False, nonlinearity=nonlinearities.tanh,
                         only_return_final=True, p=0.5, name='LSTM')

    def build_SGRU():
        resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                         W_cell=lasagne.init.GlorotUniform())

        resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                W_cell=lasagne.init.GlorotUniform())

        updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                          W_cell=lasagne.init.GlorotUniform())

        hidden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                            b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)

        return SGRULayer(layer_input, num_units, mask_input=layer_mask, grad_clipping=grad_clipping,
                         resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden,
                         updategate=updategate, hidden_update=hidden_update,
                         only_return_final=True, p=0.5, name='SGRU')

    if architec == 'gru0':
        return build_GRU(False)
    elif architec == 'gru1':
        return build_GRU(True)
    elif architec == 'lstm':
        return build_LSTM()
    elif architec == 'sgru':
        return build_SGRU()
    else:
        raise ValueError('unkown architecture: %s' % architec)

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_std_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
                          reset_input):
    # Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)

    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)

    resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
                           updategate=updategate_forward, hidden_update=hidden_update_forward,
                           grad_clipping=grad_clipping, reset_input=reset_input, name='forward')

    resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
                            updategate=updategate_backward, hidden_update=hidden_update_backward,
                            grad_clipping=grad_clipping, reset_input=reset_input, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")

    bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p)

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_std_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
    # Construct Bi-directional LSTM-CNNs-CRF with standard dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    incoming1 = lasagne.layers.DropoutLayer(incoming1, p=p)

    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2)

    resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
                             resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
                             updategate=updategate_forward, hidden_update=hidden_update_forward,
                             grad_clipping=grad_clipping, name='forward')

    resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
                              resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
                              updategate=updategate_backward, hidden_update=hidden_update_backward,
                              grad_clipping=grad_clipping, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")

    bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p)

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_recur_dropout_gru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p,
                            reset_input):
    # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))

    resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_forward = GRULayer(incoming, num_units, mask_input=mask, resetgate=resetgate_forward,
                           updategate=updategate_forward, hidden_update=hidden_update_forward,
                           grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='forward')

    resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    gru_backward = GRULayer(incoming, num_units, mask_input=mask, backwards=True, resetgate=resetgate_backward,
                            updategate=updategate_backward, hidden_update=hidden_update_backward,
                            grad_clipping=grad_clipping, reset_input=reset_input, p=p, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_gru_cnn = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")
    # shape = [batch, n-step, num_units]
    bi_gru_cnn = lasagne.layers.DropoutLayer(bi_gru_cnn, p=p, shared_axes=(1,))

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_gru_cnn = lasagne.layers.reshape(bi_gru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_gru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：NeuroNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_recur_dropout_sgru(incoming1, incoming2, num_units, num_labels, mask, grad_clipping, num_filters, p):
    # Construct Bi-directional LSTM-CNNs-CRF with recurrent dropout.
    # first get some necessary dimensions or parameters
    conv_window = 3
    # shape = [batch, n-step, c_dim, char_length]
    # construct convolution layer
    # shape = [batch, n-step, c_filters, output_length]
    cnn_layer = ConvTimeStep1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
                                    nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
    # infer the pool size for pooling (pool size should go through all time step of cnn)
    _, _, _, pool_size = cnn_layer.output_shape
    # construct max pool layer
    # shape = [batch, n-step, c_filters, 1]
    pool_layer = PoolTimeStep1DLayer(cnn_layer, pool_size=pool_size)
    # reshape: [batch, n-step, c_filters, 1] --> [batch, n-step, c_filters]
    output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], [1], [2]))

    # finally, concatenate the two incoming layers together.
    # shape = [batch, n-step, c_filter&w_dim]
    incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)

    # dropout for incoming
    incoming = lasagne.layers.DropoutLayer(incoming, p=0.2, shared_axes=(1,))

    resetgate_input_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                 W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_forward = SGRULayer(incoming, num_units, mask_input=mask,
                             resetgate_input=resetgate_input_forward, resetgate_hidden=resetgate_hidden_forward,
                             updategate=updategate_forward, hidden_update=hidden_update_forward,
                             grad_clipping=grad_clipping, p=p, name='forward')

    resetgate_input_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    resetgate_hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
    hidden_update_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                                  W_cell=None, nonlinearity=nonlinearities.tanh)
    sgru_backward = SGRULayer(incoming, num_units, mask_input=mask, backwards=True,
                              resetgate_input=resetgate_input_backward, resetgate_hidden=resetgate_hidden_backward,
                              updategate=updategate_backward, hidden_update=hidden_update_backward,
                              grad_clipping=grad_clipping, p=p, name='backward')

    # concatenate the outputs of forward and backward LSTMs to combine them.
    bi_sgru_cnn = lasagne.layers.concat([sgru_forward, sgru_backward], axis=2, name="bi-sgru")
    # shape = [batch, n-step, num_units]
    bi_sgru_cnn = lasagne.layers.DropoutLayer(bi_sgru_cnn, p=p, shared_axes=(1,))

    # reshape bi-rnn-cnn to [batch * max_length, num_units]
    bi_sgru_cnn = lasagne.layers.reshape(bi_sgru_cnn, (-1, [2]))

    # construct output layer (dense layer with softmax)
    layer_output = lasagne.layers.DenseLayer(bi_sgru_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
                                             name='softmax')

    return layer_output

项目：MachineComprehension 作者：sa-j | 项目源码 | 文件源码

def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(),
                 cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(),
                 nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.),
                 backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0,
                 precompute_input=True, mask_input=None,
                 encoder_mask_input=None, attention=False, word_by_word=False, **kwargs):
        super(CustomLSTMDecoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity,
                                                cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps,
                                                grad_clipping, False, precompute_input, mask_input, True,
                                                **kwargs)
        self.attention = attention
        self.word_by_word = word_by_word
        # encoder mask
        self.encoder_mask_incoming_index = -1
        if encoder_mask_input is not None:
            self.input_layers.append(encoder_mask_input)
            self.input_shapes.append(encoder_mask_input.output_shape)
            self.encoder_mask_incoming_index = len(self.input_layers) - 1
        # check encoder
        if not isinstance(self.cell_init, CustomLSTMEncoder) \
                or self.num_units != self.cell_init.num_units:
            raise ValueError('cell_init must be CustomLSTMEncoder'
                             ' and num_units should equal')
        self.r_init = None
        self.r_init = self.add_param(init.Constant(0.),
                                     (1, num_units), name="r_init",
                                     trainable=False, regularizable=False)
        if self.word_by_word:
            # rewrites
            self.attention = True
        if self.attention:
            if not isinstance(encoder_mask_input, lasagne.layers.Layer):
                raise ValueError('Attention mechnism needs encoder mask layer')
            # initializes attention weights
            self.W_y_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'V_pointer')
            self.W_h_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_h_attend')
            # doesn't need transpose
            self.w_attend = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer')
            self.W_p_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_p_attend')
            self.W_x_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_x_attend')
            if self.word_by_word:
                self.W_r_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_r_attend')
                self.W_t_attend = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_t_attend')

项目：began 作者：davidtellez | 项目源码 | 文件源码

def network_generator(self, input_var, network_weights=None):

        # Input layer
        layers = []
        n_blocks = int(np.log2(self.input_size / 8)) + 1  # end up with 8x8 output
        layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input'))

        # Dense layer up (from h to n*8*8)
        layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters), name='generator/dense%d' % len(layers), network_weights=network_weights))
        layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters, 8, 8), name='generator/reshape%d' % len(layers)))

        # Convolutional blocks (decoder)
        for i_block in range(1, n_blocks+1):
            layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights))
            layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights))
            if i_block != n_blocks:
                layers.append(Upscale2DLayer(layers[-1], scale_factor=2, name='generator/upsample%d' % len(layers)))

        # Final layer (make sure input images are in the range [-1, 1] if tanh used)
        layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=sigmoid))

        # Network in dictionary form
        network = {layer.name: layer for layer in layers}

        return network

    # def network_generator_alt(self, input_var, network_weights=None):
    #
    #     # Input layer
    #     layers = []
    #     n_blocks = int(np.log2(self.input_size / 8)) + 1  # end up with 8x8 output
    #     layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input'))
    #
    #     # Dense layer up (from h to n*8*8)
    #     layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters*n_blocks), name='generator/dense%d' % len(layers), network_weights=network_weights, nonlinearity=elu, bn=True))
    #     layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters*n_blocks, 8, 8), name='generator/reshape%d' % len(layers)))
    #
    #     # Convolutional blocks (decoder)
    #     for i_block in range(1, n_blocks+1)[::-1]:
    #         # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True))
    #         # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True))
    #         if i_block != 1:
    #             layers.append(transposed_conv_layer(layers[-1], n_filters=self.n_filters*(i_block-1), stride=2, name='generator/upsample%d' % len(layers),
    #                                                 output_size=8*2**(n_blocks-i_block+1), network_weights=network_weights, nonlinearity=elu, bn=True))
    #
    #     # Final layer (make sure input images are in the range [-1, 1]
    #     layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=tanh, bn=False))
    #
    #     # Network in dictionary form
    #     network = {layer.name: layer for layer in layers}
    #
    #     return network

项目：rllabplusplus 作者：shaneshixiang | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            hidden_W_init=LI.HeUniform(),
            hidden_b_init=LI.Constant(0.),
            output_nonlinearity=NL.tanh,
            output_W_init=LI.Uniform(-3e-3, 3e-3),
            output_b_init=LI.Uniform(-3e-3, 3e-3),
            bn=False):
        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim))

        l_hidden = l_obs
        if bn:
            l_hidden = batch_norm(l_hidden)

        for idx, size in enumerate(hidden_sizes):
            l_hidden = L.DenseLayer(
                l_hidden,
                num_units=size,
                W=hidden_W_init,
                b=hidden_b_init,
                nonlinearity=hidden_nonlinearity,
                name="h%d" % idx
            )
            if bn:
                l_hidden = batch_norm(l_hidden)

        l_output = L.DenseLayer(
            l_hidden,
            num_units=env_spec.action_space.flat_dim,
            W=output_W_init,
            b=output_b_init,
            nonlinearity=output_nonlinearity,
            name="output"
        )

        # Note the deterministic=True argument. It makes sure that when getting
        # actions from single observations, we do not update params in the
        # batch normalization layers

        action_var = L.get_output(l_output, deterministic=True)
        self._output_layer = l_output

        self._f_actions = ext.compile_function([l_obs.input_var], action_var)

        super(DeterministicMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [l_output])

项目：LasagneNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_BiLSTM(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, dropout=True,
                 in_to_out=False):
    # construct the forward and backward rnns. Now, Ws are initialized by Glorot initializer with default arguments.
    # Need to try other initializers for specific tasks.

    # dropout for incoming
    if dropout:
        incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)

    ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                          W_cell=lasagne.init.Uniform(range=0.1))
    outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                           W_cell=lasagne.init.Uniform(range=0.1))
    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                              W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    # now use tanh for nonlinear function of cell, need to try pure linear cell
    cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                        nonlinearity=nonlinearities.tanh)
    lstm_forward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                            nonlinearity=nonlinearities.tanh, peepholes=peepholes,
                                            precompute_input=precompute_input,
                                            ingate=ingate_forward, outgate=outgate_forward,
                                            forgetgate=forgetgate_forward, cell=cell_forward, name='forward')

    ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                           W_cell=lasagne.init.Uniform(range=0.1))
    outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                            W_cell=lasagne.init.Uniform(range=0.1))
    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                               W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    # now use tanh for nonlinear function of cell, need to try pure linear cell
    cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                         nonlinearity=nonlinearities.tanh)
    lstm_backward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                             nonlinearity=nonlinearities.tanh, peepholes=peepholes,
                                             precompute_input=precompute_input, backwards=True,
                                             ingate=ingate_backward, outgate=outgate_backward,
                                             forgetgate=forgetgate_backward, cell=cell_backward, name='backward')

    # concatenate the outputs of forward and backward RNNs to combine them.
    concat = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm")

    # dropout for output
    if dropout:
        concat = lasagne.layers.DropoutLayer(concat, p=0.5)

    if in_to_out:
        concat = lasagne.layers.concat([concat, incoming], axis=2)

    # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
    return concat

项目：LasagneNLP 作者：XuezheMax | 项目源码 | 文件源码

def build_BiGRU(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, dropout=True, in_to_out=False):
    # construct the forward and backward grus. Now, Ws are initialized by Glorot initializer with default arguments.
    # Need to try other initializers for specific tasks.

    # dropout for incoming
    if dropout:
        incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)

    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                             W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                              W_cell=lasagne.init.Uniform(range=0.1))
    # now use tanh for nonlinear function of hidden gate
    hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                          nonlinearity=nonlinearities.tanh)
    gru_forward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                          precompute_input=precompute_input,
                                          resetgate=resetgate_forward, updategate=updategate_forward,
                                          hidden_update=hidden_forward, name='forward')

    # according to Jozefowicz et al.(2015), init bias of forget gate to 1.
    resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                              W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
    updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
                               W_cell=lasagne.init.Uniform(range=0.1))
    # now use tanh for nonlinear function of hidden gate
    hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
                           nonlinearity=nonlinearities.tanh)
    gru_backward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping,
                                           precompute_input=precompute_input, backwards=True,
                                           resetgate=resetgate_backward, updategate=updategate_backward,
                                           hidden_update=hidden_backward, name='backward')

    # concatenate the outputs of forward and backward GRUs to combine them.
    concat = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru")

    # dropout for output
    if dropout:
        concat = lasagne.layers.DropoutLayer(concat, p=0.5)

    if in_to_out:
        concat = lasagne.layers.concat([concat, incoming], axis=2)

    # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
    return concat

项目：CIKM2017 作者：MovieFIB | 项目源码 | 文件源码

def __init__(
        self, incomings, num_units,
        W_g=init.Normal(0.1),
        W_h=init.Normal(0.1),
        W_v=init.Normal(0.1),
        W_s=init.Normal(0.1),
        W_p=init.Normal(0.1),
        nonlinearity=nonlinearities.tanh,
        nonlinearity_atten=nonlinearities.softmax,
        **kwargs
    ):
        super(AttenLayer, self).__init__(incomings, **kwargs)
        self.batch_size = self.input_shapes[0][0]  # None
        num_inputs = self.input_shapes[2][1]  # k
        feature_dim = self.input_shapes[0][1]  # d
        self.num_units = num_units
        self.nonlinearity = nonlinearity
        self.nonlinearity_atten = nonlinearity_atten
        self.W_h_to_attenGate = self.add_param(
            W_h, (num_inputs, 1),
            name='W_h_to_atten'
        )
        self.W_g_to_attenGate = self.add_param(
            W_g,
            (feature_dim, num_inputs),
            name='W_g_to_atten'
        )
        self.W_v_to_attenGate = self.add_param(
            W_v,
            (feature_dim, num_inputs),
            name='W_v_to_atten'
        )
        self.W_s_to_attenGate = self.add_param(
            W_s,
            (feature_dim, num_inputs),
            name='W_s_to_atten'
        )
        self.W_p = self.add_param(
            W_p,
            (feature_dim, num_units),
            name='W_p_to_atten'
        )
        self.num_inputs = num_inputs

项目：BiDNN 作者：v-v | 项目源码 | 文件源码

def __init__(self, conf):
        self.conf = conf

        if self.conf.act == "linear":
            self.conf.act = linear
        elif self.conf.act == "sigmoid":
            self.conf.act = sigmoid
        elif self.conf.act == "relu":
            self.conf.act = rectify
        elif self.conf.act == "tanh":
            self.conf.act = tanh
        else:
            raise ValueError("Unknown activation function", self.conf.act)

        input_var_first   = T.matrix('inputs1')
        input_var_second  = T.matrix('inputs2')
        target_var        = T.matrix('targets')

        # create network        
        self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second)

        self.out = get_output(self.autoencoder)

        loss = squared_error(self.out, target_var)
        loss = loss.mean()

        params = get_all_params(self.autoencoder, trainable=True)
        updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum)

        # training function
        self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates)

        # fuction to reconstruct
        test_reconstruction = get_output(self.autoencoder, deterministic=True)
        self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction)

        # encoding function
        test_encode = get_output([encoder_first, encoder_second], deterministic=True)
        self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode)

        # utils
        blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0]
        self.blas_nrm2 = blas('nrm2', np.array([], dtype=float))
        self.blas_scal = blas('scal', np.array([], dtype=float))

        # load weights if necessary
        if self.conf.load_model is not None:
            self.load_model()

项目：cpo 作者：jachiam | 项目源码 | 文件源码

def run_task(*_):
        trpo_stepsize = 0.01
        trpo_subsample_factor = 0.2

        env = PointGatherEnv(apple_reward=10,bomb_cost=1,n_apples=2, activity_range=6)

        policy = GaussianMLPPolicy(env.spec,
                    hidden_sizes=(64,32)
                 )

        baseline = GaussianMLPBaseline(
            env_spec=env.spec,
            regressor_args={
                    'hidden_sizes': (64,32),
                    'hidden_nonlinearity': NL.tanh,
                    'learn_std':False,
                    'step_size':trpo_stepsize,
                    'optimizer':ConjugateGradientOptimizer(subsample_factor=trpo_subsample_factor)
                    }
        )

        safety_baseline = GaussianMLPBaseline(
            env_spec=env.spec,
            regressor_args={
                    'hidden_sizes': (64,32),
                    'hidden_nonlinearity': NL.tanh,
                    'learn_std':False,
                    'step_size':trpo_stepsize,
                    'optimizer':ConjugateGradientOptimizer(subsample_factor=trpo_subsample_factor)
                    },
            target_key='safety_returns',
            )

        safety_constraint = GatherSafetyConstraint(max_value=0.1, baseline=safety_baseline)



        algo = CPO(
            env=env,
            policy=policy,
            baseline=baseline,
            safety_constraint=safety_constraint,
            safety_gae_lambda=1,
            batch_size=50000,
            max_path_length=15,
            n_itr=100,
            gae_lambda=0.95,
            discount=0.995,
            step_size=trpo_stepsize,
            optimizer_args={'subsample_factor':trpo_subsample_factor},
            #plot=True,
        )

        algo.train()

项目：gail-driver 作者：sisl | 项目源码 | 文件源码

def __init__(
            self,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            hidden_W_init=LI.HeUniform(),
            hidden_b_init=LI.Constant(0.),
            output_nonlinearity=NL.tanh,
            output_W_init=LI.Uniform(-3e-3, 3e-3),
            output_b_init=LI.Uniform(-3e-3, 3e-3),
            bn=False):
        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim))

        l_hidden = l_obs
        if bn:
            l_hidden = batch_norm(l_hidden)

        for idx, size in enumerate(hidden_sizes):
            l_hidden = L.DenseLayer(
                l_hidden,
                num_units=size,
                W=hidden_W_init,
                b=hidden_b_init,
                nonlinearity=hidden_nonlinearity,
                name="h%d" % idx
            )
            if bn:
                l_hidden = batch_norm(l_hidden)

        l_output = L.DenseLayer(
            l_hidden,
            num_units=env_spec.action_space.flat_dim,
            W=output_W_init,
            b=output_b_init,
            nonlinearity=output_nonlinearity,
            name="output"
        )

        # Note the deterministic=True argument. It makes sure that when getting
        # actions from single observations, we do not update params in the
        # batch normalization layers

        action_var = L.get_output(l_output, deterministic=True)
        self._output_layer = l_output

        self._f_actions = ext.compile_function([l_obs.input_var], action_var)

        super(DeterministicMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [l_output])

项目：saliency-salgan-2017 作者：imatge-upc | 项目源码 | 文件源码

def build(input_height, input_width, concat_var):
    """
    Build the discriminator, all weights initialized from scratch
    :param input_width:
    :param input_height: 
    :param concat_var: Theano symbolic tensor variable
    :return: Dictionary that contains the discriminator
    """

    net = {'input': InputLayer((None, 4, input_height, input_width), input_var=concat_var)}
    print "Input: {}".format(net['input'].output_shape[1:])

    net['merge'] = ConvLayer(net['input'], 3, 1, pad=0, flip_filters=False)
    print "merge: {}".format(net['merge'].output_shape[1:])

    net['conv1'] = ConvLayer(net['merge'], 32, 3, pad=1)
    print "conv1: {}".format(net['conv1'].output_shape[1:])

    net['pool1'] = PoolLayer(net['conv1'], 4)
    print "pool1: {}".format(net['pool1'].output_shape[1:])

    net['conv2_1'] = ConvLayer(net['pool1'], 64, 3, pad=1)
    print "conv2_1: {}".format(net['conv2_1'].output_shape[1:])

    net['conv2_2'] = ConvLayer(net['conv2_1'], 64, 3, pad=1)
    print "conv2_2: {}".format(net['conv2_2'].output_shape[1:])

    net['pool2'] = PoolLayer(net['conv2_2'], 2)
    print "pool2: {}".format(net['pool2'].output_shape[1:])

    net['conv3_1'] = nn.weight_norm(ConvLayer(net['pool2'], 64, 3, pad=1))
    print "conv3_1: {}".format(net['conv3_1'].output_shape[1:])

    net['conv3_2'] = nn.weight_norm(ConvLayer(net['conv3_1'], 64, 3, pad=1))
    print "conv3_2: {}".format(net['conv3_2'].output_shape[1:])

    net['pool3'] = PoolLayer(net['conv3_2'], 2)
    print "pool3: {}".format(net['pool3'].output_shape[1:])

    net['fc4'] = DenseLayer(net['pool3'], num_units=100, nonlinearity=tanh)
    print "fc4: {}".format(net['fc4'].output_shape[1:])

    net['fc5'] = DenseLayer(net['fc4'], num_units=2, nonlinearity=tanh)
    print "fc5: {}".format(net['fc5'].output_shape[1:])

    net['prob'] = DenseLayer(net['fc5'], num_units=1, nonlinearity=sigmoid)
    print "prob: {}".format(net['prob'].output_shape[1:])

    return net

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True):

    weights, biases, shapes, nonlinearities = dbn

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          shapes,
                                          nonlinearities,
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    if use_blstm:
        l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1',
                                           use_peepholes)

        # We'll combine the forward and backward layer output by summing.
        # Merge layers take in lists of layers to merge as input.
        l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
        # reshape, flatten to 2 dimensions to run softmax on all timesteps
        l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')

    return l_out

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, output_classes=26):

    dbn_layers = dbn.get_all_layers()
    weights = []
    biases = []
    weights.append(dbn_layers[1].W.astype('float32'))
    weights.append(dbn_layers[2].W.astype('float32'))
    weights.append(dbn_layers[3].W.astype('float32'))
    weights.append(dbn_layers[4].W.astype('float32'))
    biases.append(dbn_layers[1].b.astype('float32'))
    biases.append(dbn_layers[2].b.astype('float32'))
    biases.append(dbn_layers[3].b.astype('float32'))
    biases.append(dbn_layers[4].b.astype('float32'))

    gate_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(weights, biases, l_reshape1)
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    # l_delta = DeltaLayer(l_reshape2, win, name='delta')

    # l_lstm = create_lstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1')
    l_lstm, l_lstm_back = create_blstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')

    l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out

项目：ip-avsr 作者：lzuwei | 项目源码 | 文件源码

def create_pretrained_substream(weights, biases, input_shape, input_var, mask_shape, mask_var, name,
                                lstm_size=250, win=T.iscalar('theta'), nonlinearity=rectify,
                                w_init_fn=las.init.Orthogonal(), use_peepholes=True):
    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_input = InputLayer(input_shape, input_var, 'input_'+name)
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize_raw = l_input.input_var.shape[0]
    symbolic_seqlen_raw = l_input.input_var.shape[1]

    l_reshape1_raw = ReshapeLayer(l_input, (-1, input_shape[-1]), name='reshape1_'+name)
    l_encoder_raw = create_pretrained_encoder(l_reshape1_raw, weights, biases,
                                              [2000, 1000, 500, 50],
                                              [nonlinearity, nonlinearity, nonlinearity, linear],
                                              ['fc1_'+name, 'fc2_'+name, 'fc3_'+name, 'bottleneck_'+name])
    input_len = las.layers.get_output_shape(l_encoder_raw)[-1]

    l_reshape2 = ReshapeLayer(l_encoder_raw,
                                  (symbolic_batchsize_raw, symbolic_seqlen_raw, input_len),
                                  name='reshape2_'+name)
    l_delta = DeltaLayer(l_reshape2, win, name='delta_'+name)

    l_lstm = LSTMLayer(
        l_delta, int(lstm_size), peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_'+name)

    return l_lstm