Python keras.layers.recurrent 模块,GRU 实例源码

我们从Python开源项目中,提取了以下46个代码示例,用于说明如何使用keras.layers.recurrent.GRU

项目:LSTM-GRU-CNN-MLP    作者:ansleliu    | 项目源码 | 文件源码
def build_model(layers):
    model = Sequential()

    model.add(GRU(input_dim=layers[0], output_dim=layers[1], activation='tanh', return_sequences=True))
    model.add(Dropout(0.15))  # Dropout overfitting

    # model.add(GRU(layers[2],activation='tanh', return_sequences=True))
    # model.add(Dropout(0.2))  # Dropout overfitting

    model.add(GRU(layers[2], activation='tanh', return_sequences=False))
    model.add(Dropout(0.15))  # Dropout overfitting

    model.add(Dense(output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    # model.compile(loss="mse", optimizer=sgd)
    model.compile(loss="mse", optimizer="rmsprop") # Nadam rmsprop
    print "Compilation Time : ", time.time() - start
    return model
项目:soph    作者:Linusp    | 项目源码 | 文件源码
def understand_variable_length_handle():
    """????????? recurrent layer ??????"""
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 78, 256)
    model.fit(train_x, train_y, verbose=0)

    inz_1 = np.random.randn(1, 78, 256)
    rez_1 = model.predict_proba(inz_1, verbose=0)

    inz_2 = np.random.randn(1, 87, 256)
    rez_2 = model.predict_proba(inz_2, verbose=0)

    print()
    print('=========== understand variable length =================')
    print('With `return_sequence=True`')
    print('Input shape is: {}, output shae is {}'.format(inz_1.shape, rez_1.shape))
    print('Input shape is: {}, output shae is {}'.format(inz_2.shape, rez_2.shape))
    print('====================== end =============================')
项目:soph    作者:Linusp    | 项目源码 | 文件源码
def try_variable_length_train():
    """????????

    ?????????? train_x ? train_y ? dtype ? object ???
    ?? shape ???? (100,) ?????????
    """
    model = Sequential()
    model.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model.compile(loss='mean_squared_error', optimizer='sgd')

    train_x = []
    train_y = []
    for i in range(100):
        seq_length = np.random.randint(78, 87 + 1)
        sequence = []
        for _ in range(seq_length):
            sequence.append([np.random.randn() for _ in range(256)])

        train_x.append(np.array(sequence))
        train_y.append(np.array(sequence))

    train_x = np.array(train_x)
    train_y = np.array(train_y)

    model.fit(np.array(train_x), np.array(train_y))
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def test_temporal_clf(self):
        print('temporal classification data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10), 
            classification=True, nb_class=2)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
        self.assertTrue(history.validation_accuracy[-1] > 0.9)
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def test_regularizer(layer_class):
    layer = layer_class(output_dim, return_sequences=False, weights=None,
                        batch_input_shape=(nb_samples, timesteps, embedding_dim),
                        W_regularizer=regularizers.WeightRegularizer(l1=0.01),
                        U_regularizer=regularizers.WeightRegularizer(l1=0.01),
                        b_regularizer='l2')
    shape = (nb_samples, timesteps, embedding_dim)
    layer.build(shape)
    output = layer(K.variable(np.ones(shape)))
    K.eval(output)
    if layer_class == recurrent.SimpleRNN:
        assert len(layer.losses) == 3
    if layer_class == recurrent.GRU:
        assert len(layer.losses) == 9
    if layer_class == recurrent.LSTM:
        assert len(layer.losses) == 12
项目:motion-classification    作者:matthiasplappert    | 项目源码 | 文件源码
def fit(self, X, y):
        assert isinstance(X, list)  #TODO: this should not be an assert
        assert len(y) > 0
        assert len(X) == len(y)

        X = pad_sequences(X)
        print X.shape, y.shape

        n_features = X.shape[2]
        self.n_labels_ = y.shape[1]
        print n_features, self.n_labels_

        model = Sequential()
        model.add(GRU(n_features, 128))
        model.add(Dropout(0.1))
        model.add(BatchNormalization(128))
        model.add(Dense(128, self.n_labels_))
        model.add(Activation('sigmoid'))

        sgd = opt.SGD(lr=0.005, decay=1e-6, momentum=0., nesterov=True)
        model.compile(loss='categorical_crossentropy', optimizer=sgd, class_mode='categorical')

        model.fit(X, y, batch_size=self.n_batch_size, nb_epoch=self.n_epochs, show_accuracy=True)
        self.model_ = model
项目:keras    作者:NVIDIA    | 项目源码 | 文件源码
def test_regularizer(layer_class):
    layer = layer_class(output_dim, return_sequences=False, weights=None,
                        batch_input_shape=(nb_samples, timesteps, embedding_dim),
                        W_regularizer=regularizers.WeightRegularizer(l1=0.01),
                        U_regularizer=regularizers.WeightRegularizer(l1=0.01),
                        b_regularizer='l2')
    shape = (nb_samples, timesteps, embedding_dim)
    layer.build(shape)
    output = layer(K.variable(np.ones(shape)))
    K.eval(output)
    if layer_class == recurrent.SimpleRNN:
        assert len(layer.losses) == 3
    if layer_class == recurrent.GRU:
        assert len(layer.losses) == 9
    if layer_class == recurrent.LSTM:
        assert len(layer.losses) == 12
项目:stratosphere-lstm    作者:mendozawow    | 项目源码 | 文件源码
def build_lstm(input_shape):
    model = Sequential()
    # model.add(Masking(input_shape=input_shape, mask_value=-1.))
    model.add(Embedding(input_shape[0], 128, input_length=input_shape[1]))

    model.add(Convolution1D(nb_filter=64,
                            filter_length=5,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=4))

    model.add(GRU(128))

    # model.add(GRU(128, return_sequences=False))
    # Add dropout if overfitting
    # model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
项目:stratosphere-lstm    作者:mendozawow    | 项目源码 | 文件源码
def build_lstm(input_shape):
    model = Sequential()
    # model.add(Masking(input_shape=input_shape, mask_value=-1.))
    model.add(Embedding(input_shape[0], 128, input_length=input_shape[1]))

    model.add(Convolution1D(nb_filter=64,
                            filter_length=5,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=model.output_shape[1]))

    model.add(Flatten())

    model.add(Dense(128))

    # model.add(GRU(128, return_sequences=False))
    # Add dropout if overfitting
    # model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
项目:deep-coref    作者:clarkkev    | 项目源码 | 文件源码
def test_temporal_clf(self):
        print('temporal classification data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5),
                                                             classification=True, nb_class=2)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)

        model = Sequential()
        model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1])))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adadelta')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
        self.assertTrue(history.history['val_acc'][-1] > 0.9)
项目:narrative-prediction    作者:roemmele    | 项目源码 | 文件源码
def create_model(self):
        model = Sequential()
        model.add(Embedding(output_dim=self.n_embedding_nodes, input_dim=self.lexicon_size + 1,
                            input_length=self.n_timesteps, mask_zero=True, name='embedding_layer'))
        for layer_num in range(self.n_hidden_layers):
            if layer_num == self.n_hidden_layers - 1:
                return_sequences = False
            else: #add extra hidden layers
                return_sequences = True
            model.add(GRU(output_dim=self.n_hidden_nodes, return_sequences=return_sequences, name='hidden_layer' + str(layer_num + 1)))
        model.add(Dense(output_dim=self.n_output_classes, activation='softmax', name='output_layer'))
        # if emb_weights is not None:
        #     #initialize weights with lm weights
        #     model.layers[0].set_weights(emb_weights) #set embeddings
        # if layer1_weights is not None:
        #     model.layers[1].set_weights(layer1_weights) #set recurrent layer 1         
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        return model
项目:RecommendationSystem    作者:TURuibo    | 项目源码 | 文件源码
def test_temporal_clf(self):
        print('temporal classification data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10), 
            classification=True, nb_class=2)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2)
        self.assertTrue(history.validation_accuracy[-1] > 0.9)
项目:keras-molecules    作者:maxhodak    | 项目源码 | 文件源码
def _buildDecoder(self, z, latent_rep_size, max_length, charset_length):
        h = Dense(latent_rep_size, name='latent_input', activation = 'relu')(z)
        h = RepeatVector(max_length, name='repeat_vector')(h)
        h = GRU(501, return_sequences = True, name='gru_1')(h)
        h = GRU(501, return_sequences = True, name='gru_2')(h)
        h = GRU(501, return_sequences = True, name='gru_3')(h)
        return TimeDistributed(Dense(charset_length, activation='softmax'), name='decoded_mean')(h)
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize("layer_class", [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f)
项目:soph    作者:Linusp    | 项目源码 | 文件源码
def build_model(input_size, seq_len, hidden_size):
    """???? seq2seq ??"""
    model = Sequential()
    model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False))
    model.add(Dense(hidden_size, activation="relu"))
    model.add(RepeatVector(seq_len))
    model.add(GRU(hidden_size, return_sequences=True))
    model.add(TimeDistributed(Dense(output_dim=input_size, activation="softmax")))
    model.compile(loss="categorical_crossentropy", optimizer='adam')

    return model
项目:soph    作者:Linusp    | 项目源码 | 文件源码
def build_model(input_size, seq_len, hidden_size):
    """???? sequence to sequence ??"""
    model = Sequential()
    model.add(GRU(input_dim=input_size, output_dim=hidden_size, return_sequences=False))
    model.add(Dense(hidden_size, activation="relu"))
    model.add(RepeatVector(seq_len))
    model.add(GRU(hidden_size, return_sequences=True))
    model.add(TimeDistributed(Dense(output_dim=input_size, activation="linear")))
    model.compile(loss="mse", optimizer='adam')

    return model
项目:soph    作者:Linusp    | 项目源码 | 文件源码
def understand_return_sequence():
    """?????? recurrent layer ?? return_sequences ??"""
    model_1 = Sequential()
    model_1.add(GRU(input_dim=256, output_dim=256, return_sequences=True))
    model_1.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 78, 256)
    model_1.fit(train_x, train_y, verbose=0)

    model_2 = Sequential()
    model_2.add(GRU(input_dim=256, output_dim=256, return_sequences=False))
    model_2.compile(loss='mean_squared_error', optimizer='sgd')
    train_x = np.random.randn(100, 78, 256)
    train_y = np.random.randn(100, 256)
    model_2.fit(train_x, train_y, verbose=0)

    inz = np.random.randn(100, 78, 256)
    rez_1 = model_1.predict_proba(inz, verbose=0)
    rez_2 = model_2.predict_proba(inz, verbose=0)

    print()
    print('=========== understand return_sequence =================')
    print('Input shape is: {}'.format(inz.shape))
    print('Output shape of model with `return_sequences=True`: {}'.format(rez_1.shape))
    print('Output shape of model with `return_sequences=False`: {}'.format(rez_2.shape))
    print('====================== end =============================')
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def config(c):
    c['dropout'] = 4/5
    c['dropoutfix_inp'] = 0
    c['dropoutfix_rec'] = 0
    c['l2reg'] = 1e-4

    c['rnnbidi'] = True
    c['rnn'] = GRU
    c['rnnbidi_mode'] = 'sum'
    c['rnnact'] = 'tanh'
    c['rnninit'] = 'glorot_uniform'
    c['sdim'] = 2
    c['rnnlevels'] = 1

    c['project'] = True
    c['pdim'] = 2
    c['pact'] = 'tanh'
    c['pinit'] = 'glorot_uniform'

    # model-external:
    c['inp_e_dropout'] = 4/5
    c['inp_w_dropout'] = 0
    # anssel-specific:
    c['ptscorer'] = B.mlp_ptscorer
    c['mlpsum'] = 'sum'
    c['Ddim'] = 2
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def config(c):
    c['dropout'] = 4/5
    c['dropoutfix_inp'] = 0
    c['dropoutfix_rec'] = 0
    c['l2reg'] = 1e-4

    c['rnnbidi'] = True
    c['rnn'] = GRU
    c['rnnbidi_mode'] = 'sum'
    c['rnnact'] = 'tanh'
    c['rnninit'] = 'glorot_uniform'
    c['sdim'] = 2
    c['rnnlevels'] = 1

    c['project'] = True
    c['pdim'] = 2
    c['pact'] = 'tanh'
    c['pinit'] = 'glorot_uniform'

    # model-external:
    c['inp_e_dropout'] = 4/5
    c['inp_w_dropout'] = 0
    # anssel-specific:
    c['ptscorer'] = B.mlp_ptscorer
    c['mlpsum'] = 'sum'
    c['Ddim'] = 2
项目:Youtube8mdataset_kagglechallenge    作者:jasonlee27    | 项目源码 | 文件源码
def load_model(self, frm_modelweights=''):
        frm_model = Sequential()
        frm_model.add(GRU(2048,
                          input_shape=(None, self.feature_size),
                          return_sequences=True,
                          activation='relu',
                          name='fc1'))
        frm_model.add(Dropout(0.5))
        frm_model.add(GRU(2048,
                          return_sequences=True,
                          activation='relu',
                          name='fc2'))
        frm_model.add(Dropout(0.5))
        frm_model.add(GRU(2048,
                          return_sequences=False,
                          activation='relu',
                          name='fc3'))
        frm_model.add(Dropout(0.5))
        frm_model.add(Dense(self.numclasses, activation='softmax', name='prediction'))
        if frm_modelweights:
            frm_model.load_weights(frm_modelweights, by_name=True)
            print("Frame model loaded with weights from %s." % frm_modelweights)
        else:
            print "Empty frame model loaded."

        return frm_model
项目:Youtube8mdataset_kagglechallenge    作者:jasonlee27    | 项目源码 | 文件源码
def load_model(self, frm_modelweights='', frmdiff_modelweights=''):
        frm_model = Sequential()
        frm_model.add(GRU(4096,
                          return_sequences=True,
                          input_dim=self.feature_size,
                          input_length=MAX_FRAMES,
                          activation='relu',
                          name='fc1'))
        frm_model.add(Dropout(0.3))
        frm_model.add(GRU(4096,
                          return_sequences=False,
                          activation='relu',
                          name='fc2'))
        frm_model.add(Dropout(0.3))
        frm_model.add(Dense(self.numclasses, activation='softmax', name='frm_prediction'))
        if frm_modelweights:
            frm_model.load_weights(frm_modelweights, by_name=True)
            print("Frame model loaded with weights from %s." % frm_modelweights)
        else:
            print "Empty frame model loaded."

        '''
        frmdiff_model = Sequential()
        frmdiff_model.add(GRU(4096, input_dim=self.feature_size, activation='relu', name='fc1'))
        frmdiff_model.add(Dropout(0.3))
        frmdiff_model.add(GRU(4096, activation='relu', name='fc2'))
        frmdiff_model.add(Dropout(0.3))
        frmdiff_model.add(Dense(self.numclasses, activation='softmax', name='frmdiff_feature'))

        if frmdiff_modelweights:
            frmdiff_model.load_weights(frmdiff_modelweights, by_name=True)
            print('Frame model loaded with weights from %s.' % frmdiff_modelweights)
        else:
            print "Empty frame model loaded."

        model = Sequential()
        model.add(Merge([frm_model, frmdiff_model], mode='concat'))
        model.add(Dense(self.numclasses, activation='softmax', name='predictions'))
        '''

        return frm_model
项目:keras-recommendation    作者:sonyisme    | 项目源码 | 文件源码
def test_temporal_reg(self):
        print('temporal regression data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,),
            classification=False)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.compile(loss='hinge', optimizer='rmsprop')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
        self.assertTrue(history.validation_loss[-1] < 0.75)
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize("layer_class", [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f)
项目:ntm_keras    作者:flomlo    | 项目源码 | 文件源码
def build(self, input_shape):
        bs, input_length, input_dim = input_shape

        self.controller_input_dim, self.controller_output_dim = controller_input_output_shape(
                input_dim, self.units, self.m_depth, self.n_slots, self.shift_range, self.read_heads,
                self.write_heads)

        # Now that we've calculated the shape of the controller, we have add it to the layer/model.
        if self.controller is None:
            self.controller = Dense(
                name = "controller",
                activation = 'linear',
                bias_initializer = 'zeros',
                units = self.controller_output_dim,
                input_shape = (bs, input_length, self.controller_input_dim))
            self.controller.build(input_shape=(self.batch_size, input_length, self.controller_input_dim))
            self.controller_with_state = False


        # This is a fixed shift matrix
        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.controller.trainable_weights 

        # We need to declare the number of states we want to carry around.
        # In our case the dimension seems to be 6 (LSTM) or 5 (GRU) or 4 (FF),
        # see self.get_initial_states, those respond to:
        # [old_ntm_output] + [init_M, init_wr, init_ww] +  [init_h] (LSMT and GRU) + [(init_c] (LSTM only))
        # old_ntm_output does not make sense in our world, but is required by the definition of the step function we
        # intend to use.
        # WARNING: What self.state_spec does is only poorly understood,
        # I only copied it from keras/recurrent.py.
        self.states = [None, None, None, None]
        self.state_spec = [InputSpec(shape=(None, self.output_dim)),                            # old_ntm_output
                            InputSpec(shape=(None, self.n_slots, self.m_depth)),                # Memory
                            InputSpec(shape=(None, self.read_heads, self.n_slots)),   # weights_read
                            InputSpec(shape=(None, self.write_heads, self.n_slots))]  # weights_write

        super(NeuralTuringMachine, self).build(input_shape)
项目:keyphrase-extraction    作者:sagarchaturvedi1    | 项目源码 | 文件源码
def get_nets(name):
    if name=='LSTM':
        return recurrent.LSTM
    elif name=='GRU':
        return recurrent.GRU
    else:
        return recurrent.SimpleRNN
项目:keyphrase-extraction    作者:sagarchaturvedi1    | 项目源码 | 文件源码
def get_nets(name):
    if name=='LSTM':
        return recurrent.LSTM
    elif name=='GRU':
        return recurrent.GRU
    else:
        return recurrent.SimpleRNN
项目:mars_express    作者:wsteitz    | 项目源码 | 文件源码
def fit(self, x, y):
        input_dim = x.shape[1]
        output_dim = y.shape[1]
        self.x_train = x

        start = len(x) % (self.batch_size * self.sequence_length)

        x_seq = self.sliding_window(x.iloc[start:])
        y_seq = self.sliding_window(y.iloc[start:])

        model = Sequential()
        model.add(GRU(1024, batch_input_shape=(self.batch_size, self.sequence_length, input_dim), return_sequences=True, stateful=True))
        model.add(Activation("tanh"))
        model.add(GRU(1024, return_sequences=True))
        model.add(Activation("tanh"))
        model.add(GRU(512, return_sequences=True))
        model.add(Activation("tanh"))
        #model.add(Dropout(0.5))
        model.add(TimeDistributed(Dense(output_dim)))
        model.add(Activation("linear"))

        optimizer = keras.optimizers.RMSprop(lr=0.002)
        optimizer = keras.optimizers.Nadam(lr=0.002)
        model.compile(loss='mse', optimizer=optimizer)

        model.fit(x_seq, y_seq, batch_size=self.batch_size, verbose=1, nb_epoch=self.n_epochs, shuffle=False)
        self.model = model
        return self
项目:headline-generation    作者:sallamander    | 项目源码 | 文件源码
def make_model(embedding_weights, input_length=50):
    """Build an recurrent net based off the input parameters and return it compiled.

    Args: 
    ----
        embedding_weights: 2d np.ndarray
        input_length (optional): int
            Holds how many words each article body will hold

    Return: 
    ------
        model: keras.model.Sequential compiled model
    """

    dict_size = embedding_weights.shape[0] # Num words in corpus
    embedding_dim = embedding_weights.shape[1] # Num dims in vec representation

    bodies = Input(shape=(input_length,), dtype='int32') 
    embeddings = Embedding(input_dim=dict_size, output_dim=embedding_dim,
                           weights=[embedding_weights], dropout=0.5)(bodies)
    layer = GRU(1024, return_sequences=True, dropout_W=0.5, dropout_U=0.5)(embeddings)
    layer = GRU(1024, return_sequences=False, dropout_W=0.5, dropout_U=0.5)(layer)
    layer = Dense(dict_size, activation='softmax')(layer)

    model = Model(input=bodies, output=layer)

    model.compile(loss='categorical_crossentropy', optimizer='adagrad')

    return model
项目:Conceptors    作者:CogSciUOS    | 项目源码 | 文件源码
def buildModel(self):
        '''
        :Description:
            Build neural network model
        '''

        self.model = Sequential()
        self.model.add(Embedding(self.embedding, 16, input_length=self.max_len))
        for l in range(self.layers-1):
            self.model.add(GRU(self.neurons, activation=self.activation, return_sequences=True, dropout_W=self.dropout, dropout_U=self.dropout))

        self.model.add(GRU(self.neurons, activation=self.activation, return_sequences=False, dropout_W=self.dropout, dropout_U=self.dropout))    

        self.model.add(Dense(self.n_songs))
        self.model.add(Activation('softmax'))
项目:cervantes    作者:textclf    | 项目源码 | 文件源码
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):

        model = Sequential()
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None)
        emb.trainable = train_vectors
        model.add(emb)
        if unit == 'gru':
            model.add(GRU(rnn_size))
        else:
            model.add(LSTM(rnn_size))
        model.add(Dropout(0.2))
        if num_classes == 2:
            model.add(Dense(1, activation='sigmoid'))
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.add(Dense(num_classes, activation='softmax'))
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
项目:cervantes    作者:textclf    | 项目源码 | 文件源码
def _generate_model(self, lembedding, num_classes=2, unit='gru', rnn_size=128, train_vectors=True):

        input = Input(shape=(lembedding.size,), dtype='int32')
        if lembedding.vector_box.W is None:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            W_constraint=None)(input)
        else:
            emb = Embedding(lembedding.vector_box.size,
                            lembedding.vector_box.vector_dim,
                            weights=[lembedding.vector_box.W], W_constraint=None, )(input)
        emb.trainable = train_vectors
        if unit == 'gru':
            forward = GRU(rnn_size)(emb)
            backward = GRU(rnn_size, go_backwards=True)(emb)
        else:
            forward = LSTM(rnn_size)(emb)
            backward = LSTM(rnn_size, go_backwards=True)(emb)

        merged_rnn = merge([forward, backward], mode='concat')
        dropped = Dropout(0.5)(merged_rnn)
        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(dropped)
            model = Model(input=input, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
项目:cervantes    作者:textclf    | 项目源码 | 文件源码
def __init__(self, lembedding, num_classes=2, ngrams=[1, 2, 3, 4, 5],
                 nfilters=64, rnn_type=GRU, rnn_dim=80, train_vectors=True,
                 optimizer=None):

        if not isinstance(lembedding, TwoLevelsEmbedding):
            raise LanguageClassifierException(
                "The model only accepts two-level language embeddings")
        if num_classes < 2:
            raise LanguageClassifierException("Classes must be 2 or more")

        self.optimizer = optimizer
        model = self._generate_model(lembedding, num_classes, ngrams,
                                     nfilters, rnn_type, rnn_dim, train_vectors)
        super(RCNNClassifier, self).__init__(model, self.optimizer)
项目:keras    作者:NVIDIA    | 项目源码 | 文件源码
def rnn_test(f):
    """
    All the recurrent layers share the same interface,
    so we can run through them with a single function.
    """
    f = keras_test(f)
    return pytest.mark.parametrize("layer_class", [
        recurrent.SimpleRNN,
        recurrent.GRU,
        recurrent.LSTM
    ])(f)
项目:stratosphere-lstm    作者:mendozawow    | 项目源码 | 文件源码
def build_lstm(input_shape):
    model = Sequential()
    model.add(Masking(input_shape=input_shape, mask_value=-1.))
    # model.add(GRU(128, return_sequences=True))

    model.add(GRU(128, return_sequences=False))
    # Add dropout if overfitting
    # model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    return model
项目:deep-coref    作者:clarkkev    | 项目源码 | 文件源码
def test_gru(self):
        _runner(recurrent.GRU)
项目:deep-coref    作者:clarkkev    | 项目源码 | 文件源码
def test_temporal_reg(self):
        print('temporal regression data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 5), output_shape=(2,),
                                                             classification=False)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        model = Sequential()
        model.add(GRU(y_train.shape[-1], input_shape=(None, X_train.shape[-1])))
        model.compile(loss='hinge', optimizer='adam')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
        self.assertTrue(history.history['val_loss'][-1] < 0.8)
项目:RecommendationSystem    作者:TURuibo    | 项目源码 | 文件源码
def test_temporal_reg(self):
        print('temporal regression data:')
        (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,),
            classification=False)
        print('X_train:', X_train.shape)
        print('X_test:', X_test.shape)
        print('y_train:', y_train.shape)
        print('y_test:', y_test.shape)

        model = Sequential()
        model.add(GRU(X_train.shape[-1], y_train.shape[-1]))
        model.compile(loss='hinge', optimizer='rmsprop')
        history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2)
        self.assertTrue(history.validation_loss[-1] < 0.75)
项目:music_rnn    作者:chengjunwen    | 项目源码 | 文件源码
def LSTMModel(self, nHidden=150, lr = 0.01):
#               print('nHidden: %i\tlr: %.3f' % ( nHidden, lr) )
                self.rnnModel.add(GRU( nHidden, activation='sigmoid', input_shape =( None, self.maxFeatures), return_sequences=True))
#                self.rnnModel.add(LSTM( nHidden, activation='sigmoid', input_shape =( None, nHidden), return_sequences=True))
                self.rnnModel.add(TimeDistributedDense(nHidden))
                self.rnnModel.add(Activation('relu'))
                self.rnnModel.add(TimeDistributedDense(self.maxFeatures))
                self.rnnModel.add(Activation('softmax'))
                rmsprop = RMSprop(lr=lr, rho=0.9, epsilon=1e-06)
                self.rnnModel.compile(loss='categorical_crossentropy', optimizer=rmsprop)
项目:RNNIPTag    作者:ml-slac    | 项目源码 | 文件源码
def BuildModel():

    #global dataset_storage,model_storage,history_storage

    dataset = makeData (Variables = o.Variables)
    #dataset_storage = dataset

    model = None
    history = None
    modelname = "" 
    print o.Model
    if "LSTM" in o.Model or "GRU" in o.Model:
        model, history = buildModel_1hidden(dataset,True)
    if o.Model == "RNNSV1":
        model, history = buildModel_RNNSV1(dataset, True)
    if o.Model == "DenseIP3D":
        model, history = buildModel_SimpleDense(dataset, False)
    print ' ------------------------------------------'
    print o.Model
    if o.Model == "RNNPlusMV2" or o.Model == "RNNPlusSV1":
        model, history = buildModel_RNNPlus(dataset, useAdam=True)

    modelname = o.Version +"_" + o.Model + "_"+ o.Variables + "_" + o.nEpoch + "epoch_" + str( n_events/1000) + 'kEvts_' + str( o.nTrackCut) + 'nTrackCut_' +  o.nMaxTrack + "nMaxTrack_" + o.nLSTMClass +"nLSTMClass_" + o.nLSTMNodes +"nLSTMNodes_"+ o.nLayers + "nLayers"

    model = evalModel(dataset, model, o.Model)

    if o.TrackOrder == 'pT':
        modelname += "_SortpT"
    if o.TrackOrder == 'Reverse':
        modelname += "_ReverseOrder"
    if o.TrackOrder == 'SL0':
        modelname += "_SL0"
    if o.doTrainC == 'y':
        modelname += "_CMix"
    if o.AddJetpT == 'y':
        modelname += '_AddJetpT'
    if int(o.EmbedSize) != 2:
        modelname += "_" + o.EmbedSize+"EmbedSize"

    if o.Mode == "R":
        modelname = o.filebase+"_Retrain_"+o.nEpoch
    if o.doLessC == "y":
        modelname += "_LessC"

    if o.doJetpTReweight == "y":
        modelname += "_JetpTReweight"

    #modelname = "test"
    saveModel(modelname, model, history)
项目:learning_rnn    作者:qiangsiwei    | 项目源码 | 文件源码
def train_breaker(datafilename, sentence_num=1000, puncs=u',?.?!???', \
            RNN=recurrent.GRU, HIDDEN_SIZE=128, EPOCH_SIZE=10, validate=True):
    wordtable = WordTable()
    wordtable.parse(datafilename, sentence_num)

    X, Y = [], []
    for line in open(datafilename).readlines()[:sentence_num]:
        line = line.strip().decode('utf-8')
        line = re.sub(ur'(^[{0}]+)|([{0}]+$)'.format(puncs),'',line)
        words = wordtable.encode(re.sub(ur'[{0}]'.format(puncs),'',line))
        breaks = re.sub(ur'0[{0}]+'.format(puncs),'1',re.sub(ur'[^{0}]'.format(puncs),'0',line))
        if len(words) >= 30 and len(words) <= 50 and breaks.count('1') >= 4:
            x = np.zeros((len(words), wordtable.capacity), dtype=np.bool)
            y = np.zeros((len(breaks), 2), dtype=np.bool)
            for idx in xrange(len(words)):
                x[idx][words[idx]] = True
                y[idx][int(breaks[idx])] = True
            X.append(x)
            Y.append(y)
    print 'total sentence: ', len(X)

    if validate:
        # Set apart 10% for validation
        split_at = len(X) - len(X)/10
        X_train, X_val = X[:split_at], X[split_at:]
        y_train, y_val = Y[:split_at], Y[split_at:]
    else:
        X_train, y_train = X, Y

    model = Graph()
    model.add_input(name='input', input_shape=(None, wordtable.capacity))
    model.add_node(RNN(HIDDEN_SIZE, return_sequences=True), name='forward', input='input')
    model.add_node(TimeDistributedDense(2, activation='softmax'), name='softmax', input='forward')
    model.add_output(name='output', input='softmax')
    model.compile('adam', {'output': 'categorical_crossentropy'})

    for epoch in xrange(EPOCH_SIZE):
        print "epoch: ", epoch
        for idx, (seq, label) in enumerate(zip(X_train, y_train)):
            loss, accuracy = model.train_on_batch({'input':np.array([seq]), 'output':np.array([label])}, accuracy=True)
            if idx % 20 == 0:
                print "\tidx={0}, loss={1}, accuracy={2}".format(idx, loss, accuracy)

    if validate:
        _Y, _P = [], []
        for (seq, label) in zip(X_val, y_val):
            y = label.argmax(axis=-1)
            p = model.predict({'input':np.array([seq])})['output'][0].argmax(axis=-1)
            _Y.extend(list(y))
            _P.extend(list(p))
        _Y, _P = np.array(_Y), np.array(_P)
        print "should break right: ", ((_P == 1)*(_Y == 1)).sum()
        print "should break wrong: ", ((_P == 0)*(_Y == 1)).sum()
        print "should not break right: ", ((_P == 0)*(_Y == 0)).sum()
        print "should not break wrong: ", ((_P == 1)*(_Y == 0)).sum()

    with open('wordtable_json.txt','w') as wordtable_file:
        wordtable_file.write(wordtable.to_json())
    with open('model_json.txt','w') as model_file:
        model_file.write(model.to_json())
    model.save_weights('model_weights.h5', overwrite=True)
项目:albemarle    作者:SeanTater    | 项目源码 | 文件源码
def get_state_transfer_rnn(RNN):
    '''Converts a given Recurrent sub class (e.g, LSTM, GRU) to its state transferable version.
    A state transfer RNN can transfer its hidden state to another one of the same type and compatible dimensions.
    '''

    class StateTransferRNN(RNN):

        def __init__(self, state_input=True, **kwargs):
            self.state_outputs = []
            self.state_input = state_input
            super(StateTransferRNN, self).__init__(**kwargs)

        def reset_states(self):
            stateful = self.stateful
            self.stateful = stateful or self.state_input or len(self.state_outputs) > 0
            if self.stateful:
                super(StateTransferRNN, self).reset_states()
            self.stateful = stateful

        def build(self,input_shape):
            stateful = self.stateful
            self.stateful = stateful or self.state_input or len(self.state_outputs) > 0
            super(StateTransferRNN, self).build(input_shape)
            self.stateful = stateful

        def broadcast_state(self, rnns):
            rnns = (set if type(rnns) in [list, tuple] else lambda a: {a})(rnns)
            rnns -= set(self.state_outputs)
            self.state_outputs.extend(rnns)
            for rnn in rnns:
                rnn.state_input = self
                rnn.updates = getattr(rnn, 'updates', [])
                rnn.updates.extend(zip(rnn.states, self.states_to_transfer))

        def call(self, x, mask=None):
            last_output, outputs, states = K.rnn(
                self.step,
                self.preprocess_input(x),
                self.states or self.get_initial_states(x),
                go_backwards=self.go_backwards,
                mask=mask,
                constants=self.get_constants(x),
                unroll=self.unroll,
                input_length=self.input_spec[0].shape[1])
            self.updates = zip(self.states, states)
            self.states_to_transfer = states
            return outputs if self.return_sequences else last_output
    return StateTransferRNN
项目:NN_sentiment    作者:hx364    | 项目源码 | 文件源码
def __init__(self, embedding_mat=None, maxlen_doc=7, maxlen_sent=50, filter_length=[3, 4, 5, 6],
                 nb_filters=200, n_vocab=10000, embedding_dims=300, hidden_gru=64, n_classes=5):
        if embedding_mat is not None:
            self.n_vocab, self.embedding_dims = embedding_mat.shape
        else:
            self.n_vocab = n_vocab
            self.embedding_dims = embedding_dims
        self.maxlen_doc = maxlen_doc
        self.maxlen_sent = maxlen_sent
        self.filter_length = filter_length
        self.nb_filters = nb_filters
        self.hidden_gru = hidden_gru

        print "Building the model"
        #graph model
        model=Graph()
        model.add_input(name='input', input_shape=(self.maxlen_doc*self.maxlen_sent,), dtype='int')

        #Model embedding layer, for word index-> word embedding transformation
        model.add_node(Embedding(self.n_vocab, self.embedding_dims, weights=[self.embedding_mat],
                                 input_length=self.maxlen_sent*self.maxlen_doc),
                       name='embedding', input='input')
        model.add_node(Reshape((self.maxlen_doc, 1, self.maxlen_sent, self.embedding_dims)),
                      name='reshape_5d', input='embedding')
        #define the different filters
        conv_layer = []
        for each_length in filter_length:
            model.add_node(TimeDistributedConvolution2D(self.nb_filters/len(filter_length),
                                                        each_length, self.embedding_dims, border_mode='valid',
                                               input_shape=(self.maxlen_doc,1,self.maxlen_sent, self.embedding_dims)),
                          name='conv_{}'.format(each_length), input='reshape_5d')
            model.add_node(Activation('relu'),
                          name='relu_conv_{}'.format(each_length), input='conv_{}'.format(each_length))

            model.add_node(TimeDistributedMaxPooling2D(pool_size=(int(self.maxlen_sent - each_length+1), 1),
                          border_mode='valid'),
                          name='pool_conv_{}'.format(each_length), input='relu_conv_{}'.format(each_length))
            model.add_node(TimeDistributedFlatten(),
                          name='flatten_conv_{}'.format(each_length), input='pool_conv_{}'.format(each_length))
            conv_layer.append('flatten_conv_{}'.format(each_length))
        # model.add_node(Activation('relu'), name='relu', inputs=conv_layer)
        print conv_layer
        model.add_node(GRU(self.hidden_gru), name='gru_forward', inputs=conv_layer)
        model.add_node(GRU(self.hidden_gru, go_backwards=True), name='gru_backward', inputs=conv_layer)
        model.add_node(Dropout(0.5), name='gru_outputs', inputs=['gru_forward', 'gru_backward'])
        model.add_node(Dense(n_classes), name='full_con', input='gru_outputs')
        model.add_node(Activation('softmax'), name='prob', input='full_con')
        model.add_output(name='pred', input='prob')

        model.compile('rmsprop', loss = {'pred': 'categorical_crossentropy'})
项目:cervantes    作者:textclf    | 项目源码 | 文件源码
def _generate_model(self, lembedding, num_classes=2, rnn_dim=32):

        WORD_PER_SENTENCES = lembedding.size_level1
        SENTENCES_PER_DOCUMENT = lembedding.size_level2
        EMBEDDING_DIM = lembedding.vector_box.vector_dim

        INPUT_SHAPE = (WORD_PER_SENTENCES * SENTENCES_PER_DOCUMENT, )
        EMBEDDING_SHAPE = (SENTENCES_PER_DOCUMENT, WORD_PER_SENTENCES, EMBEDDING_DIM)

        doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32')

        embedded = Sequential([
            Embedding(
                input_dim=lembedding.vector_box.size,
                output_dim=EMBEDDING_DIM,
                input_length=INPUT_SHAPE[0]
            ),
            Reshape(EMBEDDING_SHAPE)
        ])(doc)

        out = TimeDistributed(GRU(rnn_dim))(embedded)
        next = Dropout(0.5)(out)
        out = GRU(rnn_dim)(next)
        out = Dropout(0.5)(out)

        mapping = [
            Dense(64, activation='relu'),  # Maybe add more layers
        ]

        for f in mapping:
            out = f(out)

        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])

        return model
项目:event_chain    作者:wangzq870305    | 项目源码 | 文件源码
def lstm_memory_train(X_train_list,y_train,vocab_size):
    N=len(X_train_list)

    X_train_list = [sequence.pad_sequences(x_train, maxlen=MAX_LEN) for x_train in X_train_list]

    input_list=[]
    out_list=[]
    for i in range(N):
        input,out=get_embedding_input_output('f%d' %i,vocab_size)
        input_list.append(input)
        out_list.append(out)

    x = merge(out_list,mode='concat')

    lstm_out = LSTM(HIDDEN_SIZE, return_sequences=True)(x)

    lstm_share=GRU(HIDDEN_SIZE, return_sequences=True)

    x = lstm_out
    for i in range(2):
        att = TimeDistributed(Dense(1))(x)
        att = Flatten()(att)
        att = Activation(activation="softmax")(att)
        att = RepeatVector(HIDDEN_SIZE)(att)
        att = Permute((2,1))(att)

        mer = merge([att, lstm_out], "mul")
        mer = merge([mer, out_list[-1]], 'mul')

        z = merge([lstm_out,mer],'sum')
        z = lstm_share(z)
        x = z

    hid = AveragePooling1D(pool_length=2)(x)
    hid = Flatten()(hid)

    #hid = merge([hid,out_list[-1]], mode='concat')

    main_loss = Dense(1, activation='sigmoid', name='main_output')(hid)

    model = Model(input=input_list, output=main_loss)

    model.compile(loss='binary_crossentropy', optimizer='rmsprop')
    model.fit(X_train_list, y_train, batch_size=BATCH_SIZE, nb_epoch=EPOCHS)

    return model
项目:luvina    作者:oarriaga    | 项目源码 | 文件源码
def SiameseLSTM(max_token_length, hidden_size, embedding_size=300):
    text_input_1 = Input(shape=(max_token_length, embedding_size),
                         name='text_1')
    text_mask_1 = Masking(mask_value=0.0, name='text_mask_1')(text_input_1)
    # text_dropout_1 = Dropout(.5, name='text_dropout_1')(text_mask_1)

    text_input_2 = Input(shape=(max_token_length, embedding_size),
                         name='text_2')
    text_mask_2 = Masking(mask_value=0.0, name='text_mask_2')(text_input_2)
    # text_dropout_2 = Dropout(.5, name='text_dropout_2')(text_mask_2)

    lstm_1_a = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=True,
                                 name='RNN_1_a'))(text_mask_1)

    lstm_1_b = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_1_b'))(lstm_1_a)

    """
    lstm_1_c = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_1_c'))(lstm_1_b)
    """

    lstm_2_a = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=True,
                                 name='RNN_2_a'))(text_mask_2)

    lstm_2_b = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_2_b'))(lstm_2_a)

    """
    lstm_2_c = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_2_c'))(lstm_2_b)
    """

    cosine_similarity = Dot(axes=1, normalize=True,
                            name='cosine_similarity')([lstm_1_b, lstm_2_b])

    model = Model(inputs=[text_input_1, text_input_2],
                  outputs=cosine_similarity)

    return model
项目:narrative-prediction    作者:roemmele    | 项目源码 | 文件源码
def create_model(self, n_timesteps=None, batch_size=1, include_pred_layer=True):

        input_layers = []

        seq_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="seq_input_layer")
        input_layers.append(seq_input_layer)

        seq_embedding_layer = Embedding(input_dim=self.lexicon_size + 1, 
                                        output_dim=self.n_embedding_nodes, mask_zero=True, name='seq_embedding_layer')(seq_input_layer)

        for layer_num in range(self.n_hidden_layers):
            if layer_num == 0:
                seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer1')(seq_embedding_layer)
            else: #add extra hidden layers
                seq_hidden_layer = GRU(output_dim=self.n_hidden_nodes, return_sequences=True, stateful=True, name='seq_hidden_layer' + str(layer_num + 1))(seq_hidden_layer)

        if self.use_pos:
            pos_input_layer = Input(batch_shape=(batch_size, n_timesteps), name="pos_input_layer")
            input_layers.append(pos_input_layer)

            pos_embedding_layer = Embedding(input_dim=self.n_pos_tags + 1,
                                            output_dim=self.n_pos_embedding_nodes, mask_zero=True, name='pos_embedding_layer')(pos_input_layer)

            pos_hidden_layer = GRU(output_dim=self.n_pos_nodes, return_sequences=True, stateful=True, name='pos_hidden_layer')(pos_embedding_layer)

            seq_hidden_layer = merge([seq_hidden_layer, pos_hidden_layer], mode='concat', concat_axis=-1, name='pos_merge_hidden_layer')

        if self.use_features:
            feature_input_layer = Input(batch_shape=(batch_size, self.lexicon_size + 1), name="feature_input_layer")
            input_layers.append(feature_input_layer)
            feature_hidden_layer = Dense(output_dim=self.n_feature_nodes, activation='sigmoid', name='feature_hidden_layer')(feature_input_layer)
            feature_hidden_layer = RepeatVector(n_timesteps)(feature_hidden_layer)

            seq_hidden_layer = merge([seq_hidden_layer, feature_hidden_layer], mode='concat', concat_axis=-1, name='feature_merge_hidden_layer')

        output_layers = []
        if include_pred_layer:
            pred_layer = TimeDistributed(Dense(self.lexicon_size + 1, activation="softmax", name='pred_layer'))(seq_hidden_layer)
            output_layers.append(pred_layer)
            if self.use_pos:
                pred_pos_layer = TimeDistributed(Dense(self.n_pos_tags + 1, activation="softmax", name='pred_pos_layer'))(seq_hidden_layer)
                output_layers.append(pred_pos_layer)

        model = Model(input=input_layers, output=output_layers)

        #select optimizer and compile
        model.compile(loss="sparse_categorical_crossentropy", 
                      optimizer=eval(self.optimizer)(clipvalue=self.clipvalue, lr=self.lr, decay=self.decay))

        return model