Python lasagne 模块,regularization() 实例源码

我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用lasagne.regularization()

项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def parse_args(argv):
    """
    Parse commandline arguments.
    Arguments:
        argv -- An argument list without the program name.
    """

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--dataset', metavar='str', help='dataset for dialectology', type=str, default='na')
    parser.add_argument('-bucket', '--bucket', metavar='int', help='discretisation bucket size', type=int, default=300)
    parser.add_argument('-batch', '--batch', metavar='int', help='SGD batch size', type=int, default=0)
    parser.add_argument('-hid', '--hidden', metavar='int', help='Hidden layer size', type=int, default=500)
    parser.add_argument('-mindf', '--mindf', metavar='int', help='minimum document frequency in BoW', type=int, default=10)
    parser.add_argument('-d', '--dir', metavar='str', help='home directory', type=str, default='./data')
    parser.add_argument('-enc', '--encoding', metavar='str', help='Data Encoding (e.g. latin1, utf-8)', type=str, default='utf-8')
    parser.add_argument('-reg', '--regularization', metavar='float', help='regularization coefficient)', type=float, default=1e-6)
    parser.add_argument('-drop', '--dropout', metavar='float', help='dropout coef default 0.5', type=float, default=0.5)
    parser.add_argument('-cel', '--celebrity', metavar='int', help='celebrity threshold', type=int, default=10)
    parser.add_argument('-conv', '--convolution', action='store_true', help='if true do convolution')
    parser.add_argument('-map', '--map', action='store_true', help='if true just draw maps from pre-trained model') 
    parser.add_argument('-sqerror', '--sqerror', action='store_true', help='if exists use squared error regression instead of gaussian mixture model') 
    parser.add_argument('-autoencoder', '--autoencoder', type=int, help='if not zero pre-trains the model with input lat/lon and output lat/lon for n steps', default=0) 
    parser.add_argument('-grid', '--grid', action='store_true', help='if exists transforms the input from lat/lon to distance from grids on map') 
    parser.add_argument('-rbf', '--rbf', action='store_true', help='if exists transforms the input from lat/lon to rbf probabilities and learns centers and sigmas as well.') 
    parser.add_argument('-ncomp', '--ncomp', type=int, help='the number of bivariate gaussians whose parameters are going to be learned.', default=100) 
    parser.add_argument('-toy', action='store_true', help='if exists use the toy dataset instead of geolocation datasets.')
    parser.add_argument('-tune', action='store_true', help='if exists tune hyperparameters')
    parser.add_argument('-m', '--message', type=str) 

    args = parser.parse_args(argv)
    return args
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def tune(data, dataset_name, args, num_iter=100):
    logging.info('tuning over %s' %dataset_name)
    param_scores = []
    random.seed()
    for i in xrange(num_iter):
        logging.info('tuning iter %d' %i)
        np.random.seed(77)
        hidden_size = random.choice([300, 600, 900])
        ncomp = random.choice([250, 500, 1000])
        if args.nomdn:
            ncomp = 0
        logging.info('hidden %d ncomp %d' %(hidden_size, ncomp))
        try:
            perplexity_test, perplexity_dev = train(data, regul_coef=args.regularization, dropout_coef=args.dropout, 
                  hidden_size=hidden_size, autoencoder=args.autoencoder, ncomp=ncomp, dataset_name=dataset_name, tune=True, nomdn=args.nomdn)

        except:
            logging.info('exception occurred')
            continue

        scores = OrderedDict()
        scores['perplexity_test'], scores['perplexity_dev'] = perplexity_test, perplexity_dev
        params = OrderedDict()
        params['hidden'], params['ncomp'] =  hidden_size, ncomp
        param_scores.append([params, scores])
        logging.info(params)
        logging.info(scores)
    for param_score in param_scores:
        logging.info(param_score)
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def parse_args(argv):
    """
    Parse commandline arguments.
    Arguments:
        argv -- An argument list without the program name.
    """

    parser = argparse.ArgumentParser()
    parser.add_argument( '-i','--dataset', metavar='str',  help='dataset for dialectology',  type=str, default='na')
    parser.add_argument( '-bucket','--bucket', metavar='int',  help='discretisation bucket size',  type=int, default=300)
    parser.add_argument( '-batch','--batch', metavar='int',  help='SGD batch size',  type=int, default=1000)
    parser.add_argument( '-hid','--hidden', metavar='int',  help='Hidden layer size after bigaus layer',  type=int, default=500)
    parser.add_argument( '-mindf','--mindf', metavar='int',  help='minimum document frequency in BoW',  type=int, default=10)
    parser.add_argument( '-d','--dir', metavar='str',  help='home directory',  type=str, default='./data')
    parser.add_argument( '-enc','--encoding', metavar='str',  help='Data Encoding (e.g. latin1, utf-8)',  type=str, default='utf-8')
    parser.add_argument( '-reg','--regularization', metavar='float',  help='regularization coefficient)',  type=float, default=1e-6)
    parser.add_argument( '-drop','--dropout', metavar='float',  help='dropout coef default 0.5',  type=float, default=0.5)
    parser.add_argument( '-cel','--celebrity', metavar='int',  help='celebrity threshold',  type=int, default=10)
    parser.add_argument( '-conv', '--convolution', action='store_true',  help='if true do convolution')
    parser.add_argument( '-map', '--map', action='store_true',  help='if true just draw maps from pre-trained model')
    parser.add_argument( '-tune', '--tune', action='store_true',  help='if true tune the hyper-parameters') 
    parser.add_argument( '-tf', '--tensorflow', action='store_true',  help='if exists run with tensorflow') 
    parser.add_argument( '-autoencoder', '--autoencoder', type=int,  help='the number of autoencoder steps before training', default=0) 
    parser.add_argument( '-grid', '--grid', action='store_true',  help='if exists transforms the input from lat/lon to distance from grids on map')  
    parser.add_argument( '-ncomp', type=int,  help='the number of bivariate gaussians after the input layer', default=500) 
    parser.add_argument( '-m', '--message', type=str) 
    parser.add_argument( '-vbi', '--vbi', type=str,  help='if exists load params from vbi file and visualize bivariate gaussians on a map', default=None)
    parser.add_argument( '-nomdn', '--nomdn', action='store_true',  help='if true use tanh layer instead of MDN') 
    args = parser.parse_args(argv)
    return args
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def parse_args(argv):
    """
    Parse commandline arguments.
    Arguments:
        argv -- An argument list without the program name.
    """

    parser = argparse.ArgumentParser()
    parser.add_argument('-i','--dataset', metavar='str', help='dataset for dialectology', type=str, default='na')
    parser.add_argument('-bucket','--bucket', metavar='int', help='discretisation bucket size', type=int, default=300)
    parser.add_argument('-batch','--batch', metavar='int', help='SGD batch size', type=int, default=0)
    parser.add_argument('-hid','--hidden', metavar='int', help='Hidden layer size', type=int, default=500)
    parser.add_argument('-mindf','--mindf', metavar='int', help='minimum document frequency in BoW', type=int, default=10)
    parser.add_argument('-d','--dir', metavar='str', help='home directory', type=str, default='./data')
    parser.add_argument('-enc','--encoding', metavar='str', help='Data Encoding (e.g. latin1, utf-8)', type=str, default='utf-8')
    parser.add_argument('-reg','--regularization', metavar='float', help='regularization coefficient)', type=float, default=1e-6)
    parser.add_argument('-drop','--dropout', metavar='float', help='dropout coef default 0.5', type=float, default=0.5)
    parser.add_argument('-cel','--celebrity', metavar='int', help='celebrity threshold', type=int, default=10)
    parser.add_argument('-conv', '--convolution', action='store_true', help='if true do convolution')
    parser.add_argument('-map', '--map', action='store_true', help='if true just draw maps from pre-trained model') 
    parser.add_argument('-sqerror', '--sqerror', action='store_true', help='if exists use squared error regression instead of gaussian mixture model') 
    parser.add_argument('-autoencoder', '--autoencoder', type=int, help='if not zero pre-trains the model with input lat/lon and output lat/lon for n steps', default=0) 
    parser.add_argument('-grid', '--grid', action='store_true', help='if exists transforms the input from lat/lon to distance from grids on map') 
    parser.add_argument('-rbf', '--rbf', action='store_true', help='if exists transforms the input from lat/lon to rbf probabilities and learns centers and sigmas as well.') 
    parser.add_argument('-ncomp', '--ncomp', type=int, help='the number of bivariate gaussians whose parameters are going to be learned.', default=100) 
    parser.add_argument('-toy', action='store_true', help='if exists use the toy dataset instead of geolocation datasets.')
    parser.add_argument('-tune', action='store_true', help='if exists tune hyperparameters')
    parser.add_argument('-m', '--message', type=str) 

    args = parser.parse_args(argv)
    return args
项目:ml_defense    作者:arjunbhagoji    | 项目源码 | 文件源码
def loss_fn(model_predict, target_var, reg=None, network=None, layers=None):
    """
    Create a loss expression for training, i.e., a scalar objective we want
    to minimize (for our multi-class problem, it is the cross-entropy loss)
    """

    loss_temp = lasagne.objectives.categorical_crossentropy(model_predict,
                                                            target_var)
    loss_temp = loss_temp.mean()
    # Optional regularization
    # layers={layer_1:1e-7,layer_2:1e-7,network:1e-7}
    # l2_penalty=lasagne.regularization.regularize_layer_params_weighted(layers,
    # l2)
    if reg == 'l2':
        if layers is not None:
            layer_1 = layers[0]
            layer_2 = layers[1]
            layer_dict = {layer_1: 1e-7}
            l2_penalty = lasagne.regularization \
                .regularize_layer_params_weighted(layer_dict, l2)
        else:
            l2_penalty = (1e-7 *
                lasagne.regularization.regularize_network_params(network, l2))
        loss_temp = loss_temp + l2_penalty
    elif reg == 'l1':
        l1_penalty = lasagne.regularization.regularize_network_params(
            network, l1)
        loss_temp = loss_temp + 1e-7 * l1_penalty
    return loss_temp
#------------------------------------------------------------------------------#
项目:hierarchical_rl    作者:wulfebw    | 项目源码 | 文件源码
def __init__(self, input_shape, batch_size, num_actions, num_hidden, discount, learning_rate, regularization, update_rule, freeze_interval, rng):
        self.input_shape = input_shape
        self.batch_size = batch_size
        self.num_actions = num_actions
        self.num_hidden = num_hidden
        self.discount = discount
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.update_rule = update_rule
        self.freeze_interval = freeze_interval
        self.rng = rng if rng else np.random.RandomState()
        self.initialize_network()
        self.update_counter = 0
项目:iterative_inference_segm    作者:adri-romsor    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser(description='Unet model training')
    parser.add_argument('-dataset',
                        default='camvid',
                        help='Dataset.')
    parser.add_argument('-learning_rate',
                        default=0.0001,
                        help='Learning Rate')
    parser.add_argument('-penal_cst',
                        default=0.0,
                        help='regularization constant')
    parser.add_argument('--num_epochs',
                        '-ne',
                        type=int,
                        default=750,
                        help='Optional. Int to indicate the max'
                        'number of epochs.')
    parser.add_argument('-max_patience',
                        type=int,
                        default=100,
                        help='Max patience')
    parser.add_argument('-batch_size',
                        type=int,
                        default=[10, 1, 1],
                        help='Batch size [train, val, test]')
    parser.add_argument('-data_augmentation',
                        type=dict,
                        default={'crop_size': (224, 224), 'horizontal_flip': True, 'fill_mode':'constant'},
                        help='use data augmentation')
    parser.add_argument('-early_stop_class',
                        type=int,
                        default=None,
                        help='class to early stop on')
    parser.add_argument('-train_from_0_255',
                        type=bool,
                        default=False,
                        help='Whether to train from images within 0-255 range')
    args = parser.parse_args()

    train(args.dataset, float(args.learning_rate),
          float(args.penal_cst), int(args.num_epochs), int(args.max_patience),
          data_augmentation=args.data_augmentation, batch_size=args.batch_size,
          early_stop_class=args.early_stop_class, savepath=SAVEPATH,
          train_from_0_255=args.train_from_0_255, loadpath=LOADPATH)
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def build(self):
        """
        build the MDN network with shared Gaussian parameters
        Input is sparse text and output is the parameters of the mixture of Gaussian
        """
        self.X_sym = S.csr_matrix(name='inputs', dtype=self.dtype)
        self.Y_sym = T.matrix(name='y_true', dtype=self.dtype)

        l_in_text = lasagne.layers.InputLayer(shape=(None, self.input_size),
                                         input_var=self.X_sym)

        if self.drop_out and self.dropout_coef > 0:
            l_in_text = lasagne_layers.SparseInputDropoutLayer(l_in_text, p=self.dropout_coef)


        l_hid_text = SparseInputDenseLayer(l_in_text, num_units=self.hid_size,
                                      nonlinearity=lasagne.nonlinearities.tanh,
                                      W=lasagne.init.GlorotUniform())

        # if self.drop_out and self.dropout_coef > 0:
        #    l_hid_text = lasagne.layers.dropout(l_hid_text, p=self.dropout_coef)

        self.l_pi_out = lasagne_layers.MDNSharedParams(l_hid_text, num_units=self.n_bigaus_comp,
                                                       mus=self.mus, sigmas=self.sigmas, corxy=self.corxy,
                                                       nonlinearity=lasagne.nonlinearities.softmax,
                                                       W=lasagne.init.GlorotUniform())


        pis = lasagne.layers.get_output(self.l_pi_out, self.X_sym)
        #use the shared gaussian parameters of the layer
        mus, sigmas, corxy = self.l_pi_out.mus, self.l_pi_out.sigmas, self.l_pi_out.corxy
        sigmas = T.nnet.softplus(sigmas)
        corxy = T.nnet.nnet.softsign(corxy)        
        loss = self.nll_loss_sharedparams(mus, sigmas, corxy, pis, self.Y_sym)
        #we can add an autoencoder loss if we want here
        #sq_error_coef = 0.01
        #predicted_mu = self.get_symb_mus(mus, sigmas, corxy, pis, prediction_method="pi")
        #loss += lasagne.objectives.squared_error(predicted_mu, self.Y_sym).mean() * sq_error_coef

        #if regul_coef is more than 0 apply regularization
        if self.regul_coef:
            l1_share_out = 0.5
            l1_share_hid = 0.5
            regul_coef_out, regul_coef_hid = self.regul_coef, self.regul_coef
            logging.info('regul coefficient for output and hidden lasagne_layers is ' + str(self.regul_coef))
            l1_penalty = lasagne.regularization.regularize_layer_params(self.l_pi_out, l1) * regul_coef_out * l1_share_out
            l2_penalty = lasagne.regularization.regularize_layer_params(self.l_pi_out, l2) * regul_coef_out * (1 - l1_share_out)
            l1_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l1) * regul_coef_hid * l1_share_hid
            l2_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l2) * regul_coef_hid * (1 - l1_share_hid)

            loss += l1_penalty + l2_penalty



        parameters = lasagne.layers.get_all_params(self.l_pi_out, trainable=True)
        updates = lasagne.updates.adam(loss, parameters, learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8)
        self.f_train = theano.function([self.X_sym, self.Y_sym], loss, updates=updates, on_unused_input='warn')  # ,  mode=theano.compile.MonitorMode(pre_func=inspect_inputs, post_func=inspect_outputs))
        self.f_val = theano.function([self.X_sym, self.Y_sym], loss, on_unused_input='warn')
        self.f_predict = theano.function([self.X_sym], [mus, sigmas, corxy, pis], on_unused_input='warn')
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def build_squarederror_regression(self):
        """
        This is only used if we want to build a regression model
        """
        self.X_sym = S.csr_matrix(name='inputs', dtype=self.dtype)
        self.Y_sym = T.matrix(name='y_true', dtype=self.dtype)
        self.X_autoencoder_sym = T.matrix(name='x_autoencoder', dtype=self.dtype)
        self.Y_autoencoder_sym = T.matrix(name='y_autoencoder', dtype=self.dtype)


        l_in_text = lasagne.layers.InputLayer(shape=(None, self.input_size),
                                         input_var=self.X_sym)

        if self.drop_out and self.dropout_coef > 0:
            l_in_text = lasagne_layers.SparseInputDropoutLayer(l_in_text, p=self.dropout_coef)

        l_hid_text = SparseInputDenseLayer(l_in_text, num_units=self.hid_size,
                                      nonlinearity=lasagne.nonlinearities.tanh,
                                      W=lasagne.init.GlorotUniform())

        #if self.drop_out and self.dropout_coef > 0:
        #    l_hid_text = lasagne.layers.dropout(l_hid_text, p=self.dropout_coef)


        self.l_out = lasagne.layers.DenseLayer(l_hid_text, num_units=2,
                                               nonlinearity=lasagne.nonlinearities.linear,
                                               W=lasagne.init.GlorotUniform())

        output = lasagne.layers.get_output(self.l_out, self.X_sym)
        loss = lasagne.objectives.squared_error(output, self.Y_sym).mean() 
        output_eval = lasagne.layers.get_output(self.l_out, self.X_sym, deterministic=True)

        if self.regul_coef:
            l1_share_out = 0.5
            l1_share_hid = 0.5
            regul_coef_out, regul_coef_hid = self.regul_coef, self.regul_coef
            logging.info('regul coefficient for output and hidden lasagne_layers is ' + str(self.regul_coef))
            l1_penalty = lasagne.regularization.regularize_layer_params(self.l_out, l1) * regul_coef_out * l1_share_out
            l2_penalty = lasagne.regularization.regularize_layer_params(self.l_out, l2) * regul_coef_out * (1 - l1_share_out)
            l1_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l1) * regul_coef_hid * l1_share_hid
            l2_penalty += lasagne.regularization.regularize_layer_params(l_hid_text, l2) * regul_coef_hid * (1 - l1_share_hid)


            loss = loss + l1_penalty + l2_penalty

        parameters = lasagne.layers.get_all_params(self.l_out, trainable=True)
        updates = lasagne.updates.adam(loss, parameters, learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8)
        self.f_train = theano.function([self.X_sym, self.Y_sym], loss, updates=updates, on_unused_input='warn')
        self.f_val = theano.function([self.X_sym, self.Y_sym], loss, on_unused_input='warn')
        self.f_predict = theano.function([self.X_sym], output_eval, on_unused_input='warn')
项目:hierarchical_rl    作者:wulfebw    | 项目源码 | 文件源码
def __init__(self, input_shape, batch_size, num_hidden_layers, num_actions, num_hidden, discount, learning_rate, regularization, update_rule, freeze_interval, rng):
        """
        :type input_shape: int
        :param input_shape: the dimension of the input representation of the state

        :type batch_size: int
        :param batch_size: number of samples to use in computing the loss / updates

        :type num_hidden_layers: int
        :param num_hidden_layers: number of hidden layers to use in the network

        :type num_actions: int
        :param num_actions: the output dimension of the network measured in number of possible actions

        :type num_hidden: int
        :param num_hidden: number of hidden nodes to use in each layer (const across layers)

        :type discount: float
        :param discount: discount factor to use in computing Q-learning target values

        :type learning_rate: float
        :param learning_rate: the learning rate to use (no decay schedule since ADAM update assumed) 

        :type regularization: float
        :param regularization: l2 regularization constant applied to weights

        :type update_rule: string
        :param update_rule: the type of update rule to use, suggest using 'adam'

        :type freeze_interval: int
        :param freeze_interval: the number of updates between updating the target network weights

        :type rng: rng
        :param rng: rng for running deterministically, o/w just leave as None

        :example call: 
        network = qnetwork.QNetwork(input_shape=20, batch_size=64, num_hidden_layers=2, num_actions=4, 
            num_hidden=4, discount=1, learning_rate=1e-3, regularization=1e-4, 
            update_rule='adam', freeze_interval=1e5, rng=None)

        """
        self.input_shape = input_shape
        self.batch_size = batch_size
        self.num_hidden_layers = num_hidden_layers
        self.num_actions = num_actions
        self.num_hidden = num_hidden
        self.discount = discount
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.update_rule = update_rule
        self.freeze_interval = freeze_interval
        self.rng = rng if rng else np.random.RandomState()
        self.initialize_network()
        self.update_counter = 0
项目:hierarchical_rl    作者:wulfebw    | 项目源码 | 文件源码
def __init__(self, input_shape, sequence_length, batch_size, num_actions, num_hidden, discount, learning_rate, regularization, update_rule, freeze_interval, network_type, rng):
        """
        :type input_shape: int
        :param input_shape: the dimension of the input representation of the state

        :type sequence_length: int
        :param sequence_length: the length to back propagate through time

        :type batch_size: int
        :param batch_size: number of samples to use in computing the loss / updates

        :type num_hidden_layers: int
        :param num_hidden_layers: number of hidden layers to use in the network

        :type num_actions: int
        :param num_actions: the output dimension of the network measured in number of possible actions

        :type num_hidden: int
        :param num_hidden: number of hidden nodes to use in each layer (const across layers)

        :type discount: float
        :param discount: discount factor to use in computing Q-learning target values

        :type learning_rate: float
        :param learning_rate: the learning rate to use (no decay schedule since ADAM update assumed) 

        :type regularization: float
        :param regularization: l2 regularization constant applied to weights

        :type update_rule: string
        :param update_rule: the type of update rule to use, suggest using 'adam'

        :type freeze_interval: int
        :param freeze_interval: the number of updates between updating the target network weights

        :type rng: rng
        :param rng: rng for running deterministically, o/w just leave as None

        :example call: 
        network = qnetwork.QNetwork(input_shape=20, batch_size=64, num_hidden_layers=2, num_actions=4, 
            num_hidden=4, discount=1, learning_rate=1e-3, regularization=1e-4, 
            update_rule='adam', freeze_interval=1e5, rng=None)

        """
        self.input_shape = input_shape
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.num_actions = num_actions
        self.num_hidden = num_hidden
        self.discount = discount
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.update_rule = update_rule
        self.freeze_interval = freeze_interval
        self.network_type = network_type
        self.rng = rng if rng else np.random.RandomState()
        self.initialize_network()
        self.update_counter = 0