Python theano 模块,shared() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.shared()

项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            updates.append((v,v_t))
        else:
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def compile(self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0, fetches_=None, updates_=None, givens_=None, trunc_grad_=None, profile_=False):
        def get_shared_shape(v):
            return v.get_value(borrow=True, return_internal_type=True).shape
        if type(s_inputs_) not in (list, tuple):
            s_inputs_ = [s_inputs_]
        if isinstance(updates_, dict):
            updates_= list(updates_.items())
        super(AdamSGD,self).compile(
            s_inputs_, s_loss_, v_params_, s_reg_=s_reg_, s_grads_=s_grads_, trunc_grad_=trunc_grad_)
        self.v_m = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_m_'+p.name if p.name is not None else None) for p in v_params_]
        self.v_v = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_v_'+p.name if p.name is not None else None) for p in v_params_]
        s_b1 = T.scalar('adam_b1'); s_b2 = T.scalar('adam_b2')
        s_b1s = T.scalar('adam_b1s'); s_b2s = T.scalar('adam_b2s')
        update_m = [(m, (m*s_b1 + (1.-s_b1)*g)) for m,g in zip(self.v_m,self.s_grads)]
        update_v = [(v, (v*s_b2 + (1.-s_b2)*g*g)) for v,g in zip(self.v_v,self.s_grads)]
        apply_grad = [(p, p-(s_b1s*m*self.s_lr)/(T.sqrt(s_b2s*v)+self.eps)) for p,m,v in zip(v_params_,self.v_m,self.v_v)]
        self.fn_train = th.function(
            inputs=[self.s_lr]+s_inputs_+[s_b1,s_b2,s_b1s,s_b2s],
            outputs=fetches_,
            updates=update_m+update_v+apply_grad+(updates_ if updates_ else []),
            on_unused_input='warn',
            givens=givens_, profile=profile_)
        self.fn_rst = th.function(inputs=[], updates=[(v, T.zeros_like(v)) for v in self.v_m+self.v_v], profile=profile_)
        return self.fn_train
项目:dl4mt-multi    作者:nyu-dl    | 项目源码 | 文件源码
def get_costs(self, probs, y, y_mask,
                  decay_cs=None, opt_rets=None):
        """
        probs : dict, mapping cg_name to probabilities
        y : theano tensor variable
        y_mask : theano tensor variable
        decay_cs : list of l2 regularization weights
        opt_rets : dict, mapping cg_name to optional returned variables
        """
        costs = self.decoder.costs(probs, y, y_mask)

        if decay_cs is not None:
            for name, cost in costs.iteritems():
                if decay_cs[name] > 0.:
                    decay_c = theano.shared(numpy.float32(decay_cs[name]),
                                            name='decay_c')
                    weight_decay = 0.
                    for pp in ComputationGraph(cost).parameters:
                        weight_decay += (pp ** 2).sum()
                    weight_decay *= decay_c
                    costs[name] += weight_decay
                    costs[name].name = name

        return costs
项目:sampleRNN_ICLR2017    作者:soroushmehr    | 项目源码 | 文件源码
def param(name, *args, **kwargs):
    """
    A wrapper for `theano.shared` which enables parameter sharing in models.

    Creates and returns theano shared variables similarly to `theano.shared`,
    except if you try to create a param with the same name as a
    previously-created one, `param(...)` will just return the old one instead of
    making a new one.

    This constructor also adds a `param` attribute to the shared variables it
    creates, so that you can easily search a graph for all params.
    """

    if name not in _params:
        kwargs['name'] = name
        param = theano.shared(*args, **kwargs)
        param.param = True
        _params[name] = param
    return _params[name]
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def adadelta(parameters, gradients, rho=0.95, eps=1e-6):
  """ Reference: ADADELTA: An Adaptive Learning Rate Method,
        Zeiler 2012. https://arxiv.org/abs/1212.5701
      Adapted from the Adadelta implementation from Tensorflow.
  """
  accum = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters]
  accum_updates = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters]

  new_accum = [rho * g0 + (1.0 - rho) * (g**2) for g0, g in izip(accum, gradients)]
  updates = [tensor.sqrt(d0 + eps) / tensor.sqrt(g0 + eps) * g for d0, g0, g in izip(accum_updates,
                                             new_accum,
                                             gradients)]

  new_accum_updates = [rho * d0 + (1.0 - rho) * (d**2) for d0, d in izip(accum_updates,
                                       updates)]

  accum_ = zip(accum, new_accum)
  accum_updates_ = zip(accum_updates, new_accum_updates)  
  parameters_ = [ (p, (p - d)) for p,d in izip(parameters, updates)]
  return accum_ + accum_updates_ + parameters_
项目:deep-prior    作者:moberweger    | 项目源码 | 文件源码
def addData(self, data):
        """
        Set the data of the network, not managed within training iterations, e.g. used for validation or other small data
        :param data: training data and labels specified as dictionary
        :return: None
        """

        if not isinstance(data, dict):
            raise ValueError("Error: expected dictionary for data!")

        for key in data:
            # no need to cache validation data
            setattr(self, key+'DB', self.alignData(data[key]))

            # shared variable already exists?
            if hasattr(self, key):
                print("Reusing shared variables!")
                getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True)
            else:
                # create shared data
                setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
项目:deep-prior    作者:moberweger    | 项目源码 | 文件源码
def addStaticData(self, data):
        """
        Set the data of the network, not managed within training iterations, e.g. used for validation or other small data
        :param data: training data and labels specified as dictionary
        :return: None
        """

        if not isinstance(data, dict):
            raise ValueError("Error: expected dictionary for data!")

        for key in data:
            # no need to cache validation data
            setattr(self, key+'DB', data[key])

            # shared variable already exists?
            if hasattr(self, key):
                print("Reusing shared variables!")
                getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True)
            else:
                # create shared data
                setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
项目:deep-prior    作者:moberweger    | 项目源码 | 文件源码
def replaceTrainingData(self, start_idx, end_idx, last=False):
        """
        Replace the shared data of the training data
        :param start_idx: start index of data
        :param end_idx: end index of data
        :param last: specify if it is last macro-batch
        :return: None
        """

        for var in self.managedVar:
            if not hasattr(self, var):
                raise ValueError("Variable " + var + " not defined!")
            if last is True:
                getattr(self, var).set_value(getattr(self, var+'DBlast')[start_idx:end_idx], borrow=True)
            else:
                getattr(self, var).set_value(getattr(self, var+'DB')[start_idx:end_idx], borrow=True)
项目:deep-prior    作者:moberweger    | 项目源码 | 文件源码
def loadMacroBatch(self, macro_idx):
        """
        Make sure that macro batch is loaded in the shared variable
        :param macro_idx: macro batch index
        :return: None
        """
        if macro_idx != self.currentMacroBatch:
                # last macro batch is handled separately, as it is padded
                if self.isLastMacroBatch(macro_idx):
                    start_idx = 0
                    end_idx = self.getNumSamplesPerMacroBatch()
                    print("Loading last macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx))
                    self.replaceTrainingData(start_idx, end_idx, last=True)
                    # remember current macro batch index
                    self.currentMacroBatch = macro_idx
                else:
                    start_idx = macro_idx * self.getNumSamplesPerMacroBatch()
                    end_idx = min((macro_idx + 1) * self.getNumSamplesPerMacroBatch(), self.train_data_xDB.shape[0])
                    print("Loading macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx))
                    self.replaceTrainingData(start_idx, end_idx)
                    # remember current macro batch index
                    self.currentMacroBatch = macro_idx
项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def query_variable(self, query_):
        '''
        Return an iterable which yields shared variables found by query_, from current group.

        query_:
            Can take several forms, as shown below.

            All: return all variables under current group.
            string: treat as regex, return variables whose name fully match the regex.

        '''
        if query_ is All:
            return self._current_group_di.values()
        elif isinstance(query_, str):
            regex = re.compile(query_)
            return {k:v for k,v in self._current_group_di.items() if regex.fullmatch(k)}
        else:
            raise TypeError('Unknown query type "%s"' % type(query_))


    # TODO add / delete group does not consider non-group object by now
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def sgd_optimizer(model, lr=0.001, momentum=0.9):
    lr = theano.shared(np.array(lr).astype(theano.config.floatX))
    # Make sure momentum is a sane value
    assert momentum < 1 and momentum >= 0
    # the updates of SGD with momentum
    updates = []
    grads = T.grad(model.costs[0], model.params)
    for param, grad in zip(model.params, grads):
        param_update = theano.shared(param.get_value()*0.)
        updates.append((param, param - lr * param_update))
        updates.append((param_update, momentum*param_update + (1. - momentum)*grad))

    train_func = theano.function(model.inputs, model.costs, updates=updates)
    valid_func = theano.function(model.inputs, model.costs)

    return train_func, valid_func
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def get_cost(aes, l, eye=True):
    """Get the sum of all the reconstruction costs of the AEs.
    Input:
        aes_in: list. List of all the aes.
        l: shared variable or a list of shared variables for the importance
            weights.
    """
    costs = []
    for ae, i in zip(aes, range(len(aes))):
        if isinstance(ae, ConvolutionalAutoencoder):
            costs.append(l[i] * ae.get_train_cost()[0])
        else:
            costs.append(l[i] * ae.get_train_cost(face=eye)[0])
    cost = None
    if costs not in [[], None]:
        cost = reduce(lambda x, y: x + y, costs)
    return cost
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def evaluate_model(list_minibatchs_vl, eval_fn):
    """Evalute the model over a set."""
    error, output = None, None
    for mn_vl in list_minibatchs_vl:
        x = theano.shared(
            mn_vl['x'], borrow=True).get_value(borrow=True)
        y = theano.shared(
            mn_vl['y'], borrow=True).get_value(borrow=True)

        [error_mn, output_mn] = eval_fn(x, y)
        if error is None:
            error = error_mn
            output = output_mn
        else:
            error = np.vstack((error, error_mn))
            output = np.vstack((output, output_mn))
    return error, output
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def evaluate_model_3D_unsup(list_minibatchs_vl, eval_fn):
    """Evalute the model over a set."""
    error, output, code = None, None, None
    for mn_vl in list_minibatchs_vl:
        x = theano.shared(
            mn_vl['x'], borrow=True).get_value(borrow=True)

        [error_mn, output_mn, code_mn] = eval_fn(x)
        if error is None:
            error = error_mn
            output = output_mn
            code = code_mn
        else:
            error = np.vstack((error, error_mn))
            output = np.vstack((output, output_mn))
            code = np.vstack((code, code_mn))

    return error, output, code
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def shared_dataset(self, data_xy, train=False, borrow=True):
        """Load the data to the shared variables of Theano.

        Copy for once the data to the shared memory on the GPU.
        """

        data_x, data_y = data_xy
        if train:
            dim_output = 10 # case of MNIST
            data_y = np.int32(self.labels(data_y, dim_output))

        shared_x = theano.shared(
                np.asarray(data_x, dtype = theano.config.floatX),
                borrow=borrow)
        shared_y = theano.shared (
                np.asarray(data_y, dtype = theano.config.floatX),
                borrow=borrow)
        return shared_x, T.cast(shared_y, 'int32')
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def load_data(self, dataset_path, share = False):
        """Load the data set.


        """
        f = gzip.open(dataset_path, 'rb')
        train_set, valid_set, test_set = pickle.load(f)
        f.close()

        # share the data
        train_set_x, train_set_y = self.shared_dataset(train_set, train=True)
        valid_set_x, valid_set_y = self.shared_dataset(valid_set)
        test_set_x, test_set_y   = self.shared_dataset(test_set)
        if share:
            reval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
        else:
            reval = [train_set, valid_set, test_set] # NON-shared data (they didn't share the data in the code Crino!!!!!)
        return reval
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def shared_dataset_xy(self, data_xy, nlabels = 10, train = False, task="cls", borrow=True):
        """Load the data to the shared variables of Theano.

        Copy for once the data to the shared memory on the GPU.
        """

        data_x, data_y = data_xy
        if (train) and (task=='cls'):
            data_y = np.int32(self.labels(data_y, nlabels))

        shared_x = theano.shared(
                np.asarray(data_x, dtype = theano.config.floatX),
                borrow=borrow)
        shared_y = theano.shared (
                np.asarray(data_y, dtype = theano.config.floatX),
                borrow=borrow)
        return shared_x, T.cast(shared_y, 'int32')
项目:gram    作者:mp2893    | 项目源码 | 文件源码
def adadelta(tparams, grads, x, y, mask, lengths, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function([x, y, mask, lengths], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')

    updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]

    f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update')

    return f_grad_shared, f_update
项目:gram    作者:mp2893    | 项目源码 | 文件源码
def adadelta(tparams, grads, weightVector, iVector, jVector, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function([weightVector, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')

    updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]

    f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update')

    return f_grad_shared, f_update
项目:DL4MT    作者:thompsonb    | 项目源码 | 文件源码
def shared_dropout_layer(shape, use_noise, trng, value, scaled=True):
    #re-scale dropout at training time, so we don't need to at test time
    if scaled:
        proj = tensor.switch(
            use_noise,
            trng.binomial(shape, p=value, n=1,
                                        dtype='float32')/value,
            theano.shared(numpy.float32(1.)))
    else:
        proj = tensor.switch(
            use_noise,
            trng.binomial(shape, p=value, n=1,
                                        dtype='float32'),
            theano.shared(numpy.float32(value)))
    return proj


# feedforward layer: affine transformation + point-wise nonlinearity
项目:Neural-Photo-Editor    作者:ajbrock    | 项目源码 | 文件源码
def mdclW(num_filters,num_channels,filter_size,winit,name,scales):
    # Coefficient Initializer
    sinit = lasagne.init.Constant(1.0/(1+len(scales)))
    # Total filter size
    size = filter_size + (filter_size-1)*(scales[-1]-1)
    # Multiscale Dilated Filter 
    W = T.zeros((num_filters,num_channels,size,size))
    # Undilated Base Filter
    baseW = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,num_channels,filter_size,filter_size))),name=name+'.W')
    for scale in enumerate(scales[::-1]): # enumerate backwards so that we place the main filter on top
            W = T.set_subtensor(W[:,:,scales[-1]-scale:size-scales[-1]+scale:scale,scales[-1]-scale:size-scales[-1]+scale:scale],
                                  baseW*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'.coeff_'+str(scale)).dimshuffle(0,'x','x','x'))
    return W

# Subpixel Upsample Layer from (https://arxiv.org/abs/1609.05158)
# This layer uses a set of r^2 set_subtensor calls to reorganize the tensor in a subpixel-layer upscaling style
# as done in the ESPCN Magic ony paper for super-resolution.
# r is the upscale factor.
# c is the number of output channels.
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def load_weights(params, path, num_conv):
    print 'Loading gan weights from ' + path
    with h5py.File(path, 'r') as hdf5:
        params['skipthought2image'] = theano.shared(np.copy(hdf5['skipthought2image']))
        params['skipthought2image-bias'] = theano.shared(np.copy(hdf5['skipthought2image-bias']))

        for i in xrange(num_conv):
            params['W_conv{}'.format(i)] = theano.shared(np.copy(hdf5['W_conv{}'.format(i)]))
            params['b_conv{}'.format(i)] = theano.shared(np.copy(hdf5['b_conv{}'.format(i)]))

            # Flip w,h axes
            params['W_conv{}'.format(i)] = params['W_conv{}'.format(i)][:,:,::-1,::-1]

            w = np.abs(np.copy(hdf5['W_conv{}'.format(i)]))
            print 'W_conv{}'.format(i), np.min(w), np.mean(w), np.max(w)
            b = np.abs(np.copy(hdf5['b_conv{}'.format(i)]))
            print 'b_conv{}'.format(i), np.min(b), np.mean(b), np.max(b)

    return params
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def __init__(self, dimX, dimReadAttent, dimWriteAttent, dimRNNEnc, dimRNNDec, dimZ, runSteps, inputData, valData=None, testData=None, pathToWeights=None):
        self.dimX = dimX
        self.dimReadAttent = dimReadAttent
        self.dimWriteAttent = dimWriteAttent
        self.dimRNNEnc = dimRNNEnc
        self.dimRNNDec = dimRNNDec
        self.dimZ = dimZ
        self.runSteps = runSteps
        self.pathToWeights = pathToWeights

        self.n_batches = inputData.shape[0] / batch_size
        self.train_data = theano.shared(inputData)
        del inputData

        if valData != None:
            self.n_val_batches = valData.shape[0] / batch_size
            self.val_data = theano.shared(valData)
            del valData

        if testData != None:
            self.n_test_batches = testData.shape[0] / batch_size
            self.test_data = theano.shared(testData)
            del testData

        self._kl_final, self._logpxz, self._log_likelihood, self._c_ts, self._c_ts_gener, self._x, self._run_steps, self._updates_train, self._updates_gener, self._read_attent_params, self._write_attent_params, self._write_attent_params_gener, self._params = build_lstm_attention_vae(self.dimX, self.dimReadAttent, self.dimWriteAttent, self.dimRNNEnc, self.dimRNNDec, self.dimZ, self.runSteps, self.pathToWeights)
项目:DeepMirTar_SdA    作者:Bjoux2    | 项目源码 | 文件源码
def shared_dataset_x(data_x, borrow=True):
    """ Function that loads the dataset into shared variables

    The reason we store our dataset in shared variables is to allow
    Theano to copy it into the GPU memory (when code is run on GPU).
    Since copying data into the GPU is slow, copying a minibatch everytime
    is needed (the default behaviour if the data is not in a shared
    variable) would lead to a large decrease in performance.
    """
    shared_x = theano.shared(numpy.asarray(data_x,
                                           dtype=theano.config.floatX),
                             borrow=borrow)

    # When storing data on the GPU it has to be stored as floats
    # therefore we will store the labels as ``floatX`` as well
    # (``shared_y`` does exactly that). But during our computations
    # we need them as ints (we use labels as index, and if they are
    # floats it doesn't make sense) therefore instead of returning
    # ``shared_y`` we will have to cast it to int. This little hack
    # lets ous get around this issue
    return shared_x
项目:punctuator2    作者:ottokart    | 项目源码 | 文件源码
def __init__(self, rng, n_in, n_out, minibatch_size):
        super(GRULayer, self).__init__()
        # Notation from: An Empirical Exploration of Recurrent Network Architectures

        self.n_in = n_in
        self.n_out = n_out

        # Initial hidden state
        self.h0 = theano.shared(value=np.zeros((minibatch_size, n_out)).astype(theano.config.floatX), name='h0', borrow=True)

        # Gate parameters:
        self.W_x = weights_Glorot(n_in, n_out*2, 'W_x', rng)
        self.W_h = weights_Glorot(n_out, n_out*2, 'W_h', rng)
        self.b = weights_const(1, n_out*2, 'b', 0)
        # Input parameters
        self.W_x_h = weights_Glorot(n_in, n_out, 'W_x_h', rng)
        self.W_h_h = weights_Glorot(n_out, n_out, 'W_h_h', rng)
        self.b_h = weights_const(1, n_out, 'b_h', 0)

        self.params = [self.W_x, self.W_h, self.b, self.W_x_h, self.W_h_h, self.b_h]
项目:nrc    作者:IcarPA-TBlab    | 项目源码 | 文件源码
def setParams(self, W_IN, b_IN):
    # controllo sulle dimensioni
        if (    W_IN.shape[0] == self.W.shape.eval()[0] and
            W_IN.shape[1] == self.W.shape.eval()[1] and
            len(b_IN) == self.b.shape.eval()[0] ):

            self.W.set_value(W_IN)
            self.b.set_value(b_IN)
            #self.W = theano.shared(value=W_IN, name='W', borrow=True)

            # initialize the baises b as a vector of n_out 0s           
            #self.b = theano.shared(value=b_IN, name='b', borrow=True)
        else : 
            print "NEW_logistic_sgd:Errore nelle dimensioni delle matrici passate"
            print "W(input) shape", W_IN.shape, "W shape", self.W.shape.eval()
            print "b(input) shape", len(b_IN), "b shape", self.b.shape.eval()
项目:nrc    作者:IcarPA-TBlab    | 项目源码 | 文件源码
def shared_dataset(data_x, data_y, borrow=True):
    """ Function that loads the dataset into shared variables

    The reason we store our dataset in shared variables is to allow
    Theano to copy it into the GPU memory (when code is run on GPU).
    Since copying data into the GPU is slow, copying a minibatch everytime
    is needed (the default behaviour if the data is not in a shared
    variable) would lead to a large decrease in performance.
    """

    shared_x = theano.shared(numpy.asarray(data_x,
                                dtype=theano.config.floatX),
                                borrow=borrow)

    shared_y = theano.shared(numpy.asarray(data_y,
                                dtype=theano.config.floatX),
                                borrow=borrow)

    return shared_x, T.cast(shared_y, 'int32')
项目:snn4hrl    作者:florensacc    | 项目源码 | 文件源码
def dist_info_sym(self, obs_var, latent_var=None):  # this is ment to be for one path!
        # now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
        if latent_var is None:
            latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0))  # new fix to avoid putting the latent as an input: just take the one fixed!
            latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])

        # generate the generalized input (append latents to obs.)
        if self.bilinear_integration:
            extended_obs_var = TT.concatenate([obs_var, latent_var,
                                               TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
                                                          outdim=2)]
                                              , axis=1)
        else:
            extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
        mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
        if self.min_std is not None:
            log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
        return dict(mean=mean_var, log_std=log_std_var)
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_gibbs(self, r_h, r_v, h, *params):
        '''Step Gibbs sample.

        Args:
            r_h (theano.randomstream): random variables for hiddens.
            r_v (theano.randomstream): random variables for visibles.
            h (T.tensor): hidden state.
            *params: theano shared variables.

        Returns:
            T.tensor: hidden samples.
            T.tensor: visible samples.
            T.tensor: conditional hidden probability.
            T.tensor: conditional visible probability.

        '''
        v, pv = self.step_sv_h(r_v, h, *params)
        h, ph = self.step_sh_v(r_h, v, *params)
        return h, v, ph, pv
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_free_energy(self, x, beta, *params):
        '''Step free energy function.

        Args:
            x (T.tensor): data sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

        Returns:
            T.tensor: free energy.

        '''
        W, v_params, h_params = self.split_params(*params)

        vis_term = beta * self.v_dist.get_energy_bias(x, *v_params)
        x = self.v_dist.scale_for_energy_model(x, *v_params)
        hid_act = beta * (T.dot(x, W) + self.h_dist.get_center(*h_params))
        fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1)
        return fe
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_free_energy_h(self, h, beta, *params):
        '''Step free energy function for hidden states.

        Args:
            h (T.tensor): hidden sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

        Returns:
            T.tensor: free energy.

        '''
        W, v_params, h_params = self.split_params(*params)

        hid_term = beta * self.h_dist.get_energy_bias(h, *h_params)
        h = self.h_dist.scale_for_energy_model(h, *h_params)
        vis_act = beta * (T.dot(h, W.T) + self.v_dist.get_center(*v_params))
        fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1)
        return fe
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def _step(self, m, y, h_, Ur):
        '''Step function for RNN call.

        Args:
            m (T.tensor): masks.
            y (T.tensor): inputs.
            h_ (T.tensor): recurrent state.
            Ur (theano.shared): recurrent connection.

        Returns:
            T.tensor: next recurrent state.

        '''
        preact = T.dot(h_, Ur) + y
        h      = T.tanh(preact)
        h      = m * h + (1 - m) * h_
        return h
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def sgd(lr, tparams, grads, inp, cost, extra_ups=[], extra_outs=[],
        exclude_params=set([])):
    '''Stochastic gradient descent'''
    gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k)
               for k, p in tparams.iteritems()]

    gsup = [(gs, g) for gs, g in zip(gshared, grads)]

    f_grad_shared = theano.function(
        inp, [cost]+extra_outs, updates=gsup+extra_ups, profile=profile)

    pup = [(p, p - lr * g) for p, g in zip(tools.itemlist(tparams), gshared)
        if p.name not in exclude_params]

    if not isinstance(lr, list): lr = [lr]
    f_update = theano.function(lr, [], updates=pup, profile=profile)

    return f_grad_shared, f_update
项目:top-k-rec    作者:domainxz    | 项目源码 | 文件源码
def _generate_train_model_function(self, scores):
       u = T.lvector('u')
       i = T.lvector('i')
       j = T.lvector('j')
       self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
       self.S = theano.shared(scores, name='S');
       x_ui  = T.dot(self.W, self.S[u,i,:].T);
       x_uj  = T.dot(self.W, self.S[u,j,:].T);
       x_uij = x_ui - x_uj;
       obj = T.sum(
               T.log(T.nnet.sigmoid(x_uij)).sum() - \
               self._lambda_w * 0.5 * (self.W ** 2).sum()
               )
       cost = -obj
       g_cost_W = T.grad(cost=cost, wrt=self.W)
       updates = [
               (self.W, self.W - self._learning_rate * g_cost_W)
               ]
       self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)  
        t = theano.shared(floatX(1.))
        b1_t = self.b1*self.l**(t-1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t*m + (1 - b1_t)*g
            v_t = self.b2*v + (1 - self.b2)*g**2
            m_c = m_t / (1-self.b1**t)
            v_c = v_t / (1-self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t) )
        updates.append((t, t + 1.))
        return updates
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
    """
    Performs a Metropolis-Hastings accept-reject move.

    Parameters
    ----------
    energy_prev: theano vector
        Symbolic theano tensor which contains the energy associated with the
        configuration at time-step t.
    energy_next: theano vector
        Symbolic theano tensor which contains the energy associated with the
        proposed configuration at time-step t+1.
    s_rng: theano.tensor.shared_randomstreams.RandomStreams
        Theano shared random stream object used to generate the random number
        used in proposal.

    Returns
    -------
    return: boolean
        True if move is accepted, False otherwise
    """
    ediff = energy_prev - energy_next
    return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def draw(self, **kwargs):
        """
        Returns a new position obtained after `n_steps` of HMC simulation.

        Parameters
        ----------
        kwargs: dictionary
            The `kwargs` dictionary is passed to the shared variable
            (self.positions) `get_value()` function.  For example, to avoid
            copying the shared variable value, consider passing `borrow=True`.

        Returns
        -------
        rval: numpy matrix
            Numpy matrix whose of dimensions similar to `initial_position`.
       """
        self.simulate()
        return self.positions.get_value(borrow=False)
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
    """
    Performs a Metropolis-Hastings accept-reject move.

    Parameters
    ----------
    energy_prev: theano vector
        Symbolic theano tensor which contains the energy associated with the
        configuration at time-step t.
    energy_next: theano vector
        Symbolic theano tensor which contains the energy associated with the
        proposed configuration at time-step t+1.
    s_rng: theano.tensor.shared_randomstreams.RandomStreams
        Theano shared random stream object used to generate the random number
        used in proposal.

    Returns
    -------
    return: boolean
        True if move is accepted, False otherwise
    """
    ediff = energy_prev - energy_next
    return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def draw(self, **kwargs):
        """
        Returns a new position obtained after `n_steps` of HMC simulation.

        Parameters
        ----------
        kwargs: dictionary
            The `kwargs` dictionary is passed to the shared variable
            (self.positions) `get_value()` function.  For example, to avoid
            copying the shared variable value, consider passing `borrow=True`.

        Returns
        -------
        rval: numpy matrix
            Numpy matrix whose of dimensions similar to `initial_position`.
       """
        self.simulate()
        return self.positions.get_value(borrow=False)
项目:iGAN    作者:junyanz    | 项目源码 | 文件源码
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        t = theano.shared(floatX(1.))
        b1_t = self.b1*self.l**(t-1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t*m + (1 - b1_t)*g
            v_t = self.b2*v + (1 - self.b2)*g**2
            m_c = m_t / (1-self.b1**t)
            v_c = v_t / (1-self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((t, t + 1.))
        return updates
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def _init_params(self):
        self.W_hhs = []
        self.b_hhs = []
        for dx in xrange(self.n_layers):
            W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers],
                                        self.n_hids[dx],
                                        self.sparsity[dx],
                                        self.scale[dx],
                                        rng=self.rng)
            self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" %
                                       (dx,self.name)))
            if dx > 0:
                self.b_hhs.append(theano.shared(
                    self.bias_fn[dx-1](self.n_hids[dx],
                                       self.bias_scale[dx-1],
                                       self.rng),
                    name='b%d_%s' %(dx, self.name)))
        self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs]
        self.params_grad_scale = [self.grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_hhs]
            self.nb_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_hhs]
            self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nb_hhs]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def _init_params(self):
        self.iBlocks = 1  # number of blocks in the input (from lower layer)

        W_em = self.init_fn(self.n_in,
                            self.n_class,
                            self.sparsity,
                            self.scale,
                            self.rng)
        self.W_em = theano.shared(W_em,
                                  name='W_%s' % self.name)
        self.b_em = theano.shared(
            self.bias_fn(self.n_class, self.bias_scale, self.rng),
            name='b_%s' % self.name)

        U_em = theano.shared(((self.rng.rand(self.iBlocks, self.n_class, 
            self.n_in, self.n_words_class)-0.5)/(self.n_words_class*self.n_in)
            ).astype(theano.config.floatX), name='U_%s'%self.name)
        self.U_em = U_em
        c_em = numpy.zeros((self.n_class, self.n_words_class), dtype='float32')
        n_words_last_class = self.n_out % self.n_words_class
        #c_em[-1, n_words_last_class:] = -numpy.inf
        self.c_em = theano.shared(c_em, name='c_%s' % self.name)

        self.params = [self.W_em, self.b_em, self.U_em, self.c_em]
        self.params_grad_scale = [self.grad_scale for x in self.params]
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def __init__(self, rng, std = 0.1, ndim=0, avg =0, shape_fn=None):
        """
        """
        assert rng is not None, "random number generator should not be empty!"
        super(GaussianNoise, self).__init__(0, 0, rng)

        self.std = scale
        self.avg = self.avg
        self.ndim = ndim
        self.shape_fn = shape_fn
        if self.shape_fn:
            # Name is not important as it is not a parameter of the model
            self.noise_term = theano.shared(numpy.zeros((2,)*ndim,
                                                    dtype=theano.config.floatX),
                                        name='ndata')
            self.noise_params += [self.noise_term]
            self.noise_params_shape_fn += [shape_fn]
        self.trng = RandomStreams(rng.randint(1e5))
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def _init_params(self):
        self.W_hhs = []
        self.b_hhs = []
        for dx in xrange(self.n_layers):
            W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers],
                                        self.n_hids[dx],
                                        self.sparsity[dx],
                                        self.scale[dx],
                                        rng=self.rng)
            self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" %
                                       (dx,self.name)))
            if dx > 0:
                self.b_hhs.append(theano.shared(
                    self.bias_fn[dx-1](self.n_hids[dx],
                                       self.bias_scale[dx-1],
                                       self.rng),
                    name='b%d_%s' %(dx, self.name)))
        self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs]
        self.params_grad_scale = [self.grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_hhs]
            self.nb_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_hhs]
            self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nb_hhs]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def fit(self, x):
        s = x.shape
        x = x.copy().reshape((s[0],np.prod(s[1:])))
        m = np.mean(x, axis=0)
        x -= m
        sigma = np.dot(x.T,x) / x.shape[0]
        U, S, V = linalg.svd(sigma)
        tmp = np.dot(U, np.diag(1./np.sqrt(S+self.regularization)))
        tmp2 = np.dot(U, np.diag(np.sqrt(S+self.regularization)))
        self.ZCA_mat = th.shared(np.dot(tmp, U.T).astype(th.config.floatX))
        self.inv_ZCA_mat = th.shared(np.dot(tmp2, U.T).astype(th.config.floatX))
        self.mean = th.shared(m.astype(th.config.floatX))
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def compile(
        self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0,
        fetches_=None, updates_=None, givens_=None,
        trunc_grad_=None, profile_=False
    ):
        '''
        compile optimizer against specific model

        Args:
            s_inputs_: list of symbolic input tensors, including label
            s_loss_: optimization loss, symbolic scalar
            v_params_: list of shared parameters to optimize
            s_grads: list of gradients to apply, must be same order as v_params_, default is None (use autodiff).
            s_reg_: symbolic regularization term, default 0 (no regularization)
            updates: update operation for shared values after a step of optimization,
                usually RNN states. Takes form [(v_var, s_new_var), ...]

        Returns: None
        '''
        self.s_loss = s_loss_
        self.s_reg = s_reg_
        if s_grads_ is None:
            s_grads_ = T.grad(
                self.s_loss + self.s_reg, list(v_params_), disconnected_inputs='warn')

        if type(trunc_grad_)==float:
            self.s_grads = [T.clip(g,-trunc_grad_,trunc_grad_) for g in s_grads_]
        else:
            self.s_grads = s_grads_
项目:dl4mt-multi    作者:nyu-dl    | 项目源码 | 文件源码
def init_tparams(params):
    tparams = OrderedDict()
    for kk, pp in params.iteritems():
        tparams[kk] = theano.shared(params[kk], name=kk, borrow=True)
        add_role(tparams[kk], PARAMETER)
    return tparams


# make prefix-appended name