def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
def compile(self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0, fetches_=None, updates_=None, givens_=None, trunc_grad_=None, profile_=False):
        def get_shared_shape(v):
            return v.get_value(borrow=True, return_internal_type=True).shape
        if type(s_inputs_) not in (list, tuple):
            s_inputs_ = [s_inputs_]
        if isinstance(updates_, dict):
            updates_= list(updates_.items())
            s_inputs_, s_loss_, v_params_, s_reg_=s_reg_, s_grads_=s_grads_, trunc_grad_=trunc_grad_)
        self.v_m = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_m_' if is not None else None) for p in v_params_]
        self.v_v = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_v_' if is not None else None) for p in v_params_]
        s_b1 = T.scalar('adam_b1'); s_b2 = T.scalar('adam_b2')
        s_b1s = T.scalar('adam_b1s'); s_b2s = T.scalar('adam_b2s')
        update_m = [(m, (m*s_b1 + (1.-s_b1)*g)) for m,g in zip(self.v_m,self.s_grads)]
        update_v = [(v, (v*s_b2 + (1.-s_b2)*g*g)) for v,g in zip(self.v_v,self.s_grads)]
        apply_grad = [(p, p-(s_b1s*m*self.s_lr)/(T.sqrt(s_b2s*v)+self.eps)) for p,m,v in zip(v_params_,self.v_m,self.v_v)]
        self.fn_train = th.function(
            updates=update_m+update_v+apply_grad+(updates_ if updates_ else []),
            givens=givens_, profile=profile_)
        self.fn_rst = th.function(inputs=[], updates=[(v, T.zeros_like(v)) for v in self.v_m+self.v_v], profile=profile_)
        return self.fn_train
def get_costs(self, probs, y, y_mask,
                  decay_cs=None, opt_rets=None):
        probs : dict, mapping cg_name to probabilities
        y : theano tensor variable
        y_mask : theano tensor variable
        decay_cs : list of l2 regularization weights
        opt_rets : dict, mapping cg_name to optional returned variables
        costs = self.decoder.costs(probs, y, y_mask)

        if decay_cs is not None:
            for name, cost in costs.iteritems():
                if decay_cs[name] > 0.:
                    decay_c = theano.shared(numpy.float32(decay_cs[name]),
                    weight_decay = 0.
                    for pp in ComputationGraph(cost).parameters:
                        weight_decay += (pp ** 2).sum()
                    weight_decay *= decay_c
                    costs[name] += weight_decay
                    costs[name].name = name

        return costs
def param(name, *args, **kwargs):
    A wrapper for `theano.shared` which enables parameter sharing in models.

    Creates and returns theano shared variables similarly to `theano.shared`,
    except if you try to create a param with the same name as a
    previously-created one, `param(...)` will just return the old one instead of
    making a new one.

    This constructor also adds a `param` attribute to the shared variables it
    creates, so that you can easily search a graph for all params.

    if name not in _params:
        kwargs['name'] = name
        param = theano.shared(*args, **kwargs)
        param.param = True
        _params[name] = param
    return _params[name]
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
def adadelta(parameters, gradients, rho=0.95, eps=1e-6):
  """ Reference: ADADELTA: An Adaptive Learning Rate Method,
        Zeiler 2012.
      Adapted from the Adadelta implementation from Tensorflow.
  accum = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters]
  accum_updates = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters]

  new_accum = [rho * g0 + (1.0 - rho) * (g**2) for g0, g in izip(accum, gradients)]
  updates = [tensor.sqrt(d0 + eps) / tensor.sqrt(g0 + eps) * g for d0, g0, g in izip(accum_updates,

  new_accum_updates = [rho * d0 + (1.0 - rho) * (d**2) for d0, d in izip(accum_updates,

  accum_ = zip(accum, new_accum)
  accum_updates_ = zip(accum_updates, new_accum_updates)  
  parameters_ = [ (p, (p - d)) for p,d in izip(parameters, updates)]
  return accum_ + accum_updates_ + parameters_
def addData(self, data):
        Set the data of the network, not managed within training iterations, e.g. used for validation or other small data
        :param data: training data and labels specified as dictionary
        :return: None

        if not isinstance(data, dict):
            raise ValueError("Error: expected dictionary for data!")

        for key in data:
            # no need to cache validation data
            setattr(self, key+'DB', self.alignData(data[key]))

            # shared variable already exists?
            if hasattr(self, key):
                print("Reusing shared variables!")
                getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True)
                # create shared data
                setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
def addStaticData(self, data):
        Set the data of the network, not managed within training iterations, e.g. used for validation or other small data
        :param data: training data and labels specified as dictionary
        :return: None

        if not isinstance(data, dict):
            raise ValueError("Error: expected dictionary for data!")

        for key in data:
            # no need to cache validation data
            setattr(self, key+'DB', data[key])

            # shared variable already exists?
            if hasattr(self, key):
                print("Reusing shared variables!")
                getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True)
                # create shared data
                setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
def replaceTrainingData(self, start_idx, end_idx, last=False):
        Replace the shared data of the training data
        :param start_idx: start index of data
        :param end_idx: end index of data
        :param last: specify if it is last macro-batch
        :return: None

        for var in self.managedVar:
            if not hasattr(self, var):
                raise ValueError("Variable " + var + " not defined!")
            if last is True:
                getattr(self, var).set_value(getattr(self, var+'DBlast')[start_idx:end_idx], borrow=True)
                getattr(self, var).set_value(getattr(self, var+'DB')[start_idx:end_idx], borrow=True)
def loadMacroBatch(self, macro_idx):
        Make sure that macro batch is loaded in the shared variable
        :param macro_idx: macro batch index
        :return: None
        if macro_idx != self.currentMacroBatch:
                # last macro batch is handled separately, as it is padded
                if self.isLastMacroBatch(macro_idx):
                    start_idx = 0
                    end_idx = self.getNumSamplesPerMacroBatch()
                    print("Loading last macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx))
                    self.replaceTrainingData(start_idx, end_idx, last=True)
                    # remember current macro batch index
                    self.currentMacroBatch = macro_idx
                    start_idx = macro_idx * self.getNumSamplesPerMacroBatch()
                    end_idx = min((macro_idx + 1) * self.getNumSamplesPerMacroBatch(), self.train_data_xDB.shape[0])
                    print("Loading macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx))
                    self.replaceTrainingData(start_idx, end_idx)
                    # remember current macro batch index
                    self.currentMacroBatch = macro_idx
def query_variable(self, query_):
        Return an iterable which yields shared variables found by query_, from current group.

            Can take several forms, as shown below.

            All: return all variables under current group.
            string: treat as regex, return variables whose name fully match the regex.

        if query_ is All:
            return self._current_group_di.values()
        elif isinstance(query_, str):
            regex = re.compile(query_)
            return {k:v for k,v in self._current_group_di.items() if regex.fullmatch(k)}
            raise TypeError('Unknown query type "%s"' % type(query_))

项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def get_cost(aes, l, eye=True):
    """Get the sum of all the reconstruction costs of the AEs.
        aes_in: list. List of all the aes.
        l: shared variable or a list of shared variables for the importance
    costs = []
    for ae, i in zip(aes, range(len(aes))):
        if isinstance(ae, ConvolutionalAutoencoder):
            costs.append(l[i] * ae.get_train_cost()[0])
            costs.append(l[i] * ae.get_train_cost(face=eye)[0])
    cost = None
    if costs not in [[], None]:
        cost = reduce(lambda x, y: x + y, costs)
    return cost
def evaluate_model(list_minibatchs_vl, eval_fn):
    """Evalute the model over a set."""
    error, output = None, None
    for mn_vl in list_minibatchs_vl:
        x = theano.shared(
            mn_vl['x'], borrow=True).get_value(borrow=True)
        y = theano.shared(
            mn_vl['y'], borrow=True).get_value(borrow=True)

        [error_mn, output_mn] = eval_fn(x, y)
        if error is None:
            error = error_mn
            output = output_mn
            error = np.vstack((error, error_mn))
            output = np.vstack((output, output_mn))
    return error, output
def evaluate_model_3D_unsup(list_minibatchs_vl, eval_fn):
    """Evalute the model over a set."""
    error, output, code = None, None, None
    for mn_vl in list_minibatchs_vl:
        x = theano.shared(
            mn_vl['x'], borrow=True).get_value(borrow=True)

        [error_mn, output_mn, code_mn] = eval_fn(x)
        if error is None:
            error = error_mn
            output = output_mn
            code = code_mn
            error = np.vstack((error, error_mn))
            output = np.vstack((output, output_mn))
            code = np.vstack((code, code_mn))

    return error, output, code
def shared_dataset(self, data_xy, train=False, borrow=True):
        """Load the data to the shared variables of Theano.

        Copy for once the data to the shared memory on the GPU.

        data_x, data_y = data_xy
        if train:
            dim_output = 10 # case of MNIST
            data_y = np.int32(self.labels(data_y, dim_output))

        shared_x = theano.shared(
                np.asarray(data_x, dtype = theano.config.floatX),
        shared_y = theano.shared (
                np.asarray(data_y, dtype = theano.config.floatX),
        return shared_x, T.cast(shared_y, 'int32')
def load_data(self, dataset_path, share = False):
        """Load the data set.

        f =, 'rb')
        train_set, valid_set, test_set = pickle.load(f)

        # share the data
        train_set_x, train_set_y = self.shared_dataset(train_set, train=True)
        valid_set_x, valid_set_y = self.shared_dataset(valid_set)
        test_set_x, test_set_y   = self.shared_dataset(test_set)
        if share:
            reval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
            reval = [train_set, valid_set, test_set] # NON-shared data (they didn't share the data in the code Crino!!!!!)
        return reval
def shared_dataset_xy(self, data_xy, nlabels = 10, train = False, task="cls", borrow=True):
        """Load the data to the shared variables of Theano.

        Copy for once the data to the shared memory on the GPU.

        data_x, data_y = data_xy
        if (train) and (task=='cls'):
            data_y = np.int32(self.labels(data_y, nlabels))

        shared_x = theano.shared(
                np.asarray(data_x, dtype = theano.config.floatX),
        shared_y = theano.shared (
                np.asarray(data_y, dtype = theano.config.floatX),
        return shared_x, T.cast(shared_y, 'int32')
def adadelta(tparams, grads, x, y, mask, lengths, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function([x, y, mask, lengths], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')

    updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]

    f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update')

    return f_grad_shared, f_update
def adadelta(tparams, grads, weightVector, iVector, jVector, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function([weightVector, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')

    updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]

    f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update')

    return f_grad_shared, f_update
def shared_dropout_layer(shape, use_noise, trng, value, scaled=True):
    #re-scale dropout at training time, so we don't need to at test time
    if scaled:
        proj = tensor.switch(
            trng.binomial(shape, p=value, n=1,
        proj = tensor.switch(
            trng.binomial(shape, p=value, n=1,
    return proj

# feedforward layer: affine transformation + point-wise nonlinearity
def mdclW(num_filters,num_channels,filter_size,winit,name,scales):
    # Coefficient Initializer
    sinit = lasagne.init.Constant(1.0/(1+len(scales)))
    # Total filter size
    size = filter_size + (filter_size-1)*(scales[-1]-1)
    # Multiscale Dilated Filter 
    W = T.zeros((num_filters,num_channels,size,size))
    # Undilated Base Filter
    baseW = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,num_channels,filter_size,filter_size))),name=name+'.W')
    for scale in enumerate(scales[::-1]): # enumerate backwards so that we place the main filter on top
            W = T.set_subtensor(W[:,:,scales[-1]-scale:size-scales[-1]+scale:scale,scales[-1]-scale:size-scales[-1]+scale:scale],
                                  baseW*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'.coeff_'+str(scale)).dimshuffle(0,'x','x','x'))
    return W

# Subpixel Upsample Layer from (
# This layer uses a set of r^2 set_subtensor calls to reorganize the tensor in a subpixel-layer upscaling style
# as done in the ESPCN Magic ony paper for super-resolution.
# r is the upscale factor.
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def load_weights(params, path, num_conv):
    print 'Loading gan weights from ' + path
    with h5py.File(path, 'r') as hdf5:
        params['skipthought2image'] = theano.shared(np.copy(hdf5['skipthought2image']))
        params['skipthought2image-bias'] = theano.shared(np.copy(hdf5['skipthought2image-bias']))

        for i in xrange(num_conv):
            params['W_conv{}'.format(i)] = theano.shared(np.copy(hdf5['W_conv{}'.format(i)]))
            params['b_conv{}'.format(i)] = theano.shared(np.copy(hdf5['b_conv{}'.format(i)]))

            # Flip w,h axes
            params['W_conv{}'.format(i)] = params['W_conv{}'.format(i)][:,:,::-1,::-1]

            w = np.abs(np.copy(hdf5['W_conv{}'.format(i)]))
            print 'W_conv{}'.format(i), np.min(w), np.mean(w), np.max(w)
            b = np.abs(np.copy(hdf5['b_conv{}'.format(i)]))
            print 'b_conv{}'.format(i), np.min(b), np.mean(b), np.max(b)

    return params
def __init__(self, dimX, dimReadAttent, dimWriteAttent, dimRNNEnc, dimRNNDec, dimZ, runSteps, inputData, valData=None, testData=None, pathToWeights=None):
        self.dimX = dimX
        self.dimReadAttent = dimReadAttent
        self.dimWriteAttent = dimWriteAttent
        self.dimRNNEnc = dimRNNEnc
        self.dimRNNDec = dimRNNDec
        self.dimZ = dimZ
        self.runSteps = runSteps
        self.pathToWeights = pathToWeights

        self.n_batches = inputData.shape[0] / batch_size
        self.train_data = theano.shared(inputData)
        del inputData

        if valData != None:
            self.n_val_batches = valData.shape[0] / batch_size
            self.val_data = theano.shared(valData)
            del valData

        if testData != None:
            self.n_test_batches = testData.shape[0] / batch_size
            self.test_data = theano.shared(testData)
            del testData

        self._kl_final, self._logpxz, self._log_likelihood, self._c_ts, self._c_ts_gener, self._x, self._run_steps, self._updates_train, self._updates_gener, self._read_attent_params, self._write_attent_params, self._write_attent_params_gener, self._params = build_lstm_attention_vae(self.dimX, self.dimReadAttent, self.dimWriteAttent, self.dimRNNEnc, self.dimRNNDec, self.dimZ, self.runSteps, self.pathToWeights)
def shared_dataset_x(data_x, borrow=True):
    """ Function that loads the dataset into shared variables

    The reason we store our dataset in shared variables is to allow
    Theano to copy it into the GPU memory (when code is run on GPU).
    Since copying data into the GPU is slow, copying a minibatch everytime
    is needed (the default behaviour if the data is not in a shared
    variable) would lead to a large decrease in performance.
    shared_x = theano.shared(numpy.asarray(data_x,

    # When storing data on the GPU it has to be stored as floats
    # therefore we will store the labels as ``floatX`` as well
    # (``shared_y`` does exactly that). But during our computations
    # we need them as ints (we use labels as index, and if they are
    # floats it doesn't make sense) therefore instead of returning
    # ``shared_y`` we will have to cast it to int. This little hack
    # lets ous get around this issue
    return shared_x
def __init__(self, rng, n_in, n_out, minibatch_size):
        super(GRULayer, self).__init__()
        # Notation from: An Empirical Exploration of Recurrent Network Architectures

        self.n_in = n_in
        self.n_out = n_out

        # Initial hidden state
        self.h0 = theano.shared(value=np.zeros((minibatch_size, n_out)).astype(theano.config.floatX), name='h0', borrow=True)

        # Gate parameters:
        self.W_x = weights_Glorot(n_in, n_out*2, 'W_x', rng)
        self.W_h = weights_Glorot(n_out, n_out*2, 'W_h', rng)
        self.b = weights_const(1, n_out*2, 'b', 0)
        # Input parameters
        self.W_x_h = weights_Glorot(n_in, n_out, 'W_x_h', rng)
        self.W_h_h = weights_Glorot(n_out, n_out, 'W_h_h', rng)
        self.b_h = weights_const(1, n_out, 'b_h', 0)

        self.params = [self.W_x, self.W_h, self.b, self.W_x_h, self.W_h_h, self.b_h]
def setParams(self, W_IN, b_IN):
    # controllo sulle dimensioni
        if (    W_IN.shape[0] == self.W.shape.eval()[0] and
            W_IN.shape[1] == self.W.shape.eval()[1] and
            len(b_IN) == self.b.shape.eval()[0] ):

            #self.W = theano.shared(value=W_IN, name='W', borrow=True)

            # initialize the baises b as a vector of n_out 0s           
            #self.b = theano.shared(value=b_IN, name='b', borrow=True)
        else : 
            print "NEW_logistic_sgd:Errore nelle dimensioni delle matrici passate"
            print "W(input) shape", W_IN.shape, "W shape", self.W.shape.eval()
            print "b(input) shape", len(b_IN), "b shape", self.b.shape.eval()
def shared_dataset(data_x, data_y, borrow=True):
    """ Function that loads the dataset into shared variables

    The reason we store our dataset in shared variables is to allow
    Theano to copy it into the GPU memory (when code is run on GPU).
    Since copying data into the GPU is slow, copying a minibatch everytime
    is needed (the default behaviour if the data is not in a shared
    variable) would lead to a large decrease in performance.

    shared_x = theano.shared(numpy.asarray(data_x,

    shared_y = theano.shared(numpy.asarray(data_y,

    return shared_x, T.cast(shared_y, 'int32')
def dist_info_sym(self, obs_var, latent_var=None):  # this is ment to be for one path!
        # now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work
        if latent_var is None:
            latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0))  # new fix to avoid putting the latent as an input: just take the one fixed!
            latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1])

        # generate the generalized input (append latents to obs.)
        if self.bilinear_integration:
            extended_obs_var = TT.concatenate([obs_var, latent_var,
                                               TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :],
                                              , axis=1)
            extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1)
        mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var)
        if self.min_std is not None:
            log_std_var = TT.maximum(log_std_var, np.log(self.min_std))
        return dict(mean=mean_var, log_std=log_std_var)
def step_gibbs(self, r_h, r_v, h, *params):
        '''Step Gibbs sample.

            r_h (theano.randomstream): random variables for hiddens.
            r_v (theano.randomstream): random variables for visibles.
            h (T.tensor): hidden state.
            *params: theano shared variables.

            T.tensor: hidden samples.
            T.tensor: visible samples.
            T.tensor: conditional hidden probability.
            T.tensor: conditional visible probability.

        v, pv = self.step_sv_h(r_v, h, *params)
        h, ph = self.step_sh_v(r_h, v, *params)
        return h, v, ph, pv
def step_free_energy(self, x, beta, *params):
        '''Step free energy function.

            x (T.tensor): data sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

            T.tensor: free energy.

        W, v_params, h_params = self.split_params(*params)

        vis_term = beta * self.v_dist.get_energy_bias(x, *v_params)
        x = self.v_dist.scale_for_energy_model(x, *v_params)
        hid_act = beta * (, W) + self.h_dist.get_center(*h_params))
        fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1)
        return fe
def step_free_energy_h(self, h, beta, *params):
        '''Step free energy function for hidden states.

            h (T.tensor): hidden sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

            T.tensor: free energy.

        W, v_params, h_params = self.split_params(*params)

        hid_term = beta * self.h_dist.get_energy_bias(h, *h_params)
        h = self.h_dist.scale_for_energy_model(h, *h_params)
        vis_act = beta * (, W.T) + self.v_dist.get_center(*v_params))
        fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1)
        return fe
def _step(self, m, y, h_, Ur):
        '''Step function for RNN call.

            m (T.tensor): masks.
            y (T.tensor): inputs.
            h_ (T.tensor): recurrent state.
            Ur (theano.shared): recurrent connection.

            T.tensor: next recurrent state.

        preact =, Ur) + y
        h      = T.tanh(preact)
        h      = m * h + (1 - m) * h_
        return h
def sgd(lr, tparams, grads, inp, cost, extra_ups=[], extra_outs=[],
    '''Stochastic gradient descent'''
    gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k)
               for k, p in tparams.iteritems()]

    gsup = [(gs, g) for gs, g in zip(gshared, grads)]

    f_grad_shared = theano.function(
        inp, [cost]+extra_outs, updates=gsup+extra_ups, profile=profile)

    pup = [(p, p - lr * g) for p, g in zip(tools.itemlist(tparams), gshared)
        if not in exclude_params]

    if not isinstance(lr, list): lr = [lr]
    f_update = theano.function(lr, [], updates=pup, profile=profile)

    return f_grad_shared, f_update
def _generate_train_model_function(self, scores):
       u = T.lvector('u')
       i = T.lvector('i')
       j = T.lvector('j')
       self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
       self.S = theano.shared(scores, name='S');
       x_ui  =, self.S[u,i,:].T);
       x_uj  =, self.S[u,j,:].T);
       x_uij = x_ui - x_uj;
       obj = T.sum(
               T.log(T.nnet.sigmoid(x_uij)).sum() - \
               self._lambda_w * 0.5 * (self.W ** 2).sum()
       cost = -obj
       g_cost_W = T.grad(cost=cost, wrt=self.W)
       updates = [
               (self.W, self.W - self._learning_rate * g_cost_W)
       self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)  
        t = theano.shared(floatX(1.))
        b1_t = self.b1*self.l**(t-1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t*m + (1 - b1_t)*g
            v_t = self.b2*v + (1 - self.b2)*g**2
            m_c = m_t / (1-self.b1**t)
            v_c = v_t / (1-self.b2**t)
            p_t = p - ( * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t) )
        updates.append((t, t + 1.))
        return updates
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
    Performs a Metropolis-Hastings accept-reject move.

    energy_prev: theano vector
        Symbolic theano tensor which contains the energy associated with the
        configuration at time-step t.
    energy_next: theano vector
        Symbolic theano tensor which contains the energy associated with the
        proposed configuration at time-step t+1.
    s_rng: theano.tensor.shared_randomstreams.RandomStreams
        Theano shared random stream object used to generate the random number
        used in proposal.

    return: boolean
        True if move is accepted, False otherwise
    ediff = energy_prev - energy_next
    return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
def draw(self, **kwargs):
        Returns a new position obtained after `n_steps` of HMC simulation.

        kwargs: dictionary
            The `kwargs` dictionary is passed to the shared variable
            (self.positions) `get_value()` function.  For example, to avoid
            copying the shared variable value, consider passing `borrow=True`.

        rval: numpy matrix
            Numpy matrix whose of dimensions similar to `initial_position`.
        return self.positions.get_value(borrow=False)
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        t = theano.shared(floatX(1.))
        b1_t = self.b1*self.l**(t-1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t*m + (1 - b1_t)*g
            v_t = self.b2*v + (1 - self.b2)*g**2
            m_c = m_t / (1-self.b1**t)
            v_c = v_t / (1-self.b2**t)
            p_t = p - ( * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((t, t + 1.))
        return updates
def _init_params(self):
        self.W_hhs = []
        self.b_hhs = []
        for dx in xrange(self.n_layers):
            W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers],
            self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" %
            if dx > 0:
                    name='b%d_%s' %(dx,
        self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs]
        self.params_grad_scale = [self.grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_' for x in self.W_hhs]
            self.nb_hhs = [theano.shared(x.get_value()*0, name='noise_' for x in self.b_hhs]
            self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nb_hhs]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]
def _init_params(self):
        self.iBlocks = 1  # number of blocks in the input (from lower layer)

        W_em = self.init_fn(self.n_in,
        self.W_em = theano.shared(W_em,
                                  name='W_%s' %
        self.b_em = theano.shared(
            self.bias_fn(self.n_class, self.bias_scale, self.rng),
            name='b_%s' %

        U_em = theano.shared(((self.rng.rand(self.iBlocks, self.n_class, 
            self.n_in, self.n_words_class)-0.5)/(self.n_words_class*self.n_in)
            ).astype(theano.config.floatX), name='U_%s'
        self.U_em = U_em
        c_em = numpy.zeros((self.n_class, self.n_words_class), dtype='float32')
        n_words_last_class = self.n_out % self.n_words_class
        #c_em[-1, n_words_last_class:] = -numpy.inf
        self.c_em = theano.shared(c_em, name='c_%s' %

        self.params = [self.W_em, self.b_em, self.U_em, self.c_em]
        self.params_grad_scale = [self.grad_scale for x in self.params]
def __init__(self, rng, std = 0.1, ndim=0, avg =0, shape_fn=None):
        assert rng is not None, "random number generator should not be empty!"
        super(GaussianNoise, self).__init__(0, 0, rng)

        self.std = scale
        self.avg = self.avg
        self.ndim = ndim
        self.shape_fn = shape_fn
        if self.shape_fn:
            # Name is not important as it is not a parameter of the model
            self.noise_term = theano.shared(numpy.zeros((2,)*ndim,
            self.noise_params += [self.noise_term]
            self.noise_params_shape_fn += [shape_fn]
        self.trng = RandomStreams(rng.randint(1e5))
def _init_params(self):
        self.W_hhs = []
        self.b_hhs = []
        for dx in xrange(self.n_layers):
            W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers],
            self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" %
            if dx > 0:
                    name='b%d_%s' %(dx,
        self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs]
        self.params_grad_scale = [self.grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_' for x in self.W_hhs]
            self.nb_hhs = [theano.shared(x.get_value()*0, name='noise_' for x in self.b_hhs]
            self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nb_hhs]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]
def fit(self, x):
        s = x.shape
        x = x.copy().reshape((s[0],[1:])))
        m = np.mean(x, axis=0)
        x -= m
        sigma =,x) / x.shape[0]
        U, S, V = linalg.svd(sigma)
        tmp =, np.diag(1./np.sqrt(S+self.regularization)))
        tmp2 =, np.diag(np.sqrt(S+self.regularization)))
        self.ZCA_mat = th.shared(, U.T).astype(th.config.floatX))
        self.inv_ZCA_mat = th.shared(, U.T).astype(th.config.floatX))
        self.mean = th.shared(m.astype(th.config.floatX))
def compile(
        self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0,
        fetches_=None, updates_=None, givens_=None,
        trunc_grad_=None, profile_=False
        compile optimizer against specific model

            s_inputs_: list of symbolic input tensors, including label
            s_loss_: optimization loss, symbolic scalar
            v_params_: list of shared parameters to optimize
            s_grads: list of gradients to apply, must be same order as v_params_, default is None (use autodiff).
            s_reg_: symbolic regularization term, default 0 (no regularization)
            updates: update operation for shared values after a step of optimization,
                usually RNN states. Takes form [(v_var, s_new_var), ...]

        Returns: None
        self.s_loss = s_loss_
        self.s_reg = s_reg_
        if s_grads_ is None:
            s_grads_ = T.grad(
                self.s_loss + self.s_reg, list(v_params_), disconnected_inputs='warn')

        if type(trunc_grad_)==float:
            self.s_grads = [T.clip(g,-trunc_grad_,trunc_grad_) for g in s_grads_]
            self.s_grads = s_grads_
def init_tparams(params):
    tparams = OrderedDict()
    for kk, pp in params.iteritems():
        tparams[kk] = theano.shared(params[kk], name=kk, borrow=True)
        add_role(tparams[kk], PARAMETER)
# make prefix-appended name