Python theano.tensor 模块,grad() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.grad()

项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            updates.append((v,v_t))
        else:
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
项目:pdnn    作者:petered    | 项目源码 | 文件源码
def train_one(self, x, target):
        x, target = tt.unbroadcast(x, 0), tt.unbroadcast(target, 0)  # F'ing scan
        states = {}
        for layer in self.layers:
            x, layer_state = layer.forward_pass_and_state(x, count_ops=True)
            states[layer]=layer_state
        loss = self.loss(x, target)
        param_grad_pairs = []
        grad = None
        for layer in self.layers[::-1]:
            grad, param_grads = layer.backward_pass(state=states[layer], grad=grad, cost = loss, count_ops=True)
            loss = None
            param_grad_pairs += list(izip_equal(layer.parameters, param_grads))
        all_params, all_param_grads = zip(*param_grad_pairs)
        self.optimizer.update_from_gradients(parameters=all_params, gradients=all_param_grads)
        return create_constant(0.)  # scan demands some return
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def sgd_optimizer(model, lr=0.001, momentum=0.9):
    lr = theano.shared(np.array(lr).astype(theano.config.floatX))
    # Make sure momentum is a sane value
    assert momentum < 1 and momentum >= 0
    # the updates of SGD with momentum
    updates = []
    grads = T.grad(model.costs[0], model.params)
    for param, grad in zip(model.params, grads):
        param_update = theano.shared(param.get_value()*0.)
        updates.append((param, param - lr * param_update))
        updates.append((param_update, momentum*param_update + (1. - momentum)*grad))

    train_func = theano.function(model.inputs, model.costs, updates=updates)
    valid_func = theano.function(model.inputs, model.costs)

    return train_func, valid_func
项目:structured-output-ae    作者:sbelharbi    | 项目源码 | 文件源码
def get_sgd_updates(self, learning_rate, lr_scaler=1.0, batch_size=1,
                        sparsity_level=-1, sparse_reg=-1, x_in=None):
        h = self.encode(x_in)
        x_rec = self.decode(h)
        cost = self.get_rec_cost(x_rec)

        if self.L1_reg != -1 and self.L1_reg is not None:
            cost += self.L1_reg * self.L1

        if self.L2_reg != -1 and self.L2_reg is not None:
            cost += self.L2_reg * self.L2

        if sparsity_level != -1 and sparse_reg != -1:
            sparsity_penal = self.sparsity_penality(
                h, sparsity_level, sparse_reg, batch_size)
            cost += sparsity_penal

        self.gparams = T.grad(cost, self.params)
        updates = OrderedDict({})
        for param, gparam in zip(self.params, self.gparams):
            updates[param] = self.momentum * param - lr_scaler * \
                learning_rate * gparam
        return (cost, updates, h, x_rec)
项目:lightML    作者:jfzhang95    | 项目源码 | 文件源码
def momentum(loss, params, caches, learning_rate=0.1, rho=0.1, clip_at=0.0, scale_norm=0.0, lambda2=0.0):
    updates = OrderedDict()
    grads = T.grad(cost=loss, wrt=params)

    for p, c, g in zip(params, caches, grads):
        if clip_at > 0.0:
            grad = clip(g, clip_at)    
        else:
            grad = g

        if scale_norm > 0.0:
            grad = scale(grad, scale_norm)

        delta = rho * grad + (1-rho) * c
        updates[p] = p - learning_rate * (delta + lambda2 * p)

    return updates, grads
项目:top-k-rec    作者:domainxz    | 项目源码 | 文件源码
def _generate_train_model_function(self, scores):
       u = T.lvector('u')
       i = T.lvector('i')
       j = T.lvector('j')
       self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
       self.S = theano.shared(scores, name='S');
       x_ui  = T.dot(self.W, self.S[u,i,:].T);
       x_uj  = T.dot(self.W, self.S[u,j,:].T);
       x_uij = x_ui - x_uj;
       obj = T.sum(
               T.log(T.nnet.sigmoid(x_uij)).sum() - \
               self._lambda_w * 0.5 * (self.W ** 2).sum()
               )
       cost = -obj
       g_cost_W = T.grad(cost=cost, wrt=self.W)
       updates = [
               (self.W, self.W - self._learning_rate * g_cost_W)
               ]
       self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(
            f, wrt=params, disconnected_inputs='warn')
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])

        def Hx_plain():
            Hx_plain_splits = TT.grad(
                TT.sum([TT.sum(g * x)
                        for g, x in zip(constraint_grads, xs)]),
                wrt=params,
                disconnected_inputs='warn'
            )
            return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self.opt_fun = ext.lazydict(
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ),
        )
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)  
        t = theano.shared(floatX(1.))
        b1_t = self.b1*self.l**(t-1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t*m + (1 - b1_t)*g
            v_t = self.b2*v + (1 - self.b2)*g**2
            m_c = m_t / (1-self.b1**t)
            v_c = v_t / (1-self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t) )
        updates.append((t, t + 1.))
        return updates
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        for p,g in zip(params,grads):
            g = self.regularizer.gradient_regularize(p, g)

            acc = theano.shared(p.get_value() * 0.)
            acc_delta = theano.shared(p.get_value() * 0.)
            acc_new = self.rho * acc + (1 - self.rho) * g ** 2
            updates.append((acc,acc_new))

            update = g * T.sqrt(acc_delta + self.epsilon) / T.sqrt(acc_new + self.epsilon)
            updated_p = p - self.lr * update
            updated_p = self.regularizer.weight_regularize(updated_p)
            updates.append((p, updated_p))

            acc_delta_new = self.rho * acc_delta + (1 - self.rho) * update ** 2
            updates.append((acc_delta,acc_delta_new))
        return updates
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def svgd_gradient(X0):

    hidden, _, mse = discrim(X0)
    grad = -1.0 * T.grad( mse.sum(), X0)

    kxy, neighbors, h = rbf_kernel(hidden)  #TODO

    coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
    v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2

    X1 = X0[neighbors]
    hidden1, _, _ = discrim(X1)
    dxkxy = T.Lop(hidden1, X1, v)

    #svgd_grad = (T.dot(kxy, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
    svgd_grad = grad + dxkxy / 2.
    return grad, svgd_grad, dxkxy
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def svgd_gradient(X0):

    hidden, _, mse = discrim(X0)
    grad = -1.0 * T.grad( mse.sum(), X0)

    kxy, neighbors, h = rbf_kernel(hidden)  #TODO

    coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
    v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2

    X1 = X0[neighbors]
    hidden1, _, _ = discrim(X1)
    dxkxy = T.Lop(hidden1, X1, v)

    #svgd_grad = (T.dot(kxy, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
    svgd_grad = grad + dxkxy / 2.
    return grad, svgd_grad, dxkxy
项目:iGAN    作者:junyanz    | 项目源码 | 文件源码
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        for p,g in zip(params,grads):
            g = self.regularizer.gradient_regularize(p, g)

            acc = theano.shared(p.get_value() * 0.)
            acc_delta = theano.shared(p.get_value() * 0.)
            acc_new = self.rho * acc + (1 - self.rho) * g ** 2
            updates.append((acc,acc_new))

            update = g * T.sqrt(acc_delta + self.epsilon) / T.sqrt(acc_new + self.epsilon)
            updated_p = p - self.lr * update
            updated_p = self.regularizer.weight_regularize(updated_p)
            updates.append((p, updated_p))

            acc_delta_new = self.rho * acc_delta + (1 - self.rho) * update ** 2
            updates.append((acc_delta,acc_delta_new))
        return updates
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def __add__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = new_obj.tensor_from_layer(other)
        new_obj.out = new_obj.out + other_var
        # Summing cost layers:
        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] += grad_param
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [x + y for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the added term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        return new_obj
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def __sub__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = new_obj.tensor_from_layer(other)
        new_obj.out = new_obj.out - other_var
        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] -= grad_param
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [x - y for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the subtracted term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        return new_obj
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def __add__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = new_obj.tensor_from_layer(other)
        new_obj.out = new_obj.out + other_var
        # Summing cost layers:
        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] += grad_param
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [x + y for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the added term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        return new_obj
项目:NMT    作者:tuzhaopeng    | 项目源码 | 文件源码
def __sub__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = new_obj.tensor_from_layer(other)
        new_obj.out = new_obj.out - other_var
        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] -= grad_param
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [x - y for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the subtracted term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        return new_obj
项目:sesame-paste-noodle    作者:aissehust    | 项目源码 | 文件源码
def __init__(self):
        super(UpPooling, self).__init__()

        #X = self.get_input(train)
        #if self.dim_ordering == 'th':
        #    output = K.repeat_elements(X, self.size[0], axis=2)
        #    output = K.repeat_elements(output, self.size[1], axis=3)
        #elif self.dim_ordering == 'tf':
        #    output = K.repeat_elements(X, self.size[0], axis=1)
        #    output = K.repeat_elements(output, self.size[1], axis=2)
        #else:
        #    raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
        #
        #f = T.grad(T.sum(self._pool2d_layer.get_output(train)), wrt=self._pool2d_layer.get_input(train)) * output

        #return f
项目:sesame-paste-noodle    作者:aissehust    | 项目源码 | 文件源码
def __call__(self, cost, params):
        grads = T.grad(cost=cost ,wrt=params)
        updates = []
        exp = theano.shared(np.float32(1.0),name='exp',borrow=True)
        updates.append((exp, exp+1))
        for p, g in zip(params, grads):
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)
            m_new = self.beta1 * m + (1 - self.beta1) * g
            v_new = self.beta2 * v + (1 - self.beta2) * g**2
            mt = m_new / (1 - self.beta1**exp)
            vt = v_new / (1 - self.beta2**exp)
            updates.append((m, m_new))
            updates.append((v, v_new))
            updates.append((p, p - self.lr * mt / (T.sqrt(vt) + self.epsilon)))

        return updates
项目:seq2seq-keyphrase    作者:memray    | 项目源码 | 文件源码
def get_gradients(self, loss, params):
        """
        Consider the situation that gradient is weighted.
        """
        if isinstance(loss, list):
            grads = T.grad(loss[0], params, consider_constant=loss[1:])  # gradient of loss
        else:
            grads = T.grad(loss, params)

        if hasattr(self, 'clipnorm') and self.clipnorm > 0:
            print('use gradient clipping!!')
            print('clipnorm = %f' % self.clipnorm)
            norm = T.sqrt(sum([T.sum(g ** 2) for g in grads]))
            grads = [clip_norm(g, self.clipnorm, norm) for g in grads]
        else:
            print('not use gradient clipping!!')

        return grads
项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def get_cost_updates(self, corruption_level, learning_rate):
        #if corruption_level == 0:
        #    tilde_x = self.x
        #else:
        #    tilde_x = self.get_corrupted_input(self.x, corruption_level)
        tilde_x = self.x

        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)

        L = T.sum ( (self.x-z) * (self.x-z), axis=1 )
        cost = T.mean(L) / 2

        gparams = T.grad(cost, self.params)
        updates = {}
        for param, gparam in zip(self.params, gparams):
            updates[param] = param -  learning_rate*gparam

        return (cost, updates)
项目:Deep-Learning-with-Theano    作者:PacktPublishing    | 项目源码 | 文件源码
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        t = theano.shared(floatX(1.))
        b1_t = self.b1*self.l**(t-1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t*m + (1 - b1_t)*g
            v_t = self.b2*v + (1 - self.b2)*g**2
            m_c = m_t / (1-self.b1**t)
            v_c = v_t / (1-self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t) )
        updates.append((t, t + 1.))
        return updates
项目:Deep-Learning-with-Theano    作者:PacktPublishing    | 项目源码 | 文件源码
def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        for p,g in zip(params,grads):
            g = self.regularizer.gradient_regularize(p, g)

            acc = theano.shared(p.get_value() * 0.)
            acc_delta = theano.shared(p.get_value() * 0.)
            acc_new = self.rho * acc + (1 - self.rho) * g ** 2
            updates.append((acc,acc_new))

            update = g * T.sqrt(acc_delta + self.epsilon) / T.sqrt(acc_new + self.epsilon)
            updated_p = p - self.lr * update
            updated_p = self.regularizer.weight_regularize(updated_p)
            updates.append((p, updated_p))

            acc_delta_new = self.rho * acc_delta + (1 - self.rho) * update ** 2
            updates.append((acc_delta,acc_delta_new))
        return updates
项目:NCRF-AE    作者:cosmozhang    | 项目源码 | 文件源码
def Adam(cost, params, learning_rate=0.0002, b1=0.1, b2=0.001, e=1e-8):
    updates = OrderedDict()
    grads = T.grad(cost, params)
    i = theano.shared(np.asarray(0., dtype=theano.config.floatX))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = learning_rate * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        m = theano.shared(p.get_value() * 0.)
        v = theano.shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)

        updates[m] = m_t
        updates[v] = v_t
        updates[p] = p_t
    updates[i] = i_t

    return updates
项目:NCRF-AE    作者:cosmozhang    | 项目源码 | 文件源码
def RmsProp(cost, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
    updates = OrderedDict()
    grads = T.grad(cost, params)
    # Using theano constant to prevent upcasting of float32
    one = T.constant(1)

    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)
        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                             broadcastable=param.broadcastable)
        accu_new = rho * accu + (one - rho) * grad ** 2
        updates[accu] = accu_new
        updates[param] = param - (learning_rate * grad /
                                  T.sqrt(accu_new + epsilon))

    return updates
项目:NCRF-AE    作者:cosmozhang    | 项目源码 | 文件源码
def EGD(cost, params, learning_rate = 0.33, constraint = 1.0):

    updates = OrderedDict()

    grads = T.grad(cost, params)
    U = T.constant(constraint)

    #first half of params
    rw_pos = T.exp(-learning_rate * U * grads[0])
    rb_pos = T.exp(-learning_rate * U * grads[1])

    #second half
    rw_neg = 1/rw_pos
    rb_neg = 1/rb_pos

    rs = [rw_pos, rb_pos, rw_neg, rb_neg]

    partition = T.sum(params[0]*rs[0]) + T.sum(params[1]*rs[1]) + T.sum(params[2]*rs[2]) + T.sum(params[3]*rs[3])

    for param, r in zip(params, rs):
        updates[param] = U*param*r/partition

    return updates
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(
            f, wrt=params, disconnected_inputs='warn')
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])

        def Hx_plain():
            Hx_plain_splits = TT.grad(
                TT.sum([TT.sum(g * x)
                        for g, x in zip(constraint_grads, xs)]),
                wrt=params,
                disconnected_inputs='warn'
            )
            return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self.opt_fun = ext.lazydict(
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ),
        )
项目:pl-cnn    作者:oval-group    | 项目源码 | 文件源码
def compile_maxpool(output_shape, pool_size):

    X = T.tensor4()

    # compute output with both methods
    out1 = T.signal.pool.pool_2d(X, pool_size,
                                 ignore_border=True, st=None,
                                 padding=(0, 0), mode='max')

    out2 = my_pool_2d(X, pool_size,
                      ignore_border=True, st=None,
                      padding=(0, 0), mode='max')

    # compute gradient with random incoming gradient for both cases
    incoming_grad = T.as_tensor_variable(np.random.random(size=output_shape)
                                         .astype(np.float32))
    grad1 = T.grad(None, wrt=X, known_grads={out1: incoming_grad})
    grad2 = T.grad(None, wrt=X, known_grads={out2: incoming_grad})

    return theano.function([X], [out1, out2, grad1, grad2])
项目:pl-cnn    作者:oval-group    | 项目源码 | 文件源码
def test_maxpool_edge_case(self):
        """
        Test MaxPooling on an edge case: inputs have same values in a patch
        Check one and only one gradient is back-propagated in each patch
        """

        X = np.zeros(shape=self.input_shape, dtype=np.float32)
        out1, out2, _, grad = self.maxpool(X)

        assert np.all(np.isclose(out1, out2))

        for i in range(self.output_shape[0]):
            for j in range(self.output_shape[1]):
                for k in range(self.output_shape[2]):
                    for l in range(self.output_shape[3]):
                        count = 0
                        for m in range(self.pool_size[0]):
                            for n in range(self.pool_size[1]):
                                kk = self.pool_size[0] * k + m
                                ll = self.pool_size[1] * l + n
                                if grad[i, j, kk, ll] != 0.:
                                    count += 1
                        assert count == 1
项目:theano-recurrence    作者:uyaseen    | 项目源码 | 文件源码
def adam(cost, params, lr=0.001, b1=0.9, b2=0.999, e=1e-8):
    updates = []
    grads = T.grad(cost, params)
    i = theano.shared(np.dtype(theano.config.floatX).type(1))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        g = T.clip(g, -grad_clip, grad_clip)
        m = theano.shared(p.get_value() * 0.)
        v = theano.shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))
    return updates
项目:yadll    作者:pchavanne    | 项目源码 | 文件源码
def adagrad(cost, params, learning_rate=0.1, epsilon=1e-6, **kwargs):
    """Adaptive Gradient Descent
    Scale learning rates by dividing with the square root of accumulated
    squared gradients

    References
    ----------
    .. [1] http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
    """
    gparams = T.grad(cost, params)
    updates = OrderedDict()
    for param, gparam in zip(params, gparams):
        accu = shared_variable(np.zeros(param.get_value(borrow=True).shape), broadcastable=param.broadcastable)
        accu_new = accu + gparam ** 2
        updates[accu] = accu_new
        updates[param] = param - learning_rate * gparam / T.sqrt(accu_new + epsilon)
    return updates
项目:yadll    作者:pchavanne    | 项目源码 | 文件源码
def adam(cost, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-6, **kwargs):
    """Adam Gradient Descent
    Scale learning rates by Adaptive moment estimation

    References
    ----------
    .. [1] https://arxiv.org/pdf/1412.6980v8.pdf
    """
    gparams = T.grad(cost, params)
    updates = OrderedDict()
    t = shared_variable(to_float_X(0.))
    t_t = 1. + t
    l_r_t = learning_rate * T.sqrt(1. - beta2 ** t_t) / (1. - beta1 ** t_t)
    for param, gparam in zip(params, gparams):
        m = shared_variable(np.zeros(param.get_value(borrow=True).shape), broadcastable=param.broadcastable)
        v = shared_variable(np.zeros(param.get_value(borrow=True).shape), broadcastable=param.broadcastable)
        m_t = beta1 * m + (1. - beta1) * gparam
        v_t = beta2 * v + (1. - beta2) * T.sqr(gparam)
        updates[m] = m_t
        updates[v] = v_t
        updates[param] = param - l_r_t * m_t / (T.sqrt(v_t) + epsilon)
    updates[t] = t_t
    return updates
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_train(self, *args):

        # args is a list of dictionaries

        if self.verbose: print('compiling training function...')

        import theano

        for arg_list in args:
            self.compiled_train_fn_list.append(theano.function(**arg_list))

        if self.monitor_grad:

            norms = [grad.norm(L=2) for grad in self.grads]
            import theano.tensor as T
            norms = T.log10(norms)

            self.get_norm = theano.function([self.subb_ind], [T.sum(norms), T.max(norms)],
                                              givens=[(self.x, self.shared_x_slice), 
                                                      (self.y, self.shared_y_slice)]
                                                                          )
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_train(self, *args):

        # args is a list of dictionaries

        if self.verbose: print('compiling training function...')

        import theano

        for arg_list in args:
            self.compiled_train_fn_list.append(theano.function(**arg_list))

        if self.monitor_grad:

            norms = [grad.norm(L=2) for grad in self.grads]

            self.get_norm = theano.function([self.subb_ind], norms,
                                              givens=[(self.x, self.shared_x_slice), 
                                                      (self.y, self.shared_y_slice)]
                                                                          )
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_train(self, *args):

        # args is a list of dictionaries

        if self.verbose: print('compiling training function...')

        import theano

        for arg_list in args:
            self.compiled_train_fn_list.append(theano.function(**arg_list))

        if self.monitor_grad:

            norms = [grad.norm(L=2) for grad in self.grads]

            self.get_norm = theano.function([self.subb_ind], norms,
                                              givens=[(self.x, self.shared_x_slice), 
                                                      (self.y, self.shared_y_slice)]
                                                                          )
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_train(self, *args):

        # args is a list of dictionaries

        if self.verbose: print('compiling training function...')

        import theano

        for arg_list in args:
            self.compiled_train_fn_list.append(theano.function(**arg_list))

        if self.monitor_grad:

            norms = [grad.norm(L=2) for grad in self.grads]

            self.get_norm = theano.function([self.subb_ind], norms,
                                              givens=[(self.x, self.shared_x_slice), 
                                                      (self.y, self.shared_y_slice)]
                                                                          )
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_train(self, *args):

        # args is a list of dictionaries

        if self.verbose: print('compiling training function...')

        import theano

        for arg_list in args:
            self.compiled_train_fn_list.append(theano.function(**arg_list))

        if self.monitor_grad:

            norms = [grad.norm(L=2) for grad in self.grads]

            self.get_norm = theano.function([self.subb_ind], norms,
                                              givens=[(self.x, self.shared_x_slice), 
                                                      (self.y, self.shared_y_slice)]
                                                                          )
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_iter_fns(self, *args, **kwargs):

        import theano

        import time
        start=time.time()

        # f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
        self.f_pred = theano.function([self.x, self.mask], self.pred.argmax(axis=1), name='f_pred')

        # f_cost = theano.function([x, mask, y], cost, name='f_cost')
        import theano.tensor as tensor
        grads = tensor.grad(self.cost, wrt=list(self.tparams.values()))
        # f_grad = theano.function([x, mask, y], grads, name='f_grad')

        lr = tensor.scalar(name='lr')

        from theanompi.models.lstm import adadelta
        self.f_grad_shared, self.f_update = adadelta(lr, self.tparams, grads,
                                         self.x, self.mask, self.y, self.cost)

        if self.rank==0: print('compile time %.3f' % (time.time()-start))
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_train(self, *args):

        # args is a list of dictionaries

        if self.verbose: print('compiling training function...')

        import theano

        for arg_list in args:
            self.compiled_train_fn_list.append(theano.function(**arg_list))

        if self.monitor_grad:

            norms = [grad.norm(L=2) for grad in self.grads]

            self.get_norm = theano.function([self.subb_ind], norms,
                                              givens=[(self.x, self.shared_x_slice), 
                                                      (self.y, self.shared_y_slice)]
                                                                          )
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def compile_train(self, *args):

        # args is a list of dictionaries

        if self.verbose: print('compiling training function...')

        import theano

        for arg_list in args:
            self.compiled_train_fn_list.append(theano.function(**arg_list))

        if self.monitor_grad:

            norms = [grad.norm(L=2) for grad in self.grads]

            self.get_norm = theano.function([self.subb_ind], norms,
                                              givens=[(self.x, self.shared_x_slice), 
                                                      (self.y, self.shared_y_slice)]
                                                                          )
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def fit(self, weights, o_error, tpo ):

        gradients = T.grad(o_error ,weights)
        updates = []
        for c, v, w, g in zip(self.t_cache, self.t_velocity, weights,gradients):
            new_velocity = T.sub( T.mul(tpo["momentum_rate"], v) , T.mul(tpo["learn_rate"], g) )
            new_cache = T.add( T.mul(tpo["decay_rate"] , c) , T.mul(T.sub( 1, tpo["decay_rate"]) , T.sqr(g)))
            new_weights = T.sub(T.add(w , new_velocity) , T.true_div( T.mul(g,tpo["learn_rate"]) , T.sqrt(T.add(new_cache,0.1**8))))
            updates.append((w, new_weights))
            updates.append((v, new_velocity))
            updates.append((c, new_cache))

        return updates


######                 Nesterov momentum
########################################
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def fit(self, weights, o_error, tpo):
        updates = []
        gradients = theano.grad(o_error, weights)


        for c, w, g in zip(self.t_cache, weights, gradients):
            new_cache = tpo["decay_rate"] * c + ( 1- tpo["decay_rate"]) * T.sqr(g)
            new_weights = w - (g * tpo["learn_rate"]) / T.sqrt(new_cache + 0.1**8)
            updates.append((w, new_weights))
            updates.append((c, new_cache))

        return updates


######                          ADADELTA
########################################
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def fit(self, weights, o_error, tpo):

        gradients = theano.grad(o_error, weights)

        updates = []
        for v, w, g in zip(self.t_velocity, weights, gradients):
            #gradient = T.grad(o_error ,w)
            new_velocity = tpo["momentum_rate"] * v - tpo["learn_rate"] * g
            new_weights = w + new_velocity
            updates.append((w, new_weights))
            updates.append((v, new_velocity))
        return updates



######                       Vanilla SGD
########################################
项目:DeepEnhancer    作者:minxueric    | 项目源码 | 文件源码
def adadelta(loss, params, learning_rate, rho=.95, epsilon=1e-6):
    grads = T.grad(loss, params)
    updates = OrderedDict()
    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)
        accu = theano.shared(
                np.zeros(value.shape, dtype=value.dtype),
                broadcastable=param.broadcastable)
        delta_accu = theano.shared(
                np.zeros(value.shape, dtype=value.dtype),
                broadcastable=param.broadcastable)
        accu_new = rho * accu + (1 - rho) * grad ** 2
        updates[accu] = accu_new
        update = (grad * T.sqrt(delta_accu + epsilon) /
                T.sqrt(accu_new + epsilon))
        updates[param] = param - learning_rate * update
        delta_accu_new = rho * delta_accu + (1 - rho) * update ** 2
        updates[delta_accu] = delta_accu_new
    return updates
项目:DeepEnhancer    作者:minxueric    | 项目源码 | 文件源码
def adam(loss, params, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8):
    grads = T.grad(loss, params)
    updates = OrderedDict()
    t_prev = theano.shared(np.cast[theano.config.floatX](0))
    t = t_prev + 1
    a_t = learning_rate * T.sqrt(1-beta2**t)/(1-beta1**t)
    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)
        m_prev = theano.shared(
                np.zeros(value.shape, dtype=value.dtype),
                broadcastable=param.broadcastable)
        v_prev = theano.shared(
                np.zeros(value.shape, dtype=value.dtype),
                broadcastable=param.broadcastable)
        m_t = beta1 * m_prev + (1 - beta1) * grad
        v_t = beta2 * v_prev + (1 - beta2) * grad ** 2
        step = a_t * m_t / (T.sqrt(v_t) + epsilon)

        updates[m_prev] = m_t
        updates[v_prev] = v_t
        updates[param] = param - step
    updates[t_prev] = t
    return updates
项目:vaegan    作者:anitan0925    | 项目源码 | 文件源码
def __init__( self, model, eta=1e-2, rho=0.9, epsilon=1e-6, minibatch_size=10 ):
        """
        Initialize RMSPROP.

        Arguments
        ---------
        model          : model instance should equip params, grad(), [and updates].
        eta            : float.
                         Learning rate.
        rho            : float.
        epsilon        : float.
                         Constant for numerical stability.
        minibatch_size : integer.
                         Minibatch size to calcurate stochastic gradient.        
        """
        self.model            = model
        self.__eta            = eta
        self.__rho            = rho  
        self.__eps            = epsilon
        self.minibatch_size = minibatch_size

        self.__compile()
项目:vaegan    作者:anitan0925    | 项目源码 | 文件源码
def __compile( self ):
        self.update_funcs = []
        for params, inputs, cost in self.model.get_opt_infos():
            # Shared variables for acc.
            accs = [ theano.shared( 
                np.zeros( p.get_value().shape, dtype=theano.config.floatX ) ) 
                     for p in params ]

            sgrad = tensor.grad( cost, params )

            new_accs = [ self.__rho * acc + (1 - self.__rho) * sg ** 2 
                         for (acc, sg) in zip( accs, sgrad ) ]

            updates = OrderedDict()
            updates.update( zip( accs, new_accs ) )
            updates.update( 
                [ (p, p - ( self.__eta * sg / tensor.sqrt( acc_new + self.__eps ) ) ) 
                  for (p, sg, acc_new) 
                  in zip( params, sgrad, new_accs ) ] )

            self.update_funcs.append( theano.function( inputs  = inputs,
                                                       updates = updates ) )
项目:vaegan    作者:anitan0925    | 项目源码 | 文件源码
def __init__( self, model, eta=1e-3, beta1=0.9, beta2=0.999, 
                  epsilon=1e-8, minibatch_size=10 ):

        """
        Initialize ADAM.

        Arguments
        ---------
        model          : model instance should equip params, grad(), [and updates].
        eta            : float.
                         Learning rate.
        beta1, beta2   : float.
        epsilon        : float.
                         Constant for numerical stability.
        minibatch_size : integer.
                         Minibatch size to calcurate stochastic gradient.        
        """
        self.model            = model
        self.__eta            = eta
        self.__beta1          = beta1  
        self.__beta2          = beta2 
        self.__eps            = epsilon
        self.minibatch_size  = minibatch_size

        self.__compile()