Python theano.tensor 模块,isinf() 实例源码

我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用theano.tensor.isinf()

项目:DL4NMT_Theano    作者:fyabc    | 项目源码 | 文件源码
def clip_grad_remove_nan(grads, clip_c_shared, mt_tparams):
    g2 = 0.
    for g in grads:
        g2 += (g*g).sum()
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    if clip_c_shared.get_value() > 0.:
        new_grads = []
        for g, p in zip(grads, itemlist(mt_tparams)):
            tmpg = tensor.switch(g2 > (clip_c_shared*clip_c_shared),
                                 g / tensor.sqrt(g2) * clip_c_shared,
                                 g)
            new_grads.append(tensor.switch(not_finite, np.float32(.1)*p, tmpg))

        return new_grads, tensor.sqrt(g2)
    else:
        return grads, tensor.sqrt(g2)
项目:tree_rnn    作者:ofirnachum    | 项目源码 | 文件源码
def gradient_descent(self, loss):
        """Momentum GD with gradient clipping."""
        grad = T.grad(loss, self.params)
        self.momentum_velocity_ = [0.] * len(grad)
        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
        updates = OrderedDict()
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
        scaling_den = T.maximum(5.0, grad_norm)
        for n, (param, grad) in enumerate(zip(self.params, grad)):
            grad = T.switch(not_finite, 0.1 * param,
                            grad * (5.0 / scaling_den))
            velocity = self.momentum_velocity_[n]
            update_step = self.momentum * velocity - self.learning_rate * grad
            self.momentum_velocity_[n] = update_step
            updates[param] = param + update_step
        return updates
项目:GRU-or-CNN    作者:hit-computer    | 项目源码 | 文件源码
def compute_updates(training_cost, params, config):
    updates = []

    grads = T.grad(training_cost, params)
    grads = OrderedDict(zip(params, grads))

    # Clip stuff
    c = np.float32(1.)
    clip_grads = []

    norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
    normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
    notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

    for p, g in grads.items():
        clip_grads.append((p, T.switch(notfinite, np.float32(.1) * p, g * normalization)))

    grads = OrderedDict(clip_grads)

    updates = Adam(grads, config.learning_rate) #??adam??????

    return updates
项目:drmad    作者:bigaidream-projects    | 项目源码 | 文件源码
def remove_nans(x):
    return T.switch(T.isnan(x) + T.isinf(x), 0, x)
项目:pyrl    作者:frsong    | 项目源码 | 文件源码
def get_updates(self, loss, lr, max_norm=1, beta1=0.9, beta2=0.999,
                    epsilon=1e-8, grads=None):
        # Gradients
        if grads is None:
            grads = tensor.grad(loss, self.trainables)

        # Clipping
        norm  = tensor.sqrt(sum([tensor.sqr(g).sum() for g in grads]))
        m     = theanotools.clipping_multiplier(norm, max_norm)
        grads = [m*g for g in grads]

        # Safeguard against numerical instability
        new_cond = tensor.or_(tensor.or_(tensor.isnan(norm), tensor.isinf(norm)),
                              tensor.or_(norm < 0, norm > 1e10))
        grads = [tensor.switch(new_cond, np.float32(0), g) for g in grads]

        # Safeguard against numerical instability
        #cond  = tensor.or_(norm < 0, tensor.or_(tensor.isnan(norm), tensor.isinf(norm)))
        #grads = [tensor.switch(cond, np.float32(0), g) for g in grads]

        # New values
        t       = self.time + 1
        lr_t    = lr*tensor.sqrt(1. - beta2**t)/(1. - beta1**t)
        means_t = [beta1*m + (1. - beta1)*g for g, m in zip(grads, self.means)]
        vars_t  = [beta2*v + (1. - beta2)*tensor.sqr(g) for g, v in zip(grads, self.vars)]
        steps   = [lr_t*m_t/(tensor.sqrt(v_t) + epsilon)
                   for m_t, v_t in zip(means_t, vars_t)]

        # Updates
        updates  = [(x, x - step) for x, step in zip(self.trainables, steps)]
        updates += [(m, m_t) for m, m_t in zip(self.means, means_t)]
        updates += [(v, v_t) for v, v_t in zip(self.vars, vars_t)]
        updates += [(self.time, t)]

        return norm, grads, updates
项目:policy_search_bb-alpha    作者:siemens    | 项目源码 | 文件源码
def adam(self,cost, params, learning_rate=0.001, beta1=0.9,
             beta2=0.999, epsilon=1e-8):

        all_grads = T.grad(cost=cost, wrt=params)
        all_grads = total_norm_constraint(all_grads,10)

        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), all_grads)))
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))

        t_prev = theano.shared(utils.floatX(0.))
        updates = OrderedDict()

        t = t_prev + 1
        a_t = learning_rate*T.sqrt(1-beta2**t)/(1-beta1**t)

        for param, g_t in zip(params, all_grads):
            g_t = T.switch(not_finite, 0.1 * param,g_t)
            value = param.get_value(borrow=True)
            m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)
            v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)

            m_t = beta1*m_prev + (1-beta1)*g_t
            v_t = beta2*v_prev + (1-beta2)*g_t**2
            step = a_t*m_t/(T.sqrt(v_t) + epsilon)

            updates[m_prev] = m_t
            updates[v_prev] = v_t
            updates[param] = param - step

        updates[t_prev] = t
        return updates
项目:denet    作者:lachlants    | 项目源码 | 文件源码
def replace_inf_nan(x, v):
    return tensor.switch(tensor.or_(tensor.isnan(x), tensor.isinf(x)), v, x)

#apply r = x + delta if r is not inf / nan, else return x
项目:denet    作者:lachlants    | 项目源码 | 文件源码
def update_inf_nan(x, delta, v):
    r = x + delta
    return tensor.switch(tensor.or_(tensor.isnan(r), tensor.isinf(r)), x, r)

#will check if shuffle is needed
项目:dl4mt-cdec    作者:nyu-dl    | 项目源码 | 文件源码
def gradient_clipping(grads, tparams, clip_c=10):
    g2 = 0.
    for g in grads:
        g2 += (g**2).sum()

    g2 = tensor.sqrt(g2)
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    new_grads = []

    for p, g in zip(tparams.values(), grads):
        new_grads.append(tensor.switch(g2 > clip_c,
                                       g * (clip_c / g2),
                                       g))

    return new_grads, not_finite, tensor.lt(clip_c, g2)
项目:online_action    作者:zhenyangli    | 项目源码 | 文件源码
def get_grad_param(self):
        self.grad_norm = TT.sqrt(sum(TT.sqr(g).sum() for g in self.model.grad)) / TT.cast(
            self.model.interface_layer.input.shape[1], 'float32')
        # self.has_numeric_error = TT.or_(TT.isnan(self.grad_norm), TT.isinf(self.grad_norm))
        # self.grad = [TT.switch(self.has_numeric_error, numpy_floatX(0.1) * p, g)
        # for g, p in zip(self.model.grad, self.model.param)]
        self.grad =[g / TT.cast(
            self.model.interface_layer.input.shape[1], 'float32') for g in self.model.grad]
        if self.clip_threshold is not None:
            self.grad = [TT.switch(TT.ge(self.grad_norm, self.clip_threshold),
                                   g * self.clip_threshold / self.grad_norm, g) for g in self.grad]
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def pseudograd(loss, params, srng=None, temperature = 1.0e-1,
               learning_rate=1.0e-2, rho2=0.95):


  one = T.constant(1.0)
  zero = T.constant(0.0)

  deltas = [ make_normal(param, srng=srng) for param in params ]
  momentum = [ make_copy(param) for param in params ]

  new_params = [
    param + learning_rate * delta
    for param, delta, m in zip(params, deltas, momentum)
  ]

  new_loss = theano.clone(
    loss, replace=dict(zip(params, new_params))
  )

  accepting_p = T.exp((loss - new_loss) / temperature)
  u = srng.uniform(size=(), dtype=loss.dtype)

  cond = T.or_(T.or_(u > accepting_p, T.isnan(new_loss)), T.isinf(new_loss))
  step = T.switch(cond, zero, one)

  updates = OrderedDict()

  for m, delta in zip(momentum, deltas):
    updates[m] = m * rho2 + (one - rho2) * delta * step

  for param, m in zip(params, momentum):
    updates[param] = param + learning_rate * m

  return updates
项目:dl4mt-c2c    作者:nyu-dl    | 项目源码 | 文件源码
def gradient_clipping(grads, tparams, clip_c=10):
    g2 = 0.
    for g in grads:
        g2 += (g**2).sum()

    g2 = tensor.sqrt(g2)
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    new_grads = []

    for p, g in zip(tparams.values(), grads):
        new_grads.append(tensor.switch(g2 > clip_c,
                                       g * (clip_c / g2),
                                       g))

    return new_grads, not_finite, tensor.lt(clip_c, g2)
项目:dl4mt-c2c    作者:nyu-dl    | 项目源码 | 文件源码
def gradient_clipping(grads, tparams, clip_c=10):
    g2 = 0.
    for g in grads:
        g2 += (g**2).sum()

    g2 = tensor.sqrt(g2)
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    new_grads = []

    for p, g in zip(tparams.values(), grads):
        new_grads.append(tensor.switch(g2 > clip_c,
                                       g * (clip_c / g2),
                                       g))

    return new_grads, not_finite, tensor.lt(clip_c, g2)
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def rmsprop(cost, params, learning_rate, momentum=0.5, rescale=5.):

    grads = T.grad(cost=cost, wrt=params)

    running_square_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                      for p in params]
    running_avg_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                   for p in params]
    memory_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                       for p in params]

    grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
    not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
    grad_norm = T.sqrt(grad_norm)
    scaling_num = rescale
    scaling_den = T.maximum(rescale, grad_norm)
    # Magic constants
    combination_coeff = 0.9
    minimum_grad = 1E-4
    updates = []
    for n, (param, grad) in enumerate(zip(params, grads)):
       grad = T.switch(not_finite, 0.1 * param,
                       grad * (scaling_num / scaling_den))
       old_square = running_square_[n]
       new_square = combination_coeff * old_square + (
           1. - combination_coeff) * T.sqr(grad)
       old_avg = running_avg_[n]
       new_avg = combination_coeff * old_avg + (
           1. - combination_coeff) * grad
       rms_grad = T.sqrt(new_square - new_avg ** 2)
       rms_grad = T.maximum(rms_grad, minimum_grad)
       memory = memory_[n]
       update = momentum * memory - learning_rate * grad / rms_grad

       update2 = momentum * momentum * memory - (
           1 + momentum) * learning_rate * grad / rms_grad

       updates.append((old_square, new_square))
       updates.append((old_avg, new_avg))
       updates.append((memory, update))
       updates.append((param, param + update2))
    return updates
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def rmsprop(cost, params, learning_rate, momentum=0.5, rescale=5.):

    grads = T.grad(cost=cost, wrt=params)

    running_square_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                      for p in params]
    running_avg_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                   for p in params]
    memory_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                       for p in params]

    grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
    not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
    grad_norm = T.sqrt(grad_norm)
    scaling_num = rescale
    scaling_den = T.maximum(rescale, grad_norm)
    # Magic constants
    combination_coeff = 0.9
    minimum_grad = 1E-4
    updates = []
    for n, (param, grad) in enumerate(zip(params, grads)):
       grad = T.switch(not_finite, 0.1 * param,
                       grad * (scaling_num / scaling_den))
       old_square = running_square_[n]
       new_square = combination_coeff * old_square + (
           1. - combination_coeff) * T.sqr(grad)
       old_avg = running_avg_[n]
       new_avg = combination_coeff * old_avg + (
           1. - combination_coeff) * grad
       rms_grad = T.sqrt(new_square - new_avg ** 2)
       rms_grad = T.maximum(rms_grad, minimum_grad)
       memory = memory_[n]
       update = momentum * memory - learning_rate * grad / rms_grad

       update2 = momentum * momentum * memory - (
           1 + momentum) * learning_rate * grad / rms_grad

       updates.append((old_square, new_square))
       updates.append((old_avg, new_avg))
       updates.append((memory, update))
       updates.append((param, param + update2))
    return updates
项目:hred-latent-piecewise    作者:julianser    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.W_emb in grads:
            if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
                assert not self.fix_encoder_parameters
                # Keep pretrained word embeddings fixed
                logger.debug("Will use mask to fix pretrained word embeddings")
                grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
            elif self.fix_encoder_parameters:
                # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
                logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
            else:
                logger.debug("Will train all word embeddings")

        optimizer_variables = []
        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates, optimizer_variables = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!") 

        return updates, optimizer_variables

    # Batch training function.
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
            assert not self.fix_encoder_parameters
            # Keep pretrained word embeddings fixed
            logger.debug("Will use mask to fix pretrained word embeddings")
            grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
        elif self.fix_encoder_parameters:
            # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
            logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
        else:
            logger.debug("Will train all word embeddings")

        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)  
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!") 

        return updates

    # Batch training function.
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.W_emb in grads:
            if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
                assert not self.fix_encoder_parameters
                # Keep pretrained word embeddings fixed
                logger.debug("Will use mask to fix pretrained word embeddings")
                grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
            elif self.fix_encoder_parameters:
                # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
                logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
            else:
                logger.debug("Will train all word embeddings")

        optimizer_variables = []
        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates, optimizer_variables = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!") 

        return updates, optimizer_variables

    # Batch training function.
项目:MACA    作者:ppartha03    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
            assert not self.fix_encoder_parameters
            # Keep pretrained word embeddings fixed
            logger.debug("Will use mask to fix pretrained word embeddings")
            grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
        elif self.fix_encoder_parameters:
            # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
            logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
        else:
            logger.debug("Will train all word embeddings")

        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!")

        return updates

    # Batch training function.