Python theano.tensor 模块,isnan() 实例源码

我们从Python开源项目中,提取了以下12个代码示例,用于说明如何使用theano.tensor.isnan()

项目:pepnet    作者:hammerlab    | 项目源码 | 文件源码
def masked_mse(y_true, y_pred):
    mask = T.isnan(y_true)
    diff = y_pred - y_true
    squared = K.square(diff)
    sum_squared_error = K.sum(
        K.switch(mask, 0.0, squared),
        axis=-1)
    n_valid_per_sample = K.sum(~mask, axis=-1)
    return sum_squared_error / n_valid_per_sample
项目:pepnet    作者:hammerlab    | 项目源码 | 文件源码
def masked_binary_crossentropy(y_true, y_pred):
    mask = T.isnan(y_true)
    cross_entropy_values = K.binary_crossentropy(
        output=y_pred,
        target=y_true)
    sum_cross_entropy_values = K.sum(
        K.switch(mask, 0.0, cross_entropy_values), axis=-1)
    n_valid_per_sample = K.sum(~mask, axis=-1)
    return sum_cross_entropy_values / n_valid_per_sample
项目:pyrl    作者:frsong    | 项目源码 | 文件源码
def get_updates(self, loss, lr, max_norm=1, beta1=0.9, beta2=0.999,
                    epsilon=1e-8, grads=None):
        # Gradients
        if grads is None:
            grads = tensor.grad(loss, self.trainables)

        # Clipping
        norm  = tensor.sqrt(sum([tensor.sqr(g).sum() for g in grads]))
        m     = theanotools.clipping_multiplier(norm, max_norm)
        grads = [m*g for g in grads]

        # Safeguard against numerical instability
        new_cond = tensor.or_(tensor.or_(tensor.isnan(norm), tensor.isinf(norm)),
                              tensor.or_(norm < 0, norm > 1e10))
        grads = [tensor.switch(new_cond, np.float32(0), g) for g in grads]

        # Safeguard against numerical instability
        #cond  = tensor.or_(norm < 0, tensor.or_(tensor.isnan(norm), tensor.isinf(norm)))
        #grads = [tensor.switch(cond, np.float32(0), g) for g in grads]

        # New values
        t       = self.time + 1
        lr_t    = lr*tensor.sqrt(1. - beta2**t)/(1. - beta1**t)
        means_t = [beta1*m + (1. - beta1)*g for g, m in zip(grads, self.means)]
        vars_t  = [beta2*v + (1. - beta2)*tensor.sqr(g) for g, v in zip(grads, self.vars)]
        steps   = [lr_t*m_t/(tensor.sqrt(v_t) + epsilon)
                   for m_t, v_t in zip(means_t, vars_t)]

        # Updates
        updates  = [(x, x - step) for x, step in zip(self.trainables, steps)]
        updates += [(m, m_t) for m, m_t in zip(self.means, means_t)]
        updates += [(v, v_t) for v, v_t in zip(self.vars, vars_t)]
        updates += [(self.time, t)]

        return norm, grads, updates
项目:denet    作者:lachlants    | 项目源码 | 文件源码
def replace_inf_nan(x, v):
    return tensor.switch(tensor.or_(tensor.isnan(x), tensor.isinf(x)), v, x)

#apply r = x + delta if r is not inf / nan, else return x
项目:denet    作者:lachlants    | 项目源码 | 文件源码
def update_inf_nan(x, delta, v):
    r = x + delta
    return tensor.switch(tensor.or_(tensor.isnan(r), tensor.isinf(r)), x, r)

#will check if shuffle is needed
项目:nature_methods_multicut_pipeline    作者:ilastik    | 项目源码 | 文件源码
def sgd(params, cost=None, gradients=None, learningrate=1e-4):
    """
    Computes the updates for Stochastic Gradient Descent (without momentum)

    :type params: list
    :param params: Network parameters.

    :type cost: theano.tensor.var.TensorVariable
    :param cost: Cost variable (scalar). Optional if the gradient is provided.

    :type gradients: list
    :param gradients: Gradient of a cost w.r.t. parameters. Optional if the cost is provided.

    :type learningrate: theano.tensor.var.TensorVariable or float
    :param learningrate: Learning rate of SGD. Can be a float (static) or a dynamic theano variable.

    :return: List of updates
    """

    # Validate input
    assert not (cost is None and gradients is None), "Update function sgd requires either a cost scalar or a list of " \
                                                     "gradients."

    # Compute gradients if requested
    if gradients is None and cost is not None:
        pdC = T.grad(cost, wrt=params)
        # Kill gradients if cost is nan
        dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC]
    else:
        dC = gradients

    # Compute updates
    upd = [(param, param - learningrate * dparam) for param, dparam in zip(params, dC)]

    # Return
    return upd


# ADAM
项目:nature_methods_multicut_pipeline    作者:ilastik    | 项目源码 | 文件源码
def momsgd(params, cost=None, gradients=None, learningrate=0.01, momentum=0.9, nesterov=True):
    # TODO: Docstring
    # Validate input
    assert not (cost is None and gradients is None), "Update function momsgd requires either a cost scalar or a " \
                                                     "list of gradients."

    # Compute gradients if requested
    if gradients is None and cost is not None:
        pdC = T.grad(cost, wrt=params)
        # Kill gradients if cost is nan
        dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC]
    else:
        dC = gradients

    # Init update list
    updates = []

    for param, dparam in zip(params, dC):
        # Fetch parameter shape
        paramshape = param.get_value().shape
        # ... and init initial momentum
        mom = th.shared(np.zeros(paramshape, dtype=th.config.floatX))
        # Compute velocity
        vel = momentum * mom - learningrate * dparam

        # Compute new parameters
        if nesterov:
            newparam = param + momentum * vel - learningrate * dparam
        else:
            newparam = param + vel

        # update update list
        updates.append((param, newparam))
        updates.append((mom, vel))

    # Return
    return updates
项目:nature_methods_multicut_pipeline    作者:ilastik    | 项目源码 | 文件源码
def rmsprop(params, cost=None, gradients=None, learningrate=0.0005, rho=0.9, epsilon=1e-6):

    # Validate input
    assert not (cost is None and gradients is None), "Update function rmsprop requires either a cost scalar or a " \
                                                     "list of gradients."

    # Compute gradients if requested
    if gradients is None and cost is not None:
        pdC = T.grad(cost, wrt=params)
        # Kill gradients if cost is nan
        dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC]
    else:
        dC = gradients

    # Init update list
    updates = []

    for p, g in zip(params, dC):
        acc = th.shared(p.get_value() * 0.)
        newacc = rho * acc + (1 - rho) * g ** 2
        gradscale = T.sqrt(newacc + epsilon)
        g = g / gradscale
        updates.append((acc, newacc))
        updates.append((p, p - learningrate * g))

    return updates

# Aliases
项目:nature_methods_multicut_pipeline    作者:ilastik    | 项目源码 | 文件源码
def sgd(params, cost=None, gradients=None, learningrate=1e-4):
    """
    Computes the updates for Stochastic Gradient Descent (without momentum)

    :type params: list
    :param params: Network parameters.

    :type cost: theano.tensor.var.TensorVariable
    :param cost: Cost variable (scalar). Optional if the gradient is provided.

    :type gradients: list
    :param gradients: Gradient of a cost w.r.t. parameters. Optional if the cost is provided.

    :type learningrate: theano.tensor.var.TensorVariable or float
    :param learningrate: Learning rate of SGD. Can be a float (static) or a dynamic theano variable.

    :return: List of updates
    """

    # Validate input
    assert not (cost is None and gradients is None), "Update function sgd requires either a cost scalar or a list of " \
                                                     "gradients."

    # Compute gradients if requested
    if gradients is None and cost is not None:
        pdC = T.grad(cost, wrt=params)
        # Kill gradients if cost is nan
        dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC]
    else:
        dC = gradients

    # Compute updates
    upd = [(param, param - learningrate * dparam) for param, dparam in zip(params, dC)]

    # Return
    return upd


# ADAM
项目:nature_methods_multicut_pipeline    作者:ilastik    | 项目源码 | 文件源码
def nadam(params, cost=None, gradients=None, learningrate=0.002, beta1=0.9, beta2=0.999, epsilon=1e-8,
          scheduledecay=0.004, iterstart=0):
    """See also: https://github.com/fchollet/keras/blob/master/keras/optimizers.py#L441"""

    # Validate input
    assert not (cost is None and gradients is None), "Update function rmsprop requires either a cost scalar or a " \
                                                     "list of gradients."

    # Compute gradients if requested
    if gradients is None and cost is not None:
        pdC = T.grad(cost, wrt=params)
        # Kill gradients if cost is nan
        dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC]
    else:
        dC = gradients

    # Init update list
    updates = []

    tm1 = th.shared(np.asarray(iterstart, dtype=th.config.floatX))
    t = tm1 + 1
    momcachet = beta1 * (1. - 0.5 * 0.96**(t * scheduledecay))

    pass


# Momentum SGD
项目:nature_methods_multicut_pipeline    作者:ilastik    | 项目源码 | 文件源码
def rmsprop(params, cost=None, gradients=None, learningrate=0.0005, rho=0.9, epsilon=1e-6):

    # Validate input
    assert not (cost is None and gradients is None), "Update function rmsprop requires either a cost scalar or a " \
                                                     "list of gradients."

    # Compute gradients if requested
    if gradients is None and cost is not None:
        pdC = T.grad(cost, wrt=params)
        # Kill gradients if cost is nan
        dC = [th.ifelse.ifelse(T.isnan(cost), T.zeros_like(dparam), dparam) for dparam in pdC]
    else:
        dC = gradients

    # Init update list
    updates = []

    for param, dparam in zip(params, dC):
        paramshape = param.get_value().shape
        acc = th.shared(np.zeros(paramshape, dtype=th.config.floatX))
        newacc = rho * acc + (1 - rho) * dparam ** 2
        gradscale = T.sqrt(newacc + epsilon)
        dparam = dparam / gradscale
        updates.append((acc, newacc))
        updates.append((param, param - learningrate * dparam))

    return updates

# Aliases
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def pseudograd(loss, params, srng=None, temperature = 1.0e-1,
               learning_rate=1.0e-2, rho2=0.95):


  one = T.constant(1.0)
  zero = T.constant(0.0)

  deltas = [ make_normal(param, srng=srng) for param in params ]
  momentum = [ make_copy(param) for param in params ]

  new_params = [
    param + learning_rate * delta
    for param, delta, m in zip(params, deltas, momentum)
  ]

  new_loss = theano.clone(
    loss, replace=dict(zip(params, new_params))
  )

  accepting_p = T.exp((loss - new_loss) / temperature)
  u = srng.uniform(size=(), dtype=loss.dtype)

  cond = T.or_(T.or_(u > accepting_p, T.isnan(new_loss)), T.isinf(new_loss))
  step = T.switch(cond, zero, one)

  updates = OrderedDict()

  for m, delta in zip(momentum, deltas):
    updates[m] = m * rho2 + (one - rho2) * delta * step

  for param, m in zip(params, momentum):
    updates[param] = param + learning_rate * m

  return updates