Python theano.tensor 模块,or_() 实例源码

我们从Python开源项目中,提取了以下22个代码示例,用于说明如何使用theano.tensor.or_()

项目:DL4NMT_Theano    作者:fyabc    | 项目源码 | 文件源码
def clip_grad_remove_nan(grads, clip_c_shared, mt_tparams):
    g2 = 0.
    for g in grads:
        g2 += (g*g).sum()
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    if clip_c_shared.get_value() > 0.:
        new_grads = []
        for g, p in zip(grads, itemlist(mt_tparams)):
            tmpg = tensor.switch(g2 > (clip_c_shared*clip_c_shared),
                                 g / tensor.sqrt(g2) * clip_c_shared,
                                 g)
            new_grads.append(tensor.switch(not_finite, np.float32(.1)*p, tmpg))

        return new_grads, tensor.sqrt(g2)
    else:
        return grads, tensor.sqrt(g2)
项目:tree_rnn    作者:ofirnachum    | 项目源码 | 文件源码
def gradient_descent(self, loss):
        """Momentum GD with gradient clipping."""
        grad = T.grad(loss, self.params)
        self.momentum_velocity_ = [0.] * len(grad)
        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
        updates = OrderedDict()
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
        scaling_den = T.maximum(5.0, grad_norm)
        for n, (param, grad) in enumerate(zip(self.params, grad)):
            grad = T.switch(not_finite, 0.1 * param,
                            grad * (5.0 / scaling_den))
            velocity = self.momentum_velocity_[n]
            update_step = self.momentum * velocity - self.learning_rate * grad
            self.momentum_velocity_[n] = update_step
            updates[param] = param + update_step
        return updates
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_or(self):
        mode = theano.compile.get_default_mode().including('canonicalize')
        x = T.scalar('x', dtype='int8')

        for zero, one in [(numpy.int8(0), numpy.int8(1)), (0, 1)]:
            f = theano.function([x], T.or_(x, one), mode=mode)
            self.assert_eqs_const(f, 1)

            f = theano.function([x], T.or_(one, x), mode=mode)
            self.assert_eqs_const(f, 1)

            f = theano.function([x], T.or_(x, zero), mode=mode)
            if f.outputs[0].variable.dtype == x.dtype:
                self.assert_identity(f)

            f = theano.function([x], T.or_(zero, x), mode=mode)
            if f.outputs[0].variable.dtype == x.dtype:
                self.assert_identity(f)
项目:GRU-or-CNN    作者:hit-computer    | 项目源码 | 文件源码
def compute_updates(training_cost, params, config):
    updates = []

    grads = T.grad(training_cost, params)
    grads = OrderedDict(zip(params, grads))

    # Clip stuff
    c = np.float32(1.)
    clip_grads = []

    norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
    normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
    notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

    for p, g in grads.items():
        clip_grads.append((p, T.switch(notfinite, np.float32(.1) * p, g * normalization)))

    grads = OrderedDict(clip_grads)

    updates = Adam(grads, config.learning_rate) #??adam??????

    return updates
项目:NNBuilder    作者:aeloyq    | 项目源码 | 文件源码
def or_(self, l, r):
            return T.or_(l, r)
项目:pyrl    作者:frsong    | 项目源码 | 文件源码
def get_updates(self, loss, lr, max_norm=1, beta1=0.9, beta2=0.999,
                    epsilon=1e-8, grads=None):
        # Gradients
        if grads is None:
            grads = tensor.grad(loss, self.trainables)

        # Clipping
        norm  = tensor.sqrt(sum([tensor.sqr(g).sum() for g in grads]))
        m     = theanotools.clipping_multiplier(norm, max_norm)
        grads = [m*g for g in grads]

        # Safeguard against numerical instability
        new_cond = tensor.or_(tensor.or_(tensor.isnan(norm), tensor.isinf(norm)),
                              tensor.or_(norm < 0, norm > 1e10))
        grads = [tensor.switch(new_cond, np.float32(0), g) for g in grads]

        # Safeguard against numerical instability
        #cond  = tensor.or_(norm < 0, tensor.or_(tensor.isnan(norm), tensor.isinf(norm)))
        #grads = [tensor.switch(cond, np.float32(0), g) for g in grads]

        # New values
        t       = self.time + 1
        lr_t    = lr*tensor.sqrt(1. - beta2**t)/(1. - beta1**t)
        means_t = [beta1*m + (1. - beta1)*g for g, m in zip(grads, self.means)]
        vars_t  = [beta2*v + (1. - beta2)*tensor.sqr(g) for g, v in zip(grads, self.vars)]
        steps   = [lr_t*m_t/(tensor.sqrt(v_t) + epsilon)
                   for m_t, v_t in zip(means_t, vars_t)]

        # Updates
        updates  = [(x, x - step) for x, step in zip(self.trainables, steps)]
        updates += [(m, m_t) for m, m_t in zip(self.means, means_t)]
        updates += [(v, v_t) for v, v_t in zip(self.vars, vars_t)]
        updates += [(self.time, t)]

        return norm, grads, updates
项目:policy_search_bb-alpha    作者:siemens    | 项目源码 | 文件源码
def adam(self,cost, params, learning_rate=0.001, beta1=0.9,
             beta2=0.999, epsilon=1e-8):

        all_grads = T.grad(cost=cost, wrt=params)
        all_grads = total_norm_constraint(all_grads,10)

        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), all_grads)))
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))

        t_prev = theano.shared(utils.floatX(0.))
        updates = OrderedDict()

        t = t_prev + 1
        a_t = learning_rate*T.sqrt(1-beta2**t)/(1-beta1**t)

        for param, g_t in zip(params, all_grads):
            g_t = T.switch(not_finite, 0.1 * param,g_t)
            value = param.get_value(borrow=True)
            m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)
            v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)

            m_t = beta1*m_prev + (1-beta1)*g_t
            v_t = beta2*v_prev + (1-beta2)*g_t**2
            step = a_t*m_t/(T.sqrt(v_t) + epsilon)

            updates[m_prev] = m_t
            updates[v_prev] = v_t
            updates[param] = param - step

        updates[t_prev] = t
        return updates
项目:dl4mt-cdec    作者:nyu-dl    | 项目源码 | 文件源码
def gradient_clipping(grads, tparams, clip_c=10):
    g2 = 0.
    for g in grads:
        g2 += (g**2).sum()

    g2 = tensor.sqrt(g2)
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    new_grads = []

    for p, g in zip(tparams.values(), grads):
        new_grads.append(tensor.switch(g2 > clip_c,
                                       g * (clip_c / g2),
                                       g))

    return new_grads, not_finite, tensor.lt(clip_c, g2)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_elemwise(self):
        # float Ops
        mats = theano.tensor.matrices('cabxy')
        c, a, b, x, y = mats
        s1 = T.switch(c, a, b)
        s2 = T.switch(c, x, y)
        for op in (T.add, T.sub, T.mul, T.true_div, T.int_div, T.floor_div,
                   T.minimum, T.maximum, T.gt, T.lt, T.ge, T.le, T.eq, T.neq,
                   T.pow):
            g = optimize(FunctionGraph(mats, [op(s1, s2)]))
            assert str(g).count('Switch') == 1
        # integer Ops
        mats = theano.tensor.imatrices('cabxy')
        c, a, b, x, y = mats
        s1 = T.switch(c, a, b)
        s2 = T.switch(c, x, y)
        for op in (T.and_, T.or_, T.xor,
                   T.bitwise_and, T.bitwise_or, T.bitwise_xor):
            g = optimize(FunctionGraph(mats, [op(s1, s2)]))
            assert str(g).count('Switch') == 1
        # add/mul with more than two inputs
        u, v = theano.tensor.matrices('uv')
        s3 = T.switch(c, u, v)
        for op in (T.add, T.mul):
            g = optimize(FunctionGraph(mats + [u, v], [op(s1, s2, s3)]))
            assert str(g).count('Switch') == 1
项目:online_action    作者:zhenyangli    | 项目源码 | 文件源码
def get_grad_param(self):
        self.grad_norm = TT.sqrt(sum(TT.sqr(g).sum() for g in self.model.grad)) / TT.cast(
            self.model.interface_layer.input.shape[1], 'float32')
        # self.has_numeric_error = TT.or_(TT.isnan(self.grad_norm), TT.isinf(self.grad_norm))
        # self.grad = [TT.switch(self.has_numeric_error, numpy_floatX(0.1) * p, g)
        # for g, p in zip(self.model.grad, self.model.param)]
        self.grad =[g / TT.cast(
            self.model.interface_layer.input.shape[1], 'float32') for g in self.model.grad]
        if self.clip_threshold is not None:
            self.grad = [TT.switch(TT.ge(self.grad_norm, self.clip_threshold),
                                   g * self.clip_threshold / self.grad_norm, g) for g in self.grad]
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def pseudograd(loss, params, srng=None, temperature = 1.0e-1,
               learning_rate=1.0e-2, rho2=0.95):


  one = T.constant(1.0)
  zero = T.constant(0.0)

  deltas = [ make_normal(param, srng=srng) for param in params ]
  momentum = [ make_copy(param) for param in params ]

  new_params = [
    param + learning_rate * delta
    for param, delta, m in zip(params, deltas, momentum)
  ]

  new_loss = theano.clone(
    loss, replace=dict(zip(params, new_params))
  )

  accepting_p = T.exp((loss - new_loss) / temperature)
  u = srng.uniform(size=(), dtype=loss.dtype)

  cond = T.or_(T.or_(u > accepting_p, T.isnan(new_loss)), T.isinf(new_loss))
  step = T.switch(cond, zero, one)

  updates = OrderedDict()

  for m, delta in zip(momentum, deltas):
    updates[m] = m * rho2 + (one - rho2) * delta * step

  for param, m in zip(params, momentum):
    updates[param] = param + learning_rate * m

  return updates
项目:dl4mt-c2c    作者:nyu-dl    | 项目源码 | 文件源码
def gradient_clipping(grads, tparams, clip_c=10):
    g2 = 0.
    for g in grads:
        g2 += (g**2).sum()

    g2 = tensor.sqrt(g2)
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    new_grads = []

    for p, g in zip(tparams.values(), grads):
        new_grads.append(tensor.switch(g2 > clip_c,
                                       g * (clip_c / g2),
                                       g))

    return new_grads, not_finite, tensor.lt(clip_c, g2)
项目:dl4mt-c2c    作者:nyu-dl    | 项目源码 | 文件源码
def gradient_clipping(grads, tparams, clip_c=10):
    g2 = 0.
    for g in grads:
        g2 += (g**2).sum()

    g2 = tensor.sqrt(g2)
    not_finite = tensor.or_(tensor.isnan(g2), tensor.isinf(g2))
    new_grads = []

    for p, g in zip(tparams.values(), grads):
        new_grads.append(tensor.switch(g2 > clip_c,
                                       g * (clip_c / g2),
                                       g))

    return new_grads, not_finite, tensor.lt(clip_c, g2)
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def rmsprop(cost, params, learning_rate, momentum=0.5, rescale=5.):

    grads = T.grad(cost=cost, wrt=params)

    running_square_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                      for p in params]
    running_avg_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                   for p in params]
    memory_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                       for p in params]

    grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
    not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
    grad_norm = T.sqrt(grad_norm)
    scaling_num = rescale
    scaling_den = T.maximum(rescale, grad_norm)
    # Magic constants
    combination_coeff = 0.9
    minimum_grad = 1E-4
    updates = []
    for n, (param, grad) in enumerate(zip(params, grads)):
       grad = T.switch(not_finite, 0.1 * param,
                       grad * (scaling_num / scaling_den))
       old_square = running_square_[n]
       new_square = combination_coeff * old_square + (
           1. - combination_coeff) * T.sqr(grad)
       old_avg = running_avg_[n]
       new_avg = combination_coeff * old_avg + (
           1. - combination_coeff) * grad
       rms_grad = T.sqrt(new_square - new_avg ** 2)
       rms_grad = T.maximum(rms_grad, minimum_grad)
       memory = memory_[n]
       update = momentum * memory - learning_rate * grad / rms_grad

       update2 = momentum * momentum * memory - (
           1 + momentum) * learning_rate * grad / rms_grad

       updates.append((old_square, new_square))
       updates.append((old_avg, new_avg))
       updates.append((memory, update))
       updates.append((param, param + update2))
    return updates
项目:Theano-MPI    作者:uoguelph-mlrg    | 项目源码 | 文件源码
def rmsprop(cost, params, learning_rate, momentum=0.5, rescale=5.):

    grads = T.grad(cost=cost, wrt=params)

    running_square_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                      for p in params]
    running_avg_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                   for p in params]
    memory_ = [theano.shared(np.zeros_like(p.get_value(),dtype=p.dtype), broadcastable=p.broadcastable)
                       for p in params]

    grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
    not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
    grad_norm = T.sqrt(grad_norm)
    scaling_num = rescale
    scaling_den = T.maximum(rescale, grad_norm)
    # Magic constants
    combination_coeff = 0.9
    minimum_grad = 1E-4
    updates = []
    for n, (param, grad) in enumerate(zip(params, grads)):
       grad = T.switch(not_finite, 0.1 * param,
                       grad * (scaling_num / scaling_den))
       old_square = running_square_[n]
       new_square = combination_coeff * old_square + (
           1. - combination_coeff) * T.sqr(grad)
       old_avg = running_avg_[n]
       new_avg = combination_coeff * old_avg + (
           1. - combination_coeff) * grad
       rms_grad = T.sqrt(new_square - new_avg ** 2)
       rms_grad = T.maximum(rms_grad, minimum_grad)
       memory = memory_[n]
       update = momentum * memory - learning_rate * grad / rms_grad

       update2 = momentum * momentum * memory - (
           1 + momentum) * learning_rate * grad / rms_grad

       updates.append((old_square, new_square))
       updates.append((old_avg, new_avg))
       updates.append((memory, update))
       updates.append((param, param + update2))
    return updates
项目:hred-latent-piecewise    作者:julianser    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.W_emb in grads:
            if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
                assert not self.fix_encoder_parameters
                # Keep pretrained word embeddings fixed
                logger.debug("Will use mask to fix pretrained word embeddings")
                grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
            elif self.fix_encoder_parameters:
                # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
                logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
            else:
                logger.debug("Will train all word embeddings")

        optimizer_variables = []
        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates, optimizer_variables = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!") 

        return updates, optimizer_variables

    # Batch training function.
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
            assert not self.fix_encoder_parameters
            # Keep pretrained word embeddings fixed
            logger.debug("Will use mask to fix pretrained word embeddings")
            grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
        elif self.fix_encoder_parameters:
            # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
            logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
        else:
            logger.debug("Will train all word embeddings")

        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)  
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!") 

        return updates

    # Batch training function.
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.W_emb in grads:
            if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
                assert not self.fix_encoder_parameters
                # Keep pretrained word embeddings fixed
                logger.debug("Will use mask to fix pretrained word embeddings")
                grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
            elif self.fix_encoder_parameters:
                # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
                logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
            else:
                logger.debug("Will train all word embeddings")

        optimizer_variables = []
        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates, optimizer_variables = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!") 

        return updates, optimizer_variables

    # Batch training function.
项目:mcv-m5    作者:david-vazquez    | 项目源码 | 文件源码
def IoU(n_classes, void_labels):
    def IoU_flatt(y_true, y_pred):
        '''Expects a binary class matrix instead of a vector of scalar classes.
        '''
        if dim_ordering == 'th':
            y_pred = K.permute_dimensions(y_pred, (0, 2, 3, 1))
        shp_y_pred = K.shape(y_pred)
        y_pred = K.reshape(y_pred, (shp_y_pred[0]*shp_y_pred[1]*shp_y_pred[2],
                           shp_y_pred[3]))  # go back to b01,c
        # shp_y_true = K.shape(y_true)
        y_true = K.cast(K.flatten(y_true), 'int32')  # b,01 -> b01
        y_pred = K.argmax(y_pred, axis=-1)

        # We use not_void in case the prediction falls in the void class of
        # the groundtruth
        for i in range(len(void_labels)):
            if i == 0:
                not_void = K.not_equal(y_true, void_labels[i])
            else:
                not_void = not_void * K.not_equal(y_true, void_labels[i])

        sum_I = K.zeros((1,), dtype='float32')

        out = {}
        for i in range(n_classes):
            y_true_i = K.equal(y_true, i)
            y_pred_i = K.equal(y_pred, i)

            if dim_ordering == 'th':
                I_i = K.sum(y_true_i * y_pred_i)
                U_i = K.sum(T.or_(y_true_i, y_pred_i) * not_void)
                # I = T.set_subtensor(I[i], I_i)
                # U = T.set_subtensor(U[i], U_i)
                sum_I = sum_I + I_i
            else:
                U_i = K.sum(K.cast(tf.logical_and(tf.logical_or(y_true_i, y_pred_i), not_void), 'float32'))
                y_true_i = K.cast(y_true_i, 'float32')
                y_pred_i = K.cast(y_pred_i, 'float32')
                I_i = K.sum(y_true_i * y_pred_i)
                sum_I = sum_I + I_i
            out['I'+str(i)] = I_i
            out['U'+str(i)] = U_i

        if dim_ordering == 'th':
            accuracy = K.sum(sum_I) / K.sum(not_void)
        else:
            accuracy = K.sum(sum_I) / tf.reduce_sum(tf.cast(not_void, 'float32'))
        out['acc'] = accuracy
        return out
    return IoU_flatt
项目:keras_zoo    作者:david-vazquez    | 项目源码 | 文件源码
def IoU(n_classes, void_labels):
    def IoU_flatt(y_true, y_pred):
        '''Expects a binary class matrix instead of a vector of scalar classes.
        '''
        if dim_ordering == 'th':
            y_pred = K.permute_dimensions(y_pred, (0, 2, 3, 1))
        shp_y_pred = K.shape(y_pred)
        y_pred = K.reshape(y_pred, (shp_y_pred[0]*shp_y_pred[1]*shp_y_pred[2],
                           shp_y_pred[3]))  # go back to b01,c
        # shp_y_true = K.shape(y_true)
        y_true = K.cast(K.flatten(y_true), 'int32')  # b,01 -> b01
        y_pred = K.argmax(y_pred, axis=-1)

        # We use not_void in case the prediction falls in the void class of
        # the groundtruth
        for i in range(len(void_labels)):
            if i == 0:
                not_void = K.not_equal(y_true, void_labels[i])
            else:
                not_void = not_void * K.not_equal(y_true, void_labels[i])

        sum_I = K.zeros((1,), dtype='float32')

        out = {}
        for i in range(n_classes):
            y_true_i = K.equal(y_true, i)
            y_pred_i = K.equal(y_pred, i)

            if dim_ordering == 'th':
                I_i = K.sum(y_true_i * y_pred_i)
                U_i = K.sum(T.or_(y_true_i, y_pred_i) * not_void)
                # I = T.set_subtensor(I[i], I_i)
                # U = T.set_subtensor(U[i], U_i)
                sum_I = sum_I + I_i
            else:
                U_i = K.sum(K.cast(tf.logical_and(tf.logical_or(y_true_i, y_pred_i), not_void), 'float32'))
                y_true_i = K.cast(y_true_i, 'float32')
                y_pred_i = K.cast(y_pred_i, 'float32')
                I_i = K.sum(y_true_i * y_pred_i)
                sum_I = sum_I + I_i
            out['I'+str(i)] = I_i
            out['U'+str(i)] = U_i

        if dim_ordering == 'th':
            accuracy = K.sum(sum_I) / K.sum(not_void)
        else:
            accuracy = K.sum(sum_I) / tf.reduce_sum(tf.cast(not_void, 'float32'))
        out['acc'] = accuracy
        return out
    return IoU_flatt
项目:MACA    作者:ppartha03    | 项目源码 | 文件源码
def compute_updates(self, training_cost, params):
        updates = []

        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))

        # Gradient clipping
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))

        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.initialize_from_pretrained_word_embeddings and self.fix_pretrained_word_embeddings:
            assert not self.fix_encoder_parameters
            # Keep pretrained word embeddings fixed
            logger.debug("Will use mask to fix pretrained word embeddings")
            grads[self.W_emb] = grads[self.W_emb] * self.W_emb_pretrained_mask
        elif self.fix_encoder_parameters:
            # If 'fix_encoder_parameters' is on, the word embeddings will be excluded from parameter training set
            logger.debug("Will fix word embeddings to initial embeddings or embeddings from resumed model")
        else:
            logger.debug("Will train all word embeddings")

        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates = Adam(grads, self.lr)
        else:
            raise Exception("Updater not understood!")

        return updates

    # Batch training function.
项目:Precise-CTC    作者:Michlong    | 项目源码 | 文件源码
def ctc_path_probability(scorematrix, queryseq, blank):
    """
    Compute path probability based on CTC algorithm, only forward pass is used.
    Batch not supported, for batch version, refer to the CTC class above
    Speed much slower than the numba & cython version (51.5min vs ~3.9min on word_correction_CTC experiment)
    :param scorematrix: (T, C+1)
    :param queryseq:    (L, 1)
    :param blank:       scalar, blank symbol
    :return: (NLL, alphas), NLL > 0 (smaller is better, = -log(p(l|x)); alphas is the forward variable)
    """

    def update_s(s, alphas, scorematrix, queryseq, blank, t):
        l = (s - 1) // 2
        alphas = ifelse(tensor.eq(s % 2, 0),
                        ifelse(tensor.eq(s, 0),
                               tensor.set_subtensor(alphas[s, t], alphas[s, t - 1] * scorematrix[blank, t]),
                               tensor.set_subtensor(alphas[s, t],
                                                    (alphas[s, t - 1] + alphas[s - 1, t - 1]) * scorematrix[blank, t]),
                               name='for_blank_symbol'),
                        ifelse(tensor.or_(tensor.eq(s, 1), tensor.eq(queryseq[l], queryseq[l - 1])),
                               tensor.set_subtensor(alphas[s, t],
                                                    (alphas[s, t - 1] + alphas[s - 1, t - 1]) * scorematrix[
                                                        queryseq[l], t]),
                               tensor.set_subtensor(alphas[s, t],
                                                    (alphas[s, t - 1] + alphas[s - 1, t - 1] + alphas[s - 2, t - 1]) *
                                                    scorematrix[queryseq[l], t]),
                               name='for_same_label_twice'))
        return alphas

    def update_t(t, LLForward, alphas, scorematrix, queryseq, blank, T, L2):
        start = tensor.max([0, L2 - 2 * (T - t)])
        end = tensor.min([2 * t + 2, L2])
        s = tensor.arange(start, end)
        results, _ = theano.scan(fn=update_s, sequences=[s], non_sequences=[scorematrix, queryseq, blank, t],
                                 outputs_info=[alphas], name='scan_along_s')
        alphas = results[-1]
        c = tensor.sum(alphas[start:end, t])
        c = tensor.max([1e-15, c])
        alphas = tensor.set_subtensor(alphas[start:end, t], alphas[start:end, t] / c)
        LLForward += tensor.log(c)
        return LLForward, alphas

    L = queryseq.shape[0]                                                 # Length of label sequence
    L2 = 2 * L + 1                                                        # Length of label sequence padded with blanks
    T = scorematrix.shape[1]                                              # time length
    alphas = tensor.zeros((L2, T))
    # Initialize alphas and forward pass
    alphas = tensor.set_subtensor(alphas[[0, 1], 0], scorematrix[[blank, queryseq[0]], 0])
    c = tensor.sum(alphas[:, 0])
    alphas = tensor.set_subtensor(alphas[:, 0], alphas[:, 0] / c)
    LLForward = tensor.log(c)
    t = tensor.arange(1, T)
    results, _ = theano.scan(fn=update_t, sequences=[t], non_sequences=[scorematrix, queryseq, blank, T, L2],
                             outputs_info=[LLForward, alphas], name='scan_along_t')
    NLL, alphas = ifelse(tensor.gt(T, 1), (-results[0][-1], results[1][-1]), (-LLForward, alphas))
    return NLL, alphas