Python theano.tensor 模块,inv() 实例源码


项目:keras_superpixel_pooling    作者:parag2489    | 项目源码 | 文件源码
def normalize_batch_in_training(x, gamma, beta,
                                reduction_axes, epsilon=1e-3):
    """Computes mean and std for batch then apply batch_normalization on batch.
    # TODO remove this if statement when Theano without
    # is deprecated
    if not hasattr(, 'batch_normalization_train'):
        return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon)

    if gamma is None:
        if beta is None:
            gamma = ones_like(x)
            gamma = ones_like(beta)
    if beta is None:
        if gamma is None:
            beta = zeros_like(x)
        beta = zeros_like(gamma)

    normed, mean, stdinv =
        x, gamma, beta, reduction_axes, epsilon)

    return normed, mean, T.inv(stdinv ** 2)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_dim1(self):
        """Test the inversion of one permutation (int vector)"""
        p = ivector()
        inv = inverse_permutation(p)
        assert inv.dtype == p.dtype
        f_inverse = function([p], inv)

        # Generate a random permutation
        rng = numpy.random.RandomState(utt.fetch_seed())
        p_val = rng.permutation(10).astype('int32')
        inv_val = f_inverse(p_val)

        # Check that the inverse of the inverse is the original permutation
        assert numpy.all(f_inverse(inv_val) == p_val)
        # Check that permutation(inverse) == inverse(permutation) = identity
        assert numpy.all(p_val[inv_val] == numpy.arange(10))
        assert numpy.all(inv_val[p_val] == numpy.arange(10))
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def normalize_batch_in_training(x, gamma, beta,
                                reduction_axes, epsilon=0.0001):
    '''Computes mean and std for batch then apply batch_normalization on batch.
    dev = theano.config.device
    use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
    if use_cudnn:
        broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x')
        broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x')
            normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train(
                x, broadcast_gamma, broadcast_beta, 'spatial', epsilon)
            var = T.inv(stdinv ** 2)
            return normed, T.flatten(mean), T.flatten(var)
        except AttributeError:

    var = x.var(reduction_axes)
    mean = x.mean(reduction_axes)

    target_shape = []
    for axis in range(ndim(x)):
        if axis in reduction_axes:
    target_shape = T.stack(*target_shape)

    broadcast_mean = T.reshape(mean, target_shape)
    broadcast_var = T.reshape(var, target_shape)
    broadcast_beta = T.reshape(beta, target_shape)
    broadcast_gamma = T.reshape(gamma, target_shape)
    normed = batch_normalization(x, broadcast_mean, broadcast_var,
                                 broadcast_beta, broadcast_gamma,
    return normed, mean, var
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def normalize_batch_in_training(x, gamma, beta,
                                reduction_axes, epsilon=1e-3):
    """Computes mean and std for batch then apply batch_normalization on batch.
    # TODO remove this if statement when Theano without
    # is deprecated
    if not hasattr(, 'batch_normalization_train'):
        return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon)

    normed, mean, stdinv =
        x, gamma, beta, reduction_axes, epsilon)

    return normed, mean, T.inv(stdinv ** 2)
项目:theano-mc-cnn    作者:epiception    | 项目源码 | 文件源码
def l2_norm_layer(ip):
    norm = T.inv(T.sqrt(((ip**2).sum(axis=(1,2,3)))))
    sq = T.reshape(norm, (batch_size,1,1,1))
    op = ip*sq

    return op
项目:theano-mc-cnn    作者:epiception    | 项目源码 | 文件源码
def l2_norm_layer(ip):
    norm = T.inv(T.sqrt(((ip**2).sum(axis=(1,2,3)))))
    sq = T.reshape(norm, (batch_size,1,1,1))
    op = ip*sq

    return op
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def normalize_batch_in_training(x, gamma, beta,
                                reduction_axes, epsilon=1e-3):
    '''Computes mean and std for batch then apply batch_normalization on batch.
    # TODO remove this if statement when Theano without
    # is deprecated
    if not hasattr(, 'batch_normalization_train'):
        return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon)

    normed, mean, stdinv =
        x, gamma, beta, reduction_axes, epsilon)

    return normed, mean, T.inv(stdinv ** 2)
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def _old_normalize_batch_in_training(x, gamma, beta,
                                     reduction_axes, epsilon=1e-3):
    '''Computes mean and std for batch then apply batch_normalization on batch.
    dev = theano.config.device
    use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
    if use_cudnn:
        broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x')
        broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x')
            normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train(
                x, broadcast_gamma, broadcast_beta, 'spatial', epsilon)
            var = T.inv(stdinv ** 2)
            return normed, T.flatten(mean), T.flatten(var)
        except AttributeError:

    var = x.var(reduction_axes)
    mean = x.mean(reduction_axes)

    target_shape = []
    for axis in range(ndim(x)):
        if axis in reduction_axes:
    target_shape = T.stack(*target_shape)

    broadcast_mean = T.reshape(mean, target_shape)
    broadcast_var = T.reshape(var, target_shape)
    broadcast_beta = T.reshape(beta, target_shape)
    broadcast_gamma = T.reshape(gamma, target_shape)
    normed = batch_normalization(x, broadcast_mean, broadcast_var,
                                 broadcast_beta, broadcast_gamma,
    return normed, mean, var

# TODO remove this if statement when Theano without
# is deprecated
项目:gogh-figure    作者:joelmoniz    | 项目源码 | 文件源码
def get_output_for(self, input, style=None, **kwargs):

        mean = input.mean(self.axes)
        inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        pattern = [0, 1, 'x', 'x']

        if style == None:
            pattern_params = ['x', 0, 'x', 'x']
            beta = 0 if self.beta is None else self.beta.dimshuffle(pattern_params)
            gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern_params)
            pattern_params = pattern
            beta = 0 if self.beta is None else self.beta[style].dimshuffle(pattern_params)
            gamma = 1 if self.gamma is None else self.gamma[style].dimshuffle(pattern_params)
            # if self.beta is not None:
            #   beta = ifelse(T.eq(style.shape[0], 1), T.addbroadcast(beta, 0), beta)
            # if self.gamma is not None:
            #   gamma = ifelse(T.eq(style.shape[0], 1), T.addbroadcast(gamma, 0), gamma)

        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
项目:keras    作者:NVIDIA    | 项目源码 | 文件源码
def normalize_batch_in_training(x, gamma, beta,
                                reduction_axes, epsilon=1e-3):
    """Computes mean and std for batch then apply batch_normalization on batch.
    # TODO remove this if statement when Theano without
    # is deprecated
    if not hasattr(, 'batch_normalization_train'):
        return _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon)

    normed, mean, stdinv =
        x, gamma, beta, reduction_axes, epsilon)

    return normed, mean, T.inv(stdinv ** 2)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test(self):
        test optimization for consecutive functional inverses

        dx = numpy.random.rand(5, 4).astype("float32")
        self.assert_func_pair_optimized(T.deg2rad, T.rad2deg, dx)
        dx = numpy.random.rand(5, 4).astype("float32")*180
        self.assert_func_pair_optimized(T.rad2deg, T.deg2rad, dx)

        # Test the other functional inverses
        dx = numpy.random.rand(5, 4).astype("float32")
        self.assert_func_pair_optimized(T.cosh, T.arccosh, dx)
        self.assert_func_pair_optimized(T.arcsinh, T.sinh, dx)
        self.assert_func_pair_optimized(T.arctanh, T.tanh, dx)
        self.assert_func_pair_optimized(T.inv, T.inv, dx)
        self.assert_func_pair_optimized(T.neg, T.neg, dx)
        cx = dx + complex(0, 1)*(dx + 0.01)
        self.assert_func_pair_optimized(T.conj, T.conj, cx, is_complex=True)

        # Test that non-inverse functions are ran normally
        self.assert_func_pair_optimized(T.conj, T.neg, cx,
                                        should_copy=False, is_complex=True)
        dx = numpy.random.rand(5, 4).astype("float32")+0.01
        self.assert_func_pair_optimized(T.rad2deg, T.rad2deg, dx,
        self.assert_func_pair_optimized(T.rad2deg, T.cosh, dx,
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_log_erfc(self):
        val = [-30, -27, -26, -11, -10, -3, -2, -1, 0, 1, 2, 3, 10,
             11, 26, 27, 28, 30]
        if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
            # python mode don't like the inv(0)
        val = numpy.asarray(val, dtype=config.floatX)
        x = T.vector('x')

        # their is some nan that will happear in the graph for the log of the negatives values
        mode = copy.copy(self.mode)
        mode.check_isfinite = False
        mode_fusion = copy.copy(self.mode_fusion)
        mode_fusion.check_isfinite = False

        f = theano.function([x], T.log(T.erfc(x)), mode=mode)
        assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
        assert all(numpy.isfinite(f(val)))

        f = theano.function([x], T.log(T.erfc(-x)), mode=mode)
        assert len(f.maker.fgraph.apply_nodes) == 24, len(f.maker.fgraph.apply_nodes)
        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
        assert all(numpy.isfinite(f(-val)))

        f = theano.function([x], T.log(T.erfc(x)), mode=mode_fusion)
        assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes)
        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
        assert len(f.maker.fgraph.toposort()[0].fgraph.toposort()[
            0].op.scalar_op.fgraph.apply_nodes) == 22, len(f.maker.fgraph.toposort()[0].fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes)
        # TODO: fix this problem
        if theano.config.floatX == "float32" and theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
            raise SkipTest('The python code upcast somewhere internally '
                           'some value of float32 to python float for '
                           'part of its computation. That make that the '
                           'c and python code dont generate the same value. '
                           'You can ignore this error.')
        assert all(numpy.isfinite(f(val)))
项目:textvae    作者:stas-semeniuta    | 项目源码 | 文件源码
def __call__(self, x):
        axes = range(x.ndim)
        axes = tuple(axes)
        input_mean = x.mean(axes)
        input_inv_std = T.inv(T.sqrt(x.var(axes) + self.epsilon))

        if self.train:
            mean = input_mean
            inv_std = input_inv_std
            if self.collect:
                mean = self.mean
                inv_std = self.inv_std
                mean = input_mean
                inv_std = input_inv_std

        self.updates = {}
        if self.train:
            if self.collect:
                self.updates[self.mean] = (1 - self.alpha) * self.mean + self.alpha * input_mean
                self.updates[self.inv_std] = (1 - self.alpha) * self.inv_std + self.alpha * input_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(x.ndim - len(axes)))
        pattern = ['x' if input_axis in axes
                   else next(param_axes)
                   for input_axis in range(x.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = self.beta.dimshuffle(pattern)
        gamma = self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (x - mean) * (gamma * inv_std) + beta
        return normalized
项目:textvae    作者:stas-semeniuta    | 项目源码 | 文件源码
def __call__(self, x):
        mean = x.mean(1, keepdims=True)
        inv_std = T.inv(T.sqrt(x.var(1, keepdims=True) + self.epsilon))

        pattern = ['x', 0] + ['x' for _ in xrange(x.ndim - 2)]
        beta = self.beta.dimshuffle(tuple(pattern))
        gamma = self.gamma.dimshuffle(tuple(pattern))

        # normalize
        normalized = (x - mean) * gamma * inv_std + beta
        return normalized
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def nll_loss(self, mus, sigmas, corxy, pis, y_true):
        negative log likelihood loss of a 2d y_true coordinate in
        each of the Gaussians with parameters mus, sigmas, corxy, pis.
        Note that the mus, sigmas and corxy are shared between all samples
        and only pis are different for each sample.

        The formula for negative log likelihood is :
        \mathcal{L}(y \vert x) = - \log\bigg\{\sum_{k=1}^K \pi_k(x)  \mathcal{N}\big(y \vert \mu_k(x), \Sigma_k(x)\big)\bigg\}

        The size of pis is n_batch x n_components,
        the size of mus is n_batch x n_components x 2,
        the size of sigmas is n_batch x n_components x 2 and
        the size of corxy is n_batch x n_components.

        The size of y_true is batch_size x 2.

        Y = y_true[:, :, np.newaxis]
        diff = Y - mus
        diffprod =, axis=-2)
        sigmainvs = T.inv(sigmas)
        sigmainvprods = sigmainvs[:,0, :] * sigmainvs[:,1, :]
        sigmas2 = sigmas ** 2
        corxy2 = corxy **2
        diff2 = diff ** 2
        diffsigma = diff2 * T.inv(sigmas2)
        diffsigmanorm = T.sum(diffsigma, axis=-2)
        z = diffsigmanorm - 2 * corxy * diffprod * sigmainvprods
        oneminuscorxy2inv = T.inv(1.0 - corxy2)
        expterm = T.exp(-0.5 * z * oneminuscorxy2inv)
        probs = (0.5 / np.pi) * sigmainvprods * T.sqrt(oneminuscorxy2inv) * expterm
        loss = - T.log(T.sum(pis * probs, axis=1))
        loss = T.mean(loss)
        #logsumexp trick
        exponent = -0.5 * z * oneminuscorxy2inv
        #normalizer = (0.5 / np.pi) * sigmainvprods * T.sqrt(oneminuscorxy2inv)
        #when something is a * exp(x) = exp(x + loga)
        new_exponent = exponent + T.log(0.5 / np.pi) + T.log(sigmainvprods) + T.log(T.sqrt(oneminuscorxy2inv)) + T.log(pis)
        max_exponent = T.max(new_exponent ,axis=1, keepdims=True)
        mod_exponent = new_exponent - max_exponent
        gauss_mix = T.sum(T.exp(mod_exponent),axis=1)
        log_gauss = max_exponent + T.log(gauss_mix)
        loss = -T.mean(log_gauss)

        return loss
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_std = TT.sqrt(input.var(self.axes) + self.epsilon)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
        if use_averages:
            mean = self.mean
            std = self.std
            mean = input_mean
            std = input_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_std.default_update = ((1 - self.alpha) *
                                              running_std +
                                              self.alpha * input_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            std += 0 * running_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * TT.inv(std)) + beta
        return normalized
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_std = TT.sqrt(input.var(self.axes) + self.epsilon)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
        if use_averages:
            mean = self.mean
            std = self.std
            mean = input_mean
            std = input_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_std.default_update = ((1 - self.alpha) *
                                              running_std +
                                              self.alpha * input_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            std += 0 * running_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * TT.inv(std)) + beta
        return normalized
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def _old_normalize_batch_in_training(x, gamma, beta,
                                     reduction_axes, epsilon=1e-3):
    """Computes mean and std for batch then apply batch_normalization on batch.
    dev = theano.config.device
    use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
    if use_cudnn:
        broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x')
        broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x')
            normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train(
                x, broadcast_gamma, broadcast_beta, 'spatial', epsilon)
            normed = theano.tensor.as_tensor_variable(normed)
            mean = theano.tensor.as_tensor_variable(mean)
            stdinv = theano.tensor.as_tensor_variable(stdinv)
            var = T.inv(stdinv ** 2)
            return normed, T.flatten(mean), T.flatten(var)
        except AttributeError:

    var = x.var(reduction_axes)
    mean = x.mean(reduction_axes)

    target_shape = []
    for axis in range(ndim(x)):
        if axis in reduction_axes:
    target_shape = T.stack(*target_shape)

    broadcast_mean = T.reshape(mean, target_shape)
    broadcast_var = T.reshape(var, target_shape)
    broadcast_beta = T.reshape(beta, target_shape)
    broadcast_gamma = T.reshape(gamma, target_shape)
    normed = batch_normalization(x, broadcast_mean, broadcast_var,
                                 broadcast_beta, broadcast_gamma,
    return normed, mean, var

# TODO remove this if statement when Theano without
# is deprecated
项目:experiments    作者:tencia    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
        if use_averages:
            mean = self.mean
            inv_std = self.inv_std
            mean = input_mean
            inv_std = input_inv_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            inv_std += 0 * running_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
项目:learning-class-invariant-features    作者:sbelharbi    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False,
        input_mean = input.mean(self.axes)
        input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        # decide whether to use the sotred averages or mini-batch statistics
        if batch_norm_use_averages is None:
            batch_norm_use_averages = deterministic
        use_averages = batch_norm_use_averages

        if use_averages:
            mean = self.mean
            inv_std = self.inv_std
            mean = input_mean
            inv_std = input_inv_std

        # decide whether to update the stored averages
        if batch_norm_update_averages is None:
            batch_norm_update_averages = not deterministic
        update_averages = batch_norm_update_averages

        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics.
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            inv_std += 0 * running_inv_std
        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
项目:keras    作者:NVIDIA    | 项目源码 | 文件源码
def _old_normalize_batch_in_training(x, gamma, beta,
                                     reduction_axes, epsilon=1e-3):
    """Computes mean and std for batch then apply batch_normalization on batch.
    dev = theano.config.device
    use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
    if use_cudnn:
        broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x')
        broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x')
            normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train(
                x, broadcast_gamma, broadcast_beta, 'spatial', epsilon)
            normed = theano.tensor.as_tensor_variable(normed)
            mean = theano.tensor.as_tensor_variable(mean)
            stdinv = theano.tensor.as_tensor_variable(stdinv)
            var = T.inv(stdinv ** 2)
            return normed, T.flatten(mean), T.flatten(var)
        except AttributeError:

    var = x.var(reduction_axes)
    mean = x.mean(reduction_axes)

    target_shape = []
    for axis in range(ndim(x)):
        if axis in reduction_axes:
    target_shape = T.stack(*target_shape)

    broadcast_mean = T.reshape(mean, target_shape)
    broadcast_var = T.reshape(var, target_shape)
    broadcast_beta = T.reshape(beta, target_shape)
    broadcast_gamma = T.reshape(gamma, target_shape)
    normed = batch_normalization(x, broadcast_mean, broadcast_var,
                                 broadcast_beta, broadcast_gamma,
    return normed, mean, var

# TODO remove this if statement when Theano without
# is deprecated
项目:keras_superpixel_pooling    作者:parag2489    | 项目源码 | 文件源码
def _old_normalize_batch_in_training(x, gamma, beta,
                                     reduction_axes, epsilon=1e-3):
    """Computes mean and std for batch then apply batch_normalization on batch.
    if gamma is None:
        gamma = ones_like(x)
    if beta is None:
        beta = zeros_like(x)

    dev = theano.config.device
    use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
    if use_cudnn:
        broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x')
        broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x')
            normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train(
                x, broadcast_gamma, broadcast_beta, 'spatial', epsilon)
            normed = theano.tensor.as_tensor_variable(normed)
            mean = theano.tensor.as_tensor_variable(mean)
            stdinv = theano.tensor.as_tensor_variable(stdinv)
            var = T.inv(stdinv ** 2)
            return normed, T.flatten(mean), T.flatten(var)
        except AttributeError:

    var = x.var(reduction_axes)
    mean = x.mean(reduction_axes)

    target_shape = []
    for axis in range(ndim(x)):
        if axis in reduction_axes:
    target_shape = T.stack(*target_shape)

    broadcast_mean = T.reshape(mean, target_shape)
    broadcast_var = T.reshape(var, target_shape)
    broadcast_beta = T.reshape(beta, target_shape)
    broadcast_gamma = T.reshape(gamma, target_shape)
    normed = batch_normalization(x, broadcast_mean, broadcast_var,
                                 broadcast_beta, broadcast_gamma,
    return normed, mean, var

# TODO remove this if statement when Theano without
# is deprecated
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_dnn_batchnorm_train():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    if dnn.version(raises=False) < 5000:
        raise SkipTest("batch normalization requires cudnn v5+")

    for mode in ('per-activation', 'spatial'):
        for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
            x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # forward pass
            out, x_mean, x_invstd = dnn.dnn_batch_normalization_train(
                x, scale, bias, mode, eps)
            # reference forward pass
            if mode == 'per-activation':
                axes = (0,)
            elif mode == 'spatial':
                axes = (0,) + tuple(range(2, ndim))
            x_mean2 = x.mean(axis=axes, keepdims=True)
            x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
            scale2 = T.addbroadcast(scale, *axes)
            bias2 = T.addbroadcast(bias, *axes)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, dy],
                                [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
                                grads + grads2, mode=mode_with_gpu)
            # run
            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
                Scale = numpy.random.randn(*param_shape).astype('float32')
                Bias = numpy.random.randn(*param_shape).astype('float32')
                outputs = f(X, Scale, Bias, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 3])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 3])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 3])  # invstd
                # compare gradients
                utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4)  # dx
                utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4)  # dscale
                utt.assert_allclose(outputs[8], outputs[8 + 3])  # dbias
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_local_pow_specialize():
    mode = theano.config.mode
    if mode == 'FAST_COMPILE':
        mode = 'FAST_RUN'
    mode = compile.mode.get_mode(mode)
    mode = mode.excluding('fusion')

    v = T.vector()
    val = numpy.arange(10, dtype=theano.config.floatX)
    val_no0 = numpy.arange(1, 10, dtype=theano.config.floatX)

    f = function([v], v ** 0, mode=mode)
    nodes = [node.op for node in f.maker.fgraph.toposort()]
    assert nodes == [Shape_i(0), T.alloc]
    utt.assert_allclose(f(val), val ** 0)

    f = function([v], v ** 1, mode=mode)
    nodes = [node.op for node in f.maker.fgraph.toposort()]
    nodes == [deep_copy_op]
    utt.assert_allclose(f(val), val ** 1)

    f = function([v], v ** (-1), mode=mode)
    nodes = [node.op for node in f.maker.fgraph.toposort()]
    assert nodes == [T.inv]
    utt.assert_allclose(f(val_no0), val_no0 ** (-1))

    f = function([v], v ** 2, mode=mode)
    nodes = [node.op for node in f.maker.fgraph.toposort()]
    assert nodes == [T.sqr]
    utt.assert_allclose(f(val), val ** 2)

    f = function([v], v ** (-2), mode=mode)
    nodes = [node.op for node in f.maker.fgraph.toposort()]
    assert len(nodes) == 2
    assert nodes[0] == T.sqr
    assert isinstance(nodes[1].scalar_op, theano.scalar.basic.Inv)
#    assert nodes == [T.sqr,T.inv]#Why this don't work?
    utt.assert_allclose(f(val_no0), val_no0 ** (-2))

    f = function([v], v ** (.5), mode=mode)
    nodes = [node.op for node in f.maker.fgraph.toposort()]
    assert nodes == [T.sqrt]
    utt.assert_allclose(f(val), val ** (.5))

    f = function([v], v ** (-.5), mode=mode)
    nodes = [node.op for node in f.maker.fgraph.toposort()]
    assert len(nodes) == 2
    assert nodes[0] == T.sqrt
    assert isinstance(nodes[1].scalar_op, theano.scalar.basic.Inv)
#    assert nodes == [T.sqrt,T.inv]#Why this don't work?
    utt.assert_allclose(f(val_no0), val_no0 ** (-.5))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_batchnorm_train():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)
    if cuda.dnn.version() < (5000, 5000):
        raise SkipTest("batch normalization requires cudnn v5+")

    for mode in ('per-activation', 'spatial'):
        for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
            x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # forward pass
            out, x_mean, x_invstd = cuda.dnn.dnn_batch_normalization_train(
                x, scale, bias, mode, eps)
            # reference forward pass
            if mode == 'per-activation':
                axes = (0,)
            elif mode == 'spatial':
                axes = (0,) + tuple(range(2, ndim))
            x_mean2 = x.mean(axis=axes, keepdims=True)
            x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
            scale2 = T.addbroadcast(scale, *axes)
            bias2 = T.addbroadcast(bias, *axes)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, dy],
                                [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
                                grads + grads2, mode=mode_with_gpu)
            # run
            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
                Scale = numpy.random.randn(*param_shape).astype('float32')
                Bias = numpy.random.randn(*param_shape).astype('float32')
                outputs = f(X, Scale, Bias, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 3])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 3])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 3])  # invstd
                # compare gradients
                utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4)  # dx
                utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4)  # dscale
                utt.assert_allclose(outputs[8], outputs[8 + 3])  # dbias
项目:rllab    作者:rll    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_std = TT.sqrt(input.var(self.axes) + self.epsilon)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
        if use_averages:
            mean = self.mean
            std = self.std
            mean = input_mean
            std = input_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_std.default_update = ((1 - self.alpha) *
                                              running_std +
                                              self.alpha * input_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            std += 0 * running_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * TT.inv(std)) + beta
        return normalized
项目:IQA_BIECON_release    作者:jongyookim    | 项目源码 | 文件源码
def get_output(self, input, **kwargs):
        input_mean = input.mean(self.axes)
        input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))
        # input_inv_std = T.inv(T.sqrt(input.var(self.axes)) + 1E-6)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = self.deterministic
        if use_averages:
            mean = self.mean
            inv_std = self.inv_std
            mean = input_mean
            inv_std = input_inv_std

        # Decide whether to update the stored averages
        update_averages = self.update_averages and not use_averages
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            inv_std += 0 * running_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
项目:IQA_BIECON_release    作者:jongyookim    | 项目源码 | 文件源码
def get_output(self, input, **kwargs):
        input_mean = input.mean(self.axes)
        # input_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))
        input_std = T.sqrt(input.var(self.axes) + self.epsilon)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = self.deterministic
        if use_averages:
            mean = self.mean
            std = self.std
            mean = input_mean
            std = input_std

        # Decide whether to update the stored averages
        update_averages = self.update_averages and not use_averages
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_std.default_update = ((1 - self.alpha) * running_std +
                                          self.alpha * input_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            std += 0 * running_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = std.dimshuffle(pattern)

        # normalize
        # normalized = (input - mean) * (gamma * std) + beta
        normalized = batch_normalization(
            input, gamma, beta, mean, std, mode='low_mem')
        return self.activation(normalized)
项目:maml_rl    作者:cbfinn    | 项目源码 | 文件源码
def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_std = TT.sqrt(input.var(self.axes) + self.epsilon)

        # Decide whether to use the stored averages or mini-batch statistics
        use_averages = kwargs.get('batch_norm_use_averages',
        if use_averages:
            mean = self.mean
            std = self.std
            mean = input_mean
            std = input_std

        # Decide whether to update the stored averages
        update_averages = kwargs.get('batch_norm_update_averages',
                                     not deterministic)
        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_std = theano.clone(self.std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_std.default_update = ((1 - self.alpha) *
                                              running_std +
                                              self.alpha * input_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean
            std += 0 * running_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(list(range(input.ndim - len(self.axes))))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        std = std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * TT.inv(std)) + beta
        return normalized