Python theano.tensor 模块,exp() 实例源码


项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def svgd_kernel(self, h = -1):
        sq_dist = pdist(self.theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))

        # compute the rbf kernel

        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, self.theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(self.theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def nll_loss_sharedparams(self, mus, sigmas, corxy, pis, y_true):
        mus_ex = mus[np.newaxis, :, :]
        X = y_true[:, np.newaxis, :]
        diff = X - mus_ex
        diffprod =, axis=-1)
        corxy2 = corxy **2
        diff2 = diff ** 2
        sigmas2 = sigmas ** 2
        sigmainvs = 1.0 / sigmas
        sigmainvprods = sigmainvs[:, 0] * sigmainvs[:, 1]
        diffsigma = diff2 / sigmas2
        diffsigmanorm = T.sum(diffsigma, axis=-1)
        z = diffsigmanorm - 2 * corxy * diffprod * sigmainvprods
        oneminuscorxy2inv = 1.0 / (1.0 - corxy2)
        expterm = -0.5 * z * oneminuscorxy2inv
        new_exponent = T.log(0.5/np.pi) + T.log(sigmainvprods) + T.log(np.sqrt(oneminuscorxy2inv)) + expterm + T.log(pis)
        max_exponent = T.max(new_exponent ,axis=1, keepdims=True)
        mod_exponent = new_exponent - max_exponent
        gauss_mix = T.sum(T.exp(mod_exponent),axis=1)
        log_gauss = max_exponent + T.log(gauss_mix)
        loss = -T.mean(log_gauss)
        return loss
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def rbf_kernel(X0):
    XY =, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def rbf_kernel(X):

    XY =, X.T)
    x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x')
    X2e = T.repeat(x2, X.shape[0], axis=1)
    H = X2e +  X2e.T - 2. * XY

    V = H.flatten()
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(.5 * h / T.log(H.shape[0].astype('float32') + 1.)) 

    # compute the rbf kernel
    kxy = T.exp(-H / (h ** 2) / 2.0)

    dxkxy =, X)
    sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / (h ** 2)

    return kxy, dxkxy
项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def evaluation(self, X_test, y_test):
        # normalization
        X_test = self.normalization(X_test)

        # average over the output
        pred_y_test = np.zeros([self.M, len(y_test)])
        prob = np.zeros([self.M, len(y_test)])

            Since we have M particles, we use a Bayesian view to calculate rmse and log-likelihood
        for i in range(self.M):
            w1, b1, w2, b2, loggamma, loglambda = self.unpack_weights(self.theta[i, :])
            pred_y_test[i, :] = self.nn_predict(X_test, w1, b1, w2, b2) * self.std_y_train + self.mean_y_train
            prob[i, :] = np.sqrt(np.exp(loggamma)) /np.sqrt(2*np.pi) * np.exp( -1 * (np.power(pred_y_test[i, :] - y_test, 2) / 2) * np.exp(loggamma) )
        pred = np.mean(pred_y_test, axis=0)

        # evaluation
        svgd_rmse = np.sqrt(np.mean((pred - y_test)**2))
        svgd_ll = np.mean(np.log(np.mean(prob, axis = 0)))

        return (svgd_rmse, svgd_ll)
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def matrix2att(self, matrix):
        '''Input is vector of size (batch_size,5) in theano terms'''
        g_hat_x = matrix[:,0]
        g_hat_y = matrix[:,1]
        log_delta = matrix[:,2]
        log_sigma_sqr = matrix[:,3]
        log_gamma = matrix[:,4]

        g_x = (self.A + 1.0) / 2.0 * (g_hat_x + 1.0)
        g_y = (self.B + 1.0) / 2.0 * (g_hat_y + 1.0)

        delta = (max(self.A,self.B) - 1.0) / (self.N - 1) * T.exp(log_delta)
        gamma = T.exp(log_gamma).dimshuffle(0, 'x')
        sigma = T.exp(log_sigma_sqr/2.0)

        return g_y, g_x, delta, sigma, gamma
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def matrix2att_cpu(self, matrix):
        '''Input is vector of size (batch_size,5) in numpy terms'''
        g_hat_x = matrix[:,0]
        g_hat_y = matrix[:,1]
        log_delta = matrix[:,2]
        log_sigma_sqr = matrix[:,3]
        log_gamma = matrix[:,4]

        g_x = (self.A + 1.0) / 2.0 * (g_hat_x + 1.0)
        g_y = (self.B + 1.0) / 2.0 * (g_hat_y + 1.0)

        delta = (max(self.A,self.B) - 1.0) / (self.N - 1) * np.exp(log_delta)
        gamma = np.exp(log_gamma)
        sigma = np.exp(log_sigma_sqr/2.0)

        return g_y, g_x, delta, sigma, gamma
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def matrix2att(self, matrix):
        '''Input is vector of size (batch_size,5) in theano terms'''
        g_hat_x = matrix[:,0]
        g_hat_y = matrix[:,1]
        log_delta = matrix[:,2]
        log_sigma_sqr = matrix[:,3]
        log_gamma = matrix[:,4]

        g_x = (self.A + 1.0) / 2.0 * (g_hat_x + 1.0)
        g_y = (self.B + 1.0) / 2.0 * (g_hat_y + 1.0)

        delta = (max(self.A,self.B) - 1.0) / (self.N - 1) * T.exp(log_delta)
        gamma = T.exp(log_gamma).dimshuffle(0, 'x')
        sigma = T.exp(log_sigma_sqr/2.0)

        return g_y, g_x, delta, sigma, gamma
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def matrix2att_cpu(self, matrix):
        '''Input is vector of size (batch_size,5) in numpy terms'''
        g_hat_x = matrix[:,0]
        g_hat_y = matrix[:,1]
        log_delta = matrix[:,2]
        log_sigma_sqr = matrix[:,3]
        log_gamma = matrix[:,4]

        g_x = (self.A + 1.0) / 2.0 * (g_hat_x + 1.0)
        g_y = (self.B + 1.0) / 2.0 * (g_hat_y + 1.0)

        delta = (max(self.A,self.B) - 1.0) / (self.N - 1) * np.exp(log_delta)
        gamma = np.exp(log_gamma)
        sigma = np.exp(log_sigma_sqr/2.0)

        return g_y, g_x, delta, sigma, gamma
项目:face_detection    作者:chintak    | 项目源码 | 文件源码
def iou_loss(p, t):
    # print "pass"
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps_t0 = T.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps_t1 = T.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps_t1 - overlaps_t0
    bool_overlap = T.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = T.maximum(intersection, np.float32(0.))
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    loss = 1. - T.minimum(
        T.exp(T.log(T.abs_(intersection)) -
              T.log(T.abs_(union) + np.float32(1e-5))),
    # return loss
    return T.mean(loss)
项目:face_detection    作者:chintak    | 项目源码 | 文件源码
def iou_loss_val(p, t):
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps = np.zeros_like(tp, dtype=np.float32)
    overlaps[:, 0, :] = np.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps[:, 1, :] = np.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps[:, 1, :] - overlaps[:, 0, :]
    bool_overlap = np.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = np.maximum(intersection, 0.)
    # print "bool", bool_overlap
    # print "Int", intersection
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    # print "un", union
    loss = 1. - np.minimum(
        np.exp(np.log(np.abs(intersection)) - np.log(np.abs(union) + 1e-5)),
    # print loss
    return np.mean(loss)
项目:geomdn    作者:afshinrahimi    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        Given 2d input find the probability of each input in each of num_units
        Diagonal Gaussians using the formula from
        #make sure sigma is positive and nonzero softplus(x) (0, +inf)
        sigmas = T.nnet.softplus(self.sigmas)
        sigmainvs = 1.0 / sigmas
        sigmainvprods = sigmainvs[:, 0] * sigmainvs[:, 1]
        sigmas2 = sigmas ** 2
        mus = self.mus[np.newaxis, :, :]
        X = input[:, np.newaxis, :]
        diff = (X - mus) ** 2
        diffsigma = diff / sigmas2
        diffsigmanorm = T.sum(diffsigma, axis=-1)
        expterm = T.exp(-0.5 * diffsigmanorm)
        probs = (0.5 / np.pi) * sigmainvprods * expterm
        return probs
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def log_marginal(self, y, h, py, q):
        '''Computes the approximate log marginal.

        Uses \log \sum p / q - \log N

            y: T.tensor, target values.
            h: T.tensor, latent samples.
            py: T.tesnor, conditional density p(y | h)
            q: approximate posterior q(h | y)
            approximate log marginal.
        log_py_h = -self.conditional.neg_log_prob(y, py)
        log_ph   = -self.prior.neg_log_prob(h)
        log_qh   = -self.posterior.neg_log_prob(h, q)
        assert log_py_h.ndim == log_ph.ndim == log_qh.ndim

        log_p     = log_py_h + log_ph - log_qh
        log_p_max = T.max(log_p, axis=0, keepdims=True)
        w         = T.exp(log_p - log_p_max)

        return (T.log(w.mean(axis=0, keepdims=True)) + log_p_max).mean()
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_free_energy(self, x, beta, *params):
        '''Step free energy function.

            x (T.tensor): data sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

            T.tensor: free energy.

        W, v_params, h_params = self.split_params(*params)

        vis_term = beta * self.v_dist.get_energy_bias(x, *v_params)
        x = self.v_dist.scale_for_energy_model(x, *v_params)
        hid_act = beta * (, W) + self.h_dist.get_center(*h_params))
        fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1)
        return fe
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_free_energy_h(self, h, beta, *params):
        '''Step free energy function for hidden states.

            h (T.tensor): hidden sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

            T.tensor: free energy.

        W, v_params, h_params = self.split_params(*params)

        hid_term = beta * self.h_dist.get_energy_bias(h, *h_params)
        h = self.h_dist.scale_for_energy_model(h, *h_params)
        vis_act = beta * (, W.T) + self.v_dist.get_center(*v_params))
        fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1)
        return fe
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        log_p_curr[:active_next] + updated_log_p_prev
    return active_next, log_p_next
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
        old_means = old_dist_info_vars["mean"]
        old_log_stds = old_dist_info_vars["log_std"]
        new_means = new_dist_info_vars["mean"]
        new_log_stds = new_dist_info_vars["log_std"]
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        old_std = TT.exp(old_log_stds)
        new_std = TT.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = TT.square(old_means - new_means) + \
                    TT.square(old_std) - TT.square(new_std)
        denominator = 2 * TT.square(new_std) + 1e-8
        return TT.sum(
            numerator / denominator + new_log_stds - old_log_stds, axis=-1)
项目:third_person_im    作者:bstadie    | 项目源码 | 文件源码
def kl(self, old_dist_info, new_dist_info):
        old_means = old_dist_info["mean"]
        old_log_stds = old_dist_info["log_std"]
        new_means = new_dist_info["mean"]
        new_log_stds = new_dist_info["log_std"]
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        old_std = np.exp(old_log_stds)
        new_std = np.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = np.square(old_means - new_means) + \
                    np.square(old_std) - np.square(new_std)
        denominator = 2 * np.square(new_std) + 1e-8
        return np.sum(
            numerator / denominator + new_log_stds - old_log_stds, axis=-1)
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def rbf_kernel(X0):
    XY =, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def svgd_gradient(X0):

    hidden, _, mse = discrim(X0)
    grad = -1.0 * T.grad( mse.sum(), X0)

    kxy, neighbors, h = rbf_kernel(hidden)  #TODO

    coff = T.exp( - T.sum((hidden[neighbors] - hidden)**2, axis=1) / h**2 / 2.0 )
    v = coff.dimshuffle(0, 'x') * (-hidden[neighbors] + hidden) / h**2

    X1 = X0[neighbors]
    hidden1, _, _ = discrim(X1)
    dxkxy = T.Lop(hidden1, X1, v)

    #svgd_grad = (, T.flatten(grad, 2)).reshape(dxkxy.shape) + dxkxy) / T.sum(kxy, axis=1).dimshuffle(0, 'x', 'x', 'x')
    svgd_grad = grad + dxkxy / 2.
    return grad, svgd_grad, dxkxy
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
    Performs a Metropolis-Hastings accept-reject move.

    energy_prev: theano vector
        Symbolic theano tensor which contains the energy associated with the
        configuration at time-step t.
    energy_next: theano vector
        Symbolic theano tensor which contains the energy associated with the
        proposed configuration at time-step t+1.
    s_rng: theano.tensor.shared_randomstreams.RandomStreams
        Theano shared random stream object used to generate the random number
        used in proposal.

    return: boolean
        True if move is accepted, False otherwise
    ediff = energy_prev - energy_next
    return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
    Performs a Metropolis-Hastings accept-reject move.

    energy_prev: theano vector
        Symbolic theano tensor which contains the energy associated with the
        configuration at time-step t.
    energy_next: theano vector
        Symbolic theano tensor which contains the energy associated with the
        proposed configuration at time-step t+1.
    s_rng: theano.tensor.shared_randomstreams.RandomStreams
        Theano shared random stream object used to generate the random number
        used in proposal.

    return: boolean
        True if move is accepted, False otherwise
    ediff = energy_prev - energy_next
    return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
项目:merlin    作者:CSTR-Edinburgh    | 项目源码 | 文件源码
def apply_activation(self, lin_output, activation):
        if activation == 'SIGMOID':
            final_output = T.nnet.sigmoid(lin_output)

        elif activation == 'TANH':
            final_output = T.tanh(lin_output)

        elif activation == 'LINEAR':
            final_output = lin_output

        elif activation == 'ReLU':  ## rectifier linear unit
            final_output = T.maximum(0.0, lin_output)

        elif activation == 'ReSU':  ## rectifier smooth unit
            final_output = numpy.log(1.0 + numpy.exp(lin_output))

            self.logger.critical('the input activation function: %s is not supported right now. Please modify to support' % (activation))

        return final_output
项目:NCRF-AE    作者:cosmozhang    | 项目源码 | 文件源码
def EGD(cost, params, learning_rate = 0.33, constraint = 1.0):

    updates = OrderedDict()

    grads = T.grad(cost, params)
    U = T.constant(constraint)

    #first half of params
    rw_pos = T.exp(-learning_rate * U * grads[0])
    rb_pos = T.exp(-learning_rate * U * grads[1])

    #second half
    rw_neg = 1/rw_pos
    rb_neg = 1/rb_pos

    rs = [rw_pos, rb_pos, rw_neg, rb_neg]

    partition = T.sum(params[0]*rs[0]) + T.sum(params[1]*rs[1]) + T.sum(params[2]*rs[2]) + T.sum(params[3]*rs[3])

    for param, r in zip(params, rs):
        updates[param] = U*param*r/partition

    return updates
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
        old_means = old_dist_info_vars["mean"]
        old_log_stds = old_dist_info_vars["log_std"]
        new_means = new_dist_info_vars["mean"]
        new_log_stds = new_dist_info_vars["log_std"]
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        old_std = TT.exp(old_log_stds)
        new_std = TT.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = TT.square(old_means - new_means) + \
                    TT.square(old_std) - TT.square(new_std)
        denominator = 2 * TT.square(new_std) + 1e-8
        return TT.sum(
            numerator / denominator + new_log_stds - old_log_stds, axis=-1)
项目:rllabplusplus    作者:shaneshixiang    | 项目源码 | 文件源码
def kl(self, old_dist_info, new_dist_info):
        old_means = old_dist_info["mean"]
        old_log_stds = old_dist_info["log_std"]
        new_means = new_dist_info["mean"]
        new_log_stds = new_dist_info["log_std"]
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        old_std = np.exp(old_log_stds)
        new_std = np.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = np.square(old_means - new_means) + \
                    np.square(old_std) - np.square(new_std)
        denominator = 2 * np.square(new_std) + 1e-8
        return np.sum(
            numerator / denominator + new_log_stds - old_log_stds, axis=-1)
项目:KGP-ASR    作者:KGPML    | 项目源码 | 文件源码
def stable_softmax(y_hat):
    """Calculate softmax and log softmax in numerically stable way

    y_hat : tensor3 (input_seq_len, num_batch, num_classes+1)
        class energies

    softmax values in normal and log domain
    y_hat_safe = y_hat - y_hat.max(axis=2, keepdims=True)
    y_hat_safe_exp = T.exp(y_hat_safe)
    y_hat_safe_normalizer = y_hat_safe_exp.sum(axis=2, keepdims=True)
    log_y_hat_safe_normalizer = T.log(y_hat_safe_normalizer)

    y_hat_softmax = y_hat_safe_exp / y_hat_safe_normalizer
    log_y_hat_softmax = y_hat_safe - log_y_hat_safe_normalizer

    return y_hat_softmax, log_y_hat_softmax
项目:cbof    作者:passalis    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        distances = conv_pairwise_distance(input, self.V)
        similarities = T.exp(-distances / T.abs_(self.gamma))
        norm = T.sum(similarities, 1).reshape((similarities.shape[0], 1, similarities.shape[2], similarities.shape[3]))
        membership = similarities / (norm + self.eps)

        histogram = T.mean(membership, axis=(2, 3))
        if self.spatial_level == 1:
            pivot1, pivot2 = membership.shape[2] / 2, membership.shape[3] / 2
            h1 = T.mean(membership[:, :, :pivot1, :pivot2], axis=(2, 3))
            h2 = T.mean(membership[:, :, :pivot1, pivot2:], axis=(2, 3))
            h3 = T.mean(membership[:, :, pivot1:, :pivot2], axis=(2, 3))
            h4 = T.mean(membership[:, :, pivot1:, pivot2:], axis=(2, 3))
            # Pyramid is not used in the paper
            # histogram = T.horizontal_stack(h1, h2, h3, h4)
            histogram = T.horizontal_stack(histogram, h1, h2, h3, h4)
        return histogram
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        log_p_curr[:active_next] + updated_log_p_prev
    return active_next, log_p_next
项目:LasagneNLP    作者:XuezheMax    | 项目源码 | 文件源码
def theano_logsumexp(x, axis=None):
    Compute log(sum(exp(x), axis=axis) in a numerically stable
    x : tensor_like
        A Theano tensor (any dimension will do).
    axis : int or symbolic integer scalar, or None
        Axis over which to perform the summation. `None`, the
        default, performs over all axes.
    result : ndarray or scalar
        The result of the log(sum(exp(...))) operation.

    xmax = x.max(axis=axis, keepdims=True)
    xmax_ = x.max(axis=axis)
    return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))
项目:recnet    作者:joergfranke    | 项目源码 | 文件源码
def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5):

        dot_product = , self.t_w_out)

        net_o = T.add( dot_product , self.t_b_out )

        ex_net = T.exp(net_o)
        sum_net = T.sum(ex_net, axis=2, keepdims=True)
        softmax_o = ex_net / sum_net

        mask = T.addbroadcast(mask, 2) # to do nesseccary?
        output = T.mul(mask, softmax_o)   + T.mul( (1. - mask) , 1e-6 )

        return output #result

######                     Linear Layer
项目:experiments    作者:tencia    | 项目源码 | 文件源码
def build_vae_loss(input_var, l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x,
        deterministic, binary, L):
    layer_outputs = nn.layers.get_output([l_z_mu, l_z_ls] + l_x_mu_list + l_x_ls_list
            + l_x_list + [l_x], deterministic=deterministic)
    z_mu =  layer_outputs[0]
    z_ls =  layer_outputs[1]
    x_mu =  [] if binary else layer_outputs[2:2+L]
    x_ls =  [] if binary else layer_outputs[2+L:2+2*L]
    x_list =  layer_outputs[2:2+L] if binary else layer_outputs[2+2*L:2+3*L]
    x = layer_outputs[-1]
    kl_div = 0.5 * T.sum(1 + 2*z_ls - T.sqr(z_mu) - T.exp(2 * z_ls))
    if binary:
        logpxz = sum(nn.objectives.binary_crossentropy(x, input_var).sum()
                for x in x_list) * (-1./L)
        prediction = x_list[0] if deterministic else x
        logpxz = sum(log_likelihood(input_var.flatten(2), mu, ls)
            for mu, ls in zip(x_mu, x_ls))/L
        prediction = x_mu[0] if deterministic else T.sum(x_mu, axis=0)/L
    loss = -1 * (logpxz + kl_div)
    return loss, prediction
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sym_logdensity(self, x):
        """ x is a matrix of column datapoints (VxB) V = n_visible, B = batch size """
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev +, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu =, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) + T.log(Alpha))
            return (p, a, x)
        # First element is different (it is predicted from the bias only)
        a0 = T.zeros_like(, self.W))  # BxH
        p0 = T.zeros_like(x[0])
        x0 = T.ones_like(x[0])
        ([ps, _as, _xs], updates) = theano.scan(density_given_previous_a_and_x,
                                                sequences=[x, self.W, self.V_alpha, self.b_alpha, self.V_mu, self.b_mu, self.V_sigma, self.b_sigma, self.activation_rescaling],
                                                outputs_info=[p0, a0, x0])
        return (ps[-1], updates)
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sample(self, n):
        W = self.W.get_value()
        V_alpha = self.V_alpha.get_value()
        b_alpha = self.b_alpha.get_value()
        V_mu = self.V_mu.get_value()
        b_mu = self.b_mu.get_value()
        V_sigma = self.V_sigma.get_value()
        b_sigma = self.b_sigma.get_value()
        activation_rescaling = self.activation_rescaling.get_value()
        samples = np.zeros((self.n_visible, n))
        for s in xrange(n):
            a = np.zeros((self.n_hidden,))  # H
            for i in xrange(self.n_visible):
                if i == 0:
                    a = W[i, :]
                    a = a + W[i, :] * samples[i - 1, s]
                h = self.parameters["nonlinearity"].get_numpy_f()(a * activation_rescaling[i])
                alpha = Utils.nnet.softmax(, V_alpha[i]) + b_alpha[i])  # C
                Mu =, V_mu[i]) + b_mu[i]  # C
                Sigma = np.minimum(np.exp(, V_sigma[i]) + b_sigma[i]), 1)
                comp = Utils.nnet.random_component(alpha)
                samples[i, s] = np.random.normal(Mu[comp], Sigma[comp])
        return samples
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def sample(self, n):
        W = self.W.get_value()
        V_alpha = self.V_alpha.get_value()
        b_alpha = self.b_alpha.get_value()
        V_mu = self.V_mu.get_value()
        b_mu = self.b_mu.get_value()
        V_sigma = self.V_sigma.get_value()
        b_sigma = self.b_sigma.get_value()
        activation_rescaling = self.activation_rescaling.get_value()
        samples = np.zeros((self.n_visible, n))
        for s in xrange(n):
            a = np.zeros((self.n_hidden,))  # H
            for i in xrange(self.n_visible):
                if i == 0:
                    a = W[i, :]
                    a = a + W[i, :] * samples[i - 1, s]
                h = self.parameters["nonlinearity"].get_numpy_f()(a * activation_rescaling[i])
                alpha = Utils.nnet.softmax(, V_alpha[i]) + b_alpha[i])  # C
                Mu =, V_mu[i]) + b_mu[i]  # C
                # Sigma = np.minimum(np.exp(, V_sigma[i]) + b_sigma[i]), 1)
                Sigma = np.exp(, V_sigma[i]) + b_sigma[i])
                comp = Utils.nnet.random_component(alpha)
                samples[i, s] = np.random.laplace(Mu[comp], Sigma[comp])
        return samples
项目:NADE    作者:MarcCote    | 项目源码 | 文件源码
def conditional_logdensities(self, x_lt_i, range):
        raise(Exception("Not implemented"))
        W = self.W.get_value()
        V_alpha = self.V_alpha.get_value()
        b_alpha = self.b_alpha.get_value()
        V_mu = self.V_mu.get_value()
        b_mu = self.b_mu.get_value()
        V_sigma = self.V_sigma.get_value()
        b_sigma = self.b_sigma.get_value()
        activation_rescaling = self.activation_rescaling.get_value()
        # Calculate
        i = len(x_lt_i)
        a = W[0, :] +, W[1:len(x_lt_i) + 1, :])
        h = self.parameters["nonlinearity"].get_numpy_f()(a * activation_rescaling[i])
        alpha = Utils.nnet.softmax(np.tanh(, V_alpha[i]) + b_alpha[i]) * 10.0)  # C
        Mu =, V_mu[i]) + b_mu[i]  # C
        Sigma = np.log(1.0 + np.exp((, V_sigma[i]) + b_sigma[i]) * 10)) / 10  # C

        def ld(x):
            lds = np.array([scipy.stats.norm.logpdf(x, Mu[c], Sigma[c]) for c in xrange(self.n_components)])
            return Utils.nnet.logsumexp(lds + np.log(alpha))
        return np.array([ld(x) for x in range])
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])
项目:sampleRNN_ICLR2017    作者:soroushmehr    | 项目源码 | 文件源码
def GMM_nll(x, mus, sigmas, mix_weights):
    D is dimension of each observation (e.g. frame_size) for each component
    (multivariate Normal with diagonal covariance matrix)
    See `gaussian_nll`

    x : (batch_size, D)
    mus : (batch_size, D, num_gaussians)
    sigmas : (batch_size, D, num_gaussians)
    mix_weights : (batch_size, num_gaussians)
    x = x.dimshuffle(0, 1, 'x')

    # Similar to `gaussian_nll`
    ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi))
    ll_component_wise += 2. * T.log(sigmas)
    ll_component_wise += ((x - mus) / sigmas) ** 2.
    ll_component_wise = ll_component_wise.sum(axis=1)  # on FRAME_SIZE
    ll_component_wise *= lib.floatX(-0.5)  # LL not NLL

    # Now ready to take care of weights of each component
    # Simply applying exp could potentially cause inf/NaN.
    # Look up LogSumExp trick, Softmax in theano, or this:
    weighted_ll = ll_component_wise + T.log(mix_weights)
    ll_max = T.max(weighted_ll, axis=1, keepdims=True)
    nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True))
    nll += ll_max
    nll = -nll.sum(axis=1)
    return nll
项目:mimic3-benchmarks    作者:YerevaNN    | 项目源码 | 文件源码
def softmax(x):
    e_x = T.exp(x - x.max(axis=0, keepdims=True))
    out = e_x / e_x.sum(axis=0, keepdims=True)
    return out
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
                 log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
        super(MinibatchLayer, self).__init__(incoming, **kwargs)
        self.num_kernels = num_kernels
        num_inputs = int([1:]))
        self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
        self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
        self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
        self.b = self.add_param(b, (num_kernels,), name="b")
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05),
                 log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs):
        super(MinibatchLayer, self).__init__(incoming, **kwargs)
        self.num_kernels = num_kernels
        num_inputs = int([1:]))
        self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta")
        self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale")
        self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1)
        self.b = self.add_param(b, (num_kernels,), name="b")
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def gaussian_kl_divergence(mean, ln_var):
    """Computes the KL-divergence of Gaussian variables from the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns a variable
    representing the KL-divergence between the given multi-dimensional Gaussian
    :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

        mean (~chainer.Variable): A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.

        ~chainer.Variable: A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.

    var = T.exp(ln_var)
    return  0.5 * T.sum(mean * mean + var - ln_var - 1, 1)

# aliases
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        rectified = nonlinearities.softplus(input)
        sum_rect = T.sum(rectified, axis=(1,2))
        output = 1 - T.exp(-sum_rect)
        return output
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def __init__(self, incoming, exp=nn.init.Constant(2.),  **kwargs):
        super(AggSoPP, self).__init__(incoming, **kwargs)
        self.exp = self.add_param(exp, (1,), name='exp', regularizable=False)
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        ps = nonlinearities.sigmoid(input)
        powd = ps ** self.exp
        tmean = T.mean(powd, axis=(1,2))
        return tmean
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        return T.log(T.mean(T.exp(self.r * input), axis=self.axis) + 1e-7) / self.r