Python theano.tensor 模块,log() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor.log()

项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def svgd_kernel(self, h = -1):
        sq_dist = pdist(self.theta)
        pairwise_dists = squareform(sq_dist)**2
        if h < 0: # if h < 0, using median trick
            h = np.median(pairwise_dists)  
            h = np.sqrt(0.5 * h / np.log(self.theta.shape[0]+1))

        # compute the rbf kernel

        Kxy = np.exp( -pairwise_dists / h**2 / 2)

        dxkxy = -np.matmul(Kxy, self.theta)
        sumkxy = np.sum(Kxy, axis=1)
        for i in range(self.theta.shape[1]):
            dxkxy[:, i] = dxkxy[:,i] + np.multiply(self.theta[:,i],sumkxy)
        dxkxy = dxkxy / (h**2)
        return (Kxy, dxkxy)
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def rbf_kernel(X):

    XY = T.dot(X, X.T)
    x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x')
    X2e = T.repeat(x2, X.shape[0], axis=1)
    H = X2e +  X2e.T - 2. * XY

    V = H.flatten()
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(.5 * h / T.log(H.shape[0].astype('float32') + 1.)) 

    # compute the rbf kernel
    kxy = T.exp(-H / (h ** 2) / 2.0)

    dxkxy = -T.dot(kxy, X)
    sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / (h ** 2)

    return kxy, dxkxy
项目:sampleRNN_ICLR2017    作者:soroushmehr    | 项目源码 | 文件源码
def gaussian_nll(x, mus, sigmas):
    """
    NLL for Multivariate Normal with diagonal covariance matrix
    See:
        wikipedia.org/wiki/Multivariate_normal_distribution#Likelihood_function
    where \Sigma = diag(s_1^2,..., s_n^2).

    x, mus, sigmas all should have the same shape.
    sigmas (s_1,..., s_n) should be strictly positive.
    Results in output shape of similar but without the last dimension.
    """
    nll = lib.floatX(numpy.log(2. * numpy.pi))
    nll += 2. * T.log(sigmas)
    nll += ((x - mus) / sigmas) ** 2.
    nll = nll.sum(axis=-1)
    nll *= lib.floatX(0.5)
    return nll
项目:mimic3-benchmarks    作者:YerevaNN    | 项目源码 | 文件源码
def step(self, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            batch_gen = self.train_batch_gen
        elif mode == "test":    
            theano_fn = self.test_fn
            batch_gen = self.test_batch_gen
        else:
            raise Exception("Invalid mode")

        data = next(batch_gen)
        ret = theano_fn(*data)

        return {"prediction": np.array(ret[0]),
                "answers": data[-1],
                "current_loss": ret[1],
                "log": ""}
项目:mimic3-benchmarks    作者:YerevaNN    | 项目源码 | 文件源码
def step(self, mode):
        if mode == "train" and self.mode == "test":
            raise Exception("Cannot train during test mode")

        if mode == "train":
            theano_fn = self.train_fn
            batch_gen = self.train_batch_gen
        elif mode == "test":    
            theano_fn = self.test_fn
            batch_gen = self.test_batch_gen
        else:
            raise Exception("Invalid mode")

        data = next(batch_gen)
        ret = theano_fn(*data)

        return {"prediction": np.array(ret[0]),
                "answers": data[-1],
                "current_loss": ret[1],
                "log": ""}
项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def evaluation(self, X_test, y_test):
        # normalization
        X_test = self.normalization(X_test)

        # average over the output
        pred_y_test = np.zeros([self.M, len(y_test)])
        prob = np.zeros([self.M, len(y_test)])

        '''
            Since we have M particles, we use a Bayesian view to calculate rmse and log-likelihood
        '''
        for i in range(self.M):
            w1, b1, w2, b2, loggamma, loglambda = self.unpack_weights(self.theta[i, :])
            pred_y_test[i, :] = self.nn_predict(X_test, w1, b1, w2, b2) * self.std_y_train + self.mean_y_train
            prob[i, :] = np.sqrt(np.exp(loggamma)) /np.sqrt(2*np.pi) * np.exp( -1 * (np.power(pred_y_test[i, :] - y_test, 2) / 2) * np.exp(loggamma) )
        pred = np.mean(pred_y_test, axis=0)

        # evaluation
        svgd_rmse = np.sqrt(np.mean((pred - y_test)**2))
        svgd_ll = np.mean(np.log(np.mean(prob, axis = 0)))

        return (svgd_rmse, svgd_ll)
项目:gram    作者:mp2893    | 项目源码 | 文件源码
def parse_arguments(parser):
    parser.add_argument('seq_file', type=str, metavar='<visit_file>', help='The path to the Pickled file containing visit information of patients')
    parser.add_argument('label_file', type=str, metavar='<label_file>', help='The path to the Pickled file containing label information of patients')
    parser.add_argument('tree_file', type=str, metavar='<tree_file>', help='The path to the Pickled files containing the ancestor information of the input medical codes. Only use the prefix and exclude ".level#.pk".')
    parser.add_argument('out_file', metavar='<out_file>', help='The path to the output models. The models will be saved after every epoch')
    parser.add_argument('--embed_file', type=str, default='', help='The path to the Pickled file containing the representation vectors of medical codes. If you are not using medical code representations, do not use this option')
    parser.add_argument('--embed_size', type=int, default=128, help='The dimension size of the visit embedding. If you are providing your own medical code vectors, this value will be automatically decided. (default value: 128)')
    parser.add_argument('--rnn_size', type=int, default=128, help='The dimension size of the hidden layer of the GRU (default value: 128)')
    parser.add_argument('--attention_size', type=int, default=128, help='The dimension size of hidden layer of the MLP that generates the attention weights (default value: 128)')
    parser.add_argument('--batch_size', type=int, default=100, help='The size of a single mini-batch (default value: 100)')
    parser.add_argument('--n_epochs', type=int, default=100, help='The number of training epochs (default value: 100)')
    parser.add_argument('--L2', type=float, default=0.001, help='L2 regularization coefficient for all weights except RNN (default value: 0.001)')
    parser.add_argument('--dropout_rate', type=float, default=0.5, help='Dropout rate used for the hidden layer of RNN (default value: 0.5)')
    parser.add_argument('--log_eps', type=float, default=1e-8, help='A small value to prevent log(0) (default value: 1e-8)')
    parser.add_argument('--verbose', action='store_true', help='Print output after every 100 mini-batches (default false)')
    args = parser.parse_args()
    return args
项目:face_detection    作者:chintak    | 项目源码 | 文件源码
def iou_loss(p, t):
    # print "pass"
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps_t0 = T.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps_t1 = T.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps_t1 - overlaps_t0
    bool_overlap = T.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = T.maximum(intersection, np.float32(0.))
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    loss = 1. - T.minimum(
        T.exp(T.log(T.abs_(intersection)) -
              T.log(T.abs_(union) + np.float32(1e-5))),
        np.float32(1.)
    )
    # return loss
    return T.mean(loss)
项目:face_detection    作者:chintak    | 项目源码 | 文件源码
def iou_loss_val(p, t):
    tp, tt = p.reshape((p.shape[0], 2, 2)), t.reshape((t.shape[0], 2, 2))
    overlaps = np.zeros_like(tp, dtype=np.float32)
    overlaps[:, 0, :] = np.maximum(tp[:, 0, :], tt[:, 0, :])
    overlaps[:, 1, :] = np.minimum(tp[:, 1, :], tt[:, 1, :])
    intersection = overlaps[:, 1, :] - overlaps[:, 0, :]
    bool_overlap = np.min(intersection, axis=1) > 0
    intersection = intersection[:, 0] * intersection[:, 1]
    intersection = np.maximum(intersection, 0.)
    # print "bool", bool_overlap
    # print "Int", intersection
    dims_p = tp[:, 1, :] - tp[:, 0, :]
    areas_p = dims_p[:, 0] * dims_p[:, 1]
    dims_t = tt[:, 1, :] - tt[:, 0, :]
    areas_t = dims_t[:, 0] * dims_t[:, 1]
    union = areas_p + areas_t - intersection
    # print "un", union
    loss = 1. - np.minimum(
        np.exp(np.log(np.abs(intersection)) - np.log(np.abs(union) + 1e-5)),
        1.
    )
    # print loss
    return np.mean(loss)
项目:LiviaNET    作者:josedolz    | 项目源码 | 文件源码
def negativeLogLikelihoodWeighted(self, y, weightPerClass):      
        #Weighting the cost of the different classes in the cost-function, in order to counter class imbalance.
        e1 = np.finfo(np.float32).tiny
        addTinyProbMatrix = T.lt(self.p_y_given_x_train, 4*e1) * e1

        weights = weightPerClass.dimshuffle('x', 0, 'x', 'x', 'x')
        log_p_y_given_x_train = T.log(self.p_y_given_x_train + addTinyProbMatrix) 
        weighted_log_probs = log_p_y_given_x_train * weights

        wShape =  weighted_log_probs.shape

        # Re-arrange 
        idx0 = T.arange( wShape[0] ).dimshuffle( 0, 'x','x','x')
        idx2 = T.arange( wShape[2] ).dimshuffle('x', 0, 'x','x')
        idx3 = T.arange( wShape[3] ).dimshuffle('x','x', 0, 'x')
        idx4 = T.arange( wShape[4] ).dimshuffle('x','x','x', 0)

        return -T.mean( weighted_log_probs[ idx0, y, idx2, idx3, idx4] )
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def log_marginal(self, y, h, py, q):
        '''Computes the approximate log marginal.

        Uses \log \sum p / q - \log N

        Args:
            y: T.tensor, target values.
            h: T.tensor, latent samples.
            py: T.tesnor, conditional density p(y | h)
            q: approximate posterior q(h | y)
        Returns:
            approximate log marginal.
        '''
        log_py_h = -self.conditional.neg_log_prob(y, py)
        log_ph   = -self.prior.neg_log_prob(h)
        log_qh   = -self.posterior.neg_log_prob(h, q)
        assert log_py_h.ndim == log_ph.ndim == log_qh.ndim

        log_p     = log_py_h + log_ph - log_qh
        log_p_max = T.max(log_p, axis=0, keepdims=True)
        w         = T.exp(log_p - log_p_max)

        return (T.log(w.mean(axis=0, keepdims=True)) + log_p_max).mean()
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_free_energy(self, x, beta, *params):
        '''Step free energy function.

        Args:
            x (T.tensor): data sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

        Returns:
            T.tensor: free energy.

        '''
        W, v_params, h_params = self.split_params(*params)

        vis_term = beta * self.v_dist.get_energy_bias(x, *v_params)
        x = self.v_dist.scale_for_energy_model(x, *v_params)
        hid_act = beta * (T.dot(x, W) + self.h_dist.get_center(*h_params))
        fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1)
        return fe
项目:cortex    作者:rdevon    | 项目源码 | 文件源码
def step_free_energy_h(self, h, beta, *params):
        '''Step free energy function for hidden states.

        Args:
            h (T.tensor): hidden sample.
            beta (float): beta value for annealing.
            *params: theano shared variables.

        Returns:
            T.tensor: free energy.

        '''
        W, v_params, h_params = self.split_params(*params)

        hid_term = beta * self.h_dist.get_energy_bias(h, *h_params)
        h = self.h_dist.scale_for_energy_model(h, *h_params)
        vis_act = beta * (T.dot(h, W.T) + self.v_dist.get_center(*v_params))
        fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1)
        return fe
项目:top-k-rec    作者:domainxz    | 项目源码 | 文件源码
def _generate_train_model_function(self, scores):
       u = T.lvector('u')
       i = T.lvector('i')
       j = T.lvector('j')
       self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W');
       self.S = theano.shared(scores, name='S');
       x_ui  = T.dot(self.W, self.S[u,i,:].T);
       x_uj  = T.dot(self.W, self.S[u,j,:].T);
       x_uij = x_ui - x_uj;
       obj = T.sum(
               T.log(T.nnet.sigmoid(x_uij)).sum() - \
               self._lambda_w * 0.5 * (self.W ** 2).sum()
               )
       cost = -obj
       g_cost_W = T.grad(cost=cost, wrt=self.W)
       updates = [
               (self.W, self.W - self._learning_rate * g_cost_W)
               ]
       self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def ctc_path_probs(predict, Y, alpha=1e-4):
    smoothed_predict = (1 - alpha) * predict[:, Y] + alpha * np.float32(1.) / Y.shape[0]
    L = T.log(smoothed_predict)
    zeros = T.zeros_like(L[0])
    log_first = zeros

    f_skip_idxs = ctc_create_skip_idxs(Y)
    b_skip_idxs = ctc_create_skip_idxs(Y[::-1])  # there should be a shortcut to calculating this

    def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev):
        f_active_next, log_f_next = ctc_update_log_p(f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev)
        b_active_next, log_b_next = ctc_update_log_p(b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev)
        return f_active_next, log_f_next, b_active_next, log_b_next

    [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan(
        step, sequences=[L, L[::-1, ::-1]], outputs_info=[np.int32(1), log_first, np.int32(1), log_first])

    idxs = T.arange(L.shape[1]).dimshuffle('x', 0)
    mask = (idxs < f_active.dimshuffle(0, 'x')) & (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]
    log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L
    return log_probs, mask
项目:SteinGAN    作者:DartML    | 项目源码 | 文件源码
def rbf_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(0.5 * h / T.log(X0.shape[0].astype('float32') + 1.0)) / 2.

    Kxy = T.exp(-H / h ** 2 / 2.0)
    neighbors = T.argsort(H, axis=1)[:, 1]

    return Kxy, neighbors, h
项目:NeuroNLP    作者:XuezheMax    | 项目源码 | 文件源码
def theano_logsumexp(x, axis=None):
    """
    Compute log(sum(exp(x), axis=axis) in a numerically stable
    fashion.
    Parameters
    ----------
    x : tensor_like
        A Theano tensor (any dimension will do).
    axis : int or symbolic integer scalar, or None
        Axis over which to perform the summation. `None`, the
        default, performs over all axes.
    Returns
    -------
    result : ndarray or scalar
        The result of the log(sum(exp(...))) operation.
    """

    xmax = x.max(axis=axis, keepdims=True)
    xmax_ = x.max(axis=axis)
    return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))
项目:doctorai    作者:mp2893    | 项目源码 | 文件源码
def padMatrixWithTime(seqs, labels, times, options):
    lengths = np.array([len(seq) for seq in seqs]) - 1
    n_samples = len(seqs)
    maxlen = np.max(lengths)
    inputDimSize = options['inputDimSize']
    numClass = options['numClass']

    x = np.zeros((maxlen, n_samples, inputDimSize)).astype(config.floatX)
    y = np.zeros((maxlen, n_samples, numClass)).astype(config.floatX)
    t = np.zeros((maxlen, n_samples)).astype(config.floatX)
    mask = np.zeros((maxlen, n_samples)).astype(config.floatX)
    for idx, (seq,time,label) in enumerate(zip(seqs,times,labels)):
        for xvec, subseq in zip(x[:,idx,:], seq[:-1]):
            xvec[subseq] = 1.
        for yvec, subseq in zip(y[:,idx,:], label[1:]):
            yvec[subseq] = 1.
        mask[:lengths[idx], idx] = 1.
        t[:lengths[idx], idx] = time[:-1]

    lengths = np.array(lengths, dtype=config.floatX)
    if options['useLogTime']:
        t = np.log(t + options['logEps'])

    return x, y, t, mask, lengths
项目:doctorai    作者:mp2893    | 项目源码 | 文件源码
def parse_arguments(parser):
    parser.add_argument('seq_file', type=str, metavar='<visit_file>', help='The path to the Pickled file containing visit information of patients')
    parser.add_argument('n_input_codes', type=int, metavar='<n_input_codes>', help='The number of unique input medical codes')
    parser.add_argument('label_file', type=str, metavar='<label_file>', help='The path to the Pickled file containing label information of patients')
    parser.add_argument('n_output_codes', type=int, metavar='<n_output_codes>', help='The number of unique label medical codes')
    parser.add_argument('out_file', metavar='out_file', help='The path to the output models. The models will be saved after every epoch')
    parser.add_argument('--time_file', type=str, default='', help='The path to the Pickled file containing durations between visits of patients. If you are not using duration information, do not use this option')
    parser.add_argument('--predict_time', type=int, default=0, choices=[0,1], help='Use this option if you want the GRU to also predict the time duration until the next visit (0 for false, 1 for true) (default value: 0)')
    parser.add_argument('--tradeoff', type=float, default=1.0, help='Tradeoff variable for balancing the two loss functions: code prediction function and duration prediction function (default value: 1.0)')
    parser.add_argument('--use_log_time', type=int, default=1, choices=[0,1], help='Use logarithm of time duration to dampen the impact of the outliers (0 for false, 1 for true) (default value: 1)')
    parser.add_argument('--embed_file', type=str, default='', help='The path to the Pickled file containing the representation vectors of medical codes. If you are not using medical code representations, do not use this option')
    parser.add_argument('--embed_size', type=int, default=200, help='The size of the visit embedding before passing it to the GRU layers. If you are not providing your own medical code vectors, you must specify this value (default value: 200)')
    parser.add_argument('--embed_finetune', type=int, default=1, choices=[0,1], help='If you are using randomly initialized code representations, always use this option. If you are using an external medical code representations, and you want to fine-tune them as you train the GRU, use this option as well. (0 for false, 1 for true) (default value: 1)')
    parser.add_argument('--hidden_dim_size', type=str, default='[200,200]', help='The size of the hidden layers of the GRU. This is a string argument. For example, [500,400] means you are using a two-layer GRU where the lower layer uses a 500-dimensional hidden layer, and the upper layer uses a 400-dimensional hidden layer. (default value: [200,200])')
    parser.add_argument('--batch_size', type=int, default=100, help='The size of a single mini-batch (default value: 100)')
    parser.add_argument('--n_epochs', type=int, default=10, help='The number of training epochs (default value: 10)')
    parser.add_argument('--L2_softmax', type=float, default=0.001, help='L2 regularization for the softmax function (default value: 0.001)')
    parser.add_argument('--L2_time', type=float, default=0.001, help='L2 regularization for the linear regression (default value: 0.001)')
    parser.add_argument('--dropout_rate', type=float, default=0.5, help='Dropout rate between GRU hidden layers, and between the final hidden layer and the softmax layer (default value: 0.5)')
    parser.add_argument('--log_eps', type=float, default=1e-8, help='A small value to prevent log(0) (default value: 1e-8)')
    parser.add_argument('--verbose', action='store_true', help='Print output after every 10 mini-batches (default false)')
    args = parser.parse_args()
    return args
项目:NCRF-AE    作者:cosmozhang    | 项目源码 | 文件源码
def theano_logsumexp(x, axis=None):
    """
    Compute log(sum(exp(x), axis=axis) in a numerically stable
    fashion.
    Parameters
    ----------
    x : tensor_like
        A Theano tensor (any dimension will do).
    axis : int or symbolic integer scalar, or None
        Axis over which to perform the summation. `None`, the
        default, performs over all axes.
    Returns
    -------
    result : ndarray or scalar
        The result of the log(sum(exp(...))) operation.
    """
    xmax = T.max(x, axis = axis, keepdims = True)
    xmax_ = T.max(x, axis = axis)
    return xmax_ + T.log(T.exp(x - xmax).sum(axis = axis))
项目:hnmt    作者:robertostling    | 项目源码 | 文件源码
def xent(self, inputs, inputs_mask, chars, chars_mask,
             outputs, outputs_mask, attention):
        pred_outputs, pred_attention = self(
                inputs, inputs_mask, chars, chars_mask, outputs, outputs_mask)
        outputs_xent = batch_sequence_crossentropy(
                pred_outputs, outputs[1:], outputs_mask[1:])
        # Note that pred_attention will contain zero elements for masked-out
        # character positions, to avoid trouble with log() we add 1 for zero
        # element of attention (which after multiplication will be removed
        # anyway).
        batch_size = attention.shape[1].astype(theano.config.floatX)
        attention_mask = (inputs_mask.dimshuffle('x', 1, 0) *
                          outputs_mask[1:].dimshuffle(0, 1, 'x')
                          ).astype(theano.config.floatX)
        epsilon = 1e-6
        attention_xent = (
                   -attention[1:]
                 * T.log(epsilon + pred_attention + (1-attention_mask))
                 * attention_mask).sum() / batch_size
        return outputs_xent, attention_xent
项目:EAC-Net    作者:wiibrew    | 项目源码 | 文件源码
def multi_label_ACE(outputs,y_labels):
    data_shape=outputs.shape
    loss_buff=0
    # num=T.iscalar(data_shape[0]) #theano int to get value from tensor
    # for i in range(int(num)):
    #     for j in range(12):
    #         y_exp=outputs[i,j]
    #         y_tru=y_labels[i,0,0,j]
    #         if y_tru==0:
    #             loss_ij=math.log(1-outputs[i,j])
    #             loss_buff-=loss_ij
    #         if y_tru>0:
    #             loss_ij=math.log(outputs[i,j])
    #             loss_buff-=loss_ij

    # wts=[ 0.24331649,  0.18382575,  0.23082499,  0.44545567,  0.52901483,  0.58482504, \
    # 0.57321465,  0.43411294,  0.15502839,  0.36377019,  0.19050646,  0.16083916]
    # for i in [3,4,5,6,7,9]:

    for i in range(12):
        target=y_labels[:,i]
        output=outputs[:,i]
        loss_au=T.sum(-(target * T.log((output+0.05)/1.05) + (1.0 - target) * T.log((1.05 - output)/1.05)))
        loss_buff+=loss_au
    return loss_buff/(12*BATCH_SIZE)
项目:EAC-Net    作者:wiibrew    | 项目源码 | 文件源码
def multi_label_ACE(outputs,y_labels):
    data_shape=outputs.shape
    loss_buff=0
    # num=T.iscalar(data_shape[0]) #theano int to get value from tensor
    # for i in range(int(num)):
    #     for j in range(12):
    #         y_exp=outputs[i,j]
    #         y_tru=y_labels[i,0,0,j]
    #         if y_tru==0:
    #             loss_ij=math.log(1-outputs[i,j])
    #             loss_buff-=loss_ij
    #         if y_tru>0:
    #             loss_ij=math.log(outputs[i,j])
    #             loss_buff-=loss_ij

    # wts=[ 0.24331649,  0.18382575,  0.23082499,  0.44545567,  0.52901483,  0.58482504, \
    # 0.57321465,  0.43411294,  0.15502839,  0.36377019,  0.19050646,  0.16083916]
    # for i in [3,4,5,6,7,9]:

    for i in range(12):
        target=y_labels[:,i]
        output=outputs[:,i]
        loss_au=T.sum(-(target * T.log((output+0.05)/1.05) + (1.0 - target) * T.log((1.05 - output)/1.05)))
        loss_buff+=loss_au
    return loss_buff/(12*BATCH_SIZE)
项目:KGP-ASR    作者:KGPML    | 项目源码 | 文件源码
def sequence_log_likelihood(y, y_hat, y_mask, y_hat_mask, blank_symbol):
    """
    Based on code from Shawn Tan.
    Credits to Kyle Kastner as well.
    """
    y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype='int32')
    y_mask_len = tensor.sum(y_mask, axis=0, dtype='int32')
    log_probabs = _log_path_probabs(
        y, T.log(y_hat), y_mask, y_hat_mask, blank_symbol)
    batch_size = log_probabs.shape[1]
    log_labels_probab = _log_add(
        log_probabs[y_hat_mask_len - 1,
                    tensor.arange(batch_size),
                    y_mask_len - 1],
        log_probabs[y_hat_mask_len - 1,
                    tensor.arange(batch_size),
                    y_mask_len - 2])
    return log_labels_probab
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
项目:GELUs    作者:hendrycks    | 项目源码 | 文件源码
def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
    else:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])
项目:fxnn    作者:khaotik    | 项目源码 | 文件源码
def build_model(model_):
    global fn_predict, fn_record
    global g_ozer, g_mdl

    g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]()
    g_ozer.lr = LEARN_RATE

    s_x = T.tensor4('x')
    s_y = T.ivector('y')
    s_pdpo = T.scalar()
    s_out = model_(s_x, s_pdpo)

    s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map))
    s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3))
    s_accr = T.mean( T.switch(
            T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0))

    no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))]
    fn_predict = th.function(
        [s_x, s_y],
        {'pred':s_out, 'accr':s_accr, 'loss':s_loss},
        givens=no_dropout, profile=PROFILE)
    rec_fetches = {
        'x': s_x, 'y': s_y,
        'pred': s_out}
    rec_fetches.update(g_mdl.params_di)
    fn_record = th.function(
        [s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE)
    g_ozer.compile(
        [s_x, s_y],
        s_loss,
        g_mdl.params_di.values(),
        fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr},
        givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))],
        profile_=PROFILE)
项目:recom-system    作者:tizot    | 项目源码 | 文件源码
def compute_loss(output, num_samples, num_entries=6, gamma=500.0):
    """Compute the loss of a dataset, given the output of the DSSM.

    Args:
        output (:class:`lasagne.layers.Layer`): the output of the DSSM
        num_samples (int): the number of samples in the dataset
        num_entries (int): the number of compared papers in the DSSM structure
        gamma (float): the coefficient applied in the softmax of the similarities

    Returns:
        theano.tensor.TensorType: the loss of the dataset
    """
    assert (num_entries > 2)
    assert (num_samples > 0)

    # Post-NN operations to compute the loss
    # First, we extract the first output of each bundle
    mask = np.zeros(num_entries * num_samples)
    mask[::num_entries] = 1
    unmask = np.ones(num_entries * num_samples) - mask
    cited = T.extra_ops.compress(mask, output, axis=0)
    odocs = T.extra_ops.compress(unmask, output, axis=0)

    # We duplicate each row 'x' num_entries-1 times
    cited = T.extra_ops.repeat(cited, num_entries-1, axis=0)
    # Then we compute element-wise product of x with each y, for each bundle
    sims = T.sum(cited * odocs, axis=1)

    # We reshape the similarities
    sims = T.reshape(sims, (num_samples, num_entries-1))
    sims = gamma * sims

    # We take the softmax of each row
    probs = T.nnet.softmax(sims)

    # We compute the loss as the sum of element on the first column
    loss_mask = np.zeros(num_entries-1)
    loss_mask[0] = 1
    loss = T.extra_ops.compress(loss_mask, probs, axis=1)

    return -T.log(T.prod(loss))
项目:dl4mt-multi    作者:nyu-dl    | 项目源码 | 文件源码
def cost(self, probs, y, y_mask):
        y_flat = y.flatten()
        y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat
        cost = -tensor.log(probs.flatten()[y_flat_idx])
        cost = cost.reshape([y.shape[0], y.shape[1]])
        cost = (cost * y_mask).sum(0)
        cost = cost.mean()
        return cost
项目:dl4mt-multi    作者:nyu-dl    | 项目源码 | 文件源码
def f_log_probs(self, probs, x, x_mask, y, y_mask,
                    src_selector, trg_selector, cg=None):
        y_flat = y.flatten()
        y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat
        cost = -tensor.log(probs.flatten()[y_flat_idx])
        cost = cost.reshape([y.shape[0], y.shape[1]])
        cost = (cost * y_mask).sum(0)
        func_inps = [x, x_mask, y, y_mask, src_selector, trg_selector]
        return theano.function(
            inputs=func_inps,
            outputs=cost, on_unused_input='warn')
项目:sampleRNN_ICLR2017    作者:soroushmehr    | 项目源码 | 文件源码
def GMM_nll(x, mus, sigmas, mix_weights):
    """
    D is dimension of each observation (e.g. frame_size) for each component
    (multivariate Normal with diagonal covariance matrix)
    See `gaussian_nll`

    x : (batch_size, D)
    mus : (batch_size, D, num_gaussians)
    sigmas : (batch_size, D, num_gaussians)
    mix_weights : (batch_size, num_gaussians)
    """
    x = x.dimshuffle(0, 1, 'x')

    # Similar to `gaussian_nll`
    ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi))
    ll_component_wise += 2. * T.log(sigmas)
    ll_component_wise += ((x - mus) / sigmas) ** 2.
    ll_component_wise = ll_component_wise.sum(axis=1)  # on FRAME_SIZE
    ll_component_wise *= lib.floatX(-0.5)  # LL not NLL

    # Now ready to take care of weights of each component
    # Simply applying exp could potentially cause inf/NaN.
    # Look up LogSumExp trick, Softmax in theano, or this:
    # hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
    weighted_ll = ll_component_wise + T.log(mix_weights)
    ll_max = T.max(weighted_ll, axis=1, keepdims=True)
    nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True))
    nll += ll_max
    nll = -nll.sum(axis=1)
    return nll
项目:mimic3-benchmarks    作者:YerevaNN    | 项目源码 | 文件源码
def multilabel_loss(preds, labels):
    eps = 1e-4
    preds = T.clip(preds, eps, 1-eps)
    return -(labels * T.log(preds) + (1 - labels) * T.log(1 - preds)).mean(axis=1).mean(axis=0)
项目:mimic3-benchmarks    作者:YerevaNN    | 项目源码 | 文件源码
def multilabel_loss_with_mask(preds, labels, mask):
    eps = 1e-4
    preds = T.clip(preds, eps, 1-eps)
    return -(mask * (labels * T.log(preds) + (1 - labels) * T.log(1 - preds))).mean(axis=1).mean(axis=0)
项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def init_weights(self, a0, b0):
        w1 = 1.0 / np.sqrt(self.d + 1) * np.random.randn(self.d, self.n_hidden)
        b1 = np.zeros((self.n_hidden,))
        w2 = 1.0 / np.sqrt(self.n_hidden + 1) * np.random.randn(self.n_hidden)
        b2 = 0.
        loggamma = np.log(np.random.gamma(a0, b0))
        loglambda = np.log(np.random.gamma(a0, b0))
        return (w1, b1, w2, b2, loggamma, loglambda)
项目:Stein-Variational-Gradient-Descent    作者:DartML    | 项目源码 | 文件源码
def unpack_weights(self, z):
        w = z
        w1 = np.reshape(w[:self.d*self.n_hidden], [self.d, self.n_hidden])
        b1 = w[self.d*self.n_hidden:(self.d+1)*self.n_hidden]

        w = w[(self.d+1)*self.n_hidden:]
        w2, b2 = w[:self.n_hidden], w[-3] 

        # the last two parameters are log variance
        loggamma, loglambda= w[-2], w[-1]

        return (w1, b1, w2, b2, loggamma, loglambda)
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def centered_softplus(x):
    return T.nnet.softplus(x) - np.cast[th.config.floatX](np.log(2.))
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def get_output_for(self, input, init=False, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        activation = T.tensordot(input, self.W, [[1], [0]])
        abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2)
                    + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1))

        if init:
            mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0)
            abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x')
            self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))]

        f = T.sum(T.exp(-abs_dif),axis=2)

        if init:
            mf = T.mean(f,axis=0)
            f -= mf.dimshuffle('x',0)
            self.init_updates.append((self.b, -mf))
        else:
            f += self.b.dimshuffle('x',0)

        return T.concatenate([input, f], axis=1)

# Input Mixture of Gaussian Layer
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def centered_softplus(x):
    return T.nnet.softplus(x) - np.cast[th.config.floatX](np.log(2.))
项目:deligan    作者:val-iisc    | 项目源码 | 文件源码
def log_sum_exp(x, axis=1):
    m = T.max(x, axis=axis)
    return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def connect(self, inputs):
    energy = tensor.dot(inputs, self.W) + self.b
    energy = energy.reshape([energy.shape[0] * energy.shape[1], energy.shape[2]])
    log_scores = tensor.log(tensor.nnet.softmax(energy))
    predictions = tensor.argmax(log_scores, axis=-1)
    return (log_scores, predictions)
项目:deep_srl    作者:luheng    | 项目源码 | 文件源码
def connect(self, inputs, weights, labels):
    """ - inputs: flattened log scores from the softmax layer.
    """    
    y_flat = labels.flatten()
    x_flat_idx = tensor.arange(y_flat.shape[0])
    cross_ent = - inputs[x_flat_idx, y_flat].reshape([labels.shape[0], labels.shape[1]])
    if weights != None:
      cross_ent = cross_ent * weights
    # Summed over timesteps. Averaged across samples in the batch.
    return cross_ent.sum(axis=0).mean()
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def kl_divergence(p, p_hat):
    return p_hat - p + p * T.log(p / p_hat)
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def mean_squared_logarithmic_error(y_true, y_pred):
    return T.sqr(T.log(T.clip(y_pred, epsilon, np.inf) + 1.) - T.log(T.clip(y_true, epsilon, np.inf) + 1.)).mean(axis=-1)
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def poisson_loss(y_true, y_pred):
    return T.mean(y_pred - y_true * T.log(y_pred + epsilon), axis=-1)

####################################################
# Variational Auto-encoder
项目:CopyNet    作者:MultiPath    | 项目源码 | 文件源码
def gaussian_kl_divergence(mean, ln_var):
    """Computes the KL-divergence of Gaussian variables from the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns a variable
    representing the KL-divergence between the given multi-dimensional Gaussian
    :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

    Args:
        mean (~chainer.Variable): A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.

    Returns:
        ~chainer.Variable: A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.

    """
    var = T.exp(ln_var)
    return  0.5 * T.sum(mean * mean + var - ln_var - 1, 1)


# aliases
项目:iterative_inference_segm    作者:adri-romsor    | 项目源码 | 文件源码
def entropy(y_pred):
    # Clip predictions to avoid numerical instability
    y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON)

    ent = - T.sum(y_pred * T.log(y_pred), axis=1)

    return ent.mean()
项目:dsb3    作者:EliasVansteenkiste    | 项目源码 | 文件源码
def __init__(self, input1, input2, log=False, **kwargs):
        super(MultLayer, self).__init__([input1, input2], **kwargs)