def get_output_for(self, inputs, attention_only=False, **kwargs):

        # inputs[0]: B x N x D
        # inputs[1]: B x Q x D
        # inputs[2]: B x N x Q / B x Q x N
        # self.mask: B x Q

        if self.transpose: M = inputs[2].dimshuffle((0,2,1))
        else: M = inputs[2]
        alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2])))
        alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
                self.mask[:,np.newaxis,:] # B x N x Q
        alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q
        q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D

        return eval(self.gating_fn)(inputs[0],q_rep)
项目:fg-gating    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):
        p_gru, q_gru, q_mask, feature = tuple(inputs)
        time_p = p_gru.shape[1]
        time_q = q_gru.shape[1]
        p_gru_re = p_gru.dimshuffle(0, 1, 'x', 2) # (batch, time_p, 1, units)
        q_gru_re = q_gru.dimshuffle(0, 'x', 1, 2) # (batch, 1, time_q, units)
        gru_merge = T.tanh(p_gru_re * q_gru_re).reshape((-1, time_q, self.units)) # (batch * time_p, time_q, units)

        att =, self.v1).reshape((-1, time_p, time_q)) # (batch, time_p, time_q)
        att_q =, self.v2).squeeze() # (batch, time_q)
        att = att + att_q.dimshuffle(0, 'x', 1) + feature # (batch, time_p, time_q)
        att = T.nnet.softmax(att.reshape((-1, time_q))) # (batch * time_p, time_q)
        att = att.reshape((-1, time_p, time_q)) * q_mask.dimshuffle(0, 'x', 1) # (batch, time_p, time_q)
        att = att / (att.sum(axis = 2, keepdims = True) + 1e-8) # (batch, time_p, time_q)
        att = att.reshape((-1, time_q))

        output = T.batched_dot(att, gru_merge) # (batch * time_p, units)
        output = output.reshape((-1, time_p, self.units))
        return output
项目:fg-gating    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, inputs, attention_only=False, **kwargs):

        # inputs[0]: B x N x D
        # inputs[1]: B x Q x D
        # inputs[2]: B x N x Q / B x Q x N
        # self.mask: B x Q

        if self.transpose: M = inputs[2].dimshuffle((0,2,1))
        else: M = inputs[2]
        alphas = T.nnet.softmax(T.reshape(M, (M.shape[0]*M.shape[1],M.shape[2])))
        alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
                self.mask[:,np.newaxis,:] # B x N x Q
        alphas_r = alphas_r/alphas_r.sum(axis=2)[:,:,np.newaxis] # B x N x Q
        q_rep = T.batched_dot(alphas_r, inputs[1]) # B x N x D

        return eval(self.gating_fn)(inputs[0],q_rep)
项目:SNLI    作者:qiaojingy    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):
        sequence_length = inputs[0].shape[1]/2
        input_first = inputs[0][(slice(None),) + (slice(0, sequence_length),)]
        input_second = inputs[1]

        mask = inputs[self.mask_incoming_index]

        if input_second.ndim == 3:
            input_second = input_second[(slice(None), -1)]

        M = nonlinearities.tanh(, self.W_y) +, 'x', 1), self.W_h))
        # M.shape = N * L * k
        alpha = nonlinearities.softmax(, self.w.T).reshape((inputs[0].shape[0], sequence_length)))
        # alpha.shape = N * L
        alpha = alpha * mask
        r = T.batched_dot(alpha, input_first)
        # r.shape = N * k
        h_star = nonlinearities.tanh(, self.W_p) +, self.W_x))
        return h_star
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        q = input[0]
        all_list = [q]
        for i in xrange(self.position):
            dot = T.batched_dot(q,[i + 1], self.W[i].T))
            all_list.append(dot.dimshuffle(0, 'x'))
            all_list.append(input[i + 1])
        begin_index = self.position
        for i in range(1, self.position):
            for j in range(0, i):
                dot = T.batched_dot(input[j + 1],[i + 1], self.W[begin_index].T))
                all_list.append(dot.dimshuffle(0, 'x'))
                #begin_index += 1
        out = T.concatenate(all_list, axis=1)

        # dot = T.batched_dot(q, T.batched_dot(a, self.W))
        #dot = T.batched_dot(q,, self.W.T))
        #out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1)
        return out
项目:dl4nlp_in_theano    作者:luyaojie    | 项目源码 | 文件源码
def score_batch(self, e1, e2, r_index):
        :param e1: (batch, entity_dim, )
        :param e2: (batch, entity_dim, )
        :param r_index: (batch, )
        # (batch, entity_dim) dot (batch, entity_dim, entity_dim, hidden) dot (batch, entity_dim) -> hidden * (batch, )
        hidden1_sep, _ = theano.scan(fn=self.step_batch,
                                     non_sequences=[e1, e2, self.W[r_index]],
        # hidden * (batch, ) -> (batch, hidden)
        hidden1 = T.concatenate([hidden1_sep], axis=1).transpose()
        if self.keep_normal:
            # (batch, 2 * entity_dim) dot (batch, 2 * entity_dim, hidden) -> (batch, hidden, )
            hidden2 = T.batched_dot(T.concatenate([e1, e2], axis=1), self.V[r_index])
            # (batch, hidden) + (batch, hidden) + (batch, hidden) -> (batch, hidden)
            hidden = hidden1 + hidden2 + self.b[r_index]
            hidden = hidden1
        # (batch, hidden) -> (batch, hidden)
        act_hidden = self.act.activate(hidden)
        # (batch, hidden) dot (batch, hidden) -> (batch, )
        return T.sum(act_hidden * self.U[r_index], axis=1)
项目:neural_style    作者:metaflow-ai    | 项目源码 | 文件源码
def grams(X):
    dim_ordering = K.image_dim_ordering()
    if dim_ordering == 'tf':
        X = K.permute_dimensions(X, (0, 3, 1, 2))

    (samples, c, h, w) = get_shape(X)

    X_reshaped = K.reshape(X, (-1, c, h * w))
    X_T = K.permute_dimensions(X_reshaped, (0, 2, 1))
    if K._BACKEND == 'theano':
        X_gram = T.batched_dot(X_reshaped, X_T)
        X_gram = tf.batch_matmul(X_reshaped, X_T)
    X_gram /= c * h * w

    return X_gram
项目:dnc-theano    作者:khaotik    | 项目源码 | 文件源码
def op_matmul(s_x_, s_y_, axes_=(-2, -1)):
    limited implementation of np.matmul, does not support broadcasting

        s_x_: (batch of) matrix(matrices)
        s_y_: (batch of) matrix(matrices)
        axes_: tuple of int, the axes for the matrix
    assert s_x_.ndim == s_y_.ndim
    ndim = s_x_.ndim
    assert -ndim <= axes_[0] < ndim
    assert -ndim <= axes_[1] < ndim
    assert ndim >= 2
    axes = axes_[0]%ndim, axes_[1]%ndim
    if ndim == 2:
        if axes == (0,1):
            return, s_y_)
            return, s_x_)
    s_shp = T.shape(s_x_)
    s_size = reduce(T.mul, [s_shp[i] for i in range(s_x_.ndim) if i not in axes])
    s_szu = s_shp[axes[0]]
    s_szv = s_shp[axes[1]]
    s_szw = T.shape(s_y_)[axes[1]]
    transpp = list(range(ndim))
    transpp[axes[0]], transpp[ndim-2] = transpp[ndim-2], transpp[axes[0]]
    transpp[axes[1]], transpp[ndim-1] = transpp[ndim-1], transpp[axes[1]]
    s_shp2 = [s_shp[a] for a in transpp]
    s_shp2[axes[1]] = s_szw
    s_x = s_x_.transpose(*transpp).reshape((s_size, s_szu, s_szv))
    s_y = s_y_.transpose(*transpp).reshape((s_size, s_szv, s_szw))
    return T.batched_dot(s_x, s_y).reshape(s_shp2).transpose(transpp)
项目:ga-reader    作者:bdhingra    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):

        # inputs[0]: B x N x D
        # inputs[1]: B x Q x D
        # self.mask: B x Q

        q_shuf = inputs[1].dimshuffle(0,2,1) # B x D x Q
        return T.batched_dot(inputs[0], q_shuf) # B x N x Q
项目:ga-reader    作者:bdhingra    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):

        # inputs[0]: B x N x D
        # inputs[1]: B x Q x D
        # self.aggregator: B x N x C
        # self.pointer: B x 1
        # self.mask: B x N

        q = inputs[1][T.arange(inputs[1].shape[0]),self.pointer,:] # B x D
        p = T.batched_dot(inputs[0],q) # B x N
        pm = T.nnet.softmax(p)*self.mask # B x N
        pm = pm/pm.sum(axis=1)[:,np.newaxis] # B x N

        return T.batched_dot(pm, self.aggregator)
项目:planetoid    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, input, **kwargs):
        act = T.batched_dot(T.tensordot(input, self.V, axes = [1, 2]), input) +, self.W) + self.b.dimshuffle('x', 0)
        return self.nonlinearity(act)
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def dot_time_distributed_merge(model, layers, cos_norm=False):
    """ Merging two time series layers into one, producing a new time series that
    contains a dot-product scalar for each time step.

    If cos_norm=True, actually computes cosine similarity. """
    def batched_batched_dot(s):
        """ from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their dot-products """
        import theano
        import theano.tensor as T
        return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym),
                           outputs_info=None, sequences=s, non_sequences=None)[0]

    def batched_cos_sim(s):
        """ from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their cosine similarities """
        import theano
        import theano.tensor as T
        return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym) / T.sqrt(T.batched_dot(xm, xm) * T.batched_dot(ym, ym)),
                           outputs_info=None, sequences=s, non_sequences=None)[0]

    if cos_norm:
        lmb = batched_cos_sim
        lmb = batched_batched_dot

    return Lambda([model.nodes[l] for l in layers], lmb,
                       lambda s: (s[1][0], s[1][1]))
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def dot_time_distributed_merge(model, layers, cos_norm=False):
    """ Merging two time series layers into one, producing a new time series that
    contains a dot-product scalar for each time step.

    If cos_norm=True, actually computes cosine similarity. """
    def batched_batched_dot(s):
        """ from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their dot-products """
        import theano
        import theano.tensor as T
        return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym),
                           outputs_info=None, sequences=s, non_sequences=None)[0]

    def batched_cos_sim(s):
        """ from (x,y,z)-shaped pair, produce (x,y)-shaped pair that replaces the z-vector pairs by their cosine similarities """
        import theano
        import theano.tensor as T
        return theano.scan(fn=lambda xm, ym: T.batched_dot(xm, ym) / T.sqrt(T.batched_dot(xm, xm) * T.batched_dot(ym, ym)),
                           outputs_info=None, sequences=s, non_sequences=None)[0]

    if cos_norm:
        lmb = batched_cos_sim
        lmb = batched_batched_dot

    return Lambda([model.nodes[l] for l in layers], lmb,
                       lambda s: (s[1][0], s[1][1]))
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = T.sqr(doc_batch_proj)
        query_square = T.sqr(query_batch_proj)

        doc_norm = (T.sqrt(T.sum(doc_square, axis=1)))
        query_norm = T.sqrt(T.sum(query_square, axis=1))

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:models    作者:Jonbean    | 项目源码 | 文件源码
def batch_cosine(self, doc_batch_proj, query_batch_proj):
        dot_prod = T.batched_dot(doc_batch_proj, query_batch_proj)

        doc_square = doc_batch_proj ** 2
        query_square = query_batch_proj ** 2

        doc_norm = (T.sqrt(doc_square.sum(axis = 1))).sum()
        query_norm = T.sqrt(query_square.sum(axis = 1)).sum()

        batch_cosine_vec = dot_prod/(doc_norm * query_norm)
        return batch_cosine_vec
项目:senti    作者:stevenxxiu    | 项目源码 | 文件源码
def get_output_for(self, input_, **kwargs):
        W = T.tril(self.W, -1)
        interactions = T.batched_dot(, W), input_)
        interactions = T.sqrt(T.max(interactions, 1e-6))
        return self.nonlinearity(input_ + interactions)
项目:fg-gating    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):

        # inputs[0]: B x N x D
        # inputs[1]: B x Q x D
        # self.mask: B x Q

        q_shuf = inputs[1].dimshuffle(0,2,1) # B x D x Q
        return T.batched_dot(inputs[0], q_shuf) # B x N x Q
项目:fg-gating    作者:kimiyoung    | 项目源码 | 文件源码
def get_output_for(self, inputs, **kwargs):

        # inputs[0]: B x N x D, doc
        # inputs[1]: B x Q x D, query
        # self.aggregator: B x N x C
        # self.pointer: B x 1
        # self.mask: B x N

        q = inputs[1][T.arange(inputs[1].shape[0]),self.pointer,:] # B x D
        p = T.batched_dot(inputs[0],q) # B x N
        pm = T.nnet.softmax(p)*self.mask # B x N
        pm = pm/pm.sum(axis=1)[:,np.newaxis] # B x N

        return T.batched_dot(pm, self.aggregator)
项目:DBQA    作者:nanfeng1101    | 项目源码 | 文件源码
def __call__(self, q, a):
        return T.batched_dot(T.tensordot(q, self.W, axes=[1, 0]), a)
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a = input[0], input[1]
        dot = T.batched_dot(q,, self.W))

        self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:,-1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a, feats = input[0], input[1], input[2]

        dot = T.batched_dot(q,, self.W))
        feats_dot =, self.W_feats)
        l = self.lamda.dimshuffle('x', 0)
        self.p_y_given_x = T.nnet.softmax(l*dot + (1-l) * feats_dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:,-1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a = input[0], input[1]
        # dot = T.batched_dot(q,, self.W.T))
        dot = T.batched_dot(q,, self.W))
        self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:,-1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a, feats = input[0], input[1], input[2]

        dot = T.batched_dot(q,, self.W))
        feats_dot =, self.W_feats)

        self.p_y_given_x = T.nnet.softmax(dot + feats_dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:,-1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
      # P(Y|X) = softmax(W.X + b)
      q, a, feats = input[0], input[1], input[2]

      # dot = T.batched_dot(q, T.batched_dot(a, self.W))
      dot = T.batched_dot(q,, self.W.T))
      out = T.concatenate([dot.dimshuffle(0, 'x'), q, a, feats], axis=1)
      return out
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
      # P(Y|X) = softmax(W.X + b)
      q, a, feats = input[0], input[1], input[2]

      # dot = T.batched_dot(q, T.batched_dot(a, self.W))
      dot = T.batched_dot(q,, self.W.T))
      out = T.concatenate([dot.dimshuffle(0, 'x'), feats], axis=1)
      # out = feats
      return out
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
      # P(Y|X) = softmax(W.X + b)
      q, a = input[0], input[1]

      # dot = T.batched_dot(q, T.batched_dot(a, self.W))
      dot = T.batched_dot(q,, self.W.T))
      out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1)
      return out
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
      # P(Y|X) = softmax(W.X + b)
      q, a = input[0], input[1]

      # dot = T.batched_dot(q, T.batched_dot(a, self.W))
      qdot =, self.Wq)
      adot =, self.Wa)
      dot = T.batched_dot(qdot, adot)
      out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1)
      return out
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
      # P(Y|X) = softmax(W.X + b)
      q, a = input[0], input[1]

      # dot = T.batched_dot(q, T.batched_dot(a, self.W))
      out = T.batched_dot(q,, self.W.T)).dimshuffle(0, 'x')
      return out
项目:DBQA-KBQA    作者:Lucien-qiang    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a, feats = input[0], input[1], input[2]

        dot = T.batched_dot(q,, self.W))
        feats_dot =, self.W_feats)

        self.p_y_given_x = T.nnet.softmax(dot + feats_dot +, self.W_q) +, self.W_a) + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:,-1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:hred-latent-piecewise    作者:julianser    | 项目源码 | 文件源码
def LayerNormalization(x, gamma, mask, estimated_mean=0.0, estimated_var=1.0):
    assert x.ndim == 3 or x.ndim == 2
    if x.ndim == 3:
        x_mean = T.mean(x, axis=2).dimshuffle(0, 1, 'x')
        x_var = T.var(x, axis=2).dimshuffle(0, 1, 'x')
        return gamma*((x - x_mean) / T.sqrt(x_var+1e-7)), x_mean[0, 0], x_var[0, 0]

    elif x.ndim == 2:
        x_mean = T.mean(x, axis=1).dimshuffle(0, 'x')
        x_var = T.var(x, axis=1).dimshuffle(0, 'x')
        return gamma*((x - x_mean) / T.sqrt(x_var+1e-7)), x_mean[0], x_var[0]

# Does theano.batched_dot. If last_axis is on it will loop over the last axis, otherwise it will loop over the first axis.
项目:hred-latent-piecewise    作者:julianser    | 项目源码 | 文件源码
def BatchedDot(x, y, last_axis=False):
    if last_axis==False:
        return T.batched_dot(x, y)
    elif last_axis:
        if x.ndim == 2:
            shuffled_x = x.dimshuffle(1,0)
        elif x.ndim == 3:
            shuffled_x = x.dimshuffle(2,0,1)
        elif x.ndim == 4:
            shuffled_x = x.dimshuffle(3,0,1,2)
            raise ValueError('BatchedDot inputs must have between 2-4 dimensions, but x has ' + str(x.ndim) + ' dimensions')

        if y.ndim == 2:
            shuffled_y = y.dimshuffle(1,0)
        elif y.ndim == 3:
            shuffled_y = y.dimshuffle(2,0,1)
        elif y.ndim == 4:
            shuffled_y = y.dimshuffle(3,0,1,2)
            raise ValueError('BatchedDot inputs must have between 2-4 dimensions, but y has ' + str(y.ndim) + ' dimensions')

        dot = T.batched_dot(shuffled_x, shuffled_y)
        if dot.ndim == 2:
            return dot.dimshuffle(1,0)
        elif dot.ndim == 3:
            return dot.dimshuffle(1,2,0)
        elif dot.ndim == 4:
            return dot.dimshuffle(1,2,3,0)
项目:gogh-figure    作者:joelmoniz    | 项目源码 | 文件源码
def batched_gram5d(self, fmap):
        # (layer, batch, featuremaps, height*width)

        # (layer*batch, featuremaps, height*width)
        fmap2=fmap.reshape((-1, fmap.shape[-2], fmap.shape[-1]))

        # The term can't be taken outside as a T.mean in style_loss(), since the width and height of the image might vary
        return T.batched_dot(fmap2, fmap2.dimshuffle(0,2,1)).reshape(fmap.shape)/[-2:])
项目:gogh-figure    作者:joelmoniz    | 项目源码 | 文件源码
def batched_gram(self, fmap):
        # (batch, featuremaps, height*width)

        # The term can't be taken outside as a T.mean in style_loss(), since the width and height of the image might vary
        if self.net_type == 0:
            return T.batched_dot(fmap, fmap.dimshuffle(0,2,1))/[-2:])
        elif self.net_type == 1:
            return T.batched_dot(fmap, fmap.dimshuffle(0,2,1))/[-1])
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a = input[0], input[1]
        dot = T.batched_dot(q,, self.W))

        self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:, -1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a, feats = input[0], input[1], input[2]

        dot = T.batched_dot(q,, self.W))
        feats_dot =, self.W_feats)
        l = self.lamda.dimshuffle('x', 0)
        self.p_y_given_x = T.nnet.softmax(l * dot + (1 - l) * feats_dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:, -1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a = input[0], input[1]
        # dot = T.batched_dot(q,, self.W.T))
        dot = T.batched_dot(q,, self.W))
        self.p_y_given_x = T.nnet.softmax(dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:, -1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a, feats = input[0], input[1], input[2]

        dot = T.batched_dot(q,, self.W))
        feats_dot =, self.W_feats)

        self.p_y_given_x = T.nnet.softmax(dot + feats_dot + self.b.dimshuffle('x', 0))
        self.prob = self.p_y_given_x[:, -1]
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        return self.y_pred
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a, feats = input[0], input[1], input[2]

        # dot = T.batched_dot(q, T.batched_dot(a, self.W))
        dot = T.batched_dot(q,, self.W.T))
        out = T.concatenate([dot.dimshuffle(0, 'x'), q, a, feats], axis=1)
        return out
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q = input[0]
        all_list = [q]
        for i in xrange(self.position):
            dot = T.batched_dot(q,[i + 1], self.W[i].T))
            all_list.append(dot.dimshuffle(0, 'x'))
            all_list.append(input[i + 1])
        # dot = T.batched_dot(q, T.batched_dot(a, self.W))
        #dot = T.batched_dot(q,, self.W.T))
        #out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1)
        out = T.concatenate(all_list, axis=1)
        return out
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a, feats = input[0], input[1], input[2]

        # dot = T.batched_dot(q, T.batched_dot(a, self.W))
        dot = T.batched_dot(q,, self.W.T))
        out = T.concatenate([dot.dimshuffle(0, 'x'), feats], axis=1)
        # out = feats
        return out
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a = input[0], input[1]

        # dot = T.batched_dot(q, T.batched_dot(a, self.W))
        dot = T.batched_dot(q,, self.W.T))
        out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1)
        return out
项目:DEEP-CLICK-MODEL    作者:THUIR    | 项目源码 | 文件源码
def output_func(self, input):
        # P(Y|X) = softmax(W.X + b)
        q, a = input[0], input[1]

        # dot = T.batched_dot(q, T.batched_dot(a, self.W))
        qdot =, self.Wq)
        adot =, self.Wa)
        dot = T.batched_dot(qdot, adot)
        out = T.concatenate([dot.dimshuffle(0, 'x'), q, a], axis=1)
        return out