Python chainer.functions 模块,batch_matmul() 实例源码

我们从Python开源项目中,提取了以下37个代码示例,用于说明如何使用chainer.functions.batch_matmul()

项目:chainer_nmt    作者:odashi    | 项目源码 | 文件源码
def _context(self, p, fb_mat, fbe_mat):
    batch_size, source_length, _ = fb_mat.data.shape
    # {pe,e}_mat: shape = [batch * srclen, atten]
    pe_mat = F.reshape(
        F.broadcast_to(
            F.expand_dims(self.p_e(p), 1),
            [batch_size, source_length, self.atten_size]),
        [batch_size * source_length, self.atten_size])
    e_mat = F.tanh(fbe_mat + pe_mat)
    # a_mat: shape = [batch, srclen]
    a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length]))
    # q: shape = [batch, 2 * hidden]
    q = F.reshape(
        F.batch_matmul(a_mat, fb_mat, transa=True),
        [batch_size, 2 * self.hidden_size])

    return q
项目:convolutional_seq2seq    作者:soskek    | 项目源码 | 文件源码
def attend(self, query, key, value, mask, minfs=None):
        """
        Input shapes:
            q=(b, units, dec_l), k=(b, units, enc_l),
            v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)
        """

        # Calculate Attention Scores with Mask for Zero-padded Areas
        pre_a = F.batch_matmul(query, key, transa=True)  # (b, dec_l, enc_l)
        minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
            if minfs is None else minfs
        pre_a = F.where(mask, pre_a, minfs)
        a = F.softmax(pre_a, axis=2)
        # if values in axis=2 are all -inf, they become nan. thus do re-mask.
        a = F.where(self.xp.isnan(a.data),
                    self.xp.zeros(a.shape, dtype=a.dtype), a)
        reshaped_a = a[:, None]  # (b, 1, dec_xl, enc_l)

        # Calculate Weighted Sum
        pre_c = F.broadcast_to(reshaped_a, value.shape) * value
        c = F.sum(pre_c, axis=3, keepdims=True)  # (b, units, dec_xl, 1)
        return c
项目:nn_parsers    作者:odashi    | 项目源码 | 文件源码
def forward(self, data):
    ep_list = [self.p_embed(d[0], d[1]) for d in data]
    ec_list = [self.c_embed(d[0], d[1]) for d in data]
    er_list = [self.r_embed(d[0], d[1]) for d in data]
    p_list = self.p_encode(ep_list)
    c_list = self.c_encode(ec_list)
    r_list = self.r_encode(er_list)

    P = functions.reshape(
      functions.concat(p_list, 0),
      (1, len(data), self.hidden_size))
    C = functions.reshape(
      functions.concat(c_list, 0),
      (1, len(data), self.hidden_size))
    R = functions.concat(r_list, 0)

    parent_scores = functions.reshape(
      functions.batch_matmul(C, P, transb=True),
      (len(data), len(data)))
    root_scores = functions.reshape(
      self.r_scorer(R),
      (1, len(data)))

    return parent_scores, root_scores
项目:seq2seq_temporal_attention    作者:aistairc    | 项目源码 | 文件源码
def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            w = reshape(batch_matmul(state['h2'], a, transa=True), (batch_size, 1))
            w.data = xp.clip(w.data, -40, 40)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)

        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size))
        return context, e_list, sum_e
项目:teras    作者:chantera    | 项目源码 | 文件源码
def __call__(self, x, hs):
        batch, dim = x.shape
        alphas = 0
        _sum = 0
        for h in F.transpose_sequence(hs[:batch]):
            size = h.shape[0]
            if size < batch:
                h = F.vstack([h, variable.Variable(
                    self.xp.zeros((batch - size, h.shape[1]), dtype='f'))])
            score = self._score_func(x, h)
            e = F.exp(score)
            _sum += e
            alphas += batch_matmul(h, e)
        c = F.reshape(batch_matmul(F.reshape(alphas, (batch, dim)),
                                   (1 / _sum)), (batch, dim))
        return c
项目:teras    作者:chantera    | 项目源码 | 文件源码
def __call__(self, x1, x2):
        xp = self.xp
        out_size = self.out_size
        batch_size, len1, dim1 = x1.shape
        if not self.nobias[0]:
            x1 = F.concat((x1, xp.ones((batch_size, len1, 1),
                                       dtype=xp.float32)), axis=2)
            dim1 += 1
        len2, dim2 = x2.shape[1:]
        if not self.nobias[1]:
            x2 = F.concat((x2, xp.ones((batch_size, len2, 1),
                                       dtype=xp.float32)), axis=2)
            dim2 += 1
        x1_reshaped = F.reshape(x1, (batch_size * len1, dim1))
        W_reshaped = F.reshape(F.transpose(self.W, (0, 2, 1)),
                               (dim1, out_size * dim2))
        affine = F.reshape(F.matmul(x1_reshaped, W_reshaped),
                           (batch_size, len1 * out_size, dim2))
        biaffine = F.transpose(
            F.reshape(batch_matmul(affine, x2, transb=True),
                      (batch_size, len1, out_size, len2)),
            (0, 1, 3, 2))
        if not self.nobias[2]:
            biaffine += F.broadcast_to(self.b, biaffine.shape)
        return biaffine
项目:ROCStory_skipthought_baseline    作者:soskek    | 项目源码 | 文件源码
def calculate_score(self, h, pos, neg, pos_score=None, neg_score=None, multipos=False):
        #h_pro = self.act1(self.W_predict(h))
        h_pro = h
        if multipos:
            # If multiple positive vectors are given,
            # max score is picked up. (other ones are not propagated)
            pos_scoreL = [F.batch_matmul(h_pro, pos_one, transa=True) for pos_one in pos]
            pos_score = F.max(F.concat(pos_scoreL, axis=1), axis=1, keepdims=True)
        else:
            pos_score = F.batch_matmul(h_pro, pos, transa=True)
        neg_score = F.batch_matmul(h_pro, neg, transa=True)

        return pos_score, neg_score
项目:chainerrl    作者:chainer    | 项目源码 | 文件源码
def matmul_v3(a, b, **kwargs):
        if (a.ndim, b.ndim) == (3, 3):
            return F.batch_matmul(a, b, **kwargs)
        elif (a.ndim, b.ndim) == (2, 2):
            return F.matmul(a, b, **kwargs)
        else:
            raise Exception("unsupported shapes: {}, {}".format(
                a.shape, b.shape))
项目:chainer-neural-style    作者:dsanno    | 项目源码 | 文件源码
def gram_matrix(x):
    b, ch, h, w = x.data.shape
    v = F.reshape(x, (b, ch, w * h))
    return F.batch_matmul(v, v, transb=True) / np.float32(ch * w * h)
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def forward_batch(self, x1, x2):
        xp = cuda.get_array_module(x1.data)
        batch, slen, hidden = x2.shape
        return F.batch_matmul(
                F.concat([x1, xp.ones((batch, slen, 1), 'f')], 2), # (batch, slen, hidden+1)
                F.reshape(F.linear(F.reshape(x2, (batch * slen, -1)), self.W),
                    (batch, slen, -1)), transb=True)
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __call__(self, e1, e2):
        ele2 = F.reshape(
                F.batch_matmul(e1[:,:,None], e2[:,None,:]), (-1, self.in_size1 * self.in_size2))

        res = F.matmul(ele2,
                F.reshape(self.W, (self.in_size1 * self.in_size2, self.out_size))) + \
            F.matmul(e1, self.V1) + \
            F.matmul(e2, self.V2)

        res, bias = F.broadcast(res, self.b)
        return res + bias
项目:lencon    作者:kiyukuta    | 项目源码 | 文件源码
def __call__(self, p, train=True):
        attention = self._attend(p)

        if self.history is not None:
            self.history.append(
                chainer.cuda.to_cpu(attention.data[0, :, 0]).tolist())

        ret = F.batch_matmul(F.swapaxes(self.source_hiddens, 2, 1), attention)
        return F.reshape(ret, (self.batchsize, self.dim_out))
项目:lencon    作者:kiyukuta    | 项目源码 | 文件源码
def _attend(self, p):
        weight = F.batch_matmul(self.source_hiddens, p)
        weight = F.where(self.mask, weight, self.minf)
        attention = F.softmax(weight)
        return attention
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, m, k)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (batch_size, k, n)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(x, y)
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i], self.x2[i])
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, k, m)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (batch_size, k, n)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(x, y, transa=True)
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i].T, self.x2[i])
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, m, k)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (batch_size, n, k)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(x, y, transb=True)
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i], self.x2[i].T)
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, k, m)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (batch_size, n, k)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(x, y, transa=True, transb=True)
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i].T, self.x2[i].T)
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, m,)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (batch_size, m,)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (batch_size, 1, 1)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(x, y, transa=True)
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i], self.x2[i])
            for i in six.moves.range(batch_size)]).reshape(batch_size, 1, 1)
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (1, m, k)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (1, k, n)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (1, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(x, y)
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i], self.x2[i])
            for i in six.moves.range(1)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, m, k)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (1, k, n)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(
            x, F.broadcast_to(y, (batch_size, k, n)))
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i], self.x2[0])
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def setUp(self):
        self.x1 = numpy.random.uniform(
            .5, 1, (batch_size, m, k)).astype(numpy.float32)
        self.x2 = numpy.random.uniform(
            .5, 1, (k, n)).astype(numpy.float32)
        self.gy = numpy.random.uniform(
            -1, 1, (batch_size, m, n)).astype(numpy.float32)
        self.op = lambda x, y: F.batch_matmul(
            x, F.broadcast_to(F.expand_dims(y, 0), (batch_size, k, n)))
        self.forward_answer = numpy.array([
            numpy.dot(self.x1[i], self.x2)
            for i in six.moves.range(batch_size)])
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def test_identity_cpu(self):
        eye = _make_eye(self.x.shape)
        x = chainer.Variable(self.x)
        y = functions.batch_matmul(x, functions.batch_inv(x))
        gradient_check.assert_allclose(y.data, eye,
                                       **self.check_forward_options)
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def test_identity_gpu(self):
        eye = cuda.to_gpu(_make_eye(self.x.shape))
        x = chainer.Variable(cuda.to_gpu(self.x))
        y = functions.batch_matmul(x, functions.batch_inv(x))
        gradient_check.assert_allclose(y.data, eye,
                                       **self.check_forward_options)
项目:deep_metric_learning    作者:ronekko    | 项目源码 | 文件源码
def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha + matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))
项目:nn_parsers    作者:odashi    | 项目源码 | 文件源码
def forward(self, data):
    self.reset_state()

    x_list = [XP.iarray([d[0]]) for d in data]
    ep_list = [self.p_embed(x) for x in x_list]
    ec_list = [self.c_embed(x) for x in x_list]
    er_list = [self.r_embed(x) for x in x_list]
    p_list = self.p_encode(ep_list)
    c_list = self.c_encode(ec_list)
    r_list = self.r_encode(er_list)

    P = functions.reshape(
      functions.concat(p_list, 0),
      (1, len(data), self.hidden_size))
    C = functions.reshape(
      functions.concat(c_list, 0),
      (1, len(data), self.hidden_size))
    R = functions.concat(r_list, 0)

    parent_scores = functions.reshape(
      functions.batch_matmul(C, P, transb=True),
      (len(data), len(data)))
    root_scores = functions.reshape(
      self.r_scorer(R),
      (1, len(data)))

    return parent_scores, root_scores
项目:nn_parsers    作者:odashi    | 项目源码 | 文件源码
def forward(self, data):
    self.reset_state()

    x_list = [XP.iarray([d[0]]) for d in data]
    ep_list = [self.p_embed(x) for x in x_list]
    ec_list = [self.c_embed(x) for x in x_list]
    er_list = [self.r_embed(x) for x in x_list]
    p_list = self.p_encode(ep_list)
    c_list = self.c_encode(ec_list)
    r_list = self.r_encode(er_list)

    P = functions.reshape(
      functions.concat(p_list, 0),
      (1, len(data), self.hidden_size))
    C = functions.reshape(
      functions.concat(c_list, 0),
      (1, len(data), self.hidden_size))
    R = functions.concat(r_list, 0)

    parent_scores = functions.reshape(
      functions.batch_matmul(C, P, transb=True),
      (len(data), len(data)))
    root_scores = functions.reshape(
      self.r_scorer(R),
      (1, len(data)))

    return parent_scores, root_scores
项目:nn_parsers    作者:odashi    | 项目源码 | 文件源码
def forward(self, data):
    self.reset_state()

    x_list = [XP.iarray([d[0]]) for d in data]
    ep_list = [self.p_embed(x) for x in x_list]
    ec_list = [self.c_embed(x) for x in x_list]
    er_list = [self.r_embed(x) for x in x_list]
    p_list = self.p_encode(ep_list)
    c_list = self.c_encode(ec_list)
    r_list = self.r_encode(er_list)

    P = functions.reshape(
      functions.concat(p_list, 0),
      (1, len(data), self.hidden_size))
    C = functions.reshape(
      functions.concat(c_list, 0),
      (1, len(data), self.hidden_size))
    R = functions.concat(r_list, 0)

    parent_scores = functions.reshape(
      functions.batch_matmul(C, P, transb=True),
      (len(data), len(data)))
    root_scores = functions.reshape(
      self.r_scorer(R),
      (1, len(data)))

    return parent_scores, root_scores
项目:nmtrain    作者:philip30    | 项目源码 | 文件源码
def __call__(self, S, h):
    return F.squeeze(F.softmax(F.batch_matmul(S, h)), axis=2)
项目:nmtrain    作者:philip30    | 项目源码 | 文件源码
def __call__(self, S, h):
    batch_size, src_len, hidden_size = S.data.shape
    S = self.inner_weight(F.reshape(S, (batch_size * src_len, hidden_size)))
    S = F.reshape(S, (batch_size, src_len, hidden_size))
    a = F.softmax(F.squeeze(F.batch_matmul(S, h), axis = 2))
    return a

# MLP layer, as of Bahdanau+ 15
项目:seq2seq_temporal_attention    作者:aistairc    | 项目源码 | 文件源码
def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            w = self.aw(a, state['h2'])
            w.data = xp.clip(w.data, -20, 20)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)
        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size))
        return context, e_list, sum_e
项目:seq2seq_temporal_attention    作者:aistairc    | 项目源码 | 文件源码
def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            v = tanh(self.av(array.concat.concat((a, state['h2']), axis=1)))
            w = self.vw(v)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)
        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size))
        return context, e_list, sum_e
项目:teras    作者:chantera    | 项目源码 | 文件源码
def _score_general(self, x, h):
        batch, dim = x.shape
        return batch_matmul(F.reshape(self.W(x), (batch, 1, dim)), h)
项目:chainer_frmqn    作者:okdshin    | 项目源码 | 文件源码
def read(self, h):
        #M_key = F.swapaxes(F.stack(self.key_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
        M_key = F.stack(self.key_buff, axis=1) # (B, M, m)

        self.p = F.softmax(F.reshape(F.batch_matmul(M_key, h, transa=False, transb=False), (h.shape[0], M_key.shape[1]))) # (B, M)
        #p = F.reshape(p, (h.shape[0], 1, M_key.shape[1])) # (B, 1, M)
        #print("p", p.shape)
        #M_val = F.swapaxes(F.stack(self.val_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
        M_val = F.stack(self.val_buff, axis=1) # (B, M, m)
        #print("M_val", M_val.shape)
        o = F.batch_matmul(self.p, M_val, transa=True, transb=False) # (B, 1, m)
        o = F.reshape(o, (o.shape[0], o.shape[2])) # (B, m)
        #print("o", o.shape)
        return o, self.p
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, X, ht_enc, H_enc, skip_mask=None):
        pad = self._kernel_size - 1
        WX = self.W(X)
        if pad > 0:
            WX = WX[:, :, :-pad]
        Vh = self.V(ht_enc)
        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        self.contexts = []
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if t == 0:
                ct = (1 - f) * z
                self.contexts.append(ct)
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if t == 0:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
        pad = self._kernel_size - 1
        WX = self.W(X)[:, :, -pad-1, None]
        Vh = self.V(ht_enc)

        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if self.contexts is None:
                ct = (1 - f) * z
                self.contexts = [ct]
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t - T]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if self.H is None:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H
项目:nn_parsers    作者:odashi    | 项目源码 | 文件源码
def forward(self, data):
    self.reset_state()

    x_list = [XP.iarray([d[0]]) for d in data]
    pe_list = [self.p_embed(x) for x in x_list]
    ce_list = [self.c_embed(x) for x in x_list]
    re_list = [self.r_embed(x) for x in x_list]

    pf_list = []
    for pe in pe_list:
      pf_list.append(self.p_forward(pe))

    cf_list = []
    for ce in ce_list:
      cf_list.append(self.c_forward(ce))

    rf_list = []
    for re in re_list:
      rf_list.append(self.r_forward(re))

    pb_list = []
    for pe in reversed(pe_list):
      pb_list.append(self.p_backward(pe))

    cb_list = []
    for ce in reversed(ce_list):
      cb_list.append(self.c_backward(ce))

    rb_list = []
    for re in reversed(re_list):
      rb_list.append(self.r_backward(re))

    pc_list = [self.p_combine(pf, pb) for pf, pb in zip(pf_list, pb_list)]
    cc_list = [self.c_combine(cf, cb) for cf, cb in zip(cf_list, cb_list)]
    rc_list = [self.r_combine(rf, rb) for rf, rb in zip(rf_list, rb_list)]

    P = functions.reshape(
      functions.concat(pc_list, 0),
      (1, len(data), self.hidden_size))
    C = functions.reshape(
      functions.concat(cc_list, 0),
      (1, len(data), self.hidden_size))
    R = functions.concat(rc_list, 0)

    parent_scores = functions.reshape(
      functions.batch_matmul(C, P, transb=True),
      (len(data), len(data)))
    root_scores = functions.reshape(
      self.r_scorer(R),
      (1, len(data)))

    return parent_scores, root_scores
项目:mlpnlp-nmt    作者:mlpnlp    | 项目源码 | 文件源码
def calcAttention(self, h1, hList, aList, encLen, cMBSize, args):
        # attention????????????????h1???
        if self.attn_mode == 0:
            return h1
        # 1, attention????????
        target1 = self.model.attnIn_L1(h1)  # ??????
        # (cMBSize, self.hDim) => (cMBSize, 1, self.hDim)
        target2 = chaFunc.expand_dims(target1, axis=1)
        # (cMBSize, 1, self.hDim) => (cMBSize, encLen, self.hDim)
        target3 = chaFunc.broadcast_to(target2, (cMBSize, encLen, self.hDim))
        # target3 = chaFunc.broadcast_to(chaFunc.reshape(
        #    target1, (cMBSize, 1, self.hDim)), (cMBSize, encLen, self.hDim))
        # 2, attention?????????
        if self.attn_mode == 1:  # bilinear
            # bilinear??attention?????hList1 == hList2 ???
            # shape: (cMBSize, encLen)
            aval = chaFunc.sum(target3 * aList, axis=2)
        elif self.attn_mode == 2:  # MLP
            # attnSum ????????
            t1 = chaFunc.reshape(target3, (cMBSize * encLen, self.hDim))
            # (cMBSize*encLen, self.hDim) => (cMBSize*encLen, 1)
            t2 = self.model.attnSum(chaFunc.tanh(t1 + aList))
            # shape: (cMBSize, encLen)
            aval = chaFunc.reshape(t2, (cMBSize, encLen))
            # aval = chaFunc.reshape(self.model.attnSum(
            #    chaFunc.tanh(t1 + aList)), (cMBSize, encLen))
        else:
            assert 0, "ERROR"
        # 3, softmax????
        cAttn1 = chaFunc.softmax(aval)   # (cMBSize, encLen)
        # 4, attention???????context vector????????
        # (cMBSize, encLen) => (cMBSize, 1, encLen)
        cAttn2 = chaFunc.expand_dims(cAttn1, axis=1)
        # (1, encLen) x (encLen, hDim) ?????(matmul)?cMBSize?????
        #     => (cMBSize, 1, hDim)
        cAttn3 = chaFunc.batch_matmul(cAttn2, hList)
        # cAttn3 = chaFunc.batch_matmul(chaFunc.reshape(
        #    cAttn1, (cMBSize, 1, encLen)), hList)
        # axis=1???1????????????
        context = chaFunc.reshape(cAttn3, (cMBSize, self.hDim))
        # 4, attention???????context vector????????
        # ??????????
        # (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
        # cAttn2 = chaFunc.reshape(cAttn1, (cMBSize, encLen, 1))
        # (cMBSize, scrLen) => (cMBSize, scrLen, hDim)
        # cAttn3 = chaFunc.broadcast_to(cAttn2, (cMBSize, encLen, self.hDim))
        # ???????? (cMBSize, encLen, hDim)
        #     => (cMBSize, hDim)  # axis=1 ?????
        # context = chaFunc.sum(aList * cAttn3, axis=1)
        # 6, attention??????????
        c1 = chaFunc.concat((h1, context))
        c2 = self.model.attnOut_L2(c1)
        finalH = chaFunc.tanh(c2)
        # finalH = chaFunc.tanh(self.model.attnOut_L2(
        #    chaFunc.concat((h1, context))))
        return finalH  # context

    # ??????