项目:python-utils    作者:zhijian-liu    | 项目源码 | 文件源码
def forward(self, inputs):
        # set up batch size
        batch_size = inputs.size(0)

        # compute hidden and cell
        hidden = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
        cell = Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).cuda())
        hidden_cell = (hidden, cell)

        # recurrent neural networks
        outputs, _ = self.rnn.forward(inputs, hidden_cell)
        outputs = outputs.contiguous().view(-1, self.hidden_size * 2)

        # compute classifications by outputs
        outputs = self.classifier.forward(outputs)
        outputs = F.softmax(outputs)
        outputs = outputs.view(batch_size, -1, self.num_classes)
        return outputs
项目:deep-text-corrector    作者:andabi    | 项目源码 | 文件源码
def forward(self, hidden, encoder_outputs):
        # hidden.size() = (B, H), encoder_outputs.size() = (B, S, H)
        batch_size, encoder_outputs_len, _ = encoder_outputs.size()

        # Create variable to store attention energies
        # attn_energies.size() = (B, S)
        attn_energies = Variable(torch.zeros((batch_size, encoder_outputs_len)))  # B x S
        if Config.use_cuda: attn_energies = attn_energies.cuda()

        # Calculate energies for each encoder output
        # attn_energies.size() = (B, S)
        for i in range(encoder_outputs_len):
            attn_energies[:, i] = self.score(hidden, encoder_outputs[:, i])
            # print attn_energies[:, i]

        # Normalize energies to weights in range 0 to 1
        return F.softmax(attn_energies)
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, input, source_hids):
        # input: bsz x input_embed_dim
        # source_hids: srclen x bsz x output_embed_dim

        # x: bsz x output_embed_dim
        x = self.input_proj(input)

        # compute attention
        attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
        attn_scores = F.softmax(attn_scores.t()).t()  # srclen x bsz

        # sum weighted sources
        x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)

        x = F.tanh(self.output_proj(, input), dim=1)))
        return x, attn_scores
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, x, target_embedding, encoder_out):
        residual = x

        # attention
        x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5)
        x = self.bmm(x, encoder_out[0])

        # softmax over last dim
        sz = x.size()
        x = F.softmax(x.view(sz[0] * sz[1], sz[2]))
        x = x.view(sz)
        attn_scores = x

        x = self.bmm(x, encoder_out[1])

        # scale attention output
        s = encoder_out[1].size(1)
        x = x * (s * math.sqrt(1.0 / s))

        # project back
        x = (self.out_projection(x) + residual) * math.sqrt(0.5)
        return x, attn_scores
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, x, y, x_mask):
        x = batch * len * h1
        y = batch * h2
        x_mask = batch * len
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2), -float('inf'))
            # In training we output log-softmax for NLL
            alpha = F.log_softmax(xWy)
            # ...Otherwise 0-1 probabilities
            alpha = F.softmax(xWy)
        return alpha
项目:pyro    作者:uber    | 项目源码 | 文件源码
def softmax(x, dim=-1):
    TODO: change to use the default pyTorch implementation when available
    :param x: tensor
    :param dim: Dimension to apply the softmax function to. The elements of the tensor in this
        dimension must sum to 1.
    :return: tensor having the same dimension as `x` rescaled along dim
    input_size = x.size()

    trans_input = x.transpose(dim, len(input_size) - 1)
    trans_size = trans_input.size()

    input_2d = trans_input.contiguous().view(-1, trans_size[-1])

        soft_max_2d = F.softmax(input_2d, 1)
    except TypeError:
        # Support older pytorch 0.2 release.
        soft_max_2d = F.softmax(input_2d)

    soft_max_nd = soft_max_2d.view(*trans_size)
    return soft_max_nd.transpose(dim, len(input_size) - 1)
项目:sru    作者:taolei87    | 项目源码 | 文件源码
def forward(self, x, y, x_mask):
        x = batch * len * h1
        y = batch * h2
        x_mask = batch * len
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2), -float('inf'))
            # In training we output log-softmax for NLL
            alpha = F.log_softmax(xWy)
            # ...Otherwise 0-1 probabilities
            alpha = F.softmax(xWy)
        return alpha
项目:seq_tagger    作者:OSU-slatelab    | 项目源码 | 文件源码
def getAttnOutput(input, attnScorer, winSize=0):    # get attention output following [Liu and Lane, Interspeech 2016]. the input is seqlen X batchsize X dim. if winSize is 0, all the time steps are used for the weigted averaging
    attnSeq = []
    for i in range(input.size(0)):
        curSeq = []
        if i > 0:
            leftBegin = 0
            if winSize > 0:
                leftBegin = max(0, i-winSize)
        if i < input.size(0):
            leftEnd = input.size(0)
            if winSize > 0:
                leftEnd = min(i+winSize+1, input.size(0))
        curSeq =, 0)
        cur = input[i:i+1].expand_as(curSeq)

        attnScores = attnScorer([cur, curSeq], 2).view(-1, 2*input.size(2)) )    # get attention scores
        transAttnScores = attnScores.view(curSeq.size(0), input.size(1)).transpose(0, 1)    # batchSize X curSeqLen
        smOut = F.softmax(transAttnScores).transpose(0, 1)
        smOutSeq = smOut.unsqueeze(2).expand_as(curSeq)
        weightedAvgSeq = (curSeq * smOutSeq).sum(0)
    attnSeq =, 0)
    return[input, attnSeq], 2)
项目:bandit-nmt    作者:khanhptnk    | 项目源码 | 文件源码
def sample(self, inputs, max_length):
        targets, init_states = self.initialize(inputs, eval=False)
        emb, output, hidden, context = init_states

        outputs = []
        samples = []
        batch_size = targets.size(1)
        num_eos = targets[0].data.byte().new(batch_size).zero_()

        for i in range(max_length):
            output, hidden = self.decoder.step(emb, output, hidden, context)
            dist = F.softmax(self.generator(output))
            sample = dist.multinomial(1, replacement=False).view(-1).data

            # Stop if all sentences reach EOS.
            num_eos |= (sample == lib.Constants.EOS)
            if num_eos.sum() == batch_size: break

            emb = self.decoder.word_lut(Variable(sample))

        outputs = torch.stack(outputs)
        samples = torch.stack(samples)
        return samples, outputs
项目:open-reid    作者:Cysu    | 项目源码 | 文件源码
def test_forward_backward(self):
        import torch
        import torch.nn.functional as F
        from torch.autograd import Variable
        from reid.loss import OIMLoss
        criterion = OIMLoss(3, 3, scalar=1.0, size_average=False)
        criterion.lut = torch.eye(3)
        x = Variable(torch.randn(3, 3), requires_grad=True)
        y = Variable(torch.range(0, 2).long())
        loss = criterion(x, y)
        probs = F.softmax(x)
        grads = - torch.eye(3)
        abs_diff = torch.abs(grads -
        self.assertEquals(torch.log(probs).diag().sum(), -loss)
        self.assertTrue(torch.max(abs_diff) < 1e-6)
项目:faster_rcnn_pytorch    作者:longcw    | 项目源码 | 文件源码
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
        features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)

            roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
            rois = roi_data[0]

        # roi pool
        pooled_features = self.roi_pool(features, rois)
        x = pooled_features.view(pooled_features.size()[0], -1)
        x = self.fc6(x)
        x = F.dropout(x,
        x = self.fc7(x)
        x = F.dropout(x,

        cls_score = self.score_fc(x)
        cls_prob = F.softmax(cls_score)
        bbox_pred = self.bbox_fc(x)

            self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)

        return cls_prob, bbox_pred, rois
项目:fairseq-py    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, input, source_hids):
        # input: bsz x input_embed_dim
        # source_hids: srclen x bsz x output_embed_dim

        # x: bsz x output_embed_dim
        x = self.input_proj(input)

        # compute attention
        attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
        attn_scores = F.softmax(attn_scores.t()).t()  # srclen x bsz

        # sum weighted sources
        x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)

        x = F.tanh(self.output_proj(, input), dim=1)))
        return x, attn_scores
项目:fairseq-py    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, x, target_embedding, encoder_out):
        residual = x

        # attention
        x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5)
        x = self.bmm(x, encoder_out[0])

        # softmax over last dim
        sz = x.size()
        x = F.softmax(x.view(sz[0] * sz[1], sz[2]))
        x = x.view(sz)
        attn_scores = x

        x = self.bmm(x, encoder_out[1])

        # scale attention output
        s = encoder_out[1].size(1)
        x = x * (s * math.sqrt(1.0 / s))

        # project back
        x = (self.out_projection(x) + residual) * math.sqrt(0.5)
        return x, attn_scores
项目:fairseq-py    作者:facebookresearch    | 项目源码 | 文件源码
def _decode(self, tokens, encoder_outs):
        # wrap in Variable
        tokens = Variable(tokens, volatile=True)

        avg_probs = None
        avg_attn = None
        for model, encoder_out in zip(self.models, encoder_outs):
            decoder_out, attn = model.decoder(tokens, encoder_out)
            probs = F.softmax(decoder_out[:, -1, :]).data
            attn = attn[:, -1, :].data
            if avg_probs is None or avg_attn is None:
                avg_probs = probs
                avg_attn = attn

        return avg_probs, avg_attn
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, x, y, x_mask):
            x: batch * len * hdim1
            y: batch * hdim2
            x_mask: batch * len (1 for padding, 0 for true)
            alpha = batch * len
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2), -float('inf'))
        if self.normalize:
                # In training we output log-softmax for NLL
                alpha = F.log_softmax(xWy)
                # ...Otherwise 0-1 probabilities
                alpha = F.softmax(xWy)
            alpha = xWy.exp()
        return alpha
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, x, x_mask):
            x: batch * len * hdim
            x_mask: batch * len (1 for padding, 0 for true)
            alpha: batch * len
        x_flat = x.view(-1, x.size(-1))
        scores = self.linear(x_flat).view(x.size(0), x.size(1)), -float('inf'))
        alpha = F.softmax(scores)
        return alpha

# ------------------------------------------------------------------------------
# Functional
# ------------------------------------------------------------------------------
项目:MSDN    作者:yikang-li    | 项目源码 | 文件源码
def object_detection_gt_boxes(self, image_path, gt_boxes):
        min_score = 1/150.
        image = cv2.imread(image_path)
        # print 'image.shape', image.shape
        im_data, im_scales = self.get_image_blob_noscale(image)
        gt_boxes[:, :4] = gt_boxes[:, :4] * im_scales[0]
        # print 'im_data.shape', im_data.shape
        # print 'im_scales', im_scales
        im_info = np.array(
            [[im_data.shape[1], im_data.shape[2], im_scales[0]]],
        object_result = self(im_data, im_info, gt_boxes)[0]
        cls_prob_object, bbox_object, object_rois = object_result[:]

        prob_object = F.softmax(cls_prob_object)
        prob = prob_object.cpu().data
        top_5_cls = torch.topk(prob[:, 1:], 5, dim=1)
        # print 'im_scales[0]', im_scales[0]
        return top_5_cls[1].numpy()
项目:clevr-iep    作者:facebookresearch    | 项目源码 | 文件源码
def forward(self, v, u):
    - v: N x D x H x W
    - u: N x D

    - next_u: N x D
    N, K = v.size(0), self.hidden_dim
    D, H, W = v.size(1), v.size(2), v.size(3)
    v_proj = self.Wv(v) # N x K x H x W
    u_proj = self.Wu(u) # N x K
    u_proj_expand = u_proj.view(N, K, 1, 1).expand(N, K, H, W)
    h = F.tanh(v_proj + u_proj_expand)
    p = F.softmax(self.Wp(h).view(N, H * W)).view(N, 1, H, W)
    self.attention_maps =

    v_tilde = (p.expand_as(v) * v).sum(2).sum(3).view(N, D)
    next_u = u + v_tilde
    return next_u
项目:SeqMatchSeq    作者:pcgreat    | 项目源码 | 文件源码
def new_att_module(self):

        class NewAttModule(nn.Module):
            def __init__(self):
                super(NewAttModule, self).__init__()

            def forward(self, linput, rinput):
                self.lPad = linput.view(-1, linput.size(0), linput.size(1))

                self.lPad = linput  # self.lPad = Padding(0, 0)(linput) TODO: figureout why padding?
                self.M_r =, rinput.t())
                self.alpha = F.softmax(self.M_r.transpose(0, 1))
                self.Yl =, self.lPad)
                return self.Yl

        att_module = NewAttModule()
        if getattr(self, "att_module_master", None):
            for (tar_param, src_param) in zip(att_module.parameters(), self.att_module_master.parameters()):
        return att_module
项目:pytorch-deform-conv    作者:oeway    | 项目源码 | 文件源码
def forward(self, x):
        x = F.relu(self.conv11(x))
        x = self.bn11(x)

        x = F.relu(self.conv12(x))
        x = self.bn12(x)

        x = F.relu(self.conv21(x))
        x = self.bn21(x)

        x = F.relu(self.conv22(x))
        x = self.bn22(x)

        x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
        x = self.fc(x.view(x.size()[:2]))#
        x = F.softmax(x)
        return x
项目:pytorch-deform-conv    作者:oeway    | 项目源码 | 文件源码
def forward(self, x):
        x = F.relu(self.conv11(x))
        x = self.bn11(x)

        x = self.offset12(x)
        x = F.relu(self.conv12(x))
        x = self.bn12(x)

        x = self.offset21(x)
        x = F.relu(self.conv21(x))
        x = self.bn21(x)

        x = self.offset22(x)
        x = F.relu(self.conv22(x))
        x = self.bn22(x)

        x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
        x = self.fc(x.view(x.size()[:2]))
        x = F.softmax(x)
        return x
项目:Efficient-Dynamic-Batching    作者:jsuarez5341    | 项目源码 | 文件源码
def forward(self, x, trainable, fast=True):
      q, img, ans, prog = x #Need ans for reinforce
      if not trainable: ans = None #Safety

      p = self.ProgramGenerator(q)

      #Finicky handling of PG-EE transition
      batch, sLen, v = p.size() 
      p = p.view(-1, v)
      p = F.softmax(p)
      p = p.view(batch, sLen, v)
      p, pInds = t.max(p, 2)
      pInds = pInds[:, :, 0]
      p= p[:, :, 0]

      a = self.ExecutionEngine((pInds, p, img), fast=fast)
      return a
项目:temperature_scaling    作者:gpleiss    | 项目源码 | 文件源码
def forward(self, logits, labels):
        softmaxes = F.softmax(logits)
        confidences, predictions = torch.max(softmaxes, 1)
        accuracies = predictions.eq(labels)

        ece = Variable(torch.zeros(1)).type_as(logits)
        for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
            # Calculated |confidence - accuracy| in each bin
            in_bin = * confidences.le(bin_upper)
            prop_in_bin = in_bin.float().mean()
            if[0] > 0:
                accuracy_in_bin = accuracies[in_bin].float().mean()
                avg_confidence_in_bin = confidences[in_bin].mean()
                ece += torch.abs(avg_confidence_in_bin- accuracy_in_bin) * prop_in_bin

        return ece
项目:DrQA    作者:hitvoice    | 项目源码 | 文件源码
def forward(self, x, y, x_mask):
        x = batch * len * h1
        y = batch * h2
        x_mask = batch * len
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2), -float('inf'))
            # In training we output log-softmax for NLL
            alpha = F.log_softmax(xWy, dim=1)
            # ...Otherwise 0-1 probabilities
            alpha = F.softmax(xWy, dim=1)
        return alpha
项目:PyTorch-Encoding    作者:zhanghang1989    | 项目源码 | 文件源码
def forward(self, X):
        # input X is a 4D tensor
        assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
        if X.dim() == 3:
            # BxDxN
            B, N, K, D = X.size(0), X.size(2), self.K, self.D
            X = X.transpose(1,2).contiguous()
        elif X.dim() == 4:
            # BxDxHxW
            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
            X = X.view(B,D,-1).transpose(1,2).contiguous()
            raise RuntimeError('Encoding Layer unknown input dims!')
        # assignment weights
        A = F.softmax(scaledL2()(X, self.codewords, self.scale))
        # aggregate
        E = aggregate()(A, X, self.codewords)
        return E
项目:PyTorch-Encoding    作者:zhanghang1989    | 项目源码 | 文件源码
def assign(R, S):
    Calculate assignment weights for given residuals (:math:`R`) and scale (:math:`S`)

    .. math::
        a_{ik} = \frac{exp(-s_k\|r_{ik}\|^2)}{\sum_{j=1}^K exp(-s_j\|r_{ik}\|^2)}

        - Input: :math:`R\in\mathcal{R}^{B\times N\times K\times D}` :math:`S\in \mathcal{R}^K` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
        - Output :math:`A\in\mathcal{R}^{B\times N\times K}`

    L = square_squeeze()(R)
    K = S.size(0)
    SL = L * S.view(1,1,K)
    return F.softmax(SL)
项目:PyTorch-Encoding    作者:zhanghang1989    | 项目源码 | 文件源码
def forward(self, X):
        if isinstance(X, tuple) or isinstance(X, list):
            # for self-parallel mode, please see encoding.nn
            return my_data_parallel(self, X)
        elif not isinstance(X, Variable):
            raise RuntimeError('unknown input type')
        # input X is a 4D tensor
        if X.dim() == 3:
            # BxDxN
            B, N, K, D = X.size(0), X.size(2), self.K, self.D
            X = X.transpose(1,2).contiguous()
        elif X.dim() == 4:
            # BxDxHxW
            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
            X = X.view(B,D,-1).transpose(1,2).contiguous()
            raise RuntimeError('Encoding Layer unknown input dims!')
        # assignment weights NxKxD
        A = F.softmax(scaledL2(X, self.codewords, self.scale), dim=1)
        # aggregate
        E = aggregate(A, X, self.codewords)
        return E
项目:pytorch-avitm    作者:hyqneuron    | 项目源码 | 文件源码
def forward(self, input, compute_loss=False, avg_loss=True):
        # compute posterior
        en1 = F.softplus(self.en1_fc(input))                            # en1_fc   output
        en2 = F.softplus(self.en2_fc(en1))                              # encoder2 output
        en2 = self.en2_drop(en2)
        posterior_mean   = self.mean_bn  (self.mean_fc  (en2))          # posterior mean
        posterior_logvar = self.logvar_bn(self.logvar_fc(en2))          # posterior log variance
        posterior_var    = posterior_logvar.exp()
        # take sample
        eps = Variable( # noise
        z = posterior_mean + posterior_var.sqrt() * eps                 # reparameterization
        p = F.softmax(z)                                                # mixture probability
        p = self.p_drop(p)
        # do reconstruction
        recon = F.softmax(self.decoder_bn(self.decoder(p)))             # reconstructed distribution over vocabulary

        if compute_loss:
            return recon, self.loss(input, recon, posterior_mean, posterior_logvar, posterior_var, avg_loss)
            return recon
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def forward(self, x, y, x_mask):
            x: batch * len * hdim1
            y: batch * hdim2
            x_mask: batch * len (1 for padding, 0 for true)
            alpha = batch * len
        Wy = self.linear(y) if self.linear is not None else y
        xWy = x.bmm(Wy.unsqueeze(2)).squeeze(2), -float('inf'))
        if self.normalize:
                # In training we output log-softmax for NLL
                alpha = F.log_softmax(xWy)
                # ...Otherwise 0-1 probabilities
                alpha = F.softmax(xWy)
            alpha = xWy.exp()
        return alpha
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def forward(self, x, x_mask):
            x: batch * len * hdim
            x_mask: batch * len (1 for padding, 0 for true)
            alpha: batch * len
        x_flat = x.view(-1, x.size(-1))
        scores = self.linear(x_flat).view(x.size(0), x.size(1)), -float('inf'))
        alpha = F.softmax(scores)
        return alpha

# ------------------------------------------------------------------------------
# Functional
# ------------------------------------------------------------------------------
项目:MemNN    作者:berlino    | 项目源码 | 文件源码
def forward(self, qu, w, cand):
        qu = Variable(qu)
        w = Variable(w)
        cand = Variable(cand)
        embed_q = self.embed_B(qu)
        embed_w1 = self.embed_A(w)
        embed_w2 = self.embed_C(w)
        embed_c = self.embed_C(cand)

        q_state = torch.sum(embed_q, 1).squeeze(1)
        w1_state = torch.sum(embed_w1, 1).squeeze(1)
        w2_state = torch.sum(embed_w2, 1).squeeze(1)

        for _ in range(self.config.hop):
            sent_dot =, torch.transpose(w1_state, 0, 1))
            sent_att = F.softmax(sent_dot)

            a_dot =, w2_state)
            a_dot = self.H(a_dot)
            q_state = torch.add(a_dot, q_state)

        f_feat =, torch.transpose(embed_c, 0, 1))
        score = F.log_softmax(f_feat)
        return score
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def forward(self, hidden, encoder_outputs):
        max_len = encoder_outputs.size(0)
        this_batch_size = encoder_outputs.size(1)

        # Create variable to store attention energies
        attn_energies = Variable(torch.zeros(this_batch_size, max_len)) # B x S

        if USE_CUDA:
            attn_energies = attn_energies.cuda()

        # For each batch of encoder outputs
        for b in range(this_batch_size):
            # Calculate energy for each encoder output
            for i in range(max_len):
                attn_energies[b, i] = self.score(hidden[:, b], encoder_outputs[i, b].unsqueeze(0))

        # Normalize energies to weights in range 0 to 1, resize to 1 x B x S
        return F.softmax(attn_energies).unsqueeze(1)
项目:Seq2Seq-on-Word-Sense-Disambiguition    作者:lbwbowenLi    | 项目源码 | 文件源码
def forward(self, hidden, encoder_outputs):
        max_len = encoder_outputs.size(0)
        this_batch_size = encoder_outputs.size(1)

        # Create variable to store attention energies
        attn_energies = Variable(torch.zeros(this_batch_size, max_len)) # B x S

        if USE_CUDA:
            attn_energies = attn_energies.cuda()

        # For each batch of encoder outputs
        for b in range(this_batch_size):
            # Calculate energy for each encoder output
            for i in range(max_len):
                attn_energies[b, i] = self.score(hidden[:, b], encoder_outputs[i, b].unsqueeze(0))

        # Normalize energies to weights in range 0 to 1, resize to 1 x B x S
        return F.softmax(attn_energies).unsqueeze(1)
项目:pytorch-seq2seq    作者:IBM    | 项目源码 | 文件源码
def forward(self, output, context):
        batch_size = output.size(0)
        hidden_size = output.size(2)
        input_size = context.size(1)
        # (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len)
        attn = torch.bmm(output, context.transpose(1, 2))
        if self.mask is not None:
  , -float('inf'))
        attn = F.softmax(attn.view(-1, input_size)).view(batch_size, -1, input_size)

        # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim)
        mix = torch.bmm(attn, context)

        # concat -> (batch, out_len, 2*dim)
        combined =, output), dim=2)
        # output -> (batch, out_len, dim)
        output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size)

        return output, attn
项目:self-critical.pytorch    作者:ruotianluo    | 项目源码 | 文件源码
def forward(self, h, att_feats, p_att_feats):
        # The p_att_feats here is already projected
        att_size = att_feats.numel() // att_feats.size(0) // self.rnn_size
        att = p_att_feats.view(-1, att_size, self.att_hid_size)

        att_h = self.h2att(h)                        # batch * att_hid_size
        att_h = att_h.unsqueeze(1).expand_as(att)            # batch * att_size * att_hid_size
        dot = att + att_h                                   # batch * att_size * att_hid_size
        dot = F.tanh(dot)                                # batch * att_size * att_hid_size
        dot = dot.view(-1, self.att_hid_size)               # (batch * att_size) * att_hid_size
        dot = self.alpha_net(dot)                           # (batch * att_size) * 1
        dot = dot.view(-1, att_size)                        # batch * att_size

        weight = F.softmax(dot)                             # batch * att_size
        att_feats_ = att_feats.view(-1, att_size, self.rnn_size) # batch * att_size * att_feat_size
        att_res = torch.bmm(weight.unsqueeze(1), att_feats_).squeeze(1) # batch * att_feat_size

        return att_res
项目:pytorch_RFCN    作者:PureDiors    | 项目源码 | 文件源码
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
        features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)

            roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
            rois = roi_data[0]

        # roi pool
    conv_new1 = self.new_conv(features)
    r_score_map = self.rfcn_score(conv_new1)
    r_bbox_map = self.rfcn_bbox(conv_new1)
    psroi_pooled_cls = self.psroi_pool_cls(r_score_map, rois)
    psroi_pooled_loc = self.psroi_pool_loc(r_bbox_map, rois)
    bbox_pred = self.bbox_pred(psroi_pooled_loc)
    bbox_pred = torch.squeeze(bbox_pred)
    cls_score = self.cls_score(psroi_pooled_cls)
    cls_score = torch.squeeze(cls_score)
        cls_prob = F.softmax(cls_score)

            self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)

        return cls_prob, bbox_pred, rois
项目:Rita    作者:RITct    | 项目源码 | 文件源码
def forward(self, input, hidden, encoder_output, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn([0], hidden[0]), 1)))
        attn_weights = attn_weights.cuda() if use_cuda else attn_weights
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
        attn_applied = attn_applied.cuda() if use_cuda else attn_applied

        output =[0], attn_applied[0]), 1)
        output = output.cuda() if use_cuda else output
        output = self.attn_combine(output).unsqueeze(0)

        for i in range(self.n_layers):
            output = F.relu(output)
            output = output.cuda() if use_cuda else output
            output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]))
        output = output.cuda() if use_cuda else output
        return output, hidden, attn_weights
项目:deepspeech.pytorch    作者:SeanNaren    | 项目源码 | 文件源码
def forward(self, x):
        x = self.conv(x)

        sizes = x.size()
        x = x.view(sizes[0], sizes[1] * sizes[2], sizes[3])  # Collapse feature dimension
        x = x.transpose(1, 2).transpose(0, 1).contiguous()  # TxNxH

        x = self.rnns(x)

        if not self._bidirectional:  # no need for lookahead layer in bidirectional
            x = self.lookahead(x)

        x = self.fc(x)
        x = x.transpose(0, 1)
        # identity in training mode, softmax in eval mode
        x = self.inference_softmax(x)
        return x
项目:seqmod    作者:emanjavacas    | 项目源码 | 文件源码
def forward(self, dec_out, enc_outs, enc_att=None, mask=None):

        - dec_out: torch.Tensor(batch_size x hid_dim)
        - enc_outs: torch.Tensor(seq_len x batch_size x hid_dim)
        - enc_att: (optional), torch.Tensor(seq_len x batch_size x att_dim)
        - mask: (optional), torch.ByteTensor(batch_size x seq_len)
        # (batch x seq_len)
        weights = self.scorer(dec_out, enc_outs, enc_att=enc_att)

        if mask is not None:
            # weights = weights * mask.float()
   -, -float('inf'))

        weights = F.softmax(weights, dim=1)

        # (eq 7)
        context = weights.unsqueeze(1).bmm(enc_outs.transpose(0, 1)).squeeze(1)
        # (eq 5) linear out combining context and hidden
        context = F.tanh(self.linear_out([context, dec_out], 1)))

        return context, weights
项目:pytorch-dnc    作者:jingweiz    | 项目源码 | 文件源码
def _content_focus(self, memory_vb):
        variables needed:
            key_vb:    [batch_size x num_heads x mem_wid]
                    -> similarity key vector, to compare to each row in memory
                    -> by cosine similarity
            beta_vb:   [batch_size x num_heads x 1]
                    -> NOTE: refer here:
                    -> \in (1, +inf) after oneplus(); similarity key strength
                    -> amplify or attenuate the pecision of the focus
            memory_vb: [batch_size x mem_hei   x mem_wid]
            wc_vb:     [batch_size x num_heads x mem_hei]
                    -> the attention weight by content focus
        K_vb = batch_cosine_sim(self.key_vb, memory_vb)  # [batch_size x num_heads x mem_hei]
        self.wc_vb = K_vb * self.beta_vb.expand_as(K_vb) # [batch_size x num_heads x mem_hei]
        self.wc_vb = F.softmax(self.wc_vb.transpose(0, 2)).transpose(0, 2)
项目:pytorch-dnc    作者:jingweiz    | 项目源码 | 文件源码
def _content_focus(self, memory_vb):
        variables needed:
            key_vb:    [batch_size x num_heads x mem_wid]
                    -> similarity key vector, to compare to each row in memory
                    -> by cosine similarity
            beta_vb:   [batch_size x num_heads x 1]
                    -> NOTE: refer here:
                    -> \in (1, +inf) after oneplus(); similarity key strength
                    -> amplify or attenuate the pecision of the focus
            memory_vb: [batch_size x mem_hei   x mem_wid]
            wc_vb:     [batch_size x num_heads x mem_hei]
                    -> the attention weight by content focus
        K_vb = batch_cosine_sim(self.key_vb, memory_vb)  # [batch_size x num_heads x mem_hei]
        self.wc_vb = K_vb * self.beta_vb.expand_as(K_vb) # [batch_size x num_heads x mem_hei]
        self.wc_vb = F.softmax(self.wc_vb.transpose(0, 2)).transpose(0, 2)
项目:keita    作者:iwasaki-kenta    | 项目源码 | 文件源码
def forward(self, x):
        A model for non-linear data that works off of mixing multiple Gaussian
        distributions together. Uses linear projections of a given input to generate
        a set of N Gaussian models' mixture components, means and standard deviations.

        :param x: (num. samples, input dim.)
        :return: Mixture components, means, and standard deviations
            in the form (num. samples, num. mixtures)
        x = F.tanh(self.projection(x))

        weights = F.softmax(self.weights_projection(x))
        means = self.mean_projection(x)
        stds = torch.exp(self.std_projection(x))

        return weights, means, stds
项目:keita    作者:iwasaki-kenta    | 项目源码 | 文件源码
def forward(self, *hidden_states):
        if len(hidden_states) == 1:
            hidden_state = hidden_states[0]
            return F.softmax(F.tanh(self.projection(hidden_state))) * hidden_state
        elif len(hidden_states) == 2:
            left_hidden_state, right_hidden_state = hidden_states
            if self.mode == 0 or self.mode == 1:
                if self.mode == 0:
                    left_attention_weights = F.softmax(F.tanh(self.projection(left_hidden_state)))
                    right_attention_weights = F.softmax(F.tanh(self.projection(right_hidden_state)))
                elif self.mode == 1:
                    left_attention_weights = F.softmax(F.tanh(self.left_projection(left_hidden_state)))
                    right_attention_weights = F.softmax(F.tanh(self.right_projection(right_hidden_state)))

                return left_attention_weights * left_hidden_state, right_attention_weights * right_hidden_state
            elif self.mode == 2:
                hidden_state =[left_hidden_state, right_hidden_state], dim=1)
                attention_weights = F.softmax(F.tanh(self.projection(hidden_state)))

                return attention_weights * left_hidden_state, attention_weights * right_hidden_state
项目:keita    作者:iwasaki-kenta    | 项目源码 | 文件源码
def forward(self, last_state, states, mask=None):
        sequence_length, batch_size, hidden_dim = states.size()

        last_state = last_state.unsqueeze(0).expand(sequence_length, batch_size, last_state.size(1))
        if self.mode == "dot":
            energies = last_state * states
            energies = energies.sum(dim=2).squeeze()
        elif self.mode == "general":
            expanded_projection = self.projection.expand(sequence_length, *self.projection.size())
            energies = last_state * states.bmm(expanded_projection)
            energies = energies.sum(dim=2).squeeze()
        elif self.mode == "concat":
            expanded_reduction = self.reduction.expand(sequence_length, *self.reduction.size())
            expanded_projection = self.projection.expand(sequence_length, *self.projection.size())
            energies = F.tanh([last_state, states], dim=2).bmm(expanded_reduction))
            energies = energies.bmm(expanded_projection).squeeze()

        if type(mask) == torch.autograd.Variable:
            energies = energies + ((mask == 0).float() * -10000)
        attention_weights = F.softmax(energies)

        return attention_weights
项目:keita    作者:iwasaki-kenta    | 项目源码 | 文件源码
def forward(self, last_state, states):
        if len(states.size()) == 2: states = states.unsqueeze(0)

        sequence_length, batch_size, state_dim = states.size()

        transformed_last_state = last_state @ self.projection
        transformed_last_state = transformed_last_state.expand(sequence_length, batch_size, self.encoder_dim)
        transformed_last_state = transformed_last_state.transpose(0, 1).contiguous()
        transformed_last_state = transformed_last_state.view(batch_size, -1)

        states = states.transpose(0, 1).contiguous()
        states = states.view(batch_size, -1)

        energies = transformed_last_state * states
        energies = energies.sum(dim=1)

        if self.encoder_dim is not None:
            attention_weights =[torch.exp(energies[0]), F.softmax(energies[1:])], dim=0)
            attention_weights = F.softmax(energies)

        return attention_weights
项目:pytorch_resnet    作者:taokong    | 项目源码 | 文件源码
def forward(self, x):
        x = F.relu(self.conv11(x))
        x = self.bn11(x)

        x = F.relu(self.conv12(x))
        x = self.bn12(x)

        x = F.relu(self.conv21(x))
        x = self.bn21(x)

        x = F.relu(self.conv22(x))
        x = self.bn22(x)

        x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
        x = self.fc(x.view(x.size()[:2]))#
        x = F.softmax(x)
        return x
项目:pytorch_resnet    作者:taokong    | 项目源码 | 文件源码
def forward(self, x):
        x = F.relu(self.conv11(x))
        x = self.bn11(x)

        x = self.offset12(x)
        x = F.relu(self.conv12(x))
        x = self.bn12(x)

        x = self.offset21(x)
        x = F.relu(self.conv21(x))
        x = self.bn21(x)

        x = self.offset22(x)
        x = F.relu(self.conv22(x))
        x = self.bn22(x)

        x = F.avg_pool2d(x, kernel_size=[x.size(2), x.size(3)])
        x = self.fc(x.view(x.size()[:2]))
        x = F.softmax(x)
        return x
项目:intel-cervical-cancer    作者:wangg12    | 项目源码 | 文件源码
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
        features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)

            roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
            rois = roi_data[0]

        # roi pool
        pooled_features = self.roi_pool(features, rois)
        x = pooled_features.view(pooled_features.size()[0], -1)
        # x = self.fc6(x)
        # x = F.dropout(x,
        # x = self.fc7(x)
        # x = F.dropout(x,
        x = self.fcs(x)

        cls_score = self.score_fc(x)
        cls_prob = F.softmax(cls_score)
        bbox_pred = self.bbox_fc(x)

            self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)

        return cls_prob, bbox_pred, rois
项目:tutorials    作者:pytorch    | 项目源码 | 文件源码
def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn([0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),

        output =[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        for i in range(self.n_layers):
            output = F.relu(output)
            output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights
项目:skorch    作者:dnouri    | 项目源码 | 文件源码
def module_cls(self):
        """Return a simple module that concatenates its 2 inputs in
        forward step.

        class MyModule(nn.Module):
            def __init__(self):
                super(MyModule, self).__init__()
                self.dense = nn.Linear(20, 2)

            # pylint: disable=arguments-differ
            def forward(self, X0, X1):
                X =, X1), 1)
                X = F.softmax(self.dense(X), dim=-1)
                return X

        return MyModule