Python torch.nn 模块,LogSoftmax() 实例源码

我们从Python开源项目中,提取了以下45个代码示例,用于说明如何使用torch.nn.LogSoftmax()

项目:pyro    作者:uber    | 项目源码 | 文件源码
def call_nn_op(op, epsilon):
    """
    a helper function that adds appropriate parameters when calling
    an nn module representing an operation like Softmax
    :param op: the nn.Module operation to instantiate
    :param epsilon: a scaling parameter for certain custom modules
    :return: instantiation of the op module with appropriate parameters
    """
    if op in [ClippedSoftmax]:
        try:
            return op(epsilon, dim=1)
        except TypeError:
            # Support older pytorch 0.2 release.
            return op(epsilon)
    elif op in [ClippedSigmoid]:
        return op(epsilon)
    elif op in [nn.Softmax, nn.LogSoftmax]:
        return op(dim=1)
    else:
        return op()
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def new_instance(src_dict, trg_dict, model_params=None, random_seed=None, gpu_ids=None, init_value=0.1):
        if model_params is None:
            from nmmt import NMTEngine
            model_params = NMTEngine.Parameters()

        if gpu_ids is not None and len(gpu_ids) > 0:
            torch.cuda.set_device(gpu_ids[0])

        encoder = Models.Encoder(model_params, src_dict)
        decoder = Models.Decoder(model_params, trg_dict)
        generator = nn.Sequential(nn.Linear(model_params.rnn_size, trg_dict.size()), nn.LogSoftmax())

        model = Models.NMTModel(encoder, decoder)

        if gpu_ids is not None and len(gpu_ids) > 0:
            model.cuda()
            generator.cuda()

            if len(gpu_ids) > 1:
                model = nn.DataParallel(model, device_ids=gpu_ids, dim=1)
                generator = nn.DataParallel(generator, device_ids=gpu_ids, dim=0)
        else:
            model.cpu()
            generator.cpu()

        model.generator = generator

        for p in model.parameters():
            p.data.uniform_(-init_value, init_value)

        optim = Optim(model_params.optim, model_params.learning_rate, model_params.max_grad_norm,
                      lr_decay=model_params.learning_rate_decay, start_decay_at=model_params.start_decay_at)
        optim.set_parameters(model.parameters())

        return NMTEngineTrainer(model, optim, src_dict, trg_dict,
                                model_params=model_params, gpu_ids=gpu_ids, random_seed=random_seed)
项目:baseline    作者:dpressel    | 项目源码 | 文件源码
def __init__(self, embeddings_in, embeddings_out, **kwargs):
        super(Seq2SeqAttnModel, self).__init__(embeddings_in, embeddings_out)
        self.hsz = kwargs['hsz']
        nlayers = kwargs['layers']
        rnntype = kwargs['rnntype']
        pdrop = kwargs.get('dropout', 0.5)
        dsz = embeddings_in.dsz
        self.gpu = kwargs.get('gpu', True)
        self.encoder_rnn = pytorch_rnn(dsz, self.hsz, rnntype, nlayers, pdrop)
        self.dropout = nn.Dropout(pdrop)
        self.decoder_rnn = pytorch_rnn_cell(self.hsz + dsz, self.hsz, rnntype, nlayers, pdrop)
        self.preds = nn.Linear(self.hsz, self.nc)
        self.probs = nn.LogSoftmax()
        self.output_to_attn = nn.Linear(self.hsz, self.hsz, bias=False)
        self.attn_softmax = nn.Softmax()
        self.attn_out = nn.Linear(2 * self.hsz, self.hsz, bias=False)
        self.attn_tanh = pytorch_activation("tanh")
        self.nlayers = nlayers
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, opt, data_agent):
        super().__init__()
        self.opt = opt

        self.input_emb = nn.Embedding(data_agent.wordcnt, opt['embedding_dim'], padding_idx=0)
        self.action_type_emb = nn.Embedding(data_agent.get_num_actions(), opt['action_type_emb_dim'])
        self.encoder = nn.GRU(opt['embedding_dim'], opt['rnn_h'], opt['rnn_layers'], batch_first=True, bidirectional=opt['bidir'])
        self.decoder = nn.Sequential(
            nn.Linear(opt['rnn_h'], 1),
        )
        self.log_softmax = nn.LogSoftmax()
        self.trans = nn.Sequential(
            nn.Linear(opt['rnn_h'] * (2 if opt['bidir'] else 1), opt['embedding_dim']),
            nn.Tanh(),
        )
        counter_emb = opt['counter_emb_dim']
        if opt['counter_ablation']:
            counter_emb = 0
        self.dec_gru = nn.GRU(opt['rnn_h'] * (2 if opt['bidir'] else 1) + counter_emb + (opt['embedding_dim'] if not opt['room_ablation'] else 0) + opt['action_type_emb_dim'] + opt['action_type_emb_dim'] + opt['embedding_dim'] + opt['embedding_dim'] + opt['rnn_h'] * (2 if opt['bidir'] else 1), opt['rnn_h'], opt['rnn_layers'], batch_first=True)
        self.merge = nn.Sequential(
            nn.Linear(opt['rnn_h'] * 2, opt['rnn_h']),
            nn.Tanh(),
        )
        self.counter_emb = nn.Embedding(opt['counter_max'] + 1, opt['counter_emb_dim'])
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, opt, data_agent):
        super().__init__()
        self.opt = opt

        self.y_dim = data_agent.y_dim

        self.input_emb = nn.Embedding(data_agent.wordcnt, opt['embedding_dim'], padding_idx=0)
        self.encoder = nn.GRU(opt['embedding_dim'], opt['rnn_h'], opt['rnn_layers'], batch_first=True)
        self.decoder = nn.GRU(self.y_dim, opt['rnn_h'], opt['rnn_layers'], batch_first=True)
        self.mapping = nn.Sequential(
            nn.Linear(opt['rnn_h'] * 2, self.y_dim),
            nn.LogSoftmax(),
        )
项目:PyTorchDemystified    作者:hhsecond    | 项目源码 | 文件源码
def __init__(self, vocab_size, hidden_size, embedding_size, num_layers=1):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, vocab_size)
        self.softmax = nn.LogSoftmax()
项目:attention-is-all-you-need-pytorch    作者:jadore801120    | 项目源码 | 文件源码
def __init__(self, opt):
        self.opt = opt
        self.tt = torch.cuda if opt.cuda else torch

        checkpoint = torch.load(opt.model)
        model_opt = checkpoint['settings']
        self.model_opt = model_opt

        model = Transformer(
            model_opt.src_vocab_size,
            model_opt.tgt_vocab_size,
            model_opt.max_token_seq_len,
            proj_share_weight=model_opt.proj_share_weight,
            embs_share_weight=model_opt.embs_share_weight,
            d_k=model_opt.d_k,
            d_v=model_opt.d_v,
            d_model=model_opt.d_model,
            d_word_vec=model_opt.d_word_vec,
            d_inner_hid=model_opt.d_inner_hid,
            n_layers=model_opt.n_layers,
            n_head=model_opt.n_head,
            dropout=model_opt.dropout)

        prob_projection = nn.LogSoftmax()

        model.load_state_dict(checkpoint['model'])
        print('[Info] Trained model state loaded.')

        if opt.cuda:
            model.cuda()
            prob_projection.cuda()
        else:
            model.cpu()
            prob_projection.cpu()

        model.prob_projection = prob_projection

        self.model = model
        self.model.eval()
项目:treehopper    作者:tomekkorbak    | 项目源码 | 文件源码
def __init__(self, args, dropout=0.5):
        super(SentimentModule, self).__init__()
        self.cuda_flag = args.cuda
        self.mem_dim = args.mem_dim
        self.num_classes = args.num_classes

        self.dropout = dropout
        self.linear_layer = nn.Linear(self.mem_dim, self.num_classes)
        self.logsoftmax = nn.LogSoftmax()
        self.softmax = nn.Softmax()
        if self.cuda_flag:
            self.linear_layer = self.linear_layer.cuda()
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def load_from_checkpoint(checkpoint_path, using_cuda):
        checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage)

        model_opt = NMTEngine.Parameters()
        model_opt.__dict__.update(checkpoint['opt'])

        src_dict = checkpoint['dicts']['src']
        trg_dict = checkpoint['dicts']['tgt']

        encoder = Models.Encoder(model_opt, src_dict)
        decoder = Models.Decoder(model_opt, trg_dict)

        model = Models.NMTModel(encoder, decoder)
        model.load_state_dict(checkpoint['model'])

        generator = nn.Sequential(nn.Linear(model_opt.rnn_size, trg_dict.size()), nn.LogSoftmax())
        generator.load_state_dict(checkpoint['generator'])

        if using_cuda:
            model.cuda()
            generator.cuda()
        else:
            model.cpu()
            generator.cpu()

        model.generator = generator
        model.eval()

        optim = checkpoint['optim']
        optim.set_parameters(model.parameters())
        optim.optimizer.load_state_dict(checkpoint['optim'].optimizer.state_dict())

        return NMTEngine(model_opt, src_dict, trg_dict, model, optim, checkpoint, using_cuda)
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def __init__(self, opt):
        self.opt = opt
        self.tt = torch.cuda if opt.cuda else torch
        self.beam_accum = None

        checkpoint = torch.load(opt.model,
                                map_location=lambda storage, loc: storage)

        model_opt = checkpoint['opt']
        self.src_dict = checkpoint['dicts']['src']
        self.tgt_dict = checkpoint['dicts']['tgt']
        self._type = model_opt.encoder_type \
            if "encoder_type" in model_opt else "text"

        if self._type == "text":
            encoder = onmt.Models.Encoder(model_opt, self.src_dict)
        elif self._type == "img":
            loadImageLibs()
            encoder = onmt.modules.ImageEncoder(model_opt)

        decoder = onmt.Models.Decoder(model_opt, self.tgt_dict)
        model = onmt.Models.NMTModel(encoder, decoder)

        generator = nn.Sequential(
            nn.Linear(model_opt.rnn_size, self.tgt_dict.size()),
            nn.LogSoftmax())

        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])

        if opt.cuda:
            model.cuda()
            generator.cuda()
        else:
            model.cpu()
            generator.cpu()

        model.generator = generator

        self.model = model
        self.model.eval()
项目:alpha-dimt-icmlws    作者:sotetsuk    | 项目源码 | 文件源码
def __init__(self, opt, model=None, src_dict=None, tgt_dict=None):
        self.opt = opt
        self.tt = torch.cuda if opt.cuda else torch

        if model is None:
            checkpoint = torch.load(opt.model, map_location=lambda storage, loc: storage)

            model_opt = checkpoint['opt']
            src_dict = checkpoint['dicts']['src']
            tgt_dict = checkpoint['dicts']['tgt']

            encoder = onmt.Models.Encoder(model_opt, src_dict)
            decoder = onmt.Models.Decoder(model_opt, tgt_dict)
            model = onmt.Models.NMTModel(encoder, decoder)

            generator = nn.Sequential(
                nn.Linear(model_opt.rnn_size, tgt_dict.size()),
                nn.LogSoftmax())

            model.load_state_dict(checkpoint['model'])
            generator.load_state_dict(checkpoint['generator'])

            if opt.cuda:
                model.cuda()
                generator.cuda()
            else:
                model.cpu()
                generator.cpu()

            model.generator = generator

        self.src_dict = src_dict
        self.tgt_dict = tgt_dict

        self.model = model
        self.model.eval()
项目:pytorch-adda    作者:corenel    | 项目源码 | 文件源码
def __init__(self, input_dims, hidden_dims, output_dims):
        """Init discriminator."""
        super(Discriminator, self).__init__()

        self.restored = False

        self.layer = nn.Sequential(
            nn.Linear(input_dims, hidden_dims),
            nn.ReLU(),
            nn.Linear(hidden_dims, hidden_dims),
            nn.ReLU(),
            nn.Linear(hidden_dims, output_dims),
            nn.LogSoftmax()
        )
项目:covfefe    作者:deepnn    | 项目源码 | 文件源码
def log_softmax():
    return nn.LogSoftmax()
项目:squad_rasor_nn    作者:hsgodhia    | 项目源码 | 文件源码
def __init__(self, config, emb_data):
        super(SquadModel, self).__init__()
        # an embedding layer to lookup pre-trained word embeddings
        self.embed = nn.Embedding(config.vocab_size, config.emb_dim)
        self.embed.weight.requires_grad = False  # do not propagate into the pre-trained word embeddings
        self.embed.weight.data.copy_(emb_data)
        # used for eq(6) does FFNN(p_i)*FFNN(q_j)
        self.ff_align = nn.Linear(config.emb_dim, config.ff_dim)
        # used for eq(10) does FFNN(q_j')
        self.ff_q_indep = nn.Linear(2*config.hidden_dim, config.ff_dim)
        # used for eq(2) does FFNN(h_a) in a simplified form so that it can be re-used,
        # note: h_a = [u,v] where u and v are start and end words respectively
        # we have 2*config.hidden_dim since we are using a bi-directional LSTM
        self.p_end_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim)
        self.p_start_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim)
        # used for eq(2) plays the role of w_a
        self.w_a = nn.Linear(config.ff_dim, 1, bias=False)
        # used for eq(10) plays the role of w_q
        self.w_q = nn.Linear(config.ff_dim, 1, bias=False)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        self.logsoftmax = nn.LogSoftmax()
        self.dropout = nn.Dropout(0.6)

        self.hidden_qindp = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size)
        self.hidden = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size)
        # since we are using q_align and p_emb as p_star we have input as 2*emb_dim
        # num_layers = 2 and dropout = 0.1
        self.gru = nn.LSTM(input_size = 2*config.emb_dim + 2*config.hidden_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.6, bidirectional=True)
        self.q_indep_bilstm = nn.LSTM(input_size = config.emb_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.6, bidirectional=True)
        #change init_hidden when you change this gru/lstm

        parameters = ifilter(lambda p: p.requires_grad, self.parameters())
        for p in parameters:
            self.init_param(p)
项目:squad_rasor_nn    作者:hsgodhia    | 项目源码 | 文件源码
def __init__(self, config, emb_data):
        super(SquadModel, self).__init__()
        # an embedding layer to lookup pre-trained word embeddings
        self.embed = nn.Embedding(config.vocab_size, config.emb_dim)
        self.embed.weight.requires_grad = False  # do not propagate into the pre-trained word embeddings
        self.embed.weight.data.copy_(emb_data)
        # used for eq(6) does FFNN(p_i)*FFNN(q_j)
        self.ff_align = nn.Linear(config.emb_dim, config.ff_dim)
        # used for eq(10) does FFNN(q_j')
        self.ff_q_indep = nn.Linear(2*config.hidden_dim, config.ff_dim)
        # used for eq(2) does FFNN(h_a) in a simplified form so that it can be re-used,
        # note: h_a = [u,v] where u and v are start and end words respectively
        # we have 2*config.hidden_dim since we are using a bi-directional LSTM
        self.p_end_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim)
        self.p_start_ff = nn.Linear(2 * config.hidden_dim, config.ff_dim)
        # used for eq(2) plays the role of w_a
        self.w_a = nn.Linear(config.ff_dim, 1, bias=False)
        # used for eq(10) plays the role of w_q
        self.w_q = nn.Linear(config.ff_dim, 1, bias=False)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        self.logsoftmax = nn.LogSoftmax()
        self.dropout = nn.Dropout(0.2)

        self.hidden_qindp = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size)
        self.hidden = self.init_hidden(config.num_layers, config.hidden_dim, config.batch_size)
        # since we are using q_align and p_emb as p_star we have input as 2*emb_dim
        # num_layers = 2 and dropout = 0.1
        self.gru = nn.LSTM(input_size = 2*config.emb_dim + 1 + 2*config.hidden_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.1, bidirectional=True)
        self.q_indep_bilstm = nn.LSTM(input_size = config.emb_dim, hidden_size = config.hidden_dim, num_layers = config.num_layers, dropout=0.1, bidirectional=True)
        #change init_hidden when you change this gru/lstm

        parameters = ifilter(lambda p: p.requires_grad, self.parameters())
        for p in parameters:
            self.init_param(p)
项目:DBQA    作者:nanfeng1101    | 项目源码 | 文件源码
def __init__(self, n_in, n_hidden, n_out):
        super(MLPDropout, self).__init__()
        self.hidden_layer = nn.Linear(n_in, n_hidden)
        self.tanh = nn.Tanh()
        self.logistic_layer = nn.Linear(n_hidden, n_out)
        self.softmax = nn.LogSoftmax()
项目:DBQA    作者:nanfeng1101    | 项目源码 | 文件源码
def __init__(self, n_in, n_hidden, n_out):
        super(MLP, self).__init__()
        self.mlp = nn.Sequential(nn.Linear(n_in, n_hidden),
                                 nn.Tanh(),
                                 nn.Linear(n_hidden, n_out),
                                 nn.LogSoftmax()
                                 )
项目:NeuroNLP2    作者:XuezheMax    | 项目源码 | 文件源码
def __init__(self, word_dim, num_words, char_dim, num_chars, num_filters, kernel_size,
                 rnn_mode, hidden_size, num_layers, num_labels, tag_space=0,
                 embedd_word=None, embedd_char=None, p_in=0.2, p_rnn=0.5):
        super(BiRecurrentConv, self).__init__()

        self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word)
        self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char)
        self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1)
        self.dropout_in = nn.Dropout(p=p_in)
        self.dropout_rnn = nn.Dropout(p_rnn)

        if rnn_mode == 'RNN':
            RNN = nn.RNN
        elif rnn_mode == 'LSTM':
            RNN = nn.LSTM
        elif rnn_mode == 'GRU':
            RNN = nn.GRU
        else:
            raise ValueError('Unknown RNN mode: %s' % rnn_mode)

        self.rnn = RNN(word_dim + num_filters, hidden_size, num_layers=num_layers,
                       batch_first=True, bidirectional=True, dropout=p_rnn)

        self.dense = None
        out_dim = hidden_size * 2
        if tag_space:
            self.dense = nn.Linear(out_dim, tag_space)
            out_dim = tag_space
        self.dense_softmax = nn.Linear(out_dim, num_labels)

        # TODO set dim for log_softmax and set reduce=False to NLLLoss
        self.logsoftmax = nn.LogSoftmax()
        self.nll_loss = nn.NLLLoss(size_average=False)
项目:NeuroNLP2    作者:XuezheMax    | 项目源码 | 文件源码
def __init__(self, word_dim, num_words, char_dim, num_chars, pos_dim, num_pos, num_filters, kernel_size,
                 rnn_mode, hidden_size, num_layers, num_labels, arc_space, type_space,
                 embedd_word=None, embedd_char=None, embedd_pos=None,
                 p_in=0.2, p_out=0.5, p_rnn=(0.5, 0.5), biaffine=True):
        super(BiRecurrentConvBiAffine, self).__init__()

        self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word)
        self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char)
        self.pos_embedd = Embedding(num_pos, pos_dim, init_embedding=embedd_pos)
        self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1)
        self.dropout_in = nn.Dropout2d(p=p_in)
        self.dropout_out = nn.Dropout2d(p=p_out)
        self.num_labels = num_labels

        if rnn_mode == 'RNN':
            RNN = VarMaskedRNN
        elif rnn_mode == 'LSTM':
            RNN = VarMaskedLSTM
        elif rnn_mode == 'FastLSTM':
            RNN = VarMaskedFastLSTM
        elif rnn_mode == 'GRU':
            RNN = VarMaskedGRU
        else:
            raise ValueError('Unknown RNN mode: %s' % rnn_mode)

        self.rnn = RNN(word_dim + num_filters + pos_dim, hidden_size, num_layers=num_layers,
                       batch_first=True, bidirectional=True, dropout=p_rnn)

        out_dim = hidden_size * 2
        self.arc_h = nn.Linear(out_dim, arc_space)
        self.arc_c = nn.Linear(out_dim, arc_space)
        self.attention = BiAAttention(arc_space, arc_space, 1, biaffine=biaffine)

        self.type_h = nn.Linear(out_dim, type_space)
        self.type_c = nn.Linear(out_dim, type_space)
        self.bilinear = BiLinear(type_space, type_space, self.num_labels)
        self.logsoftmax = nn.LogSoftmax()
项目:baseline    作者:dpressel    | 项目源码 | 文件源码
def _init_output(self, input_dim, nc):
        self.output = nn.Sequential()
        append2seq(self.output, (
            nn.Linear(input_dim, nc),
            nn.LogSoftmax()
        ))
项目:baseline    作者:dpressel    | 项目源码 | 文件源码
def __init__(self, embeddings_in, embeddings_out, **kwargs):
        super(Seq2SeqModel, self).__init__(embeddings_in, embeddings_out)

        self.hsz = kwargs['hsz']
        nlayers = kwargs['layers']
        rnntype = kwargs['rnntype']
        pdrop = kwargs.get('dropout', 0.5)
        dsz = embeddings_in.dsz
        self.gpu = kwargs.get('gpu', True)
        self.dropout = nn.Dropout(pdrop)
        self.encoder_rnn = pytorch_rnn(dsz, self.hsz, rnntype, nlayers, pdrop)
        self.preds = nn.Linear(self.hsz, self.nc)
        self.decoder_rnn = pytorch_rnn_cell(dsz, self.hsz, rnntype, nlayers, pdrop)
        self.probs = nn.LogSoftmax()
项目:Rita    作者:RITct    | 项目源码 | 文件源码
def __init__(self, input_size, hidden_size, output_size):
        super(ANN, self).__init__()
        self.i2h = nn.Linear(input_size, hidden_size)
        #self.h2h = nn.Linear(hidden_size, hidden_size)
        self.h2o = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax()
项目:seqmod    作者:emanjavacas    | 项目源码 | 文件源码
def build_output(self, hid_dim, deepout_layers, deepout_act, tie_weights):
        """
        Create output projection (from decoder output to softmax)
        """
        output = []

        if deepout_layers > 0:
            output.append(
                Highway(hid_dim, num_layers=deepout_layers,
                        activation=deepout_act))

        emb_dim = self.embeddings.embedding_dim
        vocab_size = self.embeddings.num_embeddings

        if not tie_weights:
            proj = nn.Linear(hid_dim, vocab_size)
        else:
            proj = nn.Linear(emb_dim, vocab_size)
            proj.weight = self.embeddings.weight
            if emb_dim != hid_dim:
                # inp embeddings are (vocab x emb_dim); output is (hid x vocab)
                # if emb_dim != hidden, we insert a projection
                logging.warn("When tying weights, output layer and "
                             "embedding layer should have equal size. "
                             "A projection layer will be insterted.")
                proj = nn.Sequential(nn.Linear(hid_dim, emb_dim), proj)

        output.append(proj)
        output.append(nn.LogSoftmax(dim=1))

        return nn.Sequential(*output)
项目:nmp_qc    作者:priba    | 项目源码 | 文件源码
def forward(self, g, h_in, e):

        h = []

        # Padding to some larger dimension d
        h_t = torch.cat([h_in, Variable(torch.Tensor(h_in.size(0), h_in.size(1), self.args['out'] - h_in.size(2)).type_as(h_in.data).zero_())], 2)

        h.append(h_t.clone())

        # Layer
        for t in range(0, self.n_layers):

            h_t = Variable(torch.zeros(h[0].size(0), h[0].size(1), h[0].size(2)).type_as(h_in.data))

            # Apply one layer pass (Message + Update)
            for v in range(0, h_in.size(1)):

                m = self.m[0].forward(h[t][:, v, :], h[t], e[:, v, :])

                # Nodes without edge set message to 0
                m = g[:, v, :, None].expand_as(m) * m

                m = torch.sum(m, 1)

                # Update
                h_t[:, v, :] = self.u[0].forward(h[t][:, v, :], m)

            # Delete virtual nodes
            h_t = (torch.sum(torch.abs(h_in), 2).expand_as(h_t) > 0).type_as(h_t)*h_t
            h.append(h_t.clone())

        # Readout
        res = self.r.forward(h)
        if self.type == 'classification':
            res = nn.LogSoftmax()(res)
        return res
项目:nmp_qc    作者:priba    | 项目源码 | 文件源码
def forward(self, g, h_in, e):

        h = []
        h.append(h_in)

        # Layer
        for t in range(0, len(self.m)):

            u_args = self.u[t].get_args()
            h_t = Variable(torch.zeros(h_in.size(0), h_in.size(1), u_args['out']).type_as(h[t].data))

            # Apply one layer pass (Message + Update)
            for v in range(0, h_in.size(1)):

                m = self.m[t].forward(h[t][:, v, :], h[t], e[:, v, :, :])

                # Nodes without edge set message to 0
                m = g[:, v, :,None].expand_as(m) * m

                m = torch.sum(m, 1)

                # Interaction Net
                opt = {}
                opt['x_v'] = Variable(torch.Tensor([]).type_as(m.data))

                h_t[:, v, :] = self.u[t].forward(h[t][:, v, :], m, opt)

            h.append(h_t.clone())

        # Readout
        res = self.r.forward(h)
        if self.type == 'classification':
            res = nn.LogSoftmax()(res)
        return res
项目:nmp_qc    作者:priba    | 项目源码 | 文件源码
def forward(self, g, h_in, e):

        h = []

        # Padding to some larger dimension d
        h_t = torch.cat([h_in, Variable(
            torch.zeros(h_in.size(0), h_in.size(1), self.args['out'] - h_in.size(2)).type_as(h_in.data))], 2)

        h.append(h_t.clone())

        # Layer
        for t in range(0, self.n_layers):
            e_aux = e.view(-1, e.size(3))

            h_aux = h[t].view(-1, h[t].size(2))

            m = self.m[0].forward(h[t], h_aux, e_aux)
            m = m.view(h[0].size(0), h[0].size(1), -1, m.size(1))

            # Nodes without edge set message to 0
            m = torch.unsqueeze(g, 3).expand_as(m) * m

            m = torch.squeeze(torch.sum(m, 1))

            h_t = self.u[0].forward(h[t], m)

            # Delete virtual nodes
            h_t = (torch.sum(h_in, 2).expand_as(h_t) > 0).type_as(h_t) * h_t
            h.append(h_t)

        # Readout
        res = self.r.forward(h)

        if self.type == 'classification':
            res = nn.LogSoftmax()(res)
        return res
项目:MachineLearning    作者:timomernick    | 项目源码 | 文件源码
def __init__(self, batch_size, size):
        super(Pool, self).__init__()

        self.size = size

        self.inputs = Variable(torch.FloatTensor(batch_size, 1, size, size)).cuda()
        self.targets = Variable(torch.LongTensor(batch_size)).cuda()

        self.medium = nn.Parameter(torch.randn(num_media, 1, size, size) * 0.02, requires_grad=True)

        self.conv0 = nn.Conv2d(1, 1, 3, padding=1, bias=False)

        self.fc0_size = 8 * 8
        self.fc0 = nn.Linear(self.fc0_size, num_classes)

        self.maxPool = nn.AvgPool2d(8)

        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

        self.logSoftmax = nn.LogSoftmax()

        self.loss = nn.NLLLoss()

        learning_rate = 0.0005

        self.conv0.weight.requires_grad = False
        s = 0.25
        kernel = torch.FloatTensor([0.0, s, 0.0,
                                    s, 0.0, s,
                                    0.0, s, 0.0]).view(3, 3)
        self.conv0.weight.data.copy_(kernel)

        parameters = ifilter(lambda p: p.requires_grad, self.parameters())
        parameters = list(parameters)
        parameters.append(self.medium)
        self.optimizer = optim.RMSprop(parameters, lr=learning_rate, momentum=0.0)
项目:pytorch-PersonReID    作者:huaijin-chen    | 项目源码 | 文件源码
def __init__(self):
        super(Net_cls,self).__init__()
        self.cls_model = nn.Sequential(OrderedDict([
        ('fc4',nn.Linear(1024,512)),
        ('relu',nn.ReLU()),
        ('fc5',nn.Linear(512,2)),
        ('log_softmax',nn.LogSoftmax()),

        ]))
项目:tutorials    作者:pytorch    | 项目源码 | 文件源码
def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
项目:tutorials    作者:pytorch    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, n_layers=1):
        super(DecoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
项目:tutorials    作者:pytorch    | 项目源码 | 文件源码
def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size

        self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)
项目:SeqGAN-PyTorch    作者:ZiJianZhao    | 项目源码 | 文件源码
def __init__(self, num_emb, emb_dim, hidden_dim, use_cuda):
        super(TargetLSTM, self).__init__()
        self.num_emb = num_emb
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.use_cuda = use_cuda
        self.emb = nn.Embedding(num_emb, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True)
        self.lin = nn.Linear(hidden_dim, num_emb)
        self.softmax = nn.LogSoftmax()
        self.init_params()
项目:SeqGAN-PyTorch    作者:ZiJianZhao    | 项目源码 | 文件源码
def __init__(self, num_emb, emb_dim, hidden_dim, use_cuda):
        super(Generator, self).__init__()
        self.num_emb = num_emb
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.use_cuda = use_cuda
        self.emb = nn.Embedding(num_emb, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hidden_dim, batch_first=True)
        self.lin = nn.Linear(hidden_dim, num_emb)
        self.softmax = nn.LogSoftmax()
        self.init_params()
项目:SeqGAN-PyTorch    作者:ZiJianZhao    | 项目源码 | 文件源码
def __init__(self, num_classes, vocab_size, emb_dim, filter_sizes, num_filters, dropout):
        super(Discriminator, self).__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim)
        self.convs = nn.ModuleList([
            nn.Conv2d(1, n, (f, emb_dim)) for (n, f) in zip(num_filters, filter_sizes)
        ])
        self.highway = nn.Linear(sum(num_filters), sum(num_filters))
        self.dropout = nn.Dropout(p=dropout)
        self.lin = nn.Linear(sum(num_filters), num_classes)
        self.softmax = nn.LogSoftmax()
        self.init_parameters()
项目:pytorch-poetry-gen    作者:justdark    | 项目源码 | 文件源码
def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(PoetryModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, self.hidden_dim)

        self.linear1 = nn.Linear(self.hidden_dim, vocab_size)
        # self.dropout = nn.Dropout(0.2)
        self.softmax = nn.LogSoftmax()
项目:MXSeq2Seq    作者:ZiyueHuang    | 项目源码 | 文件源码
def __init__(self, hidden_size, output_size, n_layers=1):
        super(DecoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax()
项目:age    作者:ly015    | 项目源码 | 文件源码
def __init__(self, opts):

        super(CM_Discriminator, self).__init__()

        cnn_feat_map = {'resnet18': 512, 'resnet50': 2048, 'vgg16': 2048}
        self.cnn_feat_size = cnn_feat_map[opts.cnn]
        self.num_cls = opts.D_num_cls

        # net1: parallel net
        hidden_lst1 = [self.cnn_feat_size] + opts.D_hidden
        layers1 = OrderedDict()
        if opts.input_relu== 1:
            layers1['relu'] = nn.ReLU()
        for n, (dim_in, dim_out) in enumerate(zip(hidden_lst1, hidden_lst1[1::])):
            layers1['fc%d' % n] = nn.Linear(dim_in, dim_out, bias = False)
            layers1['bn%d' % n] = nn.BatchNorm1d(dim_out)
            layers1['leaky_relu%d' % n] = nn.LeakyReLU(0.2)

        self.net1 = nn.Sequential(layers1)

        # net2: fusing net
        hidden_lst2 = [2 * hidden_lst1[-1]] + opts.D_hidden2 + [self.num_cls + 1]
        layers2 = OrderedDict()
        for n, (dim_in, dim_out) in enumerate(zip(hidden_lst2, hidden_lst2[1::])):
            layers2['fc%d' % n] = nn.Linear(dim_in, dim_out, bias = False)
            if n < len(hidden_lst2) - 2:
                layers2['bn%d' % n] = nn.BatchNorm1d(dim_out)
                layers2['leaky_relu%d' % n] = nn.LeakyReLU(0.2)

        layers2['logsoftmax'] = nn.LogSoftmax()
        self.net2 = nn.Sequential(layers2)
项目:TreeLSTMSentiment    作者:ttpro1995    | 项目源码 | 文件源码
def __init__(self, cuda, mem_dim, num_classes, dropout = False):
        super(SentimentModule, self).__init__()
        self.cudaFlag = cuda
        self.mem_dim = mem_dim
        self.num_classes = num_classes
        self.dropout = dropout
        # torch.manual_seed(456)
        self.l1 = nn.Linear(self.mem_dim, self.num_classes)
        self.logsoftmax = nn.LogSoftmax()
        if self.cudaFlag:
            self.l1 = self.l1.cuda()
项目:MP-CNN-Variants    作者:tuzhucheng    | 项目源码 | 文件源码
def __init__(self, embedding, n_holistic_filters, n_per_dim_filters, filter_widths, hidden_layer_units, num_classes, dropout, ext_feats):
        super(MPCNN, self).__init__()
        self.embedding = embedding
        self.n_word_dim = embedding.weight.size(1)
        self.n_holistic_filters = n_holistic_filters
        self.n_per_dim_filters = n_per_dim_filters
        self.filter_widths = filter_widths
        self.ext_feats = ext_feats
        holistic_conv_layers = []
        per_dim_conv_layers = []

        for ws in filter_widths:
            if np.isinf(ws):
                continue

            holistic_conv_layers.append(nn.Sequential(
                nn.Conv1d(self.n_word_dim, n_holistic_filters, ws),
                nn.Tanh()
            ))

            per_dim_conv_layers.append(nn.Sequential(
                nn.Conv1d(self.n_word_dim, self.n_word_dim * n_per_dim_filters, ws, groups=self.n_word_dim),
                nn.Tanh()
            ))

        self.holistic_conv_layers = nn.ModuleList(holistic_conv_layers)
        self.per_dim_conv_layers = nn.ModuleList(per_dim_conv_layers)

        # compute number of inputs to first hidden layer
        COMP_1_COMPONENTS_HOLISTIC, COMP_1_COMPONENTS_PER_DIM, COMP_2_COMPONENTS = 2 + n_holistic_filters, 2 + self.n_word_dim, 2
        EXT_FEATS = 4 if ext_feats else 0
        n_feat_h = 3 * len(self.filter_widths) * COMP_2_COMPONENTS
        n_feat_v = (
            # comparison units from holistic conv for min, max, mean pooling for non-infinite widths
            3 * ((len(self.filter_widths) - 1) ** 2) * COMP_1_COMPONENTS_HOLISTIC +
            # comparison units from holistic conv for min, max, mean pooling for infinite widths
            3 * 3 +
            # comparison units from per-dim conv
            2 * (len(self.filter_widths) - 1) * n_per_dim_filters * COMP_1_COMPONENTS_PER_DIM
        )
        n_feat = n_feat_h + n_feat_v + EXT_FEATS

        self.final_layers = nn.Sequential(
            nn.Linear(n_feat, hidden_layer_units),
            nn.Tanh(),
            nn.Dropout(dropout),
            nn.Linear(hidden_layer_units, num_classes),
            nn.LogSoftmax()
        )
项目:Pytorch-Deeplab    作者:speedinghzl    | 项目源码 | 文件源码
def loss_calc(pred, label, gpu):
    """
    This function returns cross entropy loss for semantic segmentation
    """
    # out shape batch_size x channels x h x w -> batch_size x channels x h x w
    # label shape h x w x 1 x batch_size  -> batch_size x 1 x h x w
    label = torch.from_numpy(label).long()
    label = Variable(label).cuda(gpu)
    m = nn.LogSoftmax()
    criterion = CrossEntropy2d().cuda(gpu)
    pred = m(pred)

    return criterion(pred, label)
项目:NeuroNLP2    作者:XuezheMax    | 项目源码 | 文件源码
def __init__(self, word_dim, num_words, char_dim, num_chars, pos_dim, num_pos, num_filters, kernel_size, rnn_mode, hidden_size, num_layers, num_labels, arc_space, type_space,
                 embedd_word=None, embedd_char=None, embedd_pos=None, p_in=0.2, p_out=0.5, p_rnn=(0.5, 0.5), biaffine=True, prior_order='deep_first', skipConnect=False,
                 biasArc=False, biasType=False):

        super(StackPtrNet, self).__init__()
        self.word_embedd = Embedding(num_words, word_dim, init_embedding=embedd_word)
        self.char_embedd = Embedding(num_chars, char_dim, init_embedding=embedd_char)
        self.pos_embedd = Embedding(num_pos, pos_dim, init_embedding=embedd_pos)
        self.conv1d = nn.Conv1d(char_dim, num_filters, kernel_size, padding=kernel_size - 1)
        self.dropout_in = nn.Dropout2d(p=p_in)
        self.dropout_out = nn.Dropout2d(p=p_out)
        self.num_labels = num_labels
        if prior_order in ['deep_first', 'shallow_first']:
            self.prior_order = PriorOrder.DEPTH
        elif prior_order == 'inside_out':
            self.prior_order = PriorOrder.INSIDE_OUT
        elif prior_order == 'left2right':
            self.prior_order = PriorOrder.LEFT2RIGTH
        else:
            raise ValueError('Unknown prior order: %s' % prior_order)
        self.skipConnect = skipConnect
        self.biasArc = biasArc
        self.biasType = biasType

        if rnn_mode == 'RNN':
            RNN_ENCODER = VarMaskedRNN
            RNN_DECODER = SkipConnectRNN if skipConnect else VarMaskedRNN
        elif rnn_mode == 'LSTM':
            RNN_ENCODER = VarMaskedLSTM
            RNN_DECODER = SkipConnectLSTM if skipConnect else VarMaskedLSTM
        elif rnn_mode == 'FastLSTM':
            RNN_ENCODER = VarMaskedFastLSTM
            RNN_DECODER = SkipConnectFastLSTM if skipConnect else VarMaskedFastLSTM
        elif rnn_mode == 'GRU':
            RNN_ENCODER = VarMaskedGRU
            RNN_DECODER = SkipConnectGRU if skipConnect else VarMaskedGRU
        else:
            raise ValueError('Unknown RNN mode: %s' % rnn_mode)

        self.encoder = RNN_ENCODER(word_dim + num_filters + pos_dim, hidden_size, num_layers=num_layers,
                                   batch_first=True, bidirectional=True, dropout=p_rnn)

        self.decoder = RNN_DECODER(word_dim + num_filters + pos_dim, hidden_size, num_layers=num_layers,
                                   batch_first=True, bidirectional=False, dropout=p_rnn)

        self.hx_dense = nn.Linear(2 * hidden_size, hidden_size)
        self.arc_h = nn.Linear(hidden_size * 3, arc_space) if self.biasArc else nn.Linear(hidden_size, arc_space)  # arc dense for decoder
        self.arc_c = nn.Linear(hidden_size * 2, arc_space)  # arc dense for encoder
        self.attention = BiAAttention(arc_space, arc_space, 1, biaffine=biaffine)

        self.type_h = nn.Linear(hidden_size * 3, type_space) if self.biasType else nn.Linear(hidden_size, type_space)  # type dense for decoder
        self.type_c = nn.Linear(hidden_size * 2, type_space)  # type dense for encoder
        self.bilinear = BiLinear(type_space, type_space, self.num_labels)

        self.logsoftmax = nn.LogSoftmax()
项目:nmp_qc    作者:priba    | 项目源码 | 文件源码
def forward(self, g, h_in, e, plotter=None):

        h = []
        h.append(h_in)

        # Layer
        for t in range(0, len(self.m)):

            u_args = self.u[t].get_args()

            h_t = Variable(torch.zeros(h_in.size(0), h_in.size(1), u_args['out']).type_as(h[t].data))

            # Apply one layer pass (Message + Update)
            for v in range(0, h_in.size(1)):

                m = self.m[t].forward(h[t][:, v, :], h[t], e[:, v, :])

                # Nodes without edge set message to 0
                m = g[:, v, :, None].expand_as(m) * m

                m = torch.sum(m, 1)

                # Duvenaud
                deg = torch.sum(g[:, v, :].data, 1)

                # Separate degrees
                for i in range(len(u_args['deg'])):
                    ind = deg == u_args['deg'][i]
                    ind = Variable(torch.squeeze(torch.nonzero(torch.squeeze(ind))), volatile=True)

                    opt = {'deg': i}

                    # Update
                    if len(ind) != 0:
                        aux = self.u[t].forward(torch.index_select(h[t], 0, ind)[:, v, :], torch.index_select(m, 0, ind), opt)

                        ind = ind.data.cpu().numpy()
                        for j in range(len(ind)):
                            h_t[ind[j], v, :] = aux[j, :]

            if plotter is not None:
                num_feat = h_t.size(2)
                color = h_t[0,:,:].data.cpu().numpy()
                for i in range(num_feat):
                    plotter(color[:, i], 'layer_' + str(t) + '_element_' + str(i) + '.png')

            h.append(h_t.clone())
        # Readout
        res = self.r.forward(h)
        if self.type == 'classification':
            res = nn.LogSoftmax()(res)
        return res
项目:pytorch-vqa    作者:Cyanogenoid    | 项目源码 | 文件源码
def run(net, loader, optimizer, tracker, train=False, prefix='', epoch=0):
    """ Run an epoch over the given loader """
    if train:
        net.train()
        tracker_class, tracker_params = tracker.MovingMeanMonitor, {'momentum': 0.99}
    else:
        net.eval()
        tracker_class, tracker_params = tracker.MeanMonitor, {}
        answ = []
        idxs = []
        accs = []

    tq = tqdm(loader, desc='{} E{:03d}'.format(prefix, epoch), ncols=0)
    loss_tracker = tracker.track('{}_loss'.format(prefix), tracker_class(**tracker_params))
    acc_tracker = tracker.track('{}_acc'.format(prefix), tracker_class(**tracker_params))

    log_softmax = nn.LogSoftmax().cuda()
    for v, q, a, idx, q_len in tq:
        var_params = {
            'volatile': not train,
            'requires_grad': False,
        }
        v = Variable(v.cuda(async=True), **var_params)
        q = Variable(q.cuda(async=True), **var_params)
        a = Variable(a.cuda(async=True), **var_params)
        q_len = Variable(q_len.cuda(async=True), **var_params)

        out = net(v, q, q_len)
        nll = -log_softmax(out)
        loss = (nll * a / 10).sum(dim=1).mean()
        acc = utils.batch_accuracy(out.data, a.data).cpu()

        if train:
            global total_iterations
            update_learning_rate(optimizer, total_iterations)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_iterations += 1
        else:
            # store information about evaluation of this minibatch
            _, answer = out.data.cpu().max(dim=1)
            answ.append(answer.view(-1))
            accs.append(acc.view(-1))
            idxs.append(idx.view(-1).clone())

        loss_tracker.append(loss.data[0])
        acc_tracker.append(acc.mean())
        fmt = '{:.4f}'.format
        tq.set_postfix(loss=fmt(loss_tracker.mean.value), acc=fmt(acc_tracker.mean.value))

    if not train:
        answ = list(torch.cat(answ, dim=0))
        accs = list(torch.cat(accs, dim=0))
        idxs = list(torch.cat(idxs, dim=0))
        return answ, accs, idxs
项目:char-cnn-pytorch    作者:srviest    | 项目源码 | 文件源码
def __init__(self, args):
        super(CharCNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(args.num_features, 256, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=3)
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=3)
        )            

        self.conv3 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU()
        )

        self.conv4 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU()    
        )

        self.conv5 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU()
        )

        self.conv6 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=3)
        )


        self.fc1 = nn.Sequential(
            nn.Linear(8704, 1024),
            nn.ReLU(),
            nn.Dropout(p=args.dropout)
        )

        self.fc2 = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=args.dropout)
        )

        self.fc3 = nn.Linear(1024, 4)
        self.log_softmax = nn.LogSoftmax()
项目:char-cnn-pytorch    作者:srviest    | 项目源码 | 文件源码
def __init__(self, num_features):
        super(CharCNN, self).__init__()

        self.num_features = num_features
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=(7, self.num_features), stride=1),
            nn.ReLU()
        )

        self.maxpool1 = nn.MaxPool2d(kernel_size=(3, 1), stride=(3, 1))

        self.conv2 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=(7, 256), stride=1),
            nn.ReLU()
        )
        self.maxpool2 = nn.MaxPool2d(kernel_size=(3, 1), stride=(3, 1))

        self.conv3 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1),
            nn.ReLU()
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1),
            nn.ReLU()
        )

        self.conv5 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1),
            nn.ReLU()
        )

        self.conv6 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=(3, 256), stride=1),
            nn.ReLU()
        )

        self.maxpool6 = nn.MaxPool2d(kernel_size=(3, 1), stride=(3, 1))

        self.fc1 = nn.Sequential(
            nn.Linear(8704, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.5)
        )
        self.fc2 = nn.Sequential(
            nn.Linear(1024, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.5)
        )
        self.fc3 =nn.Linear(1024, 4)
        self.softmax = nn.LogSoftmax()
            # nn.LogSoftmax()

        # self.inference_log_softmax = InferenceBatchLogSoftmax()