Python torch.optim 模块,SGD 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用torch.optim.SGD

项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_sgd(self):
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3)
        )
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3, momentum=0.9, dampening=0),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9, dampening=0)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD(
                self._build_params_dict(weight, bias, lr=1e-2),
                lr=1e-3)
        )
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def init_optimizer(self, state_dict=None):
        """Initialize an optimizer for the free parameters of the network.

        Args:
            state_dict: network parameters
        """
        if self.args.fix_embeddings:
            for p in self.network.embedding.parameters():
                p.requires_grad = False
        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if self.args.optimizer == 'sgd':
            self.optimizer = optim.SGD(parameters, self.args.learning_rate,
                                       momentum=self.args.momentum,
                                       weight_decay=self.args.weight_decay)
        elif self.args.optimizer == 'adamax':
            self.optimizer = optim.Adamax(parameters,
                                          weight_decay=self.args.weight_decay)
        else:
            raise RuntimeError('Unsupported optimizer: %s' %
                               self.args.optimizer)

    # --------------------------------------------------------------------------
    # Learning
    # --------------------------------------------------------------------------
项目:end-to-end-negotiator    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, model, args, device_id=None, verbose=False):
        self.model = model
        self.args = args
        self.device_id = device_id
        self.verbose = verbose
        self.opt = optim.SGD(self.model.parameters(), lr=self.args.lr,
            momentum=self.args.momentum,
            nesterov=(self.args.nesterov and self.args.momentum > 0))
        self.crit = Criterion(self.model.word_dict, device_id=device_id)
        self.sel_crit = Criterion(
            self.model.item_dict, device_id=device_id, bad_toks=['<disconnect>', '<disagree>'])
        if self.args.visual:
            self.model_plot = vis.ModulePlot(self.model, plot_weight=False, plot_grad=True)
            self.loss_plot = vis.Plot(['train', 'valid', 'valid_select'],
                'loss', 'loss', 'epoch', running_n=1)
            self.ppl_plot = vis.Plot(['train', 'valid', 'valid_select'],
                'perplexity', 'ppl', 'epoch', running_n=1)
项目:optnet    作者:locuslab    | 项目源码 | 文件源码
def get_optimizer(args, params):
    if args.dataset == 'mnist':
        if args.model == 'optnet-eq':
            params = list(params)
            A_param = params.pop(0)
            assert(A_param.size() == (args.neq, args.nHidden))
            optimizer = optim.Adam([
                {'params': params, 'lr': 1e-3},
                {'params': [A_param], 'lr': 1e-1}
            ])
        else:
            optimizer = optim.Adam(params)
    elif args.dataset in ('cifar-10', 'cifar-100'):
        if args.opt == 'sgd':
            optimizer = optim.SGD(params, lr=1e-1, momentum=0.9, weight_decay=args.weightDecay)
        elif args.opt == 'adam':
            optimizer = optim.Adam(params, weight_decay=args.weightDecay)
    else:
        assert(False)

    return optimizer
项目:pytorch    作者:tylergenter    | 项目源码 | 文件源码
def test_sgd(self):
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3)
        )
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
                                     dampening=0, weight_decay=1e-4),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
                        dampening=0, weightDecay=1e-4)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD(
                self._build_params_dict(weight, bias, lr=1e-2),
                lr=1e-3)
        )
项目:ml-utils    作者:LinxiFan    | 项目源码 | 文件源码
def train(rank, args, model):
    torch.manual_seed(args.seed + rank)

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True, num_workers=1)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=args.batch_size, shuffle=True, num_workers=1)

    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    for epoch in range(1, args.epochs + 1):
        train_epoch(epoch, args, model, train_loader, optimizer)
        test_epoch(model, test_loader)
项目:pytorch-coriander    作者:hughperkins    | 项目源码 | 文件源码
def test_sgd(self):
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3)
        )
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
                                     dampening=0, weight_decay=1e-4),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
                        dampening=0, weightDecay=1e-4)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD(
                self._build_params_dict(weight, bias, lr=1e-2),
                lr=1e-3)
        )
项目:pytorch_60min_blitz    作者:kyuhyoung    | 项目源码 | 文件源码
def initialize(is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker):

    trainloader, testloader, li_class = make_dataloader_custom_file(
        dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)

    #net = Net().cuda()
    net = Net_gap()
    #t1 = net.cuda()
    criterion = nn.CrossEntropyLoss()
    if is_gpu:
        net.cuda()
        criterion.cuda()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler

    return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
项目:pytorch    作者:ezyang    | 项目源码 | 文件源码
def test_sgd(self):
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3)
        )
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
                                     dampening=0, weight_decay=1e-4),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
                        dampening=0, weightDecay=1e-4)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD(
                self._build_params_dict(weight, bias, lr=1e-2),
                lr=1e-3)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD(
                self._build_params_dict_single(weight, bias, lr=1e-2),
                lr=1e-3)
        )
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def init_optimizer(self, state_dict=None):
        """Initialize an optimizer for the free parameters of the network.

        Args:
            state_dict: network parameters
        """
        if self.args.fix_embeddings:
            for p in self.network.embedding.parameters():
                p.requires_grad = False
        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if self.args.optimizer == 'sgd':
            self.optimizer = optim.SGD(parameters, self.args.learning_rate,
                                       momentum=self.args.momentum,
                                       weight_decay=self.args.weight_decay)
        elif self.args.optimizer == 'adamax':
            self.optimizer = optim.Adamax(parameters,
                                          weight_decay=self.args.weight_decay)
        else:
            raise RuntimeError('Unsupported optimizer: %s' %
                               self.args.optimizer)

    # --------------------------------------------------------------------------
    # Learning
    # --------------------------------------------------------------------------
项目:PytorchDL    作者:FredHuangBia    | 项目源码 | 文件源码
def __init__(self, model, criterion, opt, optimState):
        self.model = model
        self.criterion = criterion
        self.optimState = optimState
        if self.optimState == None:
            self.optimState = { 'learningRate' : opt.LR,
                                'learningRateDecay' : opt.LRDParam,
                                'momentum' : opt.momentum,
                                'nesterov' : False,
                                'dampening'  : opt.dampening,
                                'weightDecay' : opt.weightDecay
                            }
        self.opt = opt
        if opt.optimizer == 'SGD':
            self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay)
        elif opt.optimizer == 'Adam':
            self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(opt.momentum, 0.999), eps=1e-8, weight_decay=opt.weightDecay)

        self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 
                        'val' : open(os.path.join(opt.resume, 'val.log'), 'a+')
                    }
项目:PytorchDL    作者:FredHuangBia    | 项目源码 | 文件源码
def __init__(self, model, criterion, opt, optimState):
        self.model = model
        self.criterion = criterion
        self.optimState = optimState
        if self.optimState == None:
            self.optimState = { 'learningRate' : opt.LR,
                                'learningRateDecay' : opt.LRDParam,
                                'momentum' : opt.momentum,
                                'nesterov' : False,
                                'dampening'  : opt.dampening,
                                'weightDecay' : opt.weightDecay
                            }
        self.opt = opt
        if opt.optimizer == 'SGD':
            self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay)
        elif opt.optimizer == 'Adam':
            self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(opt.momentum, 0.999), eps=1e-8, weight_decay=opt.weightDecay)

        self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 
                        'val' : open(os.path.join(opt.resume, 'val.log'), 'a+')
                    }
项目:PytorchDL    作者:FredHuangBia    | 项目源码 | 文件源码
def __init__(self, model, criterion, opt, optimState):
        self.model = model
        self.criterion = criterion
        self.optimState = optimState
        if self.optimState == None:
            self.optimState = { 'learningRate' : opt.LR,
                                'learningRateDecay' : opt.LRDParam,
                                'momentum' : opt.momentum,
                                'nesterov' : False,
                                'dampening'  : opt.dampening,
                                'weightDecay' : opt.weightDecay
                            }
        self.opt = opt
        if opt.optimizer == 'SGD':
            self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay)
        elif opt.optimizer == 'Adam':
            self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(0.9,0.999), eps=1e-8, weight_decay=opt.weightDecay)

        self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 
                        'val' : open(os.path.join(opt.resume, 'val.log'), 'a+')
                    }
项目:PytorchDL    作者:FredHuangBia    | 项目源码 | 文件源码
def __init__(self, model, criterion, opt, optimState):
        self.model = model
        self.criterion = criterion
        self.optimState = optimState
        if self.optimState == None:
            self.optimState = { 'learningRate' : opt.LR,
                                'learningRateDecay' : opt.LRDParam,
                                'momentum' : opt.momentum,
                                'nesterov' : False,
                                'dampening'  : opt.dampening,
                                'weightDecay' : opt.weightDecay
                            }
        self.opt = opt
        if opt.optimizer == 'SGD':
            self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay)
        elif opt.optimizer == 'Adam':
            self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(opt.momentum, 0.999), eps=1e-8, weight_decay=opt.weightDecay)

        self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 
                        'val' : open(os.path.join(opt.resume, 'val.log'), 'a+')
                    }
项目:PytorchDL    作者:FredHuangBia    | 项目源码 | 文件源码
def __init__(self, model, criterion, opt, optimState):
        self.model = model
        self.criterion = criterion
        self.optimState = optimState
        if self.optimState == None:
            self.optimState = { 'learningRate' : opt.LR,
                                'learningRateDecay' : opt.LRDParam,
                                'momentum' : opt.momentum,
                                'nesterov' : False,
                                'dampening'  : opt.dampening,
                                'weightDecay' : opt.weightDecay
                            }
        self.opt = opt
        if opt.optimizer == 'SGD':
            self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay)
        elif opt.optimizer == 'Adam':
            self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(opt.momentum, 0.999), eps=1e-8, weight_decay=opt.weightDecay)

        self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 
                        'val' : open(os.path.join(opt.resume, 'val.log'), 'a+')
                    }
项目:PytorchDL    作者:FredHuangBia    | 项目源码 | 文件源码
def __init__(self, model, criterion, opt, optimState):
        self.model = model
        self.criterion = criterion
        self.optimState = optimState
        if self.optimState == None:
            self.optimState = { 'learningRate' : opt.LR,
                                'learningRateDecay' : opt.LRDParam,
                                'momentum' : opt.momentum,
                                'nesterov' : False,
                                'dampening'  : opt.dampening,
                                'weightDecay' : opt.weightDecay
                            }
        self.opt = opt
        if opt.optimizer == 'SGD':
            self.optimizer = optim.SGD(model.parameters(), lr=opt.LR, momentum=opt.momentum, dampening=opt.dampening, weight_decay=opt.weightDecay)
        elif opt.optimizer == 'Adam':
            self.optimizer = optim.Adam(model.parameters(), lr=opt.LR, betas=(opt.momentum, 0.999), eps=1e-8, weight_decay=opt.weightDecay)

        self.logger = { 'train' : open(os.path.join(opt.resume, 'train.log'), 'a+'), 
                        'val' : open(os.path.join(opt.resume, 'val.log'), 'a+')
                    }
项目:examples    作者:pytorch    | 项目源码 | 文件源码
def train(rank, args, model):
    torch.manual_seed(args.seed + rank)

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
        batch_size=args.batch_size, shuffle=True, num_workers=1)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
        batch_size=args.batch_size, shuffle=True, num_workers=1)

    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    for epoch in range(1, args.epochs + 1):
        train_epoch(epoch, args, model, train_loader, optimizer)
        test_epoch(model, test_loader)
项目:pytorch    作者:pytorch    | 项目源码 | 文件源码
def test_sgd(self):
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3)
        )
        self._test_rosenbrock(
            lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
                                     dampening=0, weight_decay=1e-4),
            wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
                        dampening=0, weightDecay=1e-4)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD(
                self._build_params_dict(weight, bias, lr=1e-2),
                lr=1e-3)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.SGD(
                self._build_params_dict_single(weight, bias, lr=1e-2),
                lr=1e-3)
        )
项目:DeepDependencyParsingProblemSet    作者:rguthrie3    | 项目源码 | 文件源码
def test_predict_after_train_d3_1():
    """ 1 point(s) """

    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)
    feat_extract = SimpleFeatureExtractor()
    word_embed = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM)
    act_chooser = ActionChooserNetwork(TEST_EMBEDDING_DIM * NUM_FEATURES)
    combiner = MLPCombinerNetwork(TEST_EMBEDDING_DIM)

    parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner)

    # Train
    for i in xrange(75):
        train([ (test_sent[:-1], gold) ], parser, optim.SGD(parser.parameters(), lr=0.01), verbose=False)

    # predict
    pred = parser.predict(test_sent[:-1])
    gold_graph = dependency_graph_from_oracle(test_sent[:-1], gold)
    assert pred == gold_graph
项目:restricted-boltzmann-machine-deep-belief-network-deep-boltzmann-machine-in-pytorch    作者:wmingwei    | 项目源码 | 文件源码
def generative_fine_tune(dbn, lr = 1e-2, epoch = 100, batch_size = 50, input_data = None, CD_k = 1, optimization_method = "Adam", momentum = 0, weight_decay = 0, test_input = None):

    if optimization_method == "RMSprop":
        optimizer = optim.RMSprop(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay)
    elif optimization_method == "SGD":
        optimizer = optim.SGD(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay)
    elif optimization_method == "Adam":
        optimizer = optim.Adam(dbn.parameters(), lr = lr, weight_decay = weight_decay)   

    for i in dbn.parameters():
        i.mean().backward()

    train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0]))
    train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)

    for i in range(epoch):
        for batch_idx, (data, target) in enumerate(train_loader):

            sleep_wake(dbn = dbn, optimizer = optimizer, lr = lr, CD_k = CD_k, v = data, batch_size = batch_size)

        if not (type(test_input) == type(None)):

            print("fine tune", i, ais_dbn.logp_ais(self, test_input, step = 1000, M_Z = 20, M_IS = 100, parallel = True))
项目:restricted-boltzmann-machine-deep-belief-network-deep-boltzmann-machine-in-pytorch    作者:wmingwei    | 项目源码 | 文件源码
def joint_train(dbm, lr = 1e-3, epoch = 100, batch_size = 50, input_data = None, weight_decay = 0, k_positive=10, k_negative=10, alpha = [1e-1,1e-1,1]):
    u1 = nn.Parameter(torch.zeros(1))
    u2 = nn.Parameter(torch.zeros(1))
    # optimizer = optim.Adam(dbm.parameters(), lr = lr, weight_decay = weight_decay)
    optimizer = optim.SGD(dbm.parameters(), lr = lr, momentum = 0.5)
    train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0]))
    train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)
    optimizer_u = optim.Adam([u1,u2], lr = lr/1000, weight_decay = weight_decay)
    for _ in range(epoch):
        print("training epoch %i with u1 = %.4f, u2 = %.4f"%(_, u1.data.numpy()[0], u2.data.numpy()[0]))
        for batch_idx, (data, target) in enumerate(train_loader):
            data = Variable(data)
            positive_phase, negative_phase= dbm(v_input = data, k_positive = k_positive, k_negative=k_negative, greedy = False)
            loss = energy(dbm = dbm, layer = positive_phase) - energy(dbm = dbm, layer = negative_phase)+alpha[0] * torch.norm(torch.norm(dbm.W[0],2,1)-u1.repeat(dbm.W[0].size()[0],1))**2 + alpha[1]*torch.norm(torch.norm(dbm.W[1],2,1)-u2.repeat(dbm.W[1].size()[0],1))**2 + alpha[2] * (u1 - u2)**2
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            optimizer_u.step()
            optimizer_u.zero_grad()
项目:drl.pth    作者:seba-1511    | 项目源码 | 文件源码
def get_opt(name):
    opts = {
        'SGD': optim.SGD,
        'Adam': optim.Adam,
        'Adagrad': optim.Adagrad,
        'RMSprop': optim.RMSprop,
    }
    return opts[name]
项目:ladder    作者:abhiskk    | 项目源码 | 文件源码
def pretrain(self, x, pt_epochs, verbose=True):
        n = x.data.size()[0]
        num_batches = n / self.batch_size
        t = x

        # Pre-train 1 autoencoder at a time
        for i, ae_re in enumerate(self.autoencoders_ref):
            # Get the current autoencoder
            ae = getattr(self.sequential, ae_re)

            # Getting encoded output from the previous autoencoder
            if i > 0:
                # Set the requires_grad to False so that backprop doesn't
                # travel all the way back to the previous autoencoder
                temp = Variable(torch.FloatTensor(n, ae.d_in), requires_grad=False)
                for k in range(num_batches):
                    start, end = k * self.batch_size, (k + 1) * self.batch_size
                    prev_ae = getattr(self.sequential, self.autoencoders_ref[i - 1])
                    temp.data[start:end] = prev_ae.encode(t[start:end], add_noise=False).data
                t = temp
            optimizer = SGD(ae.parameters(), lr=self.pre_lr)

            # Pre-training
            print("Pre-training Autoencoder:", i)
            for ep in range(pt_epochs):
                agg_cost = 0.
                for k in range(num_batches):
                    start, end = k * self.batch_size, (k + 1) * self.batch_size
                    bt = t[start:end]
                    optimizer.zero_grad()
                    z = ae.encode(bt, add_noise=True)
                    z = ae.decode(z)
                    loss = -torch.sum(bt * torch.log(z) + (1.0 - bt) * torch.log(1.0 - z), 1)
                    cost = torch.mean(loss)
                    cost.backward()
                    optimizer.step()
                    agg_cost += cost
                agg_cost /= num_batches
                if verbose:
                    print("Pre-training Autoencoder:", i, "Epoch:", ep, "Cost:", agg_cost.data[0])
项目:ladder    作者:abhiskk    | 项目源码 | 文件源码
def finetune(self, train_X, train_y, valid_X, valid_y,
                 valid_actual_size, ft_epochs, verbose=True):
        n = train_X.data.size()[0]
        num_batches = n / self.batch_size
        n_v = valid_X.data.size()[0]
        num_batches_v = n_v / self.batch_size
        optimizer = SGD(self.parameters(), lr=self.ft_lr)
        loss = torch.nn.NLLLoss()

        for ef in range(ft_epochs):
            agg_cost = 0
            for k in range(num_batches):
                start, end = k * self.batch_size, (k + 1) * self.batch_size
                bX = train_X[start:end]
                by = train_y[start:end]
                optimizer.zero_grad()
                p = self.forward(bX)
                cost = loss.forward(p, by)
                agg_cost += cost
                cost.backward()
                optimizer.step()
            agg_cost /= num_batches
            preds = np.zeros((n_v, self.d_out))

            # Calculate accuracy on Validation set
            for k in range(num_batches_v):
                start, end = k * self.batch_size, (k + 1) * self.batch_size
                bX = valid_X[start:end]
                p = self.forward(bX).data.numpy()
                preds[start:end] = p
            correct = 0
            for actual, prediction in zip(valid_y[:valid_actual_size], preds[:valid_actual_size]):
                ind = np.argmax(prediction)
                actual = actual.data.numpy()
                if ind == actual:
                    correct += 1

            if verbose:
                print("Fine-tuning Epoch:", ef, "Cost:", agg_cost.data[0],
                      "Validation Accuracy:", "{0:.4f}".format(correct / float(valid_actual_size)))
项目:future-price-predictor    作者:htfy96    | 项目源码 | 文件源码
def train(model, db, args, bsz=32, eph=1, use_cuda=False):
    print("Training...")

    trainloader = data_utils.DataLoader(dataset=db, batch_size=bsz, shuffle=True)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
    best_loss = 100000

    for epoch in range(eph):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 1):
            inputs, targets = data
            inputs = inputs.unsqueeze(1)
            targets = target_onehot_to_classnum_tensor(targets)
            if use_cuda and cuda_ava:
                inputs = Variable(inputs.float().cuda())
                targets = Variable(targets.cuda())
            else:
                inputs = Variable(inputs.float())
                targets = Variable(targets)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            last_loss = loss.data[0]
            if i % 100 == 0:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100))
                running_loss = 0

            if last_loss < best_loss:
                best_loss = last_loss
                acc = evaluate(model, trainloader, use_cuda)
                torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S"))))
    acc = evaluate(model, trainloader, use_cuda)
    torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_all_acc_{}.t7'.format(acc)))

    print("Finished Training!")
项目:pytorch_word2vec    作者:bamtercelboo    | 项目源码 | 文件源码
def __init__(self,
                 input_file_name,
                 output_file_name,
                 emb_dimension=100,
                 batch_size=100,
                 window_size=5,
                 iteration=5,
                 initial_lr=0.025,
                 min_count=5,
                 using_hs=False,
                 using_neg=False,
                 context_size=2,
                 hidden_size=128,
                 cbow=None,
                 skip_gram=None):

        print("\nInput File loading......\n")
        self.data = InputData(input_file_name, min_count)
        print("\nInput File loaded.\n")
        self.output_file_name = output_file_name
        self.emb_size = len(self.data.word2id)
        self.emb_dimension = emb_dimension
        self.batch_size = batch_size
        self.window_size = window_size
        self.iteration = iteration
        self.initial_lr = initial_lr
        self.context_size = context_size
        self.hidden_size = hidden_size
        self.using_hs = using_hs
        self.using_neg = using_neg
        self.cbow = cbow
        self.skip_gram = skip_gram
        if self.skip_gram is not None and self.skip_gram:
            self.skip_gram_model = SkipGramModel(self.emb_size, self.emb_dimension)
            print("skip_gram_model", self.skip_gram_model)
            self.optimizer = optim.SGD(self.skip_gram_model.parameters(), lr=self.initial_lr)
        if self.cbow is not None and self.cbow:
            self.cbow_model = CBOW(self.emb_size, self.emb_dimension)
            print("CBOW_model", self.cbow_model)
            self.optimizer = optim.SGD(self.cbow_model.parameters(), lr=self.initial_lr)
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, opt, word_dict, feature_dict, state_dict=None):
        # Book-keeping.
        self.opt = opt
        self.word_dict = word_dict
        self.feature_dict = feature_dict
        self.updates = 0
        self.train_loss = AverageMeter()

        # Building network.
        self.network = RnnDocReader(opt)
        if state_dict:
            new_state = set(self.network.state_dict().keys())
            for k in list(state_dict['network'].keys()):
                if not k in new_state:
                    del state_dict['network'][k]
            self.network.load_state_dict(state_dict['network'])

        # Building optimizer.
        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if opt['optimizer'] == 'sgd':
            self.optimizer = optim.SGD(parameters, opt['learning_rate'],
                                       momentum=opt['momentum'],
                                       weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adamax':
            self.optimizer = optim.Adamax(parameters,
                                          weight_decay=opt['weight_decay'])
        else:
            raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer'])
项目:sru    作者:taolei87    | 项目源码 | 文件源码
def __init__(self, opt, embedding=None, state_dict=None):
        # Book-keeping.
        self.opt = opt
        self.updates = state_dict['updates'] if state_dict else 0
        self.train_loss = AverageMeter()

        # Building network.
        self.network = RnnDocReader(opt, embedding=embedding)
        if state_dict:
            new_state = set(self.network.state_dict().keys())
            for k in list(state_dict['network'].keys()):
                if k not in new_state:
                    del state_dict['network'][k]
            self.network.load_state_dict(state_dict['network'])

        # Building optimizer.
        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if opt['optimizer'] == 'sgd':
            self.optimizer = optim.SGD(parameters, opt['learning_rate'],
                                       momentum=opt['momentum'],
                                       weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adamax':
            self.optimizer = optim.Adamax(parameters, opt['learning_rate'],
                                          weight_decay=opt['weight_decay'])
        else:
            raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer'])
        if state_dict:
            self.optimizer.load_state_dict(state_dict['optimizer'])

        num_params = sum(p.data.numel() for p in parameters
            if p.data.data_ptr() != self.network.embedding.weight.data.data_ptr())
        print ("{} parameters".format(num_params))
项目:bandit-nmt    作者:khanhptnk    | 项目源码 | 文件源码
def _makeOptimizer(self):
        if self.method == 'sgd':
            self.optimizer = optim.SGD(self.params, lr=self.lr)
        elif self.method == 'adagrad':
            self.optimizer = optim.Adagrad(self.params, lr=self.lr)
        elif self.method == 'adadelta':
            self.optimizer = optim.Adadelta(self.params, lr=self.lr)
        elif self.method == 'adam':
            self.optimizer = optim.Adam(self.params, lr=self.lr)
        else:
            raise RuntimeError("Invalid optim method: " + self.method)
项目:odin-pytorch    作者:ShiyuLiang    | 项目源码 | 文件源码
def test(nnName, dataName, CUDA_DEVICE, epsilon, temperature):

    net1 = torch.load("../models/{}.pth".format(nnName))
    optimizer1 = optim.SGD(net1.parameters(), lr = 0, momentum = 0)
    net1.cuda(CUDA_DEVICE)

    if dataName != "Uniform" and dataName != "Gaussian":
        testsetout = torchvision.datasets.ImageFolder("../data/{}".format(dataName), transform=transform)
        testloaderOut = torch.utils.data.DataLoader(testsetout, batch_size=1,
                                         shuffle=False, num_workers=2)

    if nnName == "densenet10" or nnName == "wideresnet10": 
    testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform)
    testloaderIn = torch.utils.data.DataLoader(testset, batch_size=1,
                                         shuffle=False, num_workers=2)
    if nnName == "densenet100" or nnName == "wideresnet100": 
    testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform)
    testloaderIn = torch.utils.data.DataLoader(testset, batch_size=1,
                                         shuffle=False, num_workers=2)

    if dataName == "Gaussian":
        d.testGaussian(net1, criterion, CUDA_DEVICE, testloaderIn, testloaderIn, nnName, dataName, epsilon, temperature)
        m.metric(nnName, dataName)

    elif dataName == "Uniform":
        d.testUni(net1, criterion, CUDA_DEVICE, testloaderIn, testloaderIn, nnName, dataName, epsilon, temperature)
        m.metric(nnName, dataName)
    else:
    d.testData(net1, criterion, CUDA_DEVICE, testloaderIn, testloaderOut, nnName, dataName, epsilon, temperature) 
    m.metric(nnName, dataName)
项目:end-to-end-negotiator    作者:facebookresearch    | 项目源码 | 文件源码
def train(self, corpus):
        """Entry point."""
        N = len(corpus.word_dict)
        best_model, best_valid_select_loss = None, 1e100
        lr = self.args.lr
        last_decay_epoch = 0
        self.t = 0

        validdata = corpus.valid_dataset(self.args.bsz, device_id=self.device_id)
        for epoch in range(1, self.args.max_epoch + 1):
            traindata = corpus.train_dataset(self.args.bsz, device_id=self.device_id)
            _, _, valid_select_loss = self.iter(N, epoch, lr, traindata, validdata)

            if valid_select_loss < best_valid_select_loss:
                best_valid_select_loss = valid_select_loss
                best_model = copy.deepcopy(self.model)

        if self.verbose:
            print('| start annealing | best validselectloss %.3f | best validselectppl %.3f' % (
                best_valid_select_loss, np.exp(best_valid_select_loss)))

        self.model = best_model
        for epoch in range(self.args.max_epoch + 1, 100):
            if epoch - last_decay_epoch >= self.args.decay_every:
                last_decay_epoch = epoch
                lr /= self.args.decay_rate
                if lr < self.args.min_lr:
                    break
                self.opt = optim.SGD(self.model.parameters(), lr=lr)

            traindata = corpus.train_dataset(self.args.bsz, device_id=self.device_id)
            train_loss, valid_loss, valid_select_loss = self.iter(
                N, epoch, lr, traindata, validdata)

        return train_loss, valid_loss, valid_select_loss
项目:end-to-end-negotiator    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, model, args, name='Alice'):
        super(RlAgent, self).__init__(model, args, name=name)
        self.opt = optim.SGD(
            self.model.parameters(),
            lr=self.args.rl_lr,
            momentum=self.args.momentum,
            nesterov=(self.args.nesterov and self.args.momentum > 0))

        self.all_rewards = []

        if self.args.visual:
            self.model_plot = vis.ModulePlot(self.model, plot_weight=False, plot_grad=True)
            self.reward_plot = vis.Plot(['reward',], 'reward', 'reward')
            self.loss_plot = vis.Plot(['loss',], 'loss', 'loss')
        self.t = 0
项目:FreezeOut    作者:ajbrock    | 项目源码 | 文件源码
def __init__(self,growthRate, depth, nClasses, epochs, t_0, scale_lr=True, how_scale = 'cubic',const_time=False, cfg=cfg['E'],batch_norm=True):
        super(DenseNet, self).__init__()

        self.epochs = epochs
        self.t_0 = t_0
        self.scale_lr = scale_lr
        self.how_scale = how_scale
        self.const_time = const_time

        self.layer_index = 0
        self.features = self.make_layers(cfg,batch_norm)

        self.classifier = nn.Sequential(
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.BatchNorm1d(512),
            nn.Dropout(),
            nn.Linear(512, nClasses),
        )
        self.classifier.layer_index = self.layer_index
        self.classifier.active = True
        self._initialize_weights()

        # Optimizer
        self.optim = optim.SGD([{'params':m.parameters(), 'lr':m.lr, 'layer_index':m.layer_index} for m in self.modules() if hasattr(m,'active')],  
                         nesterov=True,momentum=0.9, weight_decay=1e-4)
        # Iteration Counter            
        self.j = 0  

        # A simple dummy variable that indicates we are using an iteration-wise
        # annealing scheme as opposed to epoch-wise. 
        self.lr_sched = {'itr':0}
项目:NeuralMT    作者:hlt-mt    | 项目源码 | 文件源码
def set_parameters(self, params):
        self.params = list(params)  # careful: params may be a generator
        if self.method == 'sgd':
            self.optimizer = optim.SGD(self.params, lr=self.lr)
        elif self.method == 'adagrad':
            self.optimizer = optim.Adagrad(self.params, lr=self.lr)
        elif self.method == 'adadelta':
            self.optimizer = optim.Adadelta(self.params, lr=self.lr)
        elif self.method == 'adam':
            self.optimizer = optim.Adam(self.params, lr=self.lr)
        else:
            raise RuntimeError("Invalid optim method: " + self.method)
项目:alpha-dimt-icmlws    作者:sotetsuk    | 项目源码 | 文件源码
def set_parameters(self, params):
        self.params = list(params)  # careful: params may be a generator
        if self.method == 'sgd':
            self.optimizer = optim.SGD(self.params, lr=self.lr)
        elif self.method == 'adagrad':
            self.optimizer = optim.Adagrad(self.params, lr=self.lr)
        elif self.method == 'adadelta':
            self.optimizer = optim.Adadelta(self.params, lr=self.lr)
        elif self.method == 'adam':
            self.optimizer = optim.Adam(self.params, lr=self.lr)
        else:
            raise RuntimeError("Invalid optim method: " + self.method)
项目:pytorch    作者:tylergenter    | 项目源码 | 文件源码
def test_invalid_param_type(self):
        with self.assertRaises(TypeError):
            optim.SGD(Variable(torch.randn(5, 5)), lr=3)
项目:covfefe    作者:deepnn    | 项目源码 | 文件源码
def sgd(w, lr=0.1, m=0, damp=0, w_decay=0, nesterov=False):
    return nn.SGD(params=w, lr=lr, momentum=m,
                  dampening=damp, weight_decay=w_decay,
                  nesterov=nesterov)
项目:pytorch-coriander    作者:hughperkins    | 项目源码 | 文件源码
def test_invalid_param_type(self):
        with self.assertRaises(TypeError):
            optim.SGD(Variable(torch.randn(5, 5)), lr=3)
项目:pytorch_60min_blitz    作者:kyuhyoung    | 项目源码 | 文件源码
def initialize(mode, is_gpu, dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker):

    if 'TORCHVISION_MEMORY' == mode:
        trainloader, testloader, li_class = make_dataloader_torchvison_memory(
            dir_data, di_set_transform, n_img_per_batch, n_worker)
    elif 'TORCHVISION_IMAGEFOLDER' == mode:
        trainloader, testloader, li_class = make_dataloader_torchvison_imagefolder(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)
    elif 'CUSTOM_MEMORY' == mode:
        trainloader, testloader, li_class = make_dataloader_custom_memory(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)
    elif 'CUSTOM_FILE' == mode:
        trainloader, testloader, li_class = make_dataloader_custom_file(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)
    else:
        trainloader, testloader, li_class = make_dataloader_custom_tensordataset(
            dir_data, di_set_transform, ext_img, n_img_per_batch, n_worker)


    #net = Net().cuda()
    net = Net()
    #t1 = net.cuda()
    criterion = nn.CrossEntropyLoss()
    if is_gpu:
        net.cuda()
        criterion.cuda()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=1, patience = 8, epsilon=0.00001, min_lr=0.000001) # set up scheduler

    return trainloader, testloader, net, criterion, optimizer, scheduler, li_class
项目:DeepPoseComparison    作者:ynaka81    | 项目源码 | 文件源码
def _get_optimizer(self, model):
        if self.opt == 'MomentumSGD':
            optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        elif self.opt == "Adam":
            optimizer = optim.Adam(model.parameters())
        return optimizer
项目:forward-thinking-pytorch    作者:kimhc6028    | 项目源码 | 文件源码
def __init__(self, deep):
        super(Net, self).__init__()

        self.deep = deep
        if deep:
            self.fc1 = nn.Linear(28*28, 100); self.fc2 = nn.Linear(100, 100); self.fc3 = nn.Linear(100, 100); self.fc4 = nn.Linear(100, 100);
            self.fc5 = nn.Linear(100, 100); self.fc6 = nn.Linear(100, 100); self.fc7 = nn.Linear(100, 100); self.fc8 = nn.Linear(100, 100);
            self.fc9 = nn.Linear(100, 100); self.fc10 = nn.Linear(100, 100); self.fc11 = nn.Linear(100, 100); self.fc12 = nn.Linear(100, 100);
            self.fc13 = nn.Linear(100, 100); self.fc14 = nn.Linear(100, 100); self.fc15 = nn.Linear(100, 100); self.fc16 = nn.Linear(100, 100);
            self.fc17 = nn.Linear(100, 100); self.fc18 = nn.Linear(100, 100); self.fc19 = nn.Linear(100, 100); self.fc20 = nn.Linear(100, 10);
            self.fcs = [self.fc1, self.fc2, self.fc3, self.fc4,
                        self.fc5, self.fc6, self.fc7, self.fc8,
                        self.fc9, self.fc10, self.fc11, self.fc12,
                        self.fc13, self.fc14, self.fc15, self.fc16,
                        self.fc17, self.fc18, self.fc19, self.fc20]

        else:
            self.fc1 = nn.Linear(28*28, 150)
            self.fc2 = nn.Linear(150, 100)
            self.fc3 = nn.Linear(100, 50)
            self.fc4 = nn.Linear(50, 10)
            self.fc5 = nn.Linear(10, 10)

        self.optimizer = optim.SGD(self.parameters(), lr=args.lr, momentum=args.momentum)

        self.train_acc = []
        self.test_acc = []
项目:forward-thinking-pytorch    作者:kimhc6028    | 项目源码 | 文件源码
def add_layer(self):
        if self.training_c:
            self.training_c.requires_grad = False
            self.frozen_c.append(self.training_c)
        try:
            self.training_c = self.standby_c.pop(0)
            self.training_cf = self.standby_cf.pop(0)
            trainable_params = [{'params': self.training_c.parameters()},
                                {'params': self.training_cf.parameters()}
            ]
            self.optimizer = optim.SGD(trainable_params, lr=args.lr, momentum=args.momentum)
        except:
            print('No more standby layers!')
项目:pytorch    作者:ezyang    | 项目源码 | 文件源码
def test_invalid_param_type(self):
        with self.assertRaises(TypeError):
            optim.SGD(Variable(torch.randn(5, 5)), lr=3)
项目:pytorch    作者:ezyang    | 项目源码 | 文件源码
def setUp(self):
        self.net = SchedulerTestNet()
        self.opt = SGD(
            [{'params': self.net.conv1.parameters()}, {'params': self.net.conv2.parameters(), 'lr': 0.5}],
            lr=0.05)
项目:DrQA    作者:hitvoice    | 项目源码 | 文件源码
def __init__(self, opt, embedding=None, state_dict=None):
        # Book-keeping.
        self.opt = opt
        self.updates = state_dict['updates'] if state_dict else 0
        self.train_loss = AverageMeter()

        # Building network.
        self.network = RnnDocReader(opt, embedding=embedding)
        if state_dict:
            new_state = set(self.network.state_dict().keys())
            for k in list(state_dict['network'].keys()):
                if k not in new_state:
                    del state_dict['network'][k]
            self.network.load_state_dict(state_dict['network'])

        # Building optimizer.
        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if opt['optimizer'] == 'sgd':
            self.optimizer = optim.SGD(parameters, opt['learning_rate'],
                                       momentum=opt['momentum'],
                                       weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adamax':
            self.optimizer = optim.Adamax(parameters,
                                          weight_decay=opt['weight_decay'])
        else:
            raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer'])
        if state_dict:
            self.optimizer.load_state_dict(state_dict['optimizer'])
项目:StackGAN_pytorch    作者:qizhex    | 项目源码 | 文件源码
def _makeOptimizer(self):
        if self.method == 'sgd':
            self.optimizer = optim.SGD(self.params, lr=self.lr)
        elif self.method == 'adagrad':
            self.optimizer = optim.Adagrad(self.params, lr=self.lr)
        elif self.method == 'adadelta':
            self.optimizer = optim.Adadelta(self.params, lr=self.lr)
        elif self.method == 'adam':
            self.optimizer = optim.Adam(self.params, lr=self.lr, betas=(0.5, 0.999))
        else:
            raise RuntimeError("Invalid optim method: " + self.method)
项目:MNIST_center_loss_pytorch    作者:jxgu1016    | 项目源码 | 文件源码
def main():
    if torch.cuda.is_available():
        use_cuda = True
    else: use_cuda = False
    # Dataset
    trainset = datasets.MNIST('../../data', download=True,train=True, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))]))
    train_loader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)

    # Model
    model = Net()

    # NLLLoss
    nllloss = nn.NLLLoss() #CrossEntropyLoss = log_softmax + NLLLoss
    # CenterLoss
    loss_weight = 1.0
    centerloss = CenterLoss(10,2,loss_weight)
    if use_cuda:
        nllloss = nllloss.cuda()
        centerloss = centerloss.cuda()
        model = model.cuda()
    criterion = [nllloss, centerloss]

    # optimzer4nn
    optimizer4nn = optim.SGD(model.parameters(),lr=0.001,momentum=0.9, weight_decay=0.0005)
    sheduler = lr_scheduler.StepLR(optimizer4nn,20,gamma=0.8)

    # optimzer4center
    optimzer4center = optim.SGD(centerloss.parameters(), lr =0.5)

    for epoch in range(50):
        sheduler.step()
        # print optimizer4nn.param_groups[0]['lr']
        train(train_loader, model, criterion, [optimizer4nn, optimzer4center], epoch+1, use_cuda)
项目:facenet_pytorch    作者:liorshk    | 项目源码 | 文件源码
def adjust_learning_rate(optimizer):
    """Updates the learning rate given the learning rate decay.
    The routine has been implemented according to the original Lua SGD optimizer
    """
    for group in optimizer.param_groups:
        if 'step' not in group:
            group['step'] = 0
        group['step'] += 1

        group['lr'] = args.lr / (1 + group['step'] * args.lr_decay)
项目:facenet_pytorch    作者:liorshk    | 项目源码 | 文件源码
def create_optimizer(model, new_lr):
    # setup optimizer
    if args.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=new_lr,
                              momentum=0.9, dampening=0.9,
                              weight_decay=args.wd)
    elif args.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=new_lr,
                               weight_decay=args.wd, betas=(args.beta1, 0.999))
    elif args.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=new_lr,
                                  lr_decay=args.lr_decay,
                                  weight_decay=args.wd)
    return optimizer