Python torch.optim 模块,RMSprop() 实例源码

我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用torch.optim.RMSprop()

项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_rmsprop(self):
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-2)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad(
                self._build_params_dict(weight, bias, lr=1e-3),
                lr=1e-2)
        )
项目:pytorch    作者:tylergenter    | 项目源码 | 文件源码
def test_rmsprop(self):
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-2)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad(
                self._build_params_dict(weight, bias, lr=1e-3),
                lr=1e-2)
        )
项目:pytorch-coriander    作者:hughperkins    | 项目源码 | 文件源码
def test_rmsprop(self):
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-2)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad(
                self._build_params_dict(weight, bias, lr=1e-3),
                lr=1e-2)
        )
项目:deep-rl    作者:xinghai-sun    | 项目源码 | 文件源码
def __init__(self,
                 action_space,
                 observation_space,
                 batch_size=128,
                 learning_rate=1e-3,
                 discount=1.0,
                 epsilon=0.05):
        if not isinstance(action_space, spaces.Discrete):
            raise TypeError("Action space type should be Discrete.")
        self._action_space = action_space
        self._batch_size = batch_size
        self._discount = discount
        self._epsilon = epsilon
        self._q_network = ConvNet(
            num_channel_input=observation_space.shape[0],
            num_output=action_space.n)
        self._optimizer = optim.RMSprop(
            self._q_network.parameters(), lr=learning_rate)
        self._memory = ReplayMemory(100000)
项目:deep-rl    作者:xinghai-sun    | 项目源码 | 文件源码
def __init__(self,
                 action_space,
                 observation_space,
                 batch_size=128,
                 learning_rate=1e-3,
                 discount=1.0,
                 epsilon=0.05):
        if not isinstance(action_space, spaces.Discrete):
            raise TypeError("Action space type should be Discrete.")
        self._action_space = action_space
        self._batch_size = batch_size
        self._discount = discount
        self._epsilon = epsilon
        self._q_network = FCNet(
            input_size=reduce(lambda x, y: x * y, observation_space.shape),
            output_size=action_space.n)
        self._optimizer = optim.RMSprop(
            self._q_network.parameters(), lr=learning_rate)
        self._memory = ReplayMemory(100000)
项目:pytorch    作者:ezyang    | 项目源码 | 文件源码
def test_rmsprop(self):
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-2)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad(
                self._build_params_dict(weight, bias, lr=1e-3),
                lr=1e-2)
        )
项目:pytorch    作者:pytorch    | 项目源码 | 文件源码
def test_rmsprop(self):
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
        )
        self._test_rosenbrock(
            lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
            wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-2)
        )
        self._test_basic_cases(
            lambda weight, bias: optim.Adagrad(
                self._build_params_dict(weight, bias, lr=1e-3),
                lr=1e-2)
        )
项目:MachineLearning    作者:timomernick    | 项目源码 | 文件源码
def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 2, ngf * 1, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 1),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 1, nc, 4, 2, 1, bias=False),
            nn.Tanh()
        )
        self.apply(weights_init)
        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2))
        #self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
项目:MachineLearning    作者:timomernick    | 项目源码 | 文件源码
def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )
        self.apply(weights_init)
        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2))
        #self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
项目:MachineLearning    作者:timomernick    | 项目源码 | 文件源码
def __init__(self):
        super(Discriminator, self).__init__()

        self.conv0 = nn.Conv1d(nc, ndf, 4, 2, 1, bias=False)
        self.conv1 = nn.Conv1d(ndf, ndf * 2, 4, 2, 1, bias=False)
        self.conv2 = nn.Conv1d(ndf * 2, ndf * 4, 4, 2, 1, bias=False)
        self.conv3 = nn.Conv1d(ndf * 4, ndf * 8, 4, 2, 1, bias=False)

        self.fc0_size = 512 * 128
        self.fc0 = nn.Linear(self.fc0_size, 100)

        self.relu = nn.LeakyReLU(0.2, inplace=True)

        self.bn1 = nn.BatchNorm1d(ndf * 2)
        self.bn2 = nn.BatchNorm1d(ndf * 4)
        self.bn3 = nn.BatchNorm1d(ndf * 8)

        self.sigmoid = nn.Sigmoid()

        self.apply(weights_init)

        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2))
        #self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
项目:DCN    作者:alexnowakvila    | 项目源码 | 文件源码
def __init__(
                 self, input_size, batch_size,
                 num_units_split, split_layers, grad_clip_split, beta=1.0
                 ):
        super(DivideAndConquerNetwork, self).__init__()
        # General
        self.input_size = input_size
        self.batch_size = batch_size
        # Split
        self.num_units_split = num_units_split
        self.split_layers = split_layers
        self.beta = beta
        self.split = Split(input_size, num_units_split,
                           batch_size, split_layers)
        # Training
        self.grad_clip_split = grad_clip_split
        self.optim_split = optim.RMSprop(self.split.parameters())

    ###########################################################################
    #                           Load Parameters                               #
    ###########################################################################
项目:restricted-boltzmann-machine-deep-belief-network-deep-boltzmann-machine-in-pytorch    作者:wmingwei    | 项目源码 | 文件源码
def generative_fine_tune(dbn, lr = 1e-2, epoch = 100, batch_size = 50, input_data = None, CD_k = 1, optimization_method = "Adam", momentum = 0, weight_decay = 0, test_input = None):

    if optimization_method == "RMSprop":
        optimizer = optim.RMSprop(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay)
    elif optimization_method == "SGD":
        optimizer = optim.SGD(dbn.parameters(), lr = lr, momentum = momentum, weight_decay = weight_decay)
    elif optimization_method == "Adam":
        optimizer = optim.Adam(dbn.parameters(), lr = lr, weight_decay = weight_decay)   

    for i in dbn.parameters():
        i.mean().backward()

    train_set = torch.utils.data.dataset.TensorDataset(input_data, torch.zeros(input_data.size()[0]))
    train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)

    for i in range(epoch):
        for batch_idx, (data, target) in enumerate(train_loader):

            sleep_wake(dbn = dbn, optimizer = optimizer, lr = lr, CD_k = CD_k, v = data, batch_size = batch_size)

        if not (type(test_input) == type(None)):

            print("fine tune", i, ais_dbn.logp_ais(self, test_input, step = 1000, M_Z = 20, M_IS = 100, parallel = True))
项目:drl.pth    作者:seba-1511    | 项目源码 | 文件源码
def get_opt(name):
    opts = {
        'SGD': optim.SGD,
        'Adam': optim.Adam,
        'Adagrad': optim.Adagrad,
        'RMSprop': optim.RMSprop,
    }
    return opts[name]
项目:pytorch.rl.learning    作者:moskomule    | 项目源码 | 文件源码
def __init__(self, agent: Agent, val_env: gym.Env, lr, memory_size, target_update_freq, gradient_update_freq,
                 batch_size, replay_start, val_freq, log_freq_by_step, log_freq_by_ep, val_epsilon,
                 log_dir, weight_dir):
        """
        :param agent: agent object
        :param val_env: environment for validation
        :param lr: learning rate of optimizer
        :param memory_size: size of replay memory
        :param target_update_freq: frequency of update target network in steps
        :param gradient_update_freq: frequency of q-network update in steps
        :param batch_size: batch size for q-net
        :param replay_start: number of random exploration before starting
        :param val_freq: frequency of validation in steps
        :param log_freq_by_step: frequency of logging in steps
        :param log_freq_by_ep: frequency of logging in episodes
        :param val_epsilon: exploration rate for validation
        :param log_dir: directory for saving tensorboard things
        :param weight_dir: directory for saving weights when validated
        """
        self.agent = agent
        self.env = self.agent.env
        self.val_env = val_env
        self.optimizer = optim.RMSprop(params=self.agent.net.parameters(), lr=lr)
        self.memory = Memory(memory_size)
        self.target_update_freq = target_update_freq
        self.batch_size = batch_size
        self.replay_start = replay_start
        self.gradient_update_freq = gradient_update_freq
        self._step = 0
        self._episode = 0
        self._warmed = False
        self._val_freq = val_freq
        self.log_freq_by_step = log_freq_by_step
        self.log_freq_by_ep = log_freq_by_ep
        self._val_epsilon = val_epsilon
        self._writer = SummaryWriter(os.path.join(log_dir, datetime.now().strftime('%b%d_%H-%M-%S')))
        if weight_dir is not None and not os.path.exists(weight_dir):
            os.makedirs(weight_dir)
        self.weight_dir = weight_dir
项目:dqn-mario    作者:nailo2c    | 项目源码 | 文件源码
def main(env):
    ### ??????????????schedule
    # This is a just rough estimate
    num_iterations = float(40000000) / 4.0


    # define exploration schedule
    exploration_schedule = LinearSchedule(1000000, 0.1)


    # optimizer
    OptimizerSpec = namedtuple("OptimizerSpec", ["constructor", "kwargs"])

    optimizer = OptimizerSpec(
        constructor=optim.RMSprop,
        kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS),
    )


    mario_learning(
        env=env,
        q_func=DQN,
        optimizer_spec=optimizer,
        exploration=exploration_schedule,
        replay_buffer_size=REPLAY_BUFFER_SIZE,
        batch_size=BATCH_SIZE,
        gamma=GAMMA,
        learning_starts=LEARNING_STARTS,
        learning_freq=LEARNING_FREQ,
        frame_history_len=FRAME_HISTORY_LEN,
        target_update_freq=TARGET_UPDATE_FREQ
    )
项目:dqn-mario    作者:nailo2c    | 项目源码 | 文件源码
def main(env):
    ### ??????????????schedule
    # This is a just rough estimate
    num_iterations = float(40000000) / 4.0


    # define exploration schedule
    exploration_schedule = LinearSchedule(1000000, 0.1)


    # optimizer
    OptimizerSpec = namedtuple("OptimizerSpec", ["constructor", "kwargs"])

    optimizer = OptimizerSpec(
        constructor=optim.RMSprop,
        kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS),
    )


    learning(
        env=env,
        q_func=DQN,
        optimizer_spec=optimizer,
        exploration=exploration_schedule,
        replay_buffer_size=REPLAY_BUFFER_SIZE,
        batch_size=BATCH_SIZE,
        gamma=GAMMA,
        learning_starts=LEARNING_STARTS,
        learning_freq=LEARNING_FREQ,
        frame_history_len=FRAME_HISTORY_LEN,
        target_update_freq=TARGET_UPDATE_FREQ
    )
项目:covfefe    作者:deepnn    | 项目源码 | 文件源码
def rms_prop(w, lr=0.01, etas=(0.5,1.2), step_sz=(1e-06, 50)):
    return nn.RMSprop(params=w, lr=lr, etas=etas,
                      step_sizes=step_sz)
项目:gan-error-avoidance    作者:aleju    | 项目源码 | 文件源码
def embed_real_images(gen, r, images, code_size, lr=0.0001, test_steps=100000):
    """Function to embed images to noise vectors that result in as similar
    images as possible (when feeding the approximated noise vectors through
    G). This is intended for real images, not images that came from the
    generator. It also didn't seem to work very well."""
    testfunc = nn.MSELoss()

    for param in gen.parameters():
        param.requires_grad = False
    best_code = torch.Tensor(len(images), code_size).cuda()

    batch_size = len(images)
    batch_code = Variable(torch.zeros(batch_size, code_size).cuda())
    batch_code.requires_grad = True

    batch_target = torch.Tensor(batch_size, images[0].size(0), images[0].size(1), images[0].size(2))
    for i, image in enumerate(images):
        batch_target[i].copy_(image)
    batch_target = Variable(batch_target.cuda())
    batch_code.data.copy_(r(batch_target).data)

    test_opt = optim.Adam([batch_code], lr=lr)
    for j in range(test_steps):
        generated, _ = gen(batch_code)
        loss = testfunc(generated, batch_target)
        loss.backward()
        test_opt.step()
        batch_code.grad.data.zero_()
        if j % 100 == 0:
            #lr = lr * 0.98
            print("Embedding real images... iter %d with loss %.08f and lr %.08f" % (j,loss.data[0], lr))
            #test_opt = optim.RMSprop([batch_code], lr=lr)
    best_code = batch_code.data

    for param in gen.parameters():
        param.requires_grad = True

    return best_code
项目:MachineLearning    作者:timomernick    | 项目源码 | 文件源码
def __init__(self):
        super(Generator, self).__init__()

        self.deconv0 = nn.ConvTranspose1d(nz, ngf * 8, 4, 1, 0, bias=False)
        self.deconv1 = nn.ConvTranspose1d(ngf * 8, ngf * 4, 4, 2, 1, bias=False)
        self.deconv2 = nn.ConvTranspose1d(ngf * 4, ngf * 2, 4, 2, 1, bias=False)
        self.deconv3 = nn.ConvTranspose1d(ngf * 2, ngf * 1, 4, 2, 1, bias=False)
        self.deconv4 = nn.ConvTranspose1d(ngf * 1, ngf / 2, 4, 2, 1, bias=False)
        self.deconv5 = nn.ConvTranspose1d(ngf / 2, ngf / 4, 4, 2, 1, bias=False)
        self.deconv6 = nn.ConvTranspose1d(ngf / 4, ngf / 8, 4, 2, 1, bias=False)
        self.deconv7 = nn.ConvTranspose1d(ngf / 8, ngf / 16, 4, 2, 1, bias=False)
        self.deconv8 = nn.ConvTranspose1d(ngf / 16, ngf / 32, 4, 2, 1, bias=False)
        self.deconv9 = nn.ConvTranspose1d(ngf / 32, nc, 4, 2, 1, bias=False)                


        self.bn0 = nn.BatchNorm1d(ngf * 8)
        self.bn1 = nn.BatchNorm1d(ngf * 4)
        self.bn2 = nn.BatchNorm1d(ngf * 2)
        self.bn3 = nn.BatchNorm1d(ngf * 1)
        self.bn4 = nn.BatchNorm1d(ngf / 2)
        self.bn5 = nn.BatchNorm1d(ngf / 4)
        self.bn6 = nn.BatchNorm1d(ngf / 8)
        self.bn7 = nn.BatchNorm1d(ngf / 16)
        self.bn8 = nn.BatchNorm1d(ngf / 32)        

        self.relu = nn.ReLU(True)

        self.tanh = nn.Tanh()

        self.apply(weights_init)

        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate, betas=(beta_1, beta_2))
        #self.optimizer = optim.RMSprop(self.parameters(), lr=learning_rate, alpha=beta_2)
项目:MachineLearning    作者:timomernick    | 项目源码 | 文件源码
def __init__(self, batch_size, size):
        super(Pool, self).__init__()

        self.size = size

        self.inputs = Variable(torch.FloatTensor(batch_size, 1, size, size)).cuda()
        self.targets = Variable(torch.LongTensor(batch_size)).cuda()

        self.medium = nn.Parameter(torch.randn(num_media, 1, size, size) * 0.02, requires_grad=True)

        self.conv0 = nn.Conv2d(1, 1, 3, padding=1, bias=False)

        self.fc0_size = 8 * 8
        self.fc0 = nn.Linear(self.fc0_size, num_classes)

        self.maxPool = nn.AvgPool2d(8)

        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

        self.logSoftmax = nn.LogSoftmax()

        self.loss = nn.NLLLoss()

        learning_rate = 0.0005

        self.conv0.weight.requires_grad = False
        s = 0.25
        kernel = torch.FloatTensor([0.0, s, 0.0,
                                    s, 0.0, s,
                                    0.0, s, 0.0]).view(3, 3)
        self.conv0.weight.data.copy_(kernel)

        parameters = ifilter(lambda p: p.requires_grad, self.parameters())
        parameters = list(parameters)
        parameters.append(self.medium)
        self.optimizer = optim.RMSprop(parameters, lr=learning_rate, momentum=0.0)
项目:repeval_rivercorners    作者:jabalazs    | 项目源码 | 文件源码
def _makeOptimizer(self):
        if self.method == 'sgd':
            self.optimizer = optim.SGD(self.params, lr=self.lr)
        elif self.method == 'adagrad':
            self.optimizer = optim.Adagrad(self.params, lr=self.lr)
        elif self.method == 'adadelta':
            self.optimizer = optim.Adadelta(self.params, lr=self.lr)
        elif self.method == 'adam':
            self.optimizer = optim.Adam(self.params, lr=self.lr)
        elif self.method == 'rmsprop':
            self.optimizer = optim.RMSprop(self.params, lr=self.lr)
        else:
            raise RuntimeError("Invalid optim method: " + self.method)
项目:categorical-dqn    作者:floringogianu    | 项目源码 | 文件源码
def optim_factory(weights, cmdl):
    if cmdl.optim == "Adam":
        return optim.Adam(weights, lr=cmdl.lr, eps=cmdl.eps)
    elif cmdl.optim == "RMSprop":
        return optim.RMSprop(weights, lr=cmdl.lr, eps=cmdl.eps,
                             alpha=cmdl.alpha)
项目:DCN    作者:alexnowakvila    | 项目源码 | 文件源码
def __init__(
                 self, input_size, batch_size,
                 num_units_merge, rnn_layers, grad_clip_merge,
                 num_units_split, split_layers, grad_clip_split, beta=1.0,
                 ):
        super(DivideAndConquerNetwork, self).__init__()
        # General
        self.input_size = input_size
        self.batch_size = batch_size
        # Merge
        self.num_units_merge = num_units_merge
        self.rnn_layers = rnn_layers
        self.merge = Merge(input_size, num_units_merge, batch_size)
        # Split
        self.num_units_split = num_units_split
        self.split_layers = split_layers
        self.beta = beta
        self.split = Split(input_size, num_units_split,
                           batch_size, split_layers)
        # Training
        self.grad_clip_split = grad_clip_split
        self.optim_split = optim.RMSprop(self.split.parameters())
        self.grad_clip_merge = grad_clip_merge
        self.optim_merge = optim.Adam(self.merge.parameters())

    ###########################################################################
    #                           Load Parameters                               #
    ###########################################################################
项目:DCN    作者:alexnowakvila    | 项目源码 | 文件源码
def upd_learning_rate(self, epoch):
        # split
        lr = 0.01 / float(epoch + 1)
        self.optim_split = optim.RMSprop(self.split.parameters(), lr=lr)
        # merge
        lr = 0.001 / float(epoch + 1)
        self.optim_merge = optim.Adam(self.merge.parameters(), lr=lr)
        return lr

    ###########################################################################
    #                             Split Phase                                 #
    ###########################################################################
项目:DCN    作者:alexnowakvila    | 项目源码 | 文件源码
def __init__(
                 self, input_size, batch_size,
                 num_units_merge, rnn_layers, grad_clip_merge,
                 num_units_split, split_layers, grad_clip_split, beta=1.0
                 ):
        super(DivideAndConquerNetwork, self).__init__()
        # General
        self.input_size = input_size
        self.batch_size = batch_size
        # Merge
        self.num_units_merge = num_units_merge
        self.rnn_layers = rnn_layers
        self.merge = Merge(input_size, num_units_merge, batch_size)
        # Split
        self.num_units_split = num_units_split
        self.split_layers = split_layers
        self.beta = beta
        self.split = Split(input_size, num_units_split,
                           batch_size, split_layers)
        # Training
        self.grad_clip_split = grad_clip_split
        self.optim_split = optim.RMSprop(self.split.parameters())
        self.grad_clip_merge = grad_clip_merge
        self.optim_merge = optim.Adam(self.merge.parameters())

    ###########################################################################
    #                           Load Parameters                               #
    ###########################################################################
项目:DCN    作者:alexnowakvila    | 项目源码 | 文件源码
def upd_learning_rate(self, epoch):
        # split
        lr = 0.01 / float(epoch + 1)
        self.optim_split = optim.RMSprop(self.split.parameters(), lr=lr)
        # merge
        lr = 0.001 / float(epoch + 1)
        self.optim_merge = optim.Adam(self.merge.parameters(), lr=lr)
        return lr
项目:DisentangleVAE    作者:Jueast    | 项目源码 | 文件源码
def __init__(self, network, dataset, visualizer,
                 args, optimizer="Adam", lr=1e-3, momentum=0.9, weight_decay=0):
        if args.ngpus > 0:
            self.network = network.cuda()
            self.gpuids = range(args.ngpus)
        else:
            self.network = network
        self.dataset = dataset
        self.visualizer = visualizer
        self.args = args
        self.maxiters = args.maxiters
        self.cuda = args.ngpus > 0
        if self.network.name == 'VAEGAN':
            self.lr= lr
            self.weight_decay = weight_decay
            self.momentum = momentum
            self.optimizer = optimizer
        else:
            if optimizer == "Adam":
                self.optimizer = optim.Adam(self.network.parameters(),
                                            lr=lr,
                                            weight_decay=weight_decay)
            elif optimizer == "RMSprop":
                self.optimizer = optim.RMSprop(self.network.parameters(),
                                               lr=lr,
                                               weight_decay=weight_decay)
            else:
                self.optimizer = optim.SGD(self.network.parameters(),
                                               lr=lr,
                                               momentum=momentum,
                                               weight_decay=weight_decay)
项目:pytorch-dqn    作者:transedward    | 项目源码 | 文件源码
def main(env, num_timesteps=int(4e7)):

    def stopping_criterion(env):
        # notice that here t is the number of steps of the wrapped env,
        # which is different from the number of steps in the underlying env
        return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps

    optimizer_spec = OptimizerSpec(
        constructor=optim.RMSprop,
        kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS),
    )

    exploration_schedule = LinearSchedule(1000000, 0.1)

    dqn_learing(
        env=env,
        q_func=DQN_RAM,
        optimizer_spec=optimizer_spec,
        exploration=exploration_schedule,
        stopping_criterion=stopping_criterion,
        replay_buffer_size=REPLAY_BUFFER_SIZE,
        batch_size=BATCH_SIZE,
        gamma=GAMMA,
        learning_starts=LEARNING_STARTS,
        learning_freq=LEARNING_FREQ,
        frame_history_len=FRAME_HISTORY_LEN,
        target_update_freq=TARGER_UPDATE_FREQ,
    )
项目:pytorch-dqn    作者:transedward    | 项目源码 | 文件源码
def main(env, num_timesteps):

    def stopping_criterion(env):
        # notice that here t is the number of steps of the wrapped env,
        # which is different from the number of steps in the underlying env
        return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps

    optimizer_spec = OptimizerSpec(
        constructor=optim.RMSprop,
        kwargs=dict(lr=LEARNING_RATE, alpha=ALPHA, eps=EPS),
    )

    exploration_schedule = LinearSchedule(1000000, 0.1)

    dqn_learing(
        env=env,
        q_func=DQN,
        optimizer_spec=optimizer_spec,
        exploration=exploration_schedule,
        stopping_criterion=stopping_criterion,
        replay_buffer_size=REPLAY_BUFFER_SIZE,
        batch_size=BATCH_SIZE,
        gamma=GAMMA,
        learning_starts=LEARNING_STARTS,
        learning_freq=LEARNING_FREQ,
        frame_history_len=FRAME_HISTORY_LEN,
        target_update_freq=TARGER_UPDATE_FREQ,
    )
项目:spatial-reasoning    作者:JannerM    | 项目源码 | 文件源码
def __init__(self, network, target_network, lr=0.01, learn_start = 1000, batch_size = 32, map_dim = 10, gamma = 0.95, replay_size = 10000, instr_len = 7, layout_channels = 1, object_channels = 1):
        self.network = network
        self.target_network = target_network
        self._copy_net()
        self.learn_start = learn_start
        self.batch_size = batch_size
        self.gamma = gamma
        self.replay_size = replay_size
        self.instr_len = instr_len
        self.layout_channels = layout_channels
        self.object_channels = object_channels
        self._refresh_size(map_dim, map_dim)

        self.criterion = F.smooth_l1_loss
        self.optimizer = optim.RMSprop(self.network.parameters(), lr=lr)
项目:sourceseparation_misc    作者:ycemsubakan    | 项目源码 | 文件源码
def VAE_trainer(loader_mix, train_loader, 
                generator, EP = 5,
                **kwargs):
    arguments = kwargs['arguments']
    criterion = kwargs['criterion']
    conditional_gen = kwargs['conditional_gen']

    generator.train()

    L1 = generator.L1
    L2 = generator.L2
    K = generator.K

    if arguments.optimizer == 'Adam':
        optimizerG = optim.Adam(generator.parameters(), lr=arguments.lr, betas=(0.9, 0.999))
    elif arguments.optimizer == 'RMSprop':
        optimizerG = optim.RMSprop(generator.parameters(), lr=arguments.lr)

    if not arguments.cuda and arguments.plot_training:
        figure(figsize=(4,4))
    true, false = 1, 0
    for ep in range(EP):
        for (ft, tar, lens), mix in zip(train_loader, loader_mix):
            if arguments.cuda:
                tar = tar.cuda()
                ft = ft.cuda()
                lens = lens.cuda()

            # sort the tensors within batch
            if arguments.task == 'images':
                tar = tar.contiguous().view(-1, arguments.L2)
                tar, ft = Variable(tar), Variable(ft)
            else:
                ft, tar = ut.sort_pack_tensors(ft, tar, lens)
                tar = Variable(tar[0])

            #if conditional_gen: 
            #    inp = mix.contiguous().view(-1, L)
            #else:
            #    inp = ft_rshape   # fixed_noise.contiguous().view(-1, L)

            # generator gradient
            generator.zero_grad()
            out_g, mu, logvar = generator.forward(ft)
            err_G = criterion(out_g, tar, mu, logvar)
            err_G.backward()

            # step 
            optimizerG.step()

            print(err_G)
            print(ep)
项目:gan-error-avoidance    作者:aleju    | 项目源码 | 文件源码
def test():
    test_loss = 0
    for param in gen.parameters():
        param.requires_grad = False
    gen.eval()
    best_code = torch.Tensor(test_index.size(0), opt.code_size).cuda()
    total_batch = (test_index.size(0) - 1) // opt.batch_size + 1

    for i in range(total_batch):
        if opt.final_test:
            print('Testing batch {0} of {1} ...'.format(i + 1, total_batch))
        batch_size = min(opt.batch_size, test_index.size(0) - i * opt.batch_size)
        batch_code = Variable(torch.zeros(batch_size, opt.code_size).cuda())
        batch_code.requires_grad = True

        batch_target = torch.Tensor(batch_size, 3, opt.height, opt.width)
        for j in range(batch_size):
            batch_target[j].copy_(get_data(test_index[i * opt.batch_size + j]))
        batch_target = Variable(batch_target.cuda())

        test_opt = optim.RMSprop([batch_code], lr = opt.test_lr, eps = 1e-6, alpha = 0.9)
        for j in range(opt.test_steps):
            loss = testfunc(gen(batch_code), batch_target)
            loss.backward()
            test_opt.step()
            batch_code.grad.data.zero_()
        best_code[i * opt.batch_size : i * opt.batch_size + batch_size].copy_(batch_code.data)

        generated = gen(batch_code)
        loss = testfunc(gen(batch_code), batch_target)
        test_loss = test_loss + loss.data[0] * batch_size
        if opt.final_test:
            print('batch loss = {0}'.format(loss.data[0]))
            sample_rec_pair = torch.Tensor(2, 3, opt.height, opt.width)
            for j in range(batch_size):
                sample_rec_pair[0].copy_(get_data(test_index[i * opt.batch_size + j]))
                sample_rec_pair[1].copy_(generated.data[j])
                if opt.output_scale:
                    torchvision.utils.save_image(sample_rec_pair * 2 - 1, os.path.join(opt.load_path, '{0}_test'.format(opt.net), '{0}.png'.format(i * opt.batch_size + j)), 2)
                else:
                    torchvision.utils.save_image(sample_rec_pair, os.path.join(opt.load_path, '{0}_test'.format(opt.net), '{0}.png'.format(i * opt.batch_size + j)), 2)

    for param in gen.parameters():
        param.requires_grad = True
    gen.train()
    if not opt.final_test:
        visualize(
            best_code[0 : min(test_index.size(0), opt.vis_row * opt.vis_col)],
            filename=os.path.join(opt.save_path, 'running_test', 'test_{0}.jpg'.format(current_iter)),
            filename_r=os.path.join(opt.save_path, 'running_test', 'r{0}_test_%d.jpg' % (current_iter,)),
            filename_all=os.path.join(opt.save_path, 'running_test', 'all_test_{0}.jpg'.format(current_iter))
        )
    test_loss = test_loss / test_index.size(0)
    print('loss = {0}'.format(test_loss))
    return test_loss
项目:gan-error-avoidance    作者:aleju    | 项目源码 | 文件源码
def test():
    test_loss = 0
    for param in gen.parameters():
        param.requires_grad = False
    gen.eval()
    best_code = torch.Tensor(test_index.size(0), opt.code_size).cuda()
    total_batch = (test_index.size(0) - 1) // opt.batch_size + 1

    for i in range(total_batch):
        if opt.final_test:
            print('Testing batch {0} of {1} ...'.format(i + 1, total_batch))
        batch_size = min(opt.batch_size, test_index.size(0) - i * opt.batch_size)
        batch_code = Variable(torch.zeros(batch_size, opt.code_size).cuda())
        batch_code.requires_grad = True

        batch_target = torch.Tensor(batch_size, 3, opt.height, opt.width)
        for j in range(batch_size):
            batch_target[j].copy_(get_data(test_index[i * opt.batch_size + j]))
        batch_target = Variable(batch_target.cuda())

        test_opt = optim.RMSprop([batch_code], lr = opt.test_lr, eps = 1e-6, alpha = 0.9)
        for j in range(opt.test_steps):
            generated, _ = gen(batch_code)
            loss = testfunc(generated, batch_target)
            loss.backward()
            test_opt.step()
            batch_code.grad.data.zero_()
        best_code[i * opt.batch_size : i * opt.batch_size + batch_size].copy_(batch_code.data)

        generated, _ = gen(batch_code)
        loss = testfunc(generated, batch_target)
        test_loss = test_loss + loss.data[0] * batch_size
        if opt.final_test:
            print('batch loss = {0}'.format(loss.data[0]))
            sample_rec_pair = torch.Tensor(2, 3, opt.height, opt.width)
            for j in range(batch_size):
                sample_rec_pair[0].copy_(get_data(test_index[i * opt.batch_size + j]))
                sample_rec_pair[1].copy_(generated.data[j])
                if opt.output_scale:
                    torchvision.utils.save_image(sample_rec_pair * 2 - 1, os.path.join(opt.load_path, '{0}_test'.format(opt.net), '{0}.png'.format(i * opt.batch_size + j)), 2)
                else:
                    torchvision.utils.save_image(sample_rec_pair, os.path.join(opt.load_path, '{0}_test'.format(opt.net), '{0}.png'.format(i * opt.batch_size + j)), 2)

    for param in gen.parameters():
        param.requires_grad = True
    gen.train()
    if not opt.final_test:
        visualize(
            best_code[0 : min(test_index.size(0), opt.vis_row * opt.vis_col)],
            filename=os.path.join(opt.save_path, 'running_test', 'test_{0}.jpg'.format(current_iter)),
            filename_r=os.path.join(opt.save_path, 'running_test', 'r{0}_test_%d.jpg' % (current_iter,)),
            filename_all=os.path.join(opt.save_path, 'running_test', 'all_test_{0}.jpg'.format(current_iter))
        )
    test_loss = test_loss / test_index.size(0)
    print('loss = {0}'.format(test_loss))
    return test_loss
项目:dlcv_for_beginners    作者:frombeijingwithlove    | 项目源码 | 文件源码
def parse_args():
    parser = argparse.ArgumentParser(
        description='A Simple Demo of Generative Adversarial Networks with 2D Samples',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('input_path',
                        help='Image or directory containing images to define distribution')

    parser.add_argument('--z_dim',
                        help='Dimensionality of latent space',
                        type=int, default=2)
    parser.add_argument('--iterations',
                        help='Num of training iterations',
                        type=int, default=2000)
    parser.add_argument('--batch_size',
                        help='Batch size of each kind',
                        type=int, default=2000)
    parser.add_argument('--optimizer',
                        help='Optimizer: Adadelta/Adam/RMSprop/SGD',
                        type=str, default='Adadelta')
    parser.add_argument('--d_lr',
                        help='Learning rate of discriminator, for Adadelta it is the base learning rate',
                        type=float, default=1)
    parser.add_argument('--g_lr',
                        help='Learning rate of generator, for Adadelta it is the base learning rate',
                        type=float, default=1)
    parser.add_argument('--d_steps',
                        help='Steps of discriminators in each iteration',
                        type=int, default=3)
    parser.add_argument('--g_steps',
                        help='Steps of generator in each iteration',
                        type=int, default=1)
    parser.add_argument('--d_hidden_size',
                        help='Num of hidden units in discriminator',
                        type=int, default=100)
    parser.add_argument('--g_hidden_size',
                        help='Num of hidden units in generator',
                        type=int, default=50)
    parser.add_argument('--display_interval',
                        help='Interval of iterations to display/export images',
                        type=int, default=10)
    parser.add_argument('--no_display',
                        help='Show plots during training', action='store_true')
    parser.add_argument('--export',
                        help='Export images', action='store_true')
    parser.add_argument('--cpu',
                        help='Set to CPU mode', action='store_true')

    args = parser.parse_args()
    args.input_path = args.input_path.rstrip(os.sep)
    args.optimizer = OPTIMIZERS[args.optimizer.lower()]

    return args
项目:pytorch-dnc    作者:jingweiz    | 项目源码 | 文件源码
def __init__(self):
        super(AgentParams, self).__init__()

        if self.agent_type == "sl":
            if self.circuit_type == "ntm":
                self.criteria       = nn.BCELoss()
                self.optim          = optim.RMSprop

                self.steps          = 100000    # max #iterations
                self.batch_size     = 16
                self.early_stop     = None      # max #steps per episode
                self.clip_grad      = 50.
                self.lr             = 1e-4
                self.optim_eps      = 1e-10     # NOTE: we use this setting to be equivalent w/ the default settings in tensorflow
                self.optim_alpha    = 0.9       # NOTE: only for rmsprop, alpha is the decay in tensorflow, whose default is 0.9
                self.eval_freq      = 500
                self.eval_steps     = 50
                self.prog_freq      = self.eval_freq
                self.test_nepisodes = 5
            elif self.circuit_type == "dnc":
                self.criteria       = nn.BCELoss()
                self.optim          = optim.RMSprop

                self.steps          = 100000    # max #iterations
                self.batch_size     = 16
                self.early_stop     = None      # max #steps per episode
                self.clip_grad      = 50.
                self.lr             = 1e-4
                self.optim_eps      = 1e-10     # NOTE: we use this setting to be equivalent w/ the default settings in tensorflow
                self.optim_alpha    = 0.9       # NOTE: only for rmsprop, alpha is the decay in tensorflow, whose default is 0.9
                self.eval_freq      = 500
                self.eval_steps     = 50
                self.prog_freq      = self.eval_freq
                self.test_nepisodes = 5
        elif self.agent_type == "empty":
            self.criteria       = nn.BCELoss()
            self.optim          = optim.RMSprop

            self.steps          = 100000    # max #iterations
            self.batch_size     = 16
            self.early_stop     = None      # max #steps per episode
            self.clip_grad      = 50.
            self.lr             = 1e-4
            self.optim_eps      = 1e-10     # NOTE: we use this setting to be equivalent w/ the default settings in tensorflow
            self.optim_alpha    = 0.9       # NOTE: only for rmsprop, alpha is the decay in tensorflow, whose default is 0.9
            self.eval_freq      = 500
            self.eval_steps     = 50
            self.prog_freq      = self.eval_freq
            self.test_nepisodes = 5

        self.env_params     = EnvParams()
        self.circuit_params = CircuitParams()
项目:SentEval    作者:facebookresearch    | 项目源码 | 文件源码
def get_optimizer(s):
    """
    Parse optimizer parameters.
    Input should be of the form:
        - "sgd,lr=0.01"
        - "adagrad,lr=0.1,lr_decay=0.05"
    """
    if "," in s:
        method = s[:s.find(',')]
        optim_params = {}
        for x in s[s.find(',') + 1:].split(','):
            split = x.split('=')
            assert len(split) == 2
            assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
            optim_params[split[0]] = float(split[1])
    else:
        method = s
        optim_params = {}

    if method == 'adadelta':
        optim_fn = optim.Adadelta
    elif method == 'adagrad':
        optim_fn = optim.Adagrad
    elif method == 'adam':
        optim_fn = optim.Adam
    elif method == 'adamax':
        optim_fn = optim.Adamax
    elif method == 'asgd':
        optim_fn = optim.ASGD
    elif method == 'rmsprop':
        optim_fn = optim.RMSprop
    elif method == 'rprop':
        optim_fn = optim.Rprop
    elif method == 'sgd':
        optim_fn = optim.SGD
        assert 'lr' in optim_params
    else:
        raise Exception('Unknown optimization method: "%s"' % method)

    # check that we give good parameters to the optimizer
    expected_args = inspect.getargspec(optim_fn.__init__)[0]
    assert expected_args[:2] == ['self', 'params']
    if not all(k in expected_args[2:] for k in optim_params.keys()):
        raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
            str(expected_args[2:]), str(optim_params.keys())))

    return optim_fn, optim_params
项目:FaderNetworks    作者:facebookresearch    | 项目源码 | 文件源码
def get_optimizer(model, s):
    """
    Parse optimizer parameters.
    Input should be of the form:
        - "sgd,lr=0.01"
        - "adagrad,lr=0.1,lr_decay=0.05"
    """
    if "," in s:
        method = s[:s.find(',')]
        optim_params = {}
        for x in s[s.find(',') + 1:].split(','):
            split = x.split('=')
            assert len(split) == 2
            assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
            optim_params[split[0]] = float(split[1])
    else:
        method = s
        optim_params = {}

    if method == 'adadelta':
        optim_fn = optim.Adadelta
    elif method == 'adagrad':
        optim_fn = optim.Adagrad
    elif method == 'adam':
        optim_fn = optim.Adam
        optim_params['betas'] = (optim_params.get('beta1', 0.5), optim_params.get('beta2', 0.999))
        optim_params.pop('beta1', None)
        optim_params.pop('beta2', None)
    elif method == 'adamax':
        optim_fn = optim.Adamax
    elif method == 'asgd':
        optim_fn = optim.ASGD
    elif method == 'rmsprop':
        optim_fn = optim.RMSprop
    elif method == 'rprop':
        optim_fn = optim.Rprop
    elif method == 'sgd':
        optim_fn = optim.SGD
        assert 'lr' in optim_params
    else:
        raise Exception('Unknown optimization method: "%s"' % method)

    # check that we give good parameters to the optimizer
    expected_args = inspect.getargspec(optim_fn.__init__)[0]
    assert expected_args[:2] == ['self', 'params']
    if not all(k in expected_args[2:] for k in optim_params.keys()):
        raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
            str(expected_args[2:]), str(optim_params.keys())))

    return optim_fn(model.parameters(), **optim_params)
项目:InferSent    作者:facebookresearch    | 项目源码 | 文件源码
def get_optimizer(s):
    """
    Parse optimizer parameters.
    Input should be of the form:
        - "sgd,lr=0.01"
        - "adagrad,lr=0.1,lr_decay=0.05"
    """
    if "," in s:
        method = s[:s.find(',')]
        optim_params = {}
        for x in s[s.find(',') + 1:].split(','):
            split = x.split('=')
            assert len(split) == 2
            assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
            optim_params[split[0]] = float(split[1])
    else:
        method = s
        optim_params = {}

    if method == 'adadelta':
        optim_fn = optim.Adadelta
    elif method == 'adagrad':
        optim_fn = optim.Adagrad
    elif method == 'adam':
        optim_fn = optim.Adam
    elif method == 'adamax':
        optim_fn = optim.Adamax
    elif method == 'asgd':
        optim_fn = optim.ASGD
    elif method == 'rmsprop':
        optim_fn = optim.RMSprop
    elif method == 'rprop':
        optim_fn = optim.Rprop
    elif method == 'sgd':
        optim_fn = optim.SGD
        assert 'lr' in optim_params
    else:
        raise Exception('Unknown optimization method: "%s"' % method)

    # check that we give good parameters to the optimizer
    expected_args = inspect.getargspec(optim_fn.__init__)[0]
    assert expected_args[:2] == ['self', 'params']
    if not all(k in expected_args[2:] for k in optim_params.keys()):
        raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
            str(expected_args[2:]), str(optim_params.keys())))

    return optim_fn, optim_params