Python gym 模块,upload() 实例源码

我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用gym.upload()

项目:gym-http-api    作者:openai    | 项目源码 | 文件源码
def upload():
    """
    Upload the results of training (as automatically recorded by
    your env's monitor) to OpenAI Gym.

    Parameters:
        - training_dir: A directory containing the results of a
        training run.
        - api_key: Your OpenAI API key
        - algorithm_id (default=None): An arbitrary string
        indicating the paricular version of the algorithm
        (including choices of parameters) you are running.
        """
    j = request.get_json()
    training_dir = get_required_param(j, 'training_dir')
    api_key      = get_required_param(j, 'api_key')
    algorithm_id = get_optional_param(j, 'algorithm_id', None)

    try:
        gym.upload(training_dir, algorithm_id, writeup=None, api_key=api_key,
                   ignore_open_monitors=False)
        return ('', 204)
    except gym.error.AuthenticationError:
        raise InvalidUsage('You must provide an OpenAI Gym API key')
项目:chainer_pong    作者:icoxfog417    | 项目源码 | 文件源码
def run(submit_key, gpu):
    env = Environment()
    agent = DQNAgent(env.actions, epsilon=0.01, model_path=PATH, on_gpu=gpu)
    path = ""
    episode = 5
    if submit_key:
        print("make directory to submit result")
        path = os.path.join(os.path.dirname(__file__), "submit")
        episode = 100

    for ep, s, r in env.play(agent, episode=episode, render=True, action_interval=4, record_path=path):
        pass

    if submit_key:
        gym.upload(path, api_key=submit_key)
项目:OpenAI_Challenges    作者:AlwaysLearningDeeper    | 项目源码 | 文件源码
def upload_results(folder):
    gym.upload(folder, api_key=secrets.api_key)
项目:gym    作者:openai    | 项目源码 | 文件源码
def close(self):
        """Flush all monitor data to disk and close any open rending windows."""
        if not self.enabled:
            return
        self.stats_recorder.close()
        if self.video_recorder is not None:
            self._close_video_recorder()
        self._flush(force=True)

        # Stop tracking this for autoclose
        monitor_closer.unregister(self._monitor_id)
        self.enabled = False

        logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
项目:trpo    作者:jjkke88    | 项目源码 | 文件源码
def test(self, model_name):
        self.load_model(model_name)
        if pms.record_movie:
            for i in range(100):
                self.storage.get_single_path()
            self.env.env.monitor.close()
            if pms.upload_to_gym:
                gym.upload("log/trpo",algorithm_id='alg_8BgjkAsQRNiWu11xAhS4Hg', api_key='sk_IJhy3b2QkqL3LWzgBXoVA')
        else:
            for i in range(50):
                self.storage.get_single_path()
项目:trpo    作者:jjkke88    | 项目源码 | 文件源码
def test(self, model_name):
        self.load_model(model_name)
        if pms.record_movie:
            for i in range(100):
                self.storage.get_single_path()
            self.env.env.monitor.close()
            if pms.upload_to_gym:
                gym.upload("log/trpo" , algorithm_id='alg_8BgjkAsQRNiWu11xAhS4Hg' , api_key='sk_IJhy3b2QkqL3LWzgBXoVA')
        else:
            for i in range(50):
                self.storage.get_single_path()
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def close(self):
        """Flush all monitor data to disk and close any open rending windows."""
        if not self.enabled:
            return
        self.stats_recorder.close()
        if self.video_recorder is not None:
            self._close_video_recorder()
        self._flush(force=True)

        # Stop tracking this for autoclose
        monitor_closer.unregister(self._monitor_id)
        self.enabled = False

        logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def main():

    parser = argparse.ArgumentParser(description = 'Designate AI mode')
    parser.add_argument('-m','--mode', help = 'train / test / upload', required = True)
    args = vars(parser.parse_args())

    if args['mode'] == 'train':
        Train_Model(env = env)
    elif args['mode'] == 'test':
        Test_Model(env = env)
    elif args['mode'] == 'upload':
        Upload()  
    else:
        print('Please designate AI mode.')
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def main():

    parser = argparse.ArgumentParser(description = 'Designate AI mode')
    parser.add_argument('-m','--mode', help = 'train / test / upload', required = True)
    args = vars(parser.parse_args())

    if args['mode'] == 'train':
        Train_Model(env = env)
    elif args['mode'] == 'test':
        Test_Model(env = env)
    elif args['mode'] == 'upload':
        Upload()  
    else:
        print('Please designate AI mode.')
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def main():

    parser = argparse.ArgumentParser(description = 'Designate AI mode')
    parser.add_argument('-m','--mode', help = 'train / test / upload', required = True)
    args = vars(parser.parse_args())

    if args['mode'] == 'train':
        Train_Model(env = env)
    elif args['mode'] == 'test':
        Test_Model(env = env)
    elif args['mode'] == 'upload':
        Upload()  
    else:
        print('Please designate AI mode.')
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def main():

    parser = argparse.ArgumentParser(description = 'Designate AI mode')
    parser.add_argument('-m','--mode', help = 'train / test / upload', required = True)
    args = vars(parser.parse_args())

    if args['mode'] == 'train':
        Train_Model(env = env)
    elif args['mode'] == 'test':
        Test_Model(env = env)
    elif args['mode'] == 'upload':
        Upload()  
    else:
        print('Please designate AI mode.')
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def main():

    parser = argparse.ArgumentParser(description = 'Designate AI mode')
    parser.add_argument('-m','--mode', help = 'train / test / upload', required = True)
    args = vars(parser.parse_args())

    if args['mode'] == 'train':
        Train_Model(env = env)
    elif args['mode'] == 'test':
        Test_Model(env = env)
    elif args['mode'] == 'upload':
        Upload()  
    else:
        print('Please designate AI mode.')
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def Upload():

    # Upload training record
    gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
项目:OpenAI_Gym_AI    作者:leimao    | 项目源码 | 文件源码
def main():

    parser = argparse.ArgumentParser(description = 'Designate AI mode')
    parser.add_argument('-m','--mode', help = 'train / test / upload', required = True)
    args = vars(parser.parse_args())

    if args['mode'] == 'train':
        Train_Model(env = env)
    elif args['mode'] == 'test':
        Test_Model(env = env)
    elif args['mode'] == 'upload':
        Upload()  
    else:
        print('Please designate AI mode.')
项目:More-I-O    作者:koltafrickenfer    | 项目源码 | 文件源码
def handleUpload(self):
    gym.upload('tmp/'+self.envEntry.get(),api_key="sk_8j3LQ561SH20sk0YN3qpg")
项目:OpenAI_Challenges    作者:AlwaysLearningDeeper    | 项目源码 | 文件源码
def run(config_file):
    local_dir = os.path.dirname(__file__)
    config_path = os.path.join(local_dir, 'config')
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_path   )
    # Create the population, which is the top-level object for a NEAT run
    population = neat.Population(config)

    population.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    population.add_reporter(stats)
    population.add_reporter(neat.Checkpointer(5))

    #Run for 300 generation
    winner = population.run(eval_genomes,generations)

    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(winner))

    # Show output of the most fit genome against training data.
    winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
    scores=[]

    # Create the environment for the test and wrap it with a Monitor
    env = gym.make('MountainCar-v0')
    #env = wrappers.Monitor(env,'tmp/MountainCar-v0')

    for i in range(trials):
        score=0
        observation=env.reset()
        for _ in range(goal_steps):
            action = np.argmax(winner_net.activate(observation))
            # do it!
            observation, reward, done, info = env.step(action)
            score += reward
            if done: break
        scores.append(score)
    print("The winning neural network obtained an average score of: "+str(np.average(scores)))
    if(np.average(scores)>-110):
        gym.upload('tmp/MountainCar-v0',api_key='sk_tiwKaUHVQDChjmO9JmK2Gg')
    p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4')
    p.run(eval_genomes, 10)
项目:rl    作者:wingedsheep    | 项目源码 | 文件源码
def run(self, 
            epochs,  
            steps, 
            api_key,
            rollouts_per_epoch = 100,
            updateTargetNetwork = defaultRunSettings['updateTargetNetwork'], 
            explorationRate = defaultRunSettings['explorationRate'], 
            miniBatchSize = defaultRunSettings['miniBatchSize'], 
            learnStart = defaultRunSettings['learnStart'], 
            renderPerXEpochs = defaultRunSettings['renderPerXEpochs'], 
            shouldRender = defaultRunSettings['shouldRender'], 
            experimentId = defaultRunSettings['experimentId'], 
            force = defaultRunSettings['force'], 
            upload = defaultRunSettings['upload']):

        last100Scores = [0] * 100
        last100ScoresIndex = 0
        last100Filled = False

        if experimentId != None:
            self.env.monitor.start('tmp/'+experimentId, force = force)

        for epoch in xrange(epochs):
            paths = []
            for rollout in xrange(rollouts_per_epoch):
                path = {}
                path["actions"] = []
                path["rewards"] = []
                path["states"] = []
                path["isDone"] = []

                observation = self.env.reset()
                # number of timesteps
                totalReward = 0
                for t in xrange(steps):
                    policyValues = self.runModel(self.policyModel, observation)
                    action = self.selectActionByProbability(policyValues)
                    # action = self.selectActionByProbability(self.convertToProbabilities(policyValues))

                    path["states"].append(observation)
                    path["actions"].append(action)

                    newObservation, reward, done, info = self.env.step(action)

                    path["rewards"].append(reward)
                    path["isDone"].append(done)

                    totalReward += reward

                    observation = newObservation

                    if done:
                        break
                paths.append(path)

            self.learn(paths)

        self.env.monitor.close()
        if upload:
            gym.upload('/tmp/'+experimentId, api_key=api_key)
项目:rl    作者:wingedsheep    | 项目源码 | 文件源码
def run(self, 
            epochs,  
            steps, 
            api_key,
            rollouts_per_epoch = 20,
            updateTargetNetwork = defaultRunSettings['updateTargetNetwork'], 
            explorationRate = defaultRunSettings['explorationRate'], 
            miniBatchSize = defaultRunSettings['miniBatchSize'], 
            learnStart = defaultRunSettings['learnStart'], 
            renderPerXEpochs = defaultRunSettings['renderPerXEpochs'], 
            shouldRender = defaultRunSettings['shouldRender'], 
            experimentId = defaultRunSettings['experimentId'], 
            force = defaultRunSettings['force'], 
            upload = defaultRunSettings['upload']):

        last100Scores = [0] * 100
        last100ScoresIndex = 0
        last100Filled = False

        stepCounter = 0

        if not experimentId == None:
            self.env.monitor.start('tmp/'+experimentId, force = force)

        for epoch in xrange(epochs):
            I = 1
            observation = self.env.reset();
            for t in xrange(steps):
                policyValues = self.runModel(self.policyModel, observation)
                action = self.selectActionByProbability(policyValues)

                newObservation, reward, done, info = self.env.step(action)

                cost, grads = self.get_cost_grads(self.policyModel);
                print (theano.pp(grads[1][0]));

                if done:
                    delta = reward + self.discountFactor * self.runModel(self.valueModel, newObservation) - self.runModel(self.valueModel, observation)
                else :
                    delta = reward - self.runModel(self.valueModel, observation) # because the value for new obs is 0

        self.env.monitor.close()
        if upload:
            gym.upload('/tmp/'+experimentId, api_key=api_key)
项目:openai-mxnet    作者:boddmg    | 项目源码 | 文件源码
def main():
    env = gym.make('CartPole-v1')
    env.monitor.start("cartpole-ex", force=True)
    agent = DQN_agent(env)
    agent.Q_network_model.load_params(PARAMS_FILE_NAME)
    for episode in range(MAX_EPISODES):
        state = env.reset()
        for t in range(STEPS_PER_EPISODE):
            tic = time.time()
            env.render()
            # print "render time:" + str(time.time() - tic)
            tic = time.time()
            action = agent.egreedy_action(state)
            # print "react time:" + str(time.time() - tic)
            next_state, reward, done, info = env.step(action)

            tic = time.time()
            agent.learn(state, action, reward, next_state, done)
            # print "learn time:" + str(time.time() - tic)

            state = next_state
            if done:
                print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon))
                break
            if SAVE_PARAMS_INTERVAL:
                agent.Q_network_model.save_params(PARAMS_FILE_NAME)
    # Test every 100 episodes
        if episode % 100 == 0:
            total_reward = 0
            for i in range(TEST_EPISODES):
                state = env.reset()
                for j in xrange(STEPS_PER_EPISODE):
                    env.render()
                    action = agent.react(state)  # direct action for test
                    state, reward, done, _ = env.step(action)
                    total_reward += reward
                    if done:
                        break
            ave_reward = total_reward / TEST_EPISODES
            log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward)
            logging.debug(log_string)
            print  log_string
            if ave_reward >= STEPS_PER_EPISODE:
                break

    env.monitor.close()
    # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
项目:openai-mxnet    作者:boddmg    | 项目源码 | 文件源码
def main():
    env = gym.make('CartPole-v1')
    env.monitor.start("cartpole-ex", force=True)
    agent = DQN_agent(env)
    agent.Q_network_model.load_params(PARAMS_FILE_NAME)
    for episode in range(MAX_EPISODES):
        state = env.reset()
        for t in range(STEPS_PER_EPISODE):
            tic = time.time()
            env.render()
            # print "render time:" + str(time.time() - tic)
            tic = time.time()
            action = agent.egreedy_action(state)
            # print "react time:" + str(time.time() - tic)
            next_state, reward, done, info = env.step(action)

            tic = time.time()
            agent.learn(state, action, reward, next_state, done)
            # print "learn time:" + str(time.time() - tic)

            state = next_state
            if done:
                logging.log(logging.DEBUG, "Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon))
                break
            if SAVE_PARAMS_INTERVAL:
                agent.Q_network_model.save_params(PARAMS_FILE_NAME)
    # Test every 100 episodes
        if episode % 100 == 0:
            total_reward = 0
            for i in range(TEST_EPISODES):
                state = env.reset()
                for j in xrange(STEPS_PER_EPISODE):
                    env.render()
                    action = agent.react(state)  # direct action for test
                    state, reward, done, _ = env.step(action)
                    total_reward += reward
                    if done:
                        break
            ave_reward = total_reward / TEST_EPISODES
            log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward)
            logging.debug(log_string)
            print  log_string
            if ave_reward >= STEPS_PER_EPISODE:
                break

    env.monitor.close()
    # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
项目:openai-mxnet    作者:boddmg    | 项目源码 | 文件源码
def main():
    env = gym.make('Acrobot-v1')
    env.monitor.start("Acrobot-ex", force=True)
    agent = DQN_agent(env)
    if os.path.exists(PARAMS_FILE_NAME):
        agent.Q_network_model.load_params(PARAMS_FILE_NAME)
    for episode in range(MAX_EPISODES):
        state = env.reset()
        for t in range(STEPS_PER_EPISODE):
            tic = time.time()
            env.render()
            # print "render time:" + str(time.time() - tic)
            tic = time.time()
            action = agent.egreedy_action(state)
            # print "react time:" + str(time.time() - tic)
            next_state, reward, done, info = env.step(action)

            tic = time.time()
            agent.learn(state, action, reward, next_state, done)
            # print "learn time:" + str(time.time() - tic)

            state = next_state
            if done:
                print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon))
                break
            if SAVE_PARAMS_INTERVAL:
                agent.Q_network_model.save_params(PARAMS_FILE_NAME)
    # Test every 100 episodes
        if episode % 100 == 0:
            total_reward = 0
            for i in range(TEST_EPISODES):
                state = env.reset()
                for j in xrange(STEPS_PER_EPISODE):
                    env.render()
                    action = agent.react(state)  # direct action for test
                    state, reward, done, _ = env.step(action)
                    total_reward += reward
                    if done:
                        break
            ave_reward = total_reward / TEST_EPISODES
            log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward)
            logging.debug(log_string)
            print  log_string
            if ave_reward >= STEPS_PER_EPISODE:
                break

    env.monitor.close()
    # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
项目:openai-mxnet    作者:boddmg    | 项目源码 | 文件源码
def main():
    env = gym.make('LunarLander-v2')
    env.monitor.start("LunarLander-v2", force=True)
    agent = DQN_agent(env)
    if os.path.exists(PARAMS_FILE_NAME):
        agent.Q_network_model.load_params(PARAMS_FILE_NAME)
    for episode in range(MAX_EPISODES):
        state = env.reset()
        acc = get_accumulator()
        for t in range(STEPS_PER_EPISODE):
            tic = time.time()
            env.render()
            # print "render time:" + str(time.time() - tic)
            tic = time.time()
            action = agent.egreedy_action(state)
            # print "react time:" + str(time.time() - tic)
            next_state, reward, done, info = env.step(action)
            acc(reward)

            tic = time.time()
            agent.learn(state, action, reward, next_state, done)
            # print "learn time:" + str(time.time() - tic)

            state = next_state
            if done:
                print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon))
                break
            if SAVE_PARAMS_INTERVAL:
                agent.Q_network_model.save_params(PARAMS_FILE_NAME)
        print acc(0)/t
    # Test every 100 episodes
        if episode % 100 == 0:
            total_reward = get_accumulator()
            for i in range(TEST_EPISODES):
                state = env.reset()
                for j in xrange(STEPS_PER_EPISODE):
                    env.render()
                    action = agent.react(state)  # direct action for test
                    state, reward, done, _ = env.step(action)
                    total_reward(reward)
                    if done:
                        break
            ave_reward = total_reward(0) / TEST_EPISODES
            log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward)
            logging.debug(log_string)
            print  log_string
            if ave_reward >= STEPS_PER_EPISODE:
                break

    env.monitor.close()
    # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
项目:openai-mxnet    作者:boddmg    | 项目源码 | 文件源码
def main():
    env = gym.make('CartPole-v0')
    env.monitor.start("cartpole-ex", force=True)
    agent = DQN_agent(env)
    for episode in range(MAX_EPISODES):
        state = env.reset()
        for t in range(STEPS_PER_EPISODE):
            tic = time.time()
            env.render()
            # print "render time:" + str(time.time() - tic)
            tic = time.time()
            action = agent.egreedy_action(state)
            # print "react time:" + str(time.time() - tic)
            next_state, reward, done, info = env.step(action)

            tic = time.time()
            agent.learn(state, action, reward, next_state, done)
            # print "learn time:" + str(time.time() - tic)

            state = next_state
            if done:
                print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon))
                break
    # Test every 100 episodes
        if episode % 100 == 0:
            record_filename = 'cartpole-experiment-{}'.format(episode)
            is_record = episode % RECORD_INTERVAL == 0 #and episode != 0
            if is_record:
                # env.monitor.start(record_filename, force=True)
                agent.Q_network_model.save_params("current_params.params")
            total_reward = 0
            for i in range(TEST_EPISODES):
                state = env.reset()
                for j in xrange(STEPS_PER_EPISODE):
                    env.render()
                    action = agent.react(state)  # direct action for test
                    state, reward, done, _ = env.step(action)
                    total_reward += reward
                    if done:
                        break
            ave_reward = total_reward / TEST_EPISODES
            log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward)
            logging.debug(log_string)
            print  log_string
            if ave_reward >= 200:
                break
            if is_record:
                pass

    env.monitor.close()
    # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
项目:More-I-O    作者:koltafrickenfer    | 项目源码 | 文件源码
def __init__(self, master):
    self.master = master
    self.frame = Frame(self.master,height=1000,width=450)
    self.frame.grid()
    #jobs label
    self.envLabel = Label(self.master,text="Jobs: ").grid(row=1,column=0,sticky=W)
    self.envNum = IntVar()
    self.envNumEntry  = Entry(self.master,textvariable=self.envNum)
    self.envNumEntry.insert(END,'2')
    self.envNum.set('2')
    self.envNumEntry.grid(row=1,column=0,sticky=E)
    #popluation label
    self.populationLabel = Label(self.master,text="Population")
    self.populationLabel.grid(row=2,column=0,sticky=W)
    self.population = IntVar()
    self.populationEntry = Entry(self.master,textvariable=self.population)
    self.populationEntry.insert(END,'300')
    self.population.set('300')
    self.populationEntry.grid(row=2,column=0,sticky=E)
    #file saver button
    self.fileSaverButton = Button(self.frame,text="save pool",command=self.saveFile)
    self.fileSaverButton.grid(row=2,column=1)
    self.fileLoaderButton = Button(self.frame,text="load pool", command=self.loadFile)
    self.fileLoaderButton.grid(row=2,column=2)
    #run button
    self.runButton = Button(self.frame,text="start run", command=self.toggleRun)
    self.runButton.grid(row=2,column=3)
    #play best button
    self.playBestButton = Button(self.frame,text='play best',command =self.playBest)
    self.playBestButton.grid(row=2,column=4)
    #uploadButton 
    self.uploadButton = Button(self.frame,text="upload",command=self.handleUpload)
    self.uploadButton.grid(row=2,column=5)
    #attemps label
    self.attempsLabel = Label(self.master,text="attemps")
    self.attempsLabel.grid(row=3,column=0,sticky=W)
    self.attemps = IntVar()
    self.attempsEntry = Entry(self.master,textvariable=self.attemps)
    self.attempsEntry.insert(END,'1')
    self.attemps.set('1')
    self.attempsEntry.grid(row=3,column=0,sticky=E)
    #env label
    self.envLabel = Label(self.master,text="enviroment")
    self.envLabel.grid(row=4,column=0,sticky=W)
    self.envEntry = Entry(self.master)
    self.envEntry.insert(END,'CartPole-v1')
    self.envEntry.grid(row=4,column=0,sticky=E)
    self.netProccess = None
    self.running= False
    self.poolInitialized = False
    self.pool = None
    self.lastPopulation = []
    self.plotDictionary = {}
    self.plotData = []
    self.genomeDictionary = {}
    self.specieID = 0
    self.fig,self.ax = plt.subplots(figsize=(10,6))
    self.ax.stackplot([],[],baseline='wiggle')
    canvas = FigureCanvasTkAgg(self.fig,self.master)
    canvas.get_tk_widget().grid(row=5,column=0,rowspan=4,sticky="nesw")
项目:deep-rl-tensorflow    作者:carpedm20    | 项目源码 | 文件源码
def play(self, test_ep, n_step=10000, n_episode=100):
    tf.initialize_all_variables().run()

    self.stat.load_model()
    self.target_network.run_copy()

    if not self.env.display:
      gym_dir = '/tmp/%s-%s' % (self.env_name, get_time())
      env = gym.wrappers.Monitor(self.env.env, gym_dir)

    best_reward, best_idx, best_count = 0, 0, 0
    try:
      itr = xrange(n_episode)
    except NameError:
      itr = range(n_episode)
    for idx in itr:
      observation, reward, terminal = self.new_game()
      current_reward = 0

      for _ in range(self.history_length):
        self.history.add(observation)

      for self.t in tqdm(range(n_step), ncols=70):
        # 1. predict
        action = self.predict(self.history.get(), test_ep)
        # 2. act
        observation, reward, terminal, info = self.env.step(action, is_training=False)
        # 3. observe
        q, loss, is_update = self.observe(observation, reward, action, terminal)

        logger.debug("a: %d, r: %d, t: %d, q: %.4f, l: %.2f" % \
            (action, reward, terminal, np.mean(q), loss))
        current_reward += reward

        if terminal:
          break

      if current_reward > best_reward:
        best_reward = current_reward
        best_idx = idx
        best_count = 0
      elif current_reward == best_reward:
        best_count += 1

      print ("="*30)
      print (" [%d] Best reward : %d (dup-percent: %d/%d)" % (best_idx, best_reward, best_count, n_episode))
      print ("="*30)

    #if not self.env.display:
      #gym.upload(gym_dir, writeup='https://github.com/devsisters/DQN-tensorflow', api_key='')