我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用gym.upload()。
def upload(): """ Upload the results of training (as automatically recorded by your env's monitor) to OpenAI Gym. Parameters: - training_dir: A directory containing the results of a training run. - api_key: Your OpenAI API key - algorithm_id (default=None): An arbitrary string indicating the paricular version of the algorithm (including choices of parameters) you are running. """ j = request.get_json() training_dir = get_required_param(j, 'training_dir') api_key = get_required_param(j, 'api_key') algorithm_id = get_optional_param(j, 'algorithm_id', None) try: gym.upload(training_dir, algorithm_id, writeup=None, api_key=api_key, ignore_open_monitors=False) return ('', 204) except gym.error.AuthenticationError: raise InvalidUsage('You must provide an OpenAI Gym API key')
def run(submit_key, gpu): env = Environment() agent = DQNAgent(env.actions, epsilon=0.01, model_path=PATH, on_gpu=gpu) path = "" episode = 5 if submit_key: print("make directory to submit result") path = os.path.join(os.path.dirname(__file__), "submit") episode = 100 for ep, s, r in env.play(agent, episode=episode, render=True, action_interval=4, record_path=path): pass if submit_key: gym.upload(path, api_key=submit_key)
def upload_results(folder): gym.upload(folder, api_key=secrets.api_key)
def close(self): """Flush all monitor data to disk and close any open rending windows.""" if not self.enabled: return self.stats_recorder.close() if self.video_recorder is not None: self._close_video_recorder() self._flush(force=True) # Stop tracking this for autoclose monitor_closer.unregister(self._monitor_id) self.enabled = False logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
def test(self, model_name): self.load_model(model_name) if pms.record_movie: for i in range(100): self.storage.get_single_path() self.env.env.monitor.close() if pms.upload_to_gym: gym.upload("log/trpo",algorithm_id='alg_8BgjkAsQRNiWu11xAhS4Hg', api_key='sk_IJhy3b2QkqL3LWzgBXoVA') else: for i in range(50): self.storage.get_single_path()
def test(self, model_name): self.load_model(model_name) if pms.record_movie: for i in range(100): self.storage.get_single_path() self.env.env.monitor.close() if pms.upload_to_gym: gym.upload("log/trpo" , algorithm_id='alg_8BgjkAsQRNiWu11xAhS4Hg' , api_key='sk_IJhy3b2QkqL3LWzgBXoVA') else: for i in range(50): self.storage.get_single_path()
def Upload(): # Upload training record gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def main(): parser = argparse.ArgumentParser(description = 'Designate AI mode') parser.add_argument('-m','--mode', help = 'train / test / upload', required = True) args = vars(parser.parse_args()) if args['mode'] == 'train': Train_Model(env = env) elif args['mode'] == 'test': Test_Model(env = env) elif args['mode'] == 'upload': Upload() else: print('Please designate AI mode.')
def handleUpload(self): gym.upload('tmp/'+self.envEntry.get(),api_key="sk_8j3LQ561SH20sk0YN3qpg")
def run(config_file): local_dir = os.path.dirname(__file__) config_path = os.path.join(local_dir, 'config') config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path ) # Create the population, which is the top-level object for a NEAT run population = neat.Population(config) population.add_reporter(neat.StdOutReporter(True)) stats = neat.StatisticsReporter() population.add_reporter(stats) population.add_reporter(neat.Checkpointer(5)) #Run for 300 generation winner = population.run(eval_genomes,generations) # Display the winning genome. print('\nBest genome:\n{!s}'.format(winner)) # Show output of the most fit genome against training data. winner_net = neat.nn.FeedForwardNetwork.create(winner, config) scores=[] # Create the environment for the test and wrap it with a Monitor env = gym.make('MountainCar-v0') #env = wrappers.Monitor(env,'tmp/MountainCar-v0') for i in range(trials): score=0 observation=env.reset() for _ in range(goal_steps): action = np.argmax(winner_net.activate(observation)) # do it! observation, reward, done, info = env.step(action) score += reward if done: break scores.append(score) print("The winning neural network obtained an average score of: "+str(np.average(scores))) if(np.average(scores)>-110): gym.upload('tmp/MountainCar-v0',api_key='sk_tiwKaUHVQDChjmO9JmK2Gg') p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4') p.run(eval_genomes, 10)
def run(self, epochs, steps, api_key, rollouts_per_epoch = 100, updateTargetNetwork = defaultRunSettings['updateTargetNetwork'], explorationRate = defaultRunSettings['explorationRate'], miniBatchSize = defaultRunSettings['miniBatchSize'], learnStart = defaultRunSettings['learnStart'], renderPerXEpochs = defaultRunSettings['renderPerXEpochs'], shouldRender = defaultRunSettings['shouldRender'], experimentId = defaultRunSettings['experimentId'], force = defaultRunSettings['force'], upload = defaultRunSettings['upload']): last100Scores = [0] * 100 last100ScoresIndex = 0 last100Filled = False if experimentId != None: self.env.monitor.start('tmp/'+experimentId, force = force) for epoch in xrange(epochs): paths = [] for rollout in xrange(rollouts_per_epoch): path = {} path["actions"] = [] path["rewards"] = [] path["states"] = [] path["isDone"] = [] observation = self.env.reset() # number of timesteps totalReward = 0 for t in xrange(steps): policyValues = self.runModel(self.policyModel, observation) action = self.selectActionByProbability(policyValues) # action = self.selectActionByProbability(self.convertToProbabilities(policyValues)) path["states"].append(observation) path["actions"].append(action) newObservation, reward, done, info = self.env.step(action) path["rewards"].append(reward) path["isDone"].append(done) totalReward += reward observation = newObservation if done: break paths.append(path) self.learn(paths) self.env.monitor.close() if upload: gym.upload('/tmp/'+experimentId, api_key=api_key)
def run(self, epochs, steps, api_key, rollouts_per_epoch = 20, updateTargetNetwork = defaultRunSettings['updateTargetNetwork'], explorationRate = defaultRunSettings['explorationRate'], miniBatchSize = defaultRunSettings['miniBatchSize'], learnStart = defaultRunSettings['learnStart'], renderPerXEpochs = defaultRunSettings['renderPerXEpochs'], shouldRender = defaultRunSettings['shouldRender'], experimentId = defaultRunSettings['experimentId'], force = defaultRunSettings['force'], upload = defaultRunSettings['upload']): last100Scores = [0] * 100 last100ScoresIndex = 0 last100Filled = False stepCounter = 0 if not experimentId == None: self.env.monitor.start('tmp/'+experimentId, force = force) for epoch in xrange(epochs): I = 1 observation = self.env.reset(); for t in xrange(steps): policyValues = self.runModel(self.policyModel, observation) action = self.selectActionByProbability(policyValues) newObservation, reward, done, info = self.env.step(action) cost, grads = self.get_cost_grads(self.policyModel); print (theano.pp(grads[1][0])); if done: delta = reward + self.discountFactor * self.runModel(self.valueModel, newObservation) - self.runModel(self.valueModel, observation) else : delta = reward - self.runModel(self.valueModel, observation) # because the value for new obs is 0 self.env.monitor.close() if upload: gym.upload('/tmp/'+experimentId, api_key=api_key)
def main(): env = gym.make('CartPole-v1') env.monitor.start("cartpole-ex", force=True) agent = DQN_agent(env) agent.Q_network_model.load_params(PARAMS_FILE_NAME) for episode in range(MAX_EPISODES): state = env.reset() for t in range(STEPS_PER_EPISODE): tic = time.time() env.render() # print "render time:" + str(time.time() - tic) tic = time.time() action = agent.egreedy_action(state) # print "react time:" + str(time.time() - tic) next_state, reward, done, info = env.step(action) tic = time.time() agent.learn(state, action, reward, next_state, done) # print "learn time:" + str(time.time() - tic) state = next_state if done: print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon)) break if SAVE_PARAMS_INTERVAL: agent.Q_network_model.save_params(PARAMS_FILE_NAME) # Test every 100 episodes if episode % 100 == 0: total_reward = 0 for i in range(TEST_EPISODES): state = env.reset() for j in xrange(STEPS_PER_EPISODE): env.render() action = agent.react(state) # direct action for test state, reward, done, _ = env.step(action) total_reward += reward if done: break ave_reward = total_reward / TEST_EPISODES log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward) logging.debug(log_string) print log_string if ave_reward >= STEPS_PER_EPISODE: break env.monitor.close() # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
def main(): env = gym.make('CartPole-v1') env.monitor.start("cartpole-ex", force=True) agent = DQN_agent(env) agent.Q_network_model.load_params(PARAMS_FILE_NAME) for episode in range(MAX_EPISODES): state = env.reset() for t in range(STEPS_PER_EPISODE): tic = time.time() env.render() # print "render time:" + str(time.time() - tic) tic = time.time() action = agent.egreedy_action(state) # print "react time:" + str(time.time() - tic) next_state, reward, done, info = env.step(action) tic = time.time() agent.learn(state, action, reward, next_state, done) # print "learn time:" + str(time.time() - tic) state = next_state if done: logging.log(logging.DEBUG, "Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon)) break if SAVE_PARAMS_INTERVAL: agent.Q_network_model.save_params(PARAMS_FILE_NAME) # Test every 100 episodes if episode % 100 == 0: total_reward = 0 for i in range(TEST_EPISODES): state = env.reset() for j in xrange(STEPS_PER_EPISODE): env.render() action = agent.react(state) # direct action for test state, reward, done, _ = env.step(action) total_reward += reward if done: break ave_reward = total_reward / TEST_EPISODES log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward) logging.debug(log_string) print log_string if ave_reward >= STEPS_PER_EPISODE: break env.monitor.close() # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
def main(): env = gym.make('Acrobot-v1') env.monitor.start("Acrobot-ex", force=True) agent = DQN_agent(env) if os.path.exists(PARAMS_FILE_NAME): agent.Q_network_model.load_params(PARAMS_FILE_NAME) for episode in range(MAX_EPISODES): state = env.reset() for t in range(STEPS_PER_EPISODE): tic = time.time() env.render() # print "render time:" + str(time.time() - tic) tic = time.time() action = agent.egreedy_action(state) # print "react time:" + str(time.time() - tic) next_state, reward, done, info = env.step(action) tic = time.time() agent.learn(state, action, reward, next_state, done) # print "learn time:" + str(time.time() - tic) state = next_state if done: print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon)) break if SAVE_PARAMS_INTERVAL: agent.Q_network_model.save_params(PARAMS_FILE_NAME) # Test every 100 episodes if episode % 100 == 0: total_reward = 0 for i in range(TEST_EPISODES): state = env.reset() for j in xrange(STEPS_PER_EPISODE): env.render() action = agent.react(state) # direct action for test state, reward, done, _ = env.step(action) total_reward += reward if done: break ave_reward = total_reward / TEST_EPISODES log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward) logging.debug(log_string) print log_string if ave_reward >= STEPS_PER_EPISODE: break env.monitor.close() # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
def main(): env = gym.make('LunarLander-v2') env.monitor.start("LunarLander-v2", force=True) agent = DQN_agent(env) if os.path.exists(PARAMS_FILE_NAME): agent.Q_network_model.load_params(PARAMS_FILE_NAME) for episode in range(MAX_EPISODES): state = env.reset() acc = get_accumulator() for t in range(STEPS_PER_EPISODE): tic = time.time() env.render() # print "render time:" + str(time.time() - tic) tic = time.time() action = agent.egreedy_action(state) # print "react time:" + str(time.time() - tic) next_state, reward, done, info = env.step(action) acc(reward) tic = time.time() agent.learn(state, action, reward, next_state, done) # print "learn time:" + str(time.time() - tic) state = next_state if done: print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon)) break if SAVE_PARAMS_INTERVAL: agent.Q_network_model.save_params(PARAMS_FILE_NAME) print acc(0)/t # Test every 100 episodes if episode % 100 == 0: total_reward = get_accumulator() for i in range(TEST_EPISODES): state = env.reset() for j in xrange(STEPS_PER_EPISODE): env.render() action = agent.react(state) # direct action for test state, reward, done, _ = env.step(action) total_reward(reward) if done: break ave_reward = total_reward(0) / TEST_EPISODES log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward) logging.debug(log_string) print log_string if ave_reward >= STEPS_PER_EPISODE: break env.monitor.close() # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
def main(): env = gym.make('CartPole-v0') env.monitor.start("cartpole-ex", force=True) agent = DQN_agent(env) for episode in range(MAX_EPISODES): state = env.reset() for t in range(STEPS_PER_EPISODE): tic = time.time() env.render() # print "render time:" + str(time.time() - tic) tic = time.time() action = agent.egreedy_action(state) # print "react time:" + str(time.time() - tic) next_state, reward, done, info = env.step(action) tic = time.time() agent.learn(state, action, reward, next_state, done) # print "learn time:" + str(time.time() - tic) state = next_state if done: print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon)) break # Test every 100 episodes if episode % 100 == 0: record_filename = 'cartpole-experiment-{}'.format(episode) is_record = episode % RECORD_INTERVAL == 0 #and episode != 0 if is_record: # env.monitor.start(record_filename, force=True) agent.Q_network_model.save_params("current_params.params") total_reward = 0 for i in range(TEST_EPISODES): state = env.reset() for j in xrange(STEPS_PER_EPISODE): env.render() action = agent.react(state) # direct action for test state, reward, done, _ = env.step(action) total_reward += reward if done: break ave_reward = total_reward / TEST_EPISODES log_string = 'episode: {}, Evaluation Average Reward:{}'.format(episode, ave_reward) logging.debug(log_string) print log_string if ave_reward >= 200: break if is_record: pass env.monitor.close() # gym.upload("cartpole-ex", algorithm_id="x", api_key="x")
def __init__(self, master): self.master = master self.frame = Frame(self.master,height=1000,width=450) self.frame.grid() #jobs label self.envLabel = Label(self.master,text="Jobs: ").grid(row=1,column=0,sticky=W) self.envNum = IntVar() self.envNumEntry = Entry(self.master,textvariable=self.envNum) self.envNumEntry.insert(END,'2') self.envNum.set('2') self.envNumEntry.grid(row=1,column=0,sticky=E) #popluation label self.populationLabel = Label(self.master,text="Population") self.populationLabel.grid(row=2,column=0,sticky=W) self.population = IntVar() self.populationEntry = Entry(self.master,textvariable=self.population) self.populationEntry.insert(END,'300') self.population.set('300') self.populationEntry.grid(row=2,column=0,sticky=E) #file saver button self.fileSaverButton = Button(self.frame,text="save pool",command=self.saveFile) self.fileSaverButton.grid(row=2,column=1) self.fileLoaderButton = Button(self.frame,text="load pool", command=self.loadFile) self.fileLoaderButton.grid(row=2,column=2) #run button self.runButton = Button(self.frame,text="start run", command=self.toggleRun) self.runButton.grid(row=2,column=3) #play best button self.playBestButton = Button(self.frame,text='play best',command =self.playBest) self.playBestButton.grid(row=2,column=4) #uploadButton self.uploadButton = Button(self.frame,text="upload",command=self.handleUpload) self.uploadButton.grid(row=2,column=5) #attemps label self.attempsLabel = Label(self.master,text="attemps") self.attempsLabel.grid(row=3,column=0,sticky=W) self.attemps = IntVar() self.attempsEntry = Entry(self.master,textvariable=self.attemps) self.attempsEntry.insert(END,'1') self.attemps.set('1') self.attempsEntry.grid(row=3,column=0,sticky=E) #env label self.envLabel = Label(self.master,text="enviroment") self.envLabel.grid(row=4,column=0,sticky=W) self.envEntry = Entry(self.master) self.envEntry.insert(END,'CartPole-v1') self.envEntry.grid(row=4,column=0,sticky=E) self.netProccess = None self.running= False self.poolInitialized = False self.pool = None self.lastPopulation = [] self.plotDictionary = {} self.plotData = [] self.genomeDictionary = {} self.specieID = 0 self.fig,self.ax = plt.subplots(figsize=(10,6)) self.ax.stackplot([],[],baseline='wiggle') canvas = FigureCanvasTkAgg(self.fig,self.master) canvas.get_tk_widget().grid(row=5,column=0,rowspan=4,sticky="nesw")
def play(self, test_ep, n_step=10000, n_episode=100): tf.initialize_all_variables().run() self.stat.load_model() self.target_network.run_copy() if not self.env.display: gym_dir = '/tmp/%s-%s' % (self.env_name, get_time()) env = gym.wrappers.Monitor(self.env.env, gym_dir) best_reward, best_idx, best_count = 0, 0, 0 try: itr = xrange(n_episode) except NameError: itr = range(n_episode) for idx in itr: observation, reward, terminal = self.new_game() current_reward = 0 for _ in range(self.history_length): self.history.add(observation) for self.t in tqdm(range(n_step), ncols=70): # 1. predict action = self.predict(self.history.get(), test_ep) # 2. act observation, reward, terminal, info = self.env.step(action, is_training=False) # 3. observe q, loss, is_update = self.observe(observation, reward, action, terminal) logger.debug("a: %d, r: %d, t: %d, q: %.4f, l: %.2f" % \ (action, reward, terminal, np.mean(q), loss)) current_reward += reward if terminal: break if current_reward > best_reward: best_reward = current_reward best_idx = idx best_count = 0 elif current_reward == best_reward: best_count += 1 print ("="*30) print (" [%d] Best reward : %d (dup-percent: %d/%d)" % (best_idx, best_reward, best_count, n_episode)) print ("="*30) #if not self.env.display: #gym.upload(gym_dir, writeup='https://github.com/devsisters/DQN-tensorflow', api_key='')