我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用environment.Environment()。
def __init__(self, thread_id): print "create worker %d"%(thread_id) self.thread_id = thread_id self.env = env = Environment(gym.make(pms.environment_name)) # print("Observation Space", env.observation_space) # print("Action Space", env.action_space) # print("Action area, high:%f, low%f" % (env.action_space.high, env.action_space.low)) self.end_count = 0 self.paths = [] self.train = True self.baseline = Baseline() self.storage = Storage(self, self.env, self.baseline) self.distribution = DiagonalGaussian(pms.action_shape) self.session = self.master.session self.init_network()
def reset(self): """ Resets the Environment to the initial state """ ## Initialize the state to be the middle ## value for each parameter e.g. if there are 13 and 19 ## buckets for the arm and hand parameters, then the intial ## state should be (6,9) ## ## Also call self.crawlingRobot.setAngles() ## to the initial arm and hand angle armState = self.nArmStates/2 handState = self.nHandStates/2 self.state = armState,handState self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState]) self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
def __init__(self,location,shape,plot): self.location = location self.shape = shape self.plot = plot self.env = Environment(self.shape) self.agent = Agent() self.child = Child(self.env.state)
def get_bash_script(script_dir, is_simulation = True, is_automated=False, is_testing=False): """ Reads a README.md file in the indicated directoy and builds an executable bash script from the commands contained within. """ if not script_dir.endswith('/'): script_dir = script_dir + "/" script = "" env = Environment(script_dir, False).get() for key, value in env.items(): script += key + "='" + value + "'\n" filename = env.get_script_file_name(script_dir) in_code_block = False in_results_section = False lines = list(open(script_dir + filename)) for line in lines: if line.startswith("Results:"): # Entering results section in_results_section = True elif line.startswith("```") and not in_code_block: # Entering a code block, if in_results_section = True then it's a results block in_code_block = True elif line.startswith("```") and in_code_block: # Finishing code block in_results_section = False in_code_block = False elif in_code_block and not in_results_section: # Executable line script += line elif line.startswith("#") and not in_code_block and not in_results_section and not is_automated: # Heading in descriptive text script += "\n" return script
def __init__(self, is_running_in_docker, script_dir="demo_scripts", filename="README.md", is_simulation=True, is_automated=False, is_testing=False, is_fast_fail=True,is_learning = False, parent_script_dir = None, is_prep_only = False, is_prerequisite = False, output_format="log"): """ is_running_in_docker should be set to true is we are running inside a Docker container script_dir is the location to look for scripts filename is the filename of the script this demo represents is_simulation should be set to true if we want to simulate a human running the commands is_automated should be set to true if we don't want to wait for an operator to indicate it's time to execute the next command is_testing is set to true if we want to compare actual results with expected results, by default execution will stop if any test fails (see is_fast_fail) is_fast_fail should be set to true if we want to contnue running tests even after a failure is_learning should be set to true if we want a human to type in the commands parent_script_dir should be the directory of the script that calls this one, or None if this is the root script is_prep_only should be set to true if we want to stop execution after all prerequisites are satsified is_prerequisite indicates whether this is a prerequisite or not. It is used to decide behaviour with respect to simulation etc. """ self.mode = None self.is_docker = is_running_in_docker self.filename = filename self.script_dir = "" self.set_script_dir(script_dir) self.is_simulation = is_simulation self.is_automated = is_automated self.is_testing = is_testing self.is_fast_fail = is_fast_fail self.is_learning = is_learning self.current_command = "" self.current_description = "" self.last_command = "" self.is_prep_only = is_prep_only self.parent_script_dir = parent_script_dir if self.parent_script_dir: self.env = Environment(self.parent_script_dir, is_test = self.is_testing) else: self.env = Environment(self.script_dir, is_test = self.is_testing) self.is_prerequisite = is_prerequisite self.output_format = output_format self.all_results = [] self.completed_validation_steps = []
def make_environment(name, **args): """Make an environment of a given name, possibly using extra arguments.""" env = environment_registry.get(name, Environment) if name in environment_registry: env = env(**args) else: env = env(name, **args) if "atari.atari_env" in env.unwrapped.__module__: to_dict = env.to_dict env = Vectorize(env) env = AtariRescale42x42(env) env = Unvectorize(env) env.to_dict = to_dict return env
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent #e.set_primary_agent(a, enforce_deadline=False) e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=1.0) # reduce update_delay to speed up simulation sim.run(n_trials=10) # press Esc or close pygame window to quit
def __init__(self, thread_id, master): print "create thread %d"%(thread_id) self.thread_id = thread_id threading.Thread.__init__(self, name="thread_%d" % thread_id) self.master = master self.env = env = Environment(gym.make(pms.environment_name)) TRPOAgentBase.__init__(self, env) self.session = self.master.session self.init_network() self.saver = tf.train.Saver(max_to_keep=10)
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=False) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.001, display=True) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=1) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
def main(): env = None try: ssid = sys.argv[1] device = sys.argv[2] env = Environment(device, ssid, v=True) except IndexError: print 'Proper usage: python ddrone.py {ssid} {wireless device}' if env: env.crackdrone(0)
def test_network(self, reward_type): """ Network Testing is done on the validation data, and the testing data is to see if the representation is learnt efficiently. Testing data is not meant to check RL accuracy. The validation data is meant for that. Testing data is meant to check if the algorithm learnt feature representations of objects. :return: None """ imagenet = self.__get_image_loader(NUM_VALIDATION_IMAGES) imagenet.load_next_image() state = self.__get_initial_state() # initialize the environment env = Environment(imagenet.get_image(), state, self.grid_dim, imagenet.get_puzzle_pieces(), IMAGE_HEIGHT, WINDOW_SIZE, SLIDING_STRIDE, self.input_channels, self.state_type) reward_list = [] image_diff_list = [] episode_reward = 0.0 while True: state_new, a_t, reward, terminal = self.__play_one_move(state, env, reward_type, epsilon=0.0) episode_reward = reward + GAMMA * episode_reward # if terminal state has reached, then move to the next image if terminal: image_diff_list.append(env.get_normalized_image_diff()) reward_list.append(episode_reward) image_present = imagenet.load_next_image() if image_present: env.update_puzzle_pieces(imagenet.get_puzzle_pieces()) env.update_original_image(imagenet.get_image()) episode_reward = 0.0 else: break # update the old values state = env.get_state() # display the reward and image matching performance statistics performance_statistics(image_diff_list, reward_list)
def run(): """ Driving function for running the simulation. Press ESC to close the simulation, or [SPACE] to pause the simulation. """ ############## # Create the environment # Flags: # verbose - set to True to display additional output from the simulation # num_dummies - discrete number of dummy agents in the environment, default is 100 # grid_size - discrete number of intersections (columns, rows), default is (8, 6) env = Environment() ############## # Create the driving agent # Flags: # learning - set to True to force the driving agent to use Q-learning # * epsilon - continuous value for the exploration factor, default is 1 # * alpha - continuous value for the learning rate, default is 0.5 agent = env.create_agent(LearningAgent) ############## # Follow the driving agent # Flags: # enforce_deadline - set to True to enforce a deadline metric env.set_primary_agent(agent) ############## # Create the simulation # Flags: # update_delay - continuous time (in seconds) between actions, default is 2.0 seconds # display - set to False to disable the GUI if PyGame is enabled # log_metrics - set to True to log trial and simulation results to /logs # optimized - set to True to change the default log file name sim = Simulator(env) ############## # Run the simulator # Flags: # tolerance - epsilon tolerance before beginning testing, default is 0.05 # n_test - discrete number of testing trials to perform, default is 0 sim.run()
def doAction(self, action): """ Perform the action and update the current state of the Environment and return the reward for the current state, the next state and the taken action. Returns: nextState, reward """ nextState, reward = None, None oldX,oldY = self.crawlingRobot.getRobotPosition() armBucket,handBucket = self.state armAngle,handAngle = self.crawlingRobot.getAngles() if action == 'arm-up': newArmAngle = self.armBuckets[armBucket+1] self.crawlingRobot.moveArm(newArmAngle) nextState = (armBucket+1,handBucket) if action == 'arm-down': newArmAngle = self.armBuckets[armBucket-1] self.crawlingRobot.moveArm(newArmAngle) nextState = (armBucket-1,handBucket) if action == 'hand-up': newHandAngle = self.handBuckets[handBucket+1] self.crawlingRobot.moveHand(newHandAngle) nextState = (armBucket,handBucket+1) if action == 'hand-down': newHandAngle = self.handBuckets[handBucket-1] self.crawlingRobot.moveHand(newHandAngle) nextState = (armBucket,handBucket-1) newX,newY = self.crawlingRobot.getRobotPosition() # a simple reward function reward = newX - oldX self.state = nextState return nextState, reward
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.005, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
def apply_function(self, function, args, env): if datatypes.isLambda(function): local_env = environment.Environment( {}, outer=function.env, bindings=function.args, args=args) return self.eval(function.body, local_env) return function(*args)
def main(argv): # Pretrained network to use inputfile = None # Wether to train or to test train = False # Trained network outputfile = None try: opts, args = getopt.getopt(argv,"hrl:s:",["loadckpt=","saveckpt="]) except getopt.GetoptError: print 'Incorrect usage. For more information: test.py -h' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'python test.py -r -l <ckptfile> -s <ckptfile>' print '-r for enabling training' print '-l for loading pre-existing model' print '-s for saving model to file' sys.exit() elif opt == '-r': train = True elif opt in ("-l", "--loadckpt"): inputfile = arg elif opt in ("-s", "--saveckpt"): outputfile = arg with tf.Session() as sess: env = Environment() agent = DQNAgent(env, sess, inputfile) if train: agent.train(6000000, outputfile) else: agent.test(2000)
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=1.0) # reduce update_delay to speed up simulation sim.run(n_trials=200) # press Esc or close pygame window to quit
def generate_Environment(): env = environment.Environment(260, 260) env.genTerrain(1) # env.saveElevationData() # env.loadElevationData() env.plotElevation() env.buildVisibilityMaps() # env.saveVisibilityMaps() # env.loadVisibilityMaps() return env
def plot_Visible_Area(force): plt.figure() c = ['r', 'b'] a = 0 for F in force: plt.plot(F.visible_area, c=c[a]) a += 1 plt.xlabel('Timestep') plt.ylabel('Visible Area') plt.title('Fraction of Environment Visible')
def main(_): ps_hosts = FLAGS.ps_hosts.split(',') worker_hosts = FLAGS.worker_hosts.split(',') # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. # ??????? # ??????task_index ??????? gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1 / 3.0) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index, config=tf.ConfigProto(gpu_options=gpu_options)) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": # ?op ????????worker? env = Environment(gym.make(pms.environment_name)) with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % (FLAGS.task_index), cluster=cluster)): agent = TRPOAgentParallel(env) saver = tf.train.Saver(max_to_keep=10) init_op = tf.initialize_all_variables() summary_op = tf.merge_all_summaries() # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="./checkpoint_parallel", init_op=init_op, global_step=agent.global_step, saver=saver, summary_op=None, save_model_secs=60) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. with sv.managed_session(server.target) as sess: agent.session = sess agent.gf.session = sess agent.sff.session =sess agent.supervisor = sv if pms.train_flag: agent.learn() elif FLAGS.task_index == 0: agent.test(pms.checkpoint_file) # Ask for all the services to stop. sv.stop()
def run(): """Run the agent for a finite number of trials.""" for simulation in range(0, N_SIMULATIONS): # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # TODO: Change later enforce_deadline=True # Now simulate it sim = Simulator(e, update_delay=0.001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=N_TRIALS) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line if simulation == N_SIMULATIONS - 1: with open('results.csv', 'a') as csvfile: fieldnames = ['alpha', 'gamma', 'epsilon', 'success_rate', 'last_failure'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) for index in range(0,len(simulation_rates)): writer.writerow({ 'alpha': get_simulation_params(0)[0], 'gamma': get_simulation_params(0)[1], 'epsilon': get_simulation_params(0)[2], 'success_rate': simulation_rates[index], 'last_failure': last_errors[index]}) if N_SIMULATIONS > 1: #multiple simulation AND last simulation plt.figure(1) plt.subplot(211) plt.plot(simulation_rates) plt.title('Success Rate/Simulation') plt.xlabel('# Simulation') plt.ylabel('Success Rate') plt.subplot(212) plt.plot(last_errors) plt.title('Last failed trial per simulation') plt.xlabel('# Simulation') plt.ylabel('Last failed trial') plt.show()