Python environment 模块，Environment() 实例源码

我们从Python开源项目中，提取了以下25个代码示例，用于说明如何使用environment.Environment()。

项目：trpo 作者：jjkke88 | 项目源码 | 文件源码

def __init__(self, thread_id):
        print "create worker %d"%(thread_id)
        self.thread_id = thread_id
        self.env = env = Environment(gym.make(pms.environment_name))
        # print("Observation Space", env.observation_space)
        # print("Action Space", env.action_space)
        # print("Action area, high:%f, low%f" % (env.action_space.high, env.action_space.low))
        self.end_count = 0
        self.paths = []
        self.train = True
        self.baseline = Baseline()
        self.storage = Storage(self, self.env, self.baseline)
        self.distribution = DiagonalGaussian(pms.action_shape)

        self.session = self.master.session
        self.init_network()

项目：Reinforcement-Learning 作者：victorgrego | 项目源码 | 文件源码

def reset(self):
       """
        Resets the Environment to the initial state
       """
       ## Initialize the state to be the middle
       ## value for each parameter e.g. if there are 13 and 19
       ## buckets for the arm and hand parameters, then the intial
       ## state should be (6,9)
       ##
       ## Also call self.crawlingRobot.setAngles()
       ## to the initial arm and hand angle

       armState = self.nArmStates/2
       handState = self.nHandStates/2
       self.state = armState,handState
       self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
       self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]

项目：AIclass 作者：mttk | 项目源码 | 文件源码

def reset(self):
        """
         Resets the Environment to the initial state
        """
        ## Initialize the state to be the middle
        ## value for each parameter e.g. if there are 13 and 19
        ## buckets for the arm and hand parameters, then the intial
        ## state should be (6,9)
        ##
        ## Also call self.crawlingRobot.setAngles()
        ## to the initial arm and hand angle

        armState = self.nArmStates/2
        handState = self.nHandStates/2
        self.state = armState,handState
        self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
        self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]

项目：AIclass 作者：mttk | 项目源码 | 文件源码

def reset(self):
        """
         Resets the Environment to the initial state
        """
        ## Initialize the state to be the middle
        ## value for each parameter e.g. if there are 13 and 19
        ## buckets for the arm and hand parameters, then the intial
        ## state should be (6,9)
        ##
        ## Also call self.crawlingRobot.setAngles()
        ## to the initial arm and hand angle

        armState = self.nArmStates/2
        handState = self.nHandStates/2
        self.state = armState,handState
        self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
        self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]

项目：VacuumAI 作者：avelkoski | 项目源码 | 文件源码

def __init__(self,location,shape,plot):
        self.location = location
        self.shape = shape
        self.plot = plot
        self.env = Environment(self.shape)
        self.agent = Agent()
        self.child = Child(self.env.state)

项目：simdem 作者：Azure | 项目源码 | 文件源码

def get_bash_script(script_dir, is_simulation = True, is_automated=False, is_testing=False):
    """
    Reads a README.md file in the indicated directoy and builds an
    executable bash script from the commands contained within.
    """
    if not script_dir.endswith('/'):
        script_dir = script_dir + "/"

    script = ""
    env = Environment(script_dir, False).get()
    for key, value in env.items():
        script += key + "='" + value + "'\n"

    filename = env.get_script_file_name(script_dir)
    in_code_block = False
    in_results_section = False
    lines = list(open(script_dir + filename))
    for line in lines:
        if line.startswith("Results:"):
            # Entering results section
            in_results_section = True
        elif line.startswith("```") and not in_code_block:
            # Entering a code block, if in_results_section = True then it's a results block
            in_code_block = True
        elif line.startswith("```") and in_code_block:
            # Finishing code block
            in_results_section = False
            in_code_block = False
        elif in_code_block and not in_results_section:
            # Executable line
            script += line
        elif line.startswith("#") and not in_code_block and not in_results_section and not is_automated:
            # Heading in descriptive text
            script += "\n"
    return script

项目：simdem 作者：Azure | 项目源码 | 文件源码

def __init__(self, is_running_in_docker, script_dir="demo_scripts", filename="README.md", is_simulation=True, is_automated=False, is_testing=False, is_fast_fail=True,is_learning = False, parent_script_dir = None, is_prep_only = False, is_prerequisite = False, output_format="log"):
        """
        is_running_in_docker should be set to true is we are running inside a Docker container
        script_dir is the location to look for scripts
        filename is the filename of the script this demo represents
        is_simulation should be set to true if we want to simulate a human running the commands
        is_automated should be set to true if we don't want to wait for an operator to indicate it's time to execute the next command
        is_testing is set to true if we want to compare actual results with expected results, by default execution will stop if any test fails (see is_fast_fail)
        is_fast_fail should be set to true if we want to contnue running tests even after a failure
        is_learning should be set to true if we want a human to type in the commands
        parent_script_dir should be the directory of the script that calls this one, or None if this is the root script
        is_prep_only should be set to true if we want to stop execution after all prerequisites are satsified
        is_prerequisite indicates whether this is a prerequisite or not. It is used to decide behaviour with respect to simulation etc.
        """
        self.mode = None
        self.is_docker = is_running_in_docker
        self.filename = filename
        self.script_dir = ""
        self.set_script_dir(script_dir)
        self.is_simulation = is_simulation
        self.is_automated = is_automated
        self.is_testing = is_testing
        self.is_fast_fail = is_fast_fail
        self.is_learning = is_learning
        self.current_command = ""
        self.current_description = ""
        self.last_command = ""
        self.is_prep_only = is_prep_only
        self.parent_script_dir = parent_script_dir
        if self.parent_script_dir:
            self.env = Environment(self.parent_script_dir, is_test = self.is_testing)
        else:
            self.env = Environment(self.script_dir, is_test = self.is_testing)
        self.is_prerequisite = is_prerequisite
        self.output_format = output_format
        self.all_results = []
        self.completed_validation_steps = []

项目：DeepRL 作者：arnomoonens | 项目源码 | 文件源码

def make_environment(name, **args):
    """Make an environment of a given name, possibly using extra arguments."""
    env = environment_registry.get(name, Environment)
    if name in environment_registry:
        env = env(**args)
    else:
        env = env(name, **args)
    if "atari.atari_env" in env.unwrapped.__module__:
        to_dict = env.to_dict
        env = Vectorize(env)
        env = AtariRescale42x42(env)
        env = Unvectorize(env)
        env.to_dict = to_dict
    return env

项目：Smartcab-Trainer 作者：i-sultan | 项目源码 | 文件源码

def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent

    #e.set_primary_agent(a, enforce_deadline=False)
    e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

    # Now simulate it
    sim = Simulator(e, update_delay=1.0)  # reduce update_delay to speed up simulation
    sim.run(n_trials=10)  # press Esc or close pygame window to quit

项目：trpo 作者：jjkke88 | 项目源码 | 文件源码

def __init__(self, thread_id, master):
        print "create thread %d"%(thread_id)
        self.thread_id = thread_id
        threading.Thread.__init__(self, name="thread_%d" % thread_id)
        self.master = master
        self.env = env = Environment(gym.make(pms.environment_name))
        TRPOAgentBase.__init__(self, env)

        self.session = self.master.session
        self.init_network()
        self.saver = tf.train.Saver(max_to_keep=10)

项目：smartcab 作者：manprets | 项目源码 | 文件源码

def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=False)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.001, display=True)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=1)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

项目：drone-wars 作者：Howchoo | 项目源码 | 文件源码

def main():

    env = None

    try:   
        ssid = sys.argv[1]
        device = sys.argv[2]
        env = Environment(device, ssid, v=True)
    except IndexError:
        print 'Proper usage: python ddrone.py {ssid} {wireless device}'

    if env:
        env.crackdrone(0)

项目：harpreif 作者：harpribot | 项目源码 | 文件源码

def test_network(self, reward_type):
        """
        Network Testing is done on the validation data, and the testing data is to see if the representation
        is learnt efficiently. Testing data is not meant to check RL accuracy. The validation data is meant for that.
        Testing data is meant to check if the algorithm learnt feature representations of objects.
        :return: None
        """
        imagenet = self.__get_image_loader(NUM_VALIDATION_IMAGES)
        imagenet.load_next_image()

        state = self.__get_initial_state()
        # initialize the environment
        env = Environment(imagenet.get_image(), state, self.grid_dim, imagenet.get_puzzle_pieces(),
                          IMAGE_HEIGHT, WINDOW_SIZE, SLIDING_STRIDE,
                          self.input_channels, self.state_type)
        reward_list = []
        image_diff_list = []
        episode_reward = 0.0
        while True:
            state_new, a_t, reward, terminal = self.__play_one_move(state, env, reward_type, epsilon=0.0)
            episode_reward = reward + GAMMA * episode_reward
            # if terminal state has reached, then move to the next image
            if terminal:
                image_diff_list.append(env.get_normalized_image_diff())
                reward_list.append(episode_reward)

                image_present = imagenet.load_next_image()

                if image_present:
                    env.update_puzzle_pieces(imagenet.get_puzzle_pieces())
                    env.update_original_image(imagenet.get_image())
                    episode_reward = 0.0
                else:
                    break
            # update the old values
            state = env.get_state()
        # display the reward and image matching performance statistics
        performance_statistics(image_diff_list, reward_list)

项目：smartcab 作者：nd009 | 项目源码 | 文件源码

def run():
    """ Driving function for running the simulation. 
        Press ESC to close the simulation, or [SPACE] to pause the simulation. """

    ##############
    # Create the environment
    # Flags:
    #   verbose     - set to True to display additional output from the simulation
    #   num_dummies - discrete number of dummy agents in the environment, default is 100
    #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
    env = Environment()

    ##############
    # Create the driving agent
    # Flags:
    #   learning   - set to True to force the driving agent to use Q-learning
    #    * epsilon - continuous value for the exploration factor, default is 1
    #    * alpha   - continuous value for the learning rate, default is 0.5
    agent = env.create_agent(LearningAgent)

    ##############
    # Follow the driving agent
    # Flags:
    #   enforce_deadline - set to True to enforce a deadline metric
    env.set_primary_agent(agent)

    ##############
    # Create the simulation
    # Flags:
    #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
    #   display      - set to False to disable the GUI if PyGame is enabled
    #   log_metrics  - set to True to log trial and simulation results to /logs
    #   optimized    - set to True to change the default log file name
    sim = Simulator(env)

    ##############
    # Run the simulator
    # Flags:
    #   tolerance  - epsilon tolerance before beginning testing, default is 0.05 
    #   n_test     - discrete number of testing trials to perform, default is 0
    sim.run()

项目：Reinforcement-Learning 作者：victorgrego | 项目源码 | 文件源码

def doAction(self, action):
       """
         Perform the action and update
         the current state of the Environment
         and return the reward for the
         current state, the next state
         and the taken action.

         Returns:
           nextState, reward
       """
       nextState, reward =  None, None

       oldX,oldY = self.crawlingRobot.getRobotPosition()

       armBucket,handBucket = self.state
       armAngle,handAngle = self.crawlingRobot.getAngles()
       if action == 'arm-up':
         newArmAngle = self.armBuckets[armBucket+1]
         self.crawlingRobot.moveArm(newArmAngle)
         nextState = (armBucket+1,handBucket)
       if action == 'arm-down':
         newArmAngle = self.armBuckets[armBucket-1]
         self.crawlingRobot.moveArm(newArmAngle)
         nextState = (armBucket-1,handBucket)
       if action == 'hand-up':
         newHandAngle = self.handBuckets[handBucket+1]
         self.crawlingRobot.moveHand(newHandAngle)
         nextState = (armBucket,handBucket+1)
       if action == 'hand-down':
         newHandAngle = self.handBuckets[handBucket-1]
         self.crawlingRobot.moveHand(newHandAngle)
         nextState = (armBucket,handBucket-1)

       newX,newY = self.crawlingRobot.getRobotPosition()

       # a simple reward function
       reward = newX - oldX

       self.state = nextState
       return nextState, reward

项目：SmartCab 作者：taochenshh | 项目源码 | 文件源码

def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.005, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

项目：AIclass 作者：mttk | 项目源码 | 文件源码

def doAction(self, action):
        """
          Perform the action and update
          the current state of the Environment
          and return the reward for the
          current state, the next state
          and the taken action.

          Returns:
            nextState, reward
        """
        nextState, reward =  None, None

        oldX,oldY = self.crawlingRobot.getRobotPosition()

        armBucket,handBucket = self.state
        armAngle,handAngle = self.crawlingRobot.getAngles()
        if action == 'arm-up':
            newArmAngle = self.armBuckets[armBucket+1]
            self.crawlingRobot.moveArm(newArmAngle)
            nextState = (armBucket+1,handBucket)
        if action == 'arm-down':
            newArmAngle = self.armBuckets[armBucket-1]
            self.crawlingRobot.moveArm(newArmAngle)
            nextState = (armBucket-1,handBucket)
        if action == 'hand-up':
            newHandAngle = self.handBuckets[handBucket+1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket,handBucket+1)
        if action == 'hand-down':
            newHandAngle = self.handBuckets[handBucket-1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket,handBucket-1)

        newX,newY = self.crawlingRobot.getRobotPosition()

        # a simple reward function
        reward = newX - oldX

        self.state = nextState
        return nextState, reward

项目：AIclass 作者：mttk | 项目源码 | 文件源码

def doAction(self, action):
        """
          Perform the action and update
          the current state of the Environment
          and return the reward for the
          current state, the next state
          and the taken action.

          Returns:
            nextState, reward
        """
        nextState, reward =  None, None

        oldX,oldY = self.crawlingRobot.getRobotPosition()

        armBucket,handBucket = self.state
        armAngle,handAngle = self.crawlingRobot.getAngles()
        if action == 'arm-up':
            newArmAngle = self.armBuckets[armBucket+1]
            self.crawlingRobot.moveArm(newArmAngle)
            nextState = (armBucket+1,handBucket)
        if action == 'arm-down':
            newArmAngle = self.armBuckets[armBucket-1]
            self.crawlingRobot.moveArm(newArmAngle)
            nextState = (armBucket-1,handBucket)
        if action == 'hand-up':
            newHandAngle = self.handBuckets[handBucket+1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket,handBucket+1)
        if action == 'hand-down':
            newHandAngle = self.handBuckets[handBucket-1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket,handBucket-1)

        newX,newY = self.crawlingRobot.getRobotPosition()

        # a simple reward function
        reward = newX - oldX

        self.state = nextState
        return nextState, reward

项目：futhon 作者：delihiros | 项目源码 | 文件源码

def apply_function(self, function, args, env):
        if datatypes.isLambda(function):
            local_env = environment.Environment(
                {},
                outer=function.env,
                bindings=function.args,
                args=args)
            return self.eval(function.body, local_env)
        return function(*args)

项目：RobocupSSLSim 作者：cheng-xie | 项目源码 | 文件源码

def main(argv):
    # Pretrained network to use
    inputfile = None
    # Wether to train or to test
    train = False
    # Trained network
    outputfile = None

    try:
        opts, args = getopt.getopt(argv,"hrl:s:",["loadckpt=","saveckpt="])
    except getopt.GetoptError:
        print 'Incorrect usage. For more information: test.py -h'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'python test.py -r -l <ckptfile> -s <ckptfile>'
            print '-r for enabling training'
            print '-l for loading pre-existing model'
            print '-s for saving  model to file'
            sys.exit()
        elif opt == '-r':
            train = True 
        elif opt in ("-l", "--loadckpt"):
            inputfile = arg
        elif opt in ("-s", "--saveckpt"):
            outputfile = arg

    with tf.Session() as sess:
        env = Environment()
        agent = DQNAgent(env, sess, inputfile) 
        if train:
            agent.train(6000000, outputfile) 
        else:
            agent.test(2000)

项目：smartcab 作者：martinbede | 项目源码 | 文件源码

def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

    # Now simulate it
    sim = Simulator(e, update_delay=1.0)  # reduce update_delay to speed up simulation
    sim.run(n_trials=200)  # press Esc or close pygame window to quit

项目：combat-sim 作者：LewisParr | 项目源码 | 文件源码

def generate_Environment():
    env = environment.Environment(260, 260)
    env.genTerrain(1)
#    env.saveElevationData()
#    env.loadElevationData()
    env.plotElevation()
    env.buildVisibilityMaps()
#    env.saveVisibilityMaps()
#    env.loadVisibilityMaps()
    return env

项目：combat-sim 作者：LewisParr | 项目源码 | 文件源码

def plot_Visible_Area(force):
    plt.figure()
    c = ['r', 'b']
    a = 0
    for F in force:
        plt.plot(F.visible_area, c=c[a])
        a += 1
    plt.xlabel('Timestep')
    plt.ylabel('Visible Area')
    plt.title('Fraction of Environment Visible')

项目：trpo 作者：jjkke88 | 项目源码 | 文件源码

def main(_):
    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    # Create and start a server for the local task.
    # ???????
    # ??????task_index ???????
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1 / 3.0)
    server = tf.train.Server(cluster,
                           job_name=FLAGS.job_name,
                           task_index=FLAGS.task_index,
                            config=tf.ConfigProto(gpu_options=gpu_options))

    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":
        # ?op ????????worker?
        env = Environment(gym.make(pms.environment_name))
        with tf.device(tf.train.replica_device_setter(
            worker_device="/job:worker/task:%d" % (FLAGS.task_index),
            cluster=cluster)):
            agent = TRPOAgentParallel(env)
            saver = tf.train.Saver(max_to_keep=10)
            init_op = tf.initialize_all_variables()
            summary_op = tf.merge_all_summaries()
        # Create a "supervisor", which oversees the training process.
        sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                             logdir="./checkpoint_parallel",
                             init_op=init_op,
                             global_step=agent.global_step,
                             saver=saver,
                             summary_op=None,
                             save_model_secs=60)

        # The supervisor takes care of session initialization, restoring from
        # a checkpoint, and closing when done or an error occurs.
        with sv.managed_session(server.target) as sess:
            agent.session = sess
            agent.gf.session = sess
            agent.sff.session =sess
            agent.supervisor = sv

            if pms.train_flag:
                agent.learn()
            elif FLAGS.task_index == 0:
                agent.test(pms.checkpoint_file)
        # Ask for all the services to stop.
        sv.stop()

项目：train-smartcab 作者：archelogos | 项目源码 | 文件源码

def run():
    """Run the agent for a finite number of trials."""

    for simulation in range(0, N_SIMULATIONS):

        # Set up environment and agent
        e = Environment()  # create environment (also adds some dummy traffic)
        a = e.create_agent(LearningAgent)  # create agent
        e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
        # NOTE: You can set enforce_deadline=False while debugging to allow longer trials
        # TODO: Change later enforce_deadline=True

        # Now simulate it
        sim = Simulator(e, update_delay=0.001, display=False)  # create simulator (uses pygame when display=True, if available)
        # NOTE: To speed up simulation, reduce update_delay and/or set display=False

        sim.run(n_trials=N_TRIALS)  # run for a specified number of trials
        # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

        if simulation == N_SIMULATIONS - 1:

            with open('results.csv', 'a') as csvfile:
                fieldnames = ['alpha', 'gamma', 'epsilon', 'success_rate', 'last_failure']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

                for index in range(0,len(simulation_rates)):
                    writer.writerow({
                        'alpha': get_simulation_params(0)[0],
                        'gamma': get_simulation_params(0)[1],
                        'epsilon': get_simulation_params(0)[2],
                        'success_rate': simulation_rates[index],
                        'last_failure': last_errors[index]})


            if N_SIMULATIONS > 1: #multiple simulation AND last simulation

                plt.figure(1)

                plt.subplot(211)
                plt.plot(simulation_rates)
                plt.title('Success Rate/Simulation')
                plt.xlabel('# Simulation')
                plt.ylabel('Success Rate')

                plt.subplot(212)
                plt.plot(last_errors)
                plt.title('Last failed trial per simulation')
                plt.xlabel('# Simulation')
                plt.ylabel('Last failed trial')

                plt.show()