Python gym.wrappers 模块,Monitor() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用gym.wrappers.Monitor()

项目:drl.pth    作者:seba-1511    | 项目源码 | 文件源码
def test(args, env, agent):
    if args.record:
        if 'env' in vars(args):
            env = wrappers.Monitor(env, './videos/' + args.env + str(time()) + '/')
        else:
            env = wrappers.Monitor(env, './videos/' + str(time()) + '/')
    test_rewards = []
    test_start = time()
    test_steps = 0
    for iteration in range(1, 1 + args.n_test_iter):
        state = env.reset()
        iter_rewards = 0.0
        done = False
        while not done:
            test_steps += 1
            action, _ = agent.forward(state)
            state, reward, done, _ = env.step(action)
            iter_rewards += reward
        test_rewards.append(iter_rewards)
    print_stats('Test', test_rewards, args.n_test_iter,
                time() - test_start, test_steps, 0, agent)
    return test_rewards
项目:malmo-challenge    作者:Kaixhin    | 项目源码 | 文件源码
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
项目:malmo-challenge    作者:Microsoft    | 项目源码 | 文件源码
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
项目:DeepRL    作者:arnomoonens    | 项目源码 | 文件源码
def __init__(self, master, thread_id, clip_gradients=True):
        super(A3CThread, self).__init__(name=thread_id)
        self.thread_id = thread_id
        self.clip_gradients = clip_gradients
        self.env = make_environment(master.env_name)
        self.master = master
        self.config = master.config
        if thread_id == 0 and self.master.monitor:
            self.env = wrappers.Monitor(self.env, master.monitor_path, force=True, video_callable=(None if self.master.video else False))

        # Only used (and overwritten) by agents that use an RNN
        self.initial_features = None

        # Build actor and critic networks
        with tf.variable_scope("t{}_net".format(self.thread_id)):
            self.action, self.value, self.actor_states, self.critic_states, self.actions_taken, self.losses, self.adv, self.r, self.n_steps = self.build_networks()
            self.sync_net = self.create_sync_net_op()
            inc_step = self.master.global_step.assign_add(self.n_steps)
            self.train_op = tf.group(self.make_trainer(), inc_step)
        # Write the summary of each thread in a different directory
        self.writer = tf.summary.FileWriter(os.path.join(self.master.monitor_path, "thread" + str(self.thread_id)), self.master.session.graph)

        self.runner = RunnerThread(self.env, self, 20, thread_id == 0 and self.master.video)
项目:DeepRL    作者:arnomoonens    | 项目源码 | 文件源码
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(SarsaFA, self).__init__()
        self.env = env
        self.env = wrappers.Monitor(self.env, monitor_path, force=True, video_callable=(None if video else False))
        m = usercfg.get("m", 10)  # Number of tilings
        self.config = dict(
            m=m,
            n_x_tiles=9,
            n_y_tiles=9,
            Lambda=0.9,
            epsilon=0,  # fully greedy in this case
            alpha=(0.05 * (0.5 / m)),
            gamma=1,
            n_iter=1000,
            steps_per_episode=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps")  # Maximum number of allowed steps per episode, as determined (for this environment) by the gym library
        )
        self.config.update(usercfg)
        O = env.observation_space
        self.x_low, self.y_low = O.low
        self.x_high, self.y_high = O.high

        self.nA = env.action_space.n
        self.policy = EGreedy(self.config["epsilon"])
        self.function_approximation = TileCoding(self.x_low, self.x_high, self.y_low, self.y_high, m, self.config["n_x_tiles"], self.config["n_y_tiles"], self.nA)
项目:DeepRL    作者:arnomoonens    | 项目源码 | 文件源码
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(Karpathy, self).__init__(**usercfg)
        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.nA = self.env.action_space.n
        # Default configuration. Can be overwritten using keyword arguments.
        self.config.update(dict(
            # timesteps_per_batch=10000,
            # n_iter=100,
            episode_max_length=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps"),
            gamma=0.99,
            learning_rate=0.05,
            batch_size=10,  # Amount of episodes after which to adapt gradients
            decay_rate=0.99,  # Used for RMSProp
            n_hidden_units=20,
            draw_frequency=50,  # Draw a plot every 50 episodes
            repeat_n_actions=1
        ))
        self.config.update(usercfg)
        self.build_network()
项目:DeepRL    作者:arnomoonens    | 项目源码 | 文件源码
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(KarpathyCNN, self).__init__(**usercfg)
        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.nA = env.action_space.n
        self.monitor_path = monitor_path
        # Default configuration. Can be overwritten using keyword arguments.
        self.config.update(
            dict(
                # timesteps_per_batch=10000,
                # n_iter=100,
                n_hidden_units=200,
                learning_rate=1e-3,
                batch_size=10,  # Amount of episodes after which to adapt gradients
                gamma=0.99,  # Discount past rewards by a percentage
                decay=0.99,  # Decay of RMSProp optimizer
                epsilon=1e-9,  # Epsilon of RMSProp optimizer
                draw_frequency=50  # Draw a plot every 50 episodes
            )
        )
        self.config.update(usercfg)
        self.build_network()
        if self.config["save_model"]:
            tf.add_to_collection("action", self.action)
            tf.add_to_collection("states", self.states)
            self.saver = tf.train.Saver()
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
项目:htm-agents    作者:ryanjmccall    | 项目源码 | 文件源码
def main():
    episodeCount = 20
    stepsPerEpisode = 100

    env = gym.make("CartPole-v0")
    env = wrappers.Monitor(env, "/tmp/cartpole-experiment-1")
    for episode in range(episodeCount):
        observation = env.reset()
        for t in range(stepsPerEpisode):
            env.render()
            print(observation)
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_env_reuse():
    with helpers.tempdir() as temp:
        env = gym.make('Autoreset-v0')
        env = Monitor(env, temp)

        env.reset()

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        _, _, done, _ = env.step(None)
        assert not done
        _, _, done, _ = env.step(None)
        assert done

        env.close()
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_steps_limit_restart():
    with helpers.tempdir() as temp:
        env = gym.make('test.StepsLimitCartpole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # Episode has started
        _, _, done, info = env.step(env.action_space.sample())
        assert done == False

        # Limit reached, now we get a done signal and the env resets itself
        _, _, done, info = env.step(env.action_space.sample())
        assert done == True
        assert env.episode_id == 1

        env.close()
项目:malmo-challenge    作者:rhaps0dy    | 项目源码 | 文件源码
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None):
        assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder'
        assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple'
        if isinstance(repeat_action, int):
            assert repeat_action >= 1, "repeat_action should be >= 1"
        elif isinstance(repeat_action, tuple):
            assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)'
            assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]'

        super(GymEnvironment, self).__init__()

        self._state_builder = state_builder
        self._env = gym.make(env_name)
        self._env.env.frameskip = repeat_action
        self._no_op = max(0, no_op)
        self._done = True

        if monitoring_path is not None:
            self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
项目:evolution-strategies-starter    作者:openai    | 项目源码 | 文件源码
def main(env_id, policy_file, record, stochastic, extra_kwargs):
    import gym
    from gym import wrappers
    import tensorflow as tf
    from es_distributed.policies import MujocoPolicy
    import numpy as np

    env = gym.make(env_id)
    if record:
        import uuid
        env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True)

    if extra_kwargs:
        import json
        extra_kwargs = json.loads(extra_kwargs)

    with tf.Session():
        pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs)
        while True:
            rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None)
            print('return={:.4f} len={}'.format(rews.sum(), t))

            if record:
                env.close()
                return
项目:gymmeforce    作者:lgvaz    | 项目源码 | 文件源码
def _create_env(self, monitor_dir, record_freq=None, max_episode_steps=None,
                    **kwargs):
        monitor_path = os.path.join(self.log_dir, monitor_dir)
        env = gym.make(self.env_name)
        if max_episode_steps is not None:
            env._max_episode_steps = max_episode_steps
        monitored_env = wrappers.Monitor(
            env=env,
            directory=monitor_path,
            resume=True,
            video_callable=lambda x: record_freq is not None and x % record_freq == True)
        if self.env_wrapper is not None:
            env = self.env_wrapper.wrap_env(monitored_env)
        else:
            env = monitored_env

        return monitored_env, env
项目:demeter    作者:evancasey    | 项目源码 | 文件源码
def __init__(self, name,
                       log_dir,
                       obs_f = None,
                       reward_f = None,
                       clamp_actions = False,
                       monitor = False):

        self._env = gym.make(name)
        self.log_dir = log_dir
        if monitor:
            self._env = wrappers.Monitor(self._env, log_dir, force=True)

        self.obs_f = obs_f
        self.reward_f = reward_f
        self.clamp_actions = clamp_actions

        self.monitor = monitor
项目:deep-q-learning    作者:alvinwan    | 项目源码 | 文件源码
def get_env(seed):
    env = gym.make('Pong-ram-v0')

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = '/tmp/hw3_vid_dir/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind_ram(env)

    return env
项目:deep-q-learning    作者:alvinwan    | 项目源码 | 文件源码
def get_env(env_id, seed):
    env = gym.make(env_id)

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = './tmp/hw3_vid_dir2/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind(env)

    return env
项目:deep-q-learning    作者:alvinwan    | 项目源码 | 文件源码
def get_custom_env(env_id, seed):
    env = gym.make(env_id)

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = './tmp/hw3_vid_dir2/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_custom(env)

    return env
项目:nesgym    作者:codescv    | 项目源码 | 文件源码
def get_env():
    env = gym.make('nesgym/NekketsuSoccerPK-v0')
    env = nesgym.wrap_nes_env(env)
    expt_dir = '/tmp/soccer/'
    env = wrappers.Monitor(env, os.path.join(expt_dir, "gym"), force=True)
    return env
项目:rl_algorithms    作者:DanielTakeshi    | 项目源码 | 文件源码
def get_env(seed):
    env = gym.make('Pong-ram-v0')

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = '/tmp/hw3_vid_dir/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind_ram(env)

    return env
项目:rl_algorithms    作者:DanielTakeshi    | 项目源码 | 文件源码
def get_env(task, seed):
    env_id = task.env_id
    env = gym.make(env_id)
    set_global_seeds(seed)
    env.seed(seed)
    expt_dir = '/tmp/hw3_vid_dir2/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind(env)
    return env
项目:rl_algorithms    作者:DanielTakeshi    | 项目源码 | 文件源码
def test(self, just_one=True):
        """ This is for test-time evaluation. No training is done here. By
        default, iterate through every snapshot.  If `just_one` is true, this
        only runs one set of weights, to ensure that we record right away since
        OpenAI will only record subsets and less frequently.  Changing the loop
        over snapshots is also needed.
        """
        os.makedirs(self.args.directory+'/videos')
        self.env = wrappers.Monitor(self.env, self.args.directory+'/videos', force=True)

        headdir = self.args.directory+'/snapshots/'
        snapshots = os.listdir(headdir)
        snapshots.sort()
        num_rollouts = 10
        if just_one:
            num_rollouts = 1

        for sn in snapshots:
            print("\n***** Currently on snapshot {} *****".format(sn))

            ### Add your own criteria here.
            # if "800" not in sn:
            #     continue
            ###

            with open(headdir+sn, 'rb') as f:
                weights = pickle.load(f)
            self.sess.run(self.set_params_op, 
                          feed_dict={self.new_weights_v: weights})
            returns = []
            for i in range(num_rollouts):
                returns.append( self._compute_return(test=True) )
            print("mean: \t{}".format(np.mean(returns)))
            print("std: \t{}".format(np.std(returns)))
            print("max: \t{}".format(np.max(returns)))
            print("min: \t{}".format(np.min(returns)))
            print("returns:\n{}".format(returns))
项目:DeepRL    作者:arnomoonens    | 项目源码 | 文件源码
def main():
    args = parser.parse_args()
    env = make_environment(args.environment)
    runner = ModelRunner(env, args.model_directory, args.save_directory, n_iter=args.iterations)
    try:
        runner.env = wrappers.Monitor(runner.env, args.save_directory, video_callable=False, force=True)
        runner.run()
    except KeyboardInterrupt:
        pass
项目:DeepRL    作者:arnomoonens    | 项目源码 | 文件源码
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(A2C, self).__init__(**usercfg)
        self.monitor_path = monitor_path

        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.env_runner = EnvRunner(self.env, self, usercfg)

        self.config.update(dict(
            timesteps_per_batch=10000,
            trajectories_per_batch=10,
            batch_update="timesteps",
            n_iter=100,
            gamma=0.99,
            actor_learning_rate=0.01,
            critic_learning_rate=0.05,
            actor_n_hidden=20,
            critic_n_hidden=20,
            repeat_n_actions=1,
            save_model=False
        ))
        self.config.update(usercfg)
        self.build_networks()
        init = tf.global_variables_initializer()
        # Launch the graph.
        self.session = tf.Session()
        self.session.run(init)
        if self.config["save_model"]:
            tf.add_to_collection("action", self.action)
            tf.add_to_collection("states", self.states)
            self.saver = tf.train.Saver()
        self.rewards = tf.placeholder("float", name="Rewards")
        self.episode_lengths = tf.placeholder("float", name="Episode_lengths")
        summary_actor_loss = tf.summary.scalar("Actor_loss", self.summary_actor_loss)
        summary_critic_loss = tf.summary.scalar("Critic_loss", self.summary_critic_loss)
        summary_rewards = tf.summary.scalar("Rewards", self.rewards)
        summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths)
        self.summary_op = tf.summary.merge([summary_actor_loss, summary_critic_loss, summary_rewards, summary_episode_lengths])
        self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "summaries"), self.session.graph)
        return
项目:DeepRL    作者:arnomoonens    | 项目源码 | 文件源码
def __init__(self, env, monitor_path, video=True, **usercfg):
        super(REINFORCE, self).__init__(**usercfg)
        self.env = wrappers.Monitor(env, monitor_path, force=True, video_callable=(None if video else False))
        self.env_runner = EnvRunner(self.env, self, usercfg)
        self.monitor_path = monitor_path
        # Default configuration. Can be overwritten using keyword arguments.
        self.config.update(dict(
            batch_update="timesteps",
            timesteps_per_batch=1000,
            n_iter=100,
            gamma=0.99,  # Discount past rewards by a percentage
            decay=0.9,  # Decay of RMSProp optimizer
            epsilon=1e-9,  # Epsilon of RMSProp optimizer
            learning_rate=0.05,
            n_hidden_units=20,
            repeat_n_actions=1,
            save_model=False
        ))
        self.config.update(usercfg)

        self.build_network()
        self.make_trainer()

        init = tf.global_variables_initializer()
        # Launch the graph.
        self.session = tf.Session()
        self.session.run(init)
        if self.config["save_model"]:
            tf.add_to_collection("action", self.action)
            tf.add_to_collection("states", self.states)
            self.saver = tf.train.Saver()
        self.rewards = tf.placeholder("float", name="Rewards")
        self.episode_lengths = tf.placeholder("float", name="Episode_lengths")
        summary_loss = tf.summary.scalar("Loss", self.summary_loss)
        summary_rewards = tf.summary.scalar("Rewards", self.rewards)
        summary_episode_lengths = tf.summary.scalar("Episode_lengths", self.episode_lengths)
        self.summary_op = tf.summary.merge([summary_loss, summary_rewards, summary_episode_lengths])
        self.writer = tf.summary.FileWriter(os.path.join(self.monitor_path, "task0"), self.session.graph)
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_no_double_wrapping():
    temp = tempfile.mkdtemp()
    try:
        env = gym.make("FrozenLake-v0")
        env = wrappers.Monitor(env, temp)
        try:
            env = wrappers.Monitor(env, temp)
        except error.DoubleWrapperError:
            pass
        else:
            assert False, "Should not allow double wrapping"
        env.close()
    finally:
        shutil.rmtree(temp)
项目:gym    作者:openai    | 项目源码 | 文件源码
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5
            },
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 100,
            }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_monitor_filename():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp)
        env.close()

        manifests = glob.glob(os.path.join(temp, '*.manifest.*'))
        assert len(manifests) == 1
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_video_callable_false_does_not_record():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 0
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_video_callable_records_videos():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_semisuper_succeeds():
    """Regression test. Ensure that this can write"""
    with helpers.tempdir() as temp:
        env = gym.make('SemisuperPendulumDecay-v0')
        env = Monitor(env, temp)
        env.reset()
        env.step(env.action_space.sample())
        env.close()
项目:gym    作者:openai    | 项目源码 | 文件源码
def test_no_monitor_reset_unless_done():
    def assert_reset_raises(env):
        errored = False
        try:
            env.reset()
        except error.Error:
            errored = True
        assert errored, "Env allowed a reset when it shouldn't have"

    with helpers.tempdir() as temp:
        # Make sure we can reset as we please without monitor
        env = gym.make('CartPole-v0')
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.reset()

        # can reset once as soon as we start
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # can reset multiple times in a row
        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        assert_reset_raises(env)

        # should allow resets after the episode is done
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        assert_reset_raises(env)

        env.close()
项目:deep-rl    作者:xinghai-sun    | 项目源码 | 文件源码
def create_env(conf, monitor_on=False):
    env = gym.make(conf['env'])
    if conf['monitor_dir'] != '' and monitor_on:
        env = wrappers.Monitor(env, conf['monitor_dir'], force=True)
    if conf['use_atari_wrapper']:
        env = AtariRescale42x42Wrapper(env)
        env = NormalizeWrapper(env)
    return env
项目:deep-rl    作者:xinghai-sun    | 项目源码 | 文件源码
def create_env(conf):
    env = gym.make(conf['env'])
    if conf['monitor_dir']:
        env = wrappers.Monitor(env, conf['monitor_dir'], force=True)
    if conf['use_atari_wrapper']:
        env = AtariRescale42x42Wrapper(env)
        env = NormalizeWrapper(env)
    return env
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_no_double_wrapping():
    temp = tempfile.mkdtemp()
    try:
        env = gym.make("FrozenLake-v0")
        env = wrappers.Monitor(env, temp)
        try:
            env = wrappers.Monitor(env, temp)
        except error.DoubleWrapperError:
            pass
        else:
            assert False, "Should not allow double wrapping"
        env.close()
    finally:
        shutil.rmtree(temp)
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test():
    benchmark = registration.Benchmark(
        id='MyBenchmark-v0',
        scorer=scoring.ClipTo01ThenAverage(),
        tasks=[
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 5
            },
            {'env_id': 'CartPole-v0',
             'trials': 1,
             'max_timesteps': 100,
            }])

    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = wrappers.Monitor(env, directory=temp, video_callable=False)
        env.seed(0)

        env.set_monitor_mode('evaluation')
        rollout(env)

        env.set_monitor_mode('training')
        for i in range(2):
            rollout(env)

        env.set_monitor_mode('evaluation')
        rollout(env, good=True)

        env.close()
        results = monitoring.load_results(temp)
        evaluation_score = benchmark.score_evaluation('CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps'])
        benchmark_score = benchmark.score_benchmark({
            'CartPole-v0': evaluation_score['scores'],
        })

        assert np.all(np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003])), "evaluation_score={}".format(evaluation_score)
        assert np.isclose(benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_monitor_filename():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp)
        env.close()

        manifests = glob.glob(os.path.join(temp, '*.manifest.*'))
        assert len(manifests) == 1
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_write_upon_reset_false():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert not files, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_write_upon_reset_true():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')

        env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True)
        env.reset()

        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0, "Files: {}".format(files)

        env.close()
        files = glob.glob(os.path.join(temp, '*'))
        assert len(files) > 0
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_video_callable_false_does_not_record():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp, video_callable=False)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 0
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_video_callable_records_videos():
    with helpers.tempdir() as temp:
        env = gym.make('CartPole-v0')
        env = Monitor(env, temp)
        env.reset()
        env.close()
        results = monitoring.load_results(temp)
        assert len(results['videos']) == 1, "Videos: {}".format(results['videos'])
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_semisuper_succeeds():
    """Regression test. Ensure that this can write"""
    with helpers.tempdir() as temp:
        env = gym.make('SemisuperPendulumDecay-v0')
        env = Monitor(env, temp)
        env.reset()
        env.step(env.action_space.sample())
        env.close()
项目:AI-Fight-the-Landlord    作者:YoungGer    | 项目源码 | 文件源码
def test_no_monitor_reset_unless_done():
    def assert_reset_raises(env):
        errored = False
        try:
            env.reset()
        except error.Error:
            errored = True
        assert errored, "Env allowed a reset when it shouldn't have"

    with helpers.tempdir() as temp:
        # Make sure we can reset as we please without monitor
        env = gym.make('CartPole-v0')
        env.reset()
        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        env.reset()

        # can reset once as soon as we start
        env = Monitor(env, temp, video_callable=False)
        env.reset()

        # can reset multiple times in a row
        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        env.step(env.action_space.sample())
        assert_reset_raises(env)

        # should allow resets after the episode is done
        d = False
        while not d:
            _, _, d, _ = env.step(env.action_space.sample())

        env.reset()
        env.reset()

        env.step(env.action_space.sample())
        assert_reset_raises(env)

        env.close()
项目:DQfD    作者:go2sea    | 项目源码 | 文件源码
def get_demo_data(env):
    # env = wrappers.Monitor(env, '/tmp/CartPole-v0', force=True)
    # agent.restore_model()
    with tf.variable_scope('get_demo_data'):
        agent = DQfDDDQN(env, DDQNConfig())

    e = 0
    while True:
        done = False
        score = 0  # sum of reward in one episode
        state = env.reset()
        demo = []
        while done is False:
            action = agent.egreedy_action(state)  # e-greedy action for train
            next_state, reward, done, _ = env.step(action)
            score += reward
            reward = reward if not done or score == 499 else -100
            agent.perceive([state, action, reward, next_state, done, 0.0])  # 0. means it is not a demo data
            demo.append([state, action, reward, next_state, done, 1.0])  # record the data that could be expert-data
            agent.train_Q_network(update=False)
            state = next_state
        if done:
            if score == 500:  # expert demo data
                demo = set_n_step(demo, Config.trajectory_n)
                agent.demo_buffer.extend(demo)
            agent.sess.run(agent.update_target_net)
            print("episode:", e, "  score:", score, "  demo_buffer:", len(agent.demo_buffer),
                  "  memory length:", len(agent.replay_buffer), "  epsilon:", agent.epsilon)
            if len(agent.demo_buffer) >= Config.demo_buffer_size:
                agent.demo_buffer = deque(itertools.islice(agent.demo_buffer, 0, Config.demo_buffer_size))
                break
        e += 1

    with open(Config.DEMO_DATA_PATH, 'wb') as f:
        pickle.dump(agent.demo_buffer, f, protocol=2)
项目:MLAlgorithms    作者:rushter    | 项目源码 | 文件源码
def init_environment(self, name='CartPole-v0', monitor=False):
        self.env = gym.make(name)
        if monitor:
            self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False)

        self.n_states = self.env.observation_space.shape[0]
        self.n_actions = self.env.action_space.n

        # Experience replay
        self.replay = []
项目:DeepRL    作者:ChiWeiHsiao    | 项目源码 | 文件源码
def __init__(self, game_name, histoy_length, render=False):
        self.env =  gym.make(game_name)
        #self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1')
        self.render = render
        self.n_actions = self.env.action_space.n
        self.n_observation = len(self.env.observation_space.high)
        self.resize_width = 80
        self.resize_height = 80
        self.histoy_length = histoy_length  # One state contains 'histoy_length' frames
        self.state_buffer = deque() # Buffer keep 'histoy_length-1' frames
        self.show_game_info()
项目:DeepRL    作者:ChiWeiHsiao    | 项目源码 | 文件源码
def __init__(self, game_name, histoy_length, render=False):
        self.env =  gym.make(game_name)
        #self.env = wrappers.Monitor(self.env, 'records/atari-experiment-1')
        self.render = render
        self.n_actions = self.env.action_space.n
        self.n_observation = len(self.env.observation_space.high)
        self.histoy_length = histoy_length  # One state contains 'histoy_length' observations
        self.state_buffer = deque() # Buffer keep 'histoy_length-1' observations
        self.show_game_info()