Open AI Gym Cart-Pole на основах PPO2

Был на этом уже два дня и никуда не денется.

Я хочу создать свою собственную спортивную среду и запустить ее с помощью базовых линий ppo2 (cnn).

Я полагаю, что если я заставлю Cart Pole работать, мой env тоже будет работать. Мой код работает на DeepQ, как и Cart Pole. Но одна и та же ошибка на my и env и CartPoleEnv.

Я скопировал большую часть кода из файла ppo2 run_atary.py. Но удалили все свои специфичные для atari обертки. Похоже, мне нужна собственная оболочка, но поскольку в базовом коде нет комментариев, я теряюсь в том, что использовать.

Для простоты использования Baslines я думаю, что это хорошая идея, чтобы показать все условия тренажерного зала, работающие на всех

Исключение из отладчика

 File "C:\PyProj\gi-bcspec\gi_bcspec\envs\train_ppo2.py", line 72, in <module>
  main()
 File "C:\PyProj\gi-bcspec\gi_bcspec\envs\train_ppo2.py", line 69, in main
  policy='cnn')
 File "C:\PyProj\gi-bcspec\gi_bcspec\envs\train_ppo2.py", line 38, in train
  env = SubprocVecEnv([env1, env2])
File "c:\PyProj\baselines\baselines\common\vec_env\subproc_vec_env.py", line 49, in __init__
  observation_space, action_space = self.remotes[0].recv()
File "C:\Python\Lib\multiprocessing\connection.py", line 250, in recv
  buf = self._recv_bytes()
File "C:\Python\Lib\multiprocessing\connection.py", line 321, in _recv_bytes
  raise EOFError

builtins.EOFError:

Вывод командной строки

C:\Python\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Logging to C:\Users\matti\AppData\Local\Temp\openai-2018-04-20-10-44-45-912947
2018-04-20 10:44:46.164246: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:1355] Found device 0 with properties: 
name: GeForce GTX 670 major: 3 minor: 0 memoryClockRate(GHz): 1.0585
pciBusID: 0000:01:00.0
totalMemory: 2.00GiB freeMemory: 1.59GiB
2018-04-20 10:44:46.194118: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:1355] Found device 1 with properties: 
name: GeForce GTX 670 major: 3 minor: 0 memoryClockRate(GHz): 1.0585
pciBusID: 0000:02:00.0
totalMemory: 2.00GiB freeMemory: 1.59GiB
2018-04-20 10:44:46.194764: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:1434] Adding visible gpu devices: 0, 1
2018-04-20 10:44:46.779295: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:922] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-04-20 10:44:46.779672: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:928]      0 1 
2018-04-20 10:44:46.779917: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:941] 0:   N N 
2018-04-20 10:44:46.780154: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:941] 1:   N N 
2018-04-20 10:44:46.780544: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:1052] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1374 MB memory) -> physical GPU (device: 0, name: GeForce GTX 670, pci bus id: 0000:01:00.0, compute capability: 3.0)
2018-04-20 10:44:46.782447: I C:\tf_jenkins\workspace\tf-nightly-windows\M\windows-gpu\PY\36\tensorflow\core\common_runtime\gpu\gpu_device.cc:1052] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 1374 MB memory) -> physical GPU (device: 1, name: GeForce GTX 670, pci bus id: 0000:02:00.0, compute capability: 3.0)
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
C:\Python\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Process Process-1:
Traceback (most recent call last):
  File "C:\Python\lib\multiprocessing\process.py", line 258, in _bootstrap
    self.run()
  File "C:\Python\lib\multiprocessing\process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "c:\pyproj\baselines\baselines\common\vec_env\subproc_vec_env.py", line 8, in worker
    env = env_fn_wrapper.x()
TypeError: 'TimeLimit' object is not callable
C:\Python\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Process Process-2:
Traceback (most recent call last):
  File "C:\Python\lib\multiprocessing\process.py", line 258, in _bootstrap
    self.run()
  File "C:\Python\lib\multiprocessing\process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "c:\pyproj\baselines\baselines\common\vec_env\subproc_vec_env.py", line 8, in worker
    env = env_fn_wrapper.x()
TypeError: 'TimeLimit' object is not callable

Мой код

import sys
import gym
from gym.wrappers import FlattenDictWrapper
from baselines import logger
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from baselines.ppo2 import ppo2
from baselines.ppo2.policies import CnnPolicy, LstmPolicy, LnLstmPolicy
from baselines.bench import Monitor
import multiprocessing
import tensorflow as tf

def train(env_id, num_timesteps, seed, policy):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env1 =  gym.make(env_id)
    env2 =  gym.make(env_id)
    env = SubprocVecEnv([env1, env2])

    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1))

def main():
    logger.configure()
    train('CartPole-v1', num_timesteps=1000, seed=10,
        policy='cnn')

if __name__ == '__main__':
    main()

0 ответов