pomdp лабиринт Pybrain не может быть запущен
Мне нужно изменить следующий лабиринт mdp на лабиринт pomdp, используя библиотеку pybrain. Следующий код реализует лабиринт mdp (учебники по Pybrain):
rl.py
from scipy import * #@unusedwildimport
import matplotlib.pyplot as plt
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA #@unusedimport
from pybrain.rl.experiments import Experiment
plt.gray()
plt.ion()
structure = array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1]])
environment = Maze(structure, (7, 7))
controller = ActionValueTable(81, 4)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
while True:
experiment.doInteractions(100)
agent.learn()
agent.reset()
plt.pcolor(controller.params.reshape(81,4).max(1).reshape(9,9))
plt.show()
plt.pause(0.1)
mdp.py
from pybrain.rl.environments import Task
from scipy import array
class MDPMazeTask(Task):
""" This is a MDP task for the MazeEnvironment. The state is fully observable,
giving the agent the current position of perseus. Reward is given on reaching
the goal, otherwise no reward. """
def getReward(self):
""" compute and return the current reward (i.e. corresponding to the last action performed) """
if self.env.goal == self.env.perseus:
self.env.reset()
reward = 1.
else:
reward = 0.
return reward
def performAction(self, action):
""" The action vector is stripped and the only element is cast to integer and given
to the super class.
"""
Task.performAction(self, int(action[0]))
def getObservation(self):
""" The agent receives its position in the maze, to make this a fully observable
MDP problem.
"""
obs = array([self.env.perseus[0] * self.env.mazeTable.shape[0] + self.env.perseus[1]])
return obs
maze.py
from scipy import array
from .pomdp import POMDPTask
from pybrain.rl.environments.mazes import Maze
from pybrain.rl.environments.task import Task
class MazeTask(POMDPTask):
""" a task corresponding to a maze environment """
bangPenalty = 0
defaultPenalty = 0
finalReward = 1
topology = None
goal = None
initPos = None
mazeclass = Maze
stochObs = 0
stochAction = 0
@property
def noisy(self):
return self.stochObs > 0
def __init__(self, **args):
self.setArgs(**args)
Task.__init__(self, self.mazeclass(self.topology, self.goal, initPos=self.initPos,
stochObs=self.stochObs, stochAction=self.stochAction))
self.minReward = min(self.bangPenalty, self.defaultPenalty)
self.reset()
def getReward(self):
if self.env.perseus == self.env.goal:
return self.finalReward
elif self.env.bang:
return self.bangPenalty
else:
return self.defaultPenalty
def isFinished(self):
return self.env.perseus == self.env.goal or POMDPTask.isFinished(self)
def __str__(self):
return str(self.env)
class TrivialMaze(MazeTask):
"""
#####
#. *#
#####
"""
discount = 0.8
initPos = [(1, 1)]
topology = array([[1] * 5,
[1, 0, 0, 0, 1],
[1] * 5, ])
goal = (1, 3)
Я попытался изменить его на лабиринт pomdp, заменив task = MDPMazeTask(environment)
с task = TrivialMaze()
но в этом возникли некоторые ошибки.
error:
Traceback (most recent call last):
File "/home/web/pybrain-master/docs/tutorials/rl.py", line 140, in <module>
experiment.doInteractions(100)# speed of learning
File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 18, in doInteractions
self._oneInteraction()
File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 27, in _oneInteraction
self.task.performAction(self.agent.getAction())
File "/home/web/pybrain-master/pybrain/rl/agents/learning.py", line 50, in getAction
self.lastaction = self.module.activate(self.lastobs)
File "/home/web/pybrain-master/pybrain/structure/modules/module.py", line 121, in activate
assert len(self.inputbuffer[self.offset]) == len(inpt), str((len(self.inputbuffer[self.offset]), len(inpt)))
AssertionError: (1, 4)
Это нигде не объяснено на их сайте http://pybrain.org/ Может кто-нибудь помочь мне, пожалуйста?