pomdp лабиринт Pybrain не может быть запущен

Мне нужно изменить следующий лабиринт mdp на лабиринт pomdp, используя библиотеку pybrain. Следующий код реализует лабиринт mdp (учебники по Pybrain):

rl.py

from scipy import * #@unusedwildimport
import matplotlib.pyplot as plt
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA #@unusedimport
from pybrain.rl.experiments import Experiment

plt.gray()
plt.ion()
structure = array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 1],
[1, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 0, 1, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1]])
environment = Maze(structure, (7, 7))
controller = ActionValueTable(81, 4)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
while True:
experiment.doInteractions(100)
agent.learn()
agent.reset()
plt.pcolor(controller.params.reshape(81,4).max(1).reshape(9,9))
plt.show()
plt.pause(0.1)

mdp.py

from pybrain.rl.environments import Task
from scipy import array

class MDPMazeTask(Task):
    """ This is a MDP task for the MazeEnvironment. The state is fully observable,
        giving the agent the current position of perseus. Reward is given on reaching
        the goal, otherwise no reward. """

    def getReward(self):
        """ compute and return the current reward (i.e. corresponding to the last action performed) """
        if self.env.goal == self.env.perseus:
            self.env.reset()
            reward = 1.
        else:
            reward = 0.
        return reward

    def performAction(self, action):
        """ The action vector is stripped and the only element is cast to integer and given
            to the super class.
        """
        Task.performAction(self, int(action[0]))


    def getObservation(self):
        """ The agent receives its position in the maze, to make this a fully observable
            MDP problem.
        """
        obs = array([self.env.perseus[0] * self.env.mazeTable.shape[0] + self.env.perseus[1]])
        return obs

maze.py

from scipy import array

from .pomdp import POMDPTask
from pybrain.rl.environments.mazes import Maze
from pybrain.rl.environments.task import Task


class MazeTask(POMDPTask):
    """ a task corresponding to a maze environment """

    bangPenalty = 0
    defaultPenalty = 0
    finalReward = 1

    topology = None
    goal = None
    initPos = None
    mazeclass = Maze

    stochObs = 0
    stochAction = 0

    @property
    def noisy(self):
        return self.stochObs > 0


    def __init__(self, **args):
        self.setArgs(**args)
        Task.__init__(self, self.mazeclass(self.topology, self.goal, initPos=self.initPos,
                                           stochObs=self.stochObs, stochAction=self.stochAction))
        self.minReward = min(self.bangPenalty, self.defaultPenalty)
        self.reset()

    def getReward(self):
        if self.env.perseus == self.env.goal:
            return self.finalReward
        elif self.env.bang:
            return self.bangPenalty
        else:
            return self.defaultPenalty

    def isFinished(self):
        return self.env.perseus == self.env.goal or POMDPTask.isFinished(self)

    def __str__(self):
        return str(self.env)


class TrivialMaze(MazeTask):
    """
    #####
    #. *#
    #####
    """
    discount = 0.8
    initPos = [(1, 1)]
    topology = array([[1] * 5,
                      [1, 0, 0, 0, 1],
                      [1] * 5, ])
    goal = (1, 3)

Я попытался изменить его на лабиринт pomdp, заменив task = MDPMazeTask(environment) с task = TrivialMaze()

но в этом возникли некоторые ошибки.

error:
    Traceback (most recent call last):
  File "/home/web/pybrain-master/docs/tutorials/rl.py", line 140, in <module>
    experiment.doInteractions(100)# speed of learning
  File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 18, in doInteractions
    self._oneInteraction()
  File "/home/web/pybrain-master/pybrain/rl/experiments/experiment.py", line 27, in _oneInteraction
    self.task.performAction(self.agent.getAction())
  File "/home/web/pybrain-master/pybrain/rl/agents/learning.py", line 50, in getAction
    self.lastaction = self.module.activate(self.lastobs)
  File "/home/web/pybrain-master/pybrain/structure/modules/module.py", line 121, in activate
    assert len(self.inputbuffer[self.offset]) == len(inpt),    str((len(self.inputbuffer[self.offset]), len(inpt)))
AssertionError: (1, 4)

Это нигде не объяснено на их сайте http://pybrain.org/ Может кто-нибудь помочь мне, пожалуйста?

0 ответов

Другие вопросы по тегам