Source code for jaclearn.rl.space

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : space.py
# Author : Jiayuan Mao
# Email  : maojiayuan@gmail.com
# Date   : 02/17/2018
#
# This file is part of Jacinle.
# Distributed under terms of the MIT license.

import numpy as np

import jacinle.random as random

__all__ = [
    'ActionSpaceBase', 'DiscreteActionSpace', 'ContinuousActionSpace',
    'CompositionalActionSpace', 'ConcatenatedActionSpace'
]


[docs] class ActionSpaceBase(object):
[docs] def __init__(self, action_meanings=None): self.__rng = random.gen_rng() self._action_meanings = action_meanings
@property def rng(self): return self.__rng @property def action_meanings(self): return self._action_meanings
[docs] def sample(self, theta=None): return self._sample(theta)
def _sample(self, theta=None): return None
[docs] class DiscreteActionSpace(ActionSpaceBase):
[docs] def __init__(self, nr_actions, action_meanings=None): super().__init__(action_meanings=action_meanings) self._nr_actions = nr_actions
@property def nr_actions(self): return self._nr_actions def _sample(self, theta=None): if theta is None: return self.rng.choice(self._nr_actions) return self.rng.choice(self._nr_actions, p=theta)
[docs] class ContinuousActionSpace(ActionSpaceBase): @staticmethod def __canonize_bound(v, shape): if type(v) is np.ndarray: assert v.shape == shape, 'Invalid shape for bound value: expect {}, got {}.'.format( shape, v.shape) return v assert type(v) in (int, float), 'Invalid type for bound value.' return np.ones(shape=shape, dtype='float32') * v
[docs] def __init__(self, low, high=None, shape=None, action_meanings=None): super().__init__(action_meanings=action_meanings) if high is None: low, high = -low, low if shape is None: assert low is not None and high is not None, 'Must provide low and high.' low, high = np.array(low), np.array(high) assert low.shape == high.shape, 'Low and high must have same shape, got: {} and {}.'.format( low.shape, high.shape) self._low = low self._high = high self._shape = low.shape else: self._low = self.__canonize_bound(low, shape) self._high = self.__canonize_bound(high, shape) self._shape = shape
@property def low(self): return self._low @property def high(self): return self._high @property def shape(self): return self._shape def _sample(self, theta=None): if theta is not None: mu, std = theta return self.rng.randn(*self.shape) * std + mu return self.rng.uniform(self._low, self._high)
[docs] class CompositionalActionSpace(ActionSpaceBase): """ The agent can act along each subspace at once. """
[docs] def __init__(self, *spaces): super().__init__() self.spaces = spaces
def _sample(self, theta=None): assert theta is None return tuple(s.sample() for s in self.spaces)
[docs] class ConcatenatedActionSpace(ActionSpaceBase): """ The agent can choose to perform one of the action at once. """
[docs] def __init__(self, *spaces): super().__init__() self.spaces = spaces
def _sample(self, theta=None): idx = self.rng.choice_list(len(self.spaces)) return (idx, self.spaces[idx].sample())