Source code for jaclearn.rl.envs.maze.maze

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : maze.py
# Author : Jiayuan Mao
# Email  : maojiayuan@gmail.com
# Date   : 02/17/2018
#
# This file is part of Jacinle.
# Distributed under terms of the MIT license.

import numpy as np
import collections
import itertools

import jacinle.random as random
from jacinle.utils.meta import notnone_property
from jacinle.utils.argument import get_2dshape

from ...env import SimpleRLEnvBase
from ...space import DiscreteActionSpace
from ._maze_visualizer import render_maze


__all__ = ['MazeEnv', 'CustomLavaWorldEnv']


[docs] class MazeEnv(SimpleRLEnvBase): """ Create a maze environment. """ _obstacles = None _start_point = None _final_point = None _shortest_path = None _distance_mat = None _distance_prev = None _inv_distance_mat = None _inv_distance_prev = None _quick_distance_mat = None _quick_distance_prev = None _current_point = None _canvas = None _origin_canvas = None """empty, obstacle, current, final, border""" _total_dim = 5 """opencv format: BGR""" _colors = [(255, 255, 255), (0, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)] _action_delta_valid = [(-1, 0), (0, 1), (1, 0), (0, -1)] # just default value _action_delta = [(0, 0), (-1, 0), (0, 1), (1, 0), (0, -1)] _action_mapping = [0, 1, 2, 3, 4]
[docs] def __init__(self, map_size=14, visible_size=None, obs_ratio=0.3, enable_path_checking=True, random_action_mapping=None, enable_noaction=False, dense_reward=False, reward_move=None, reward_noaction=0, reward_final=10, reward_error=-2, state_mode='DEFAULT'): """ :param map_size: A single int or a tuple (h, w), representing the map size. :param visible_size: A single int or a tuple (h, w), representing the visible size. The agent will at the center of the visible window, and out-of-border part will be colored by obstacle color. :param obs_ratio: Obstacle ratio (how many obstacles will be in the map). :param enable_path_checking: Enable path computation in map construction. Turn it down only when you are sure about valid maze. :param random_action_mapping: Whether to enable random action mapping. If true, the result of performing every action will be shuffled. _checkingIf a single bool True is provided, we do random shuffle. Otherwise, it should be a list with same length as action space (5 when noaction enabled, 4 otherwise). :param enable_noaction: Whether to enable no-action operation. :param dense_reward: Whether the reward is dense. :param reward_move: Reward for a valid move. For dense reward setting, it should be a positive number. While in sparse reward setting, it is expected to be a non-positive number. :param reward_noaction: Reward for a no-action. :param reward_final: Reward when you arrive at the final point. :param reward_error: Reward when you perform an invalid move. :param state_mode: State mode, either 'DEFAULT' or 'RENDER'. """ super().__init__() self._rng = random.gen_rng() self._map_size = get_2dshape(map_size) self._visible_size = visible_size self._enable_path_checking = enable_path_checking if self._visible_size is not None: self._visible_size = get_2dshape(self._visible_size) self._obs_ratio = obs_ratio if enable_noaction: self._action_space = DiscreteActionSpace(5, action_meanings=['NOOP', 'UP', 'RIGHT', 'DOWN', 'LEFT']) self._action_delta = [(0, 0), (-1, 0), (0, 1), (1, 0), (0, -1)] self._action_mapping = [0, 1, 2, 3, 4] else: self._action_space = DiscreteActionSpace(4, action_meanings=['UP', 'RIGHT', 'DOWN', 'LEFT']) self._action_delta = [(-1, 0), (0, 1), (1, 0), (0, -1)] self._action_mapping = [0, 1, 2, 3] if random_action_mapping is not None: if random_action_mapping is True: self._rng.shuffle(self._action_mapping) else: assert len(self._action_mapping) == len(random_action_mapping) self._action_mapping = random_action_mapping self._enable_noaction = enable_noaction self._dense_reward = dense_reward if reward_move is None: reward_move = -1 if not dense_reward else 1 self._rewards = (reward_move, reward_noaction, reward_final, reward_error) assert state_mode in ('DEFAULT' ,'RENDER') self._state_mode = state_mode
@notnone_property def canvas(self): """Return the raw canvas (full)""" return self._canvas @notnone_property def origin_canvas(self): """Return the original canvas (at time 0, full)""" return self._origin_canvas @notnone_property def obstacles(self): return self._obstacles @notnone_property def start_point(self): """Start point (r, c)""" return self._start_point @notnone_property def final_point(self): """Finish point (r, c)""" return self._final_point @notnone_property def current_point(self): """Current point (r, c)""" return self._current_point @notnone_property def shortest_path(self): """One of the shortest paths from start to finish, list of point (r, c)""" if self._shortest_path is None: self._gen_distance_info() return self._shortest_path @notnone_property def quick_distance_mat(self): """Distance matrix: this is done during the first run of SPFA, so if you ensure that all valid points are in the same connected component, you can use it""" return self._quick_distance_mat @notnone_property def quick_distance_prev(self): """Distance-prev matrix: see also `quick_distance_mat`""" return self._quick_distance_mat @property def distance_mat(self): """Distance matrix""" self._gen_distance_info() return self._distance_mat @property def distance_prev(self): """Distance-prev matrix""" self._gen_distance_info() return self._distance_prev @property def inv_distance_mat(self): self._gen_inv_distance_info() return self._inv_distance_mat @property def inv_distance_prev(self): self._gen_inv_distance_info() return self._inv_distance_prev @property def action_delta(self): """Action deltas: the tuple (dy, dx) when you perform action i""" return self._action_delta @property def action_mapping(self): """If random action mapping is enabled, return the internal mapping""" return self._action_mapping @property def canvas_size(self): """Canvas size""" return self.canvas.shape[:2] @property def map_size(self): """Map size""" return self._map_size @property def visible_size(self): """Visible size""" return self._visible_size @property def rewards(self): """A tuple of 4 value, representing the rewards for each action: (Move, Noaction, Arrive final point, Move Err)""" return self._rewards def _color2label(self, cc): for i, c in enumerate(self._colors): if np.all(c == cc): return i raise ValueError() def _get_canvas_color(self, yy, xx): return self._canvas[yy+1, xx+1] def _get_canvas_label(self, yy, xx): return self._color2label(self._canvas[yy+1, xx+1]) def _gen_rpt(self): """Generate a random point uniformly""" return [self._rng.randint(d) for d in self._map_size] def _fill_canvas(self, c, y, x, v, delta=1): c[y + delta, x + delta, :] = self._colors[v] def _gen_shortest_path(self, c, start_point, final_point): sy, sx = start_point fy, fx = final_point obs_dis = self.canvas_size[0] * self.canvas_size[1] q = collections.deque() v = set() d = np.ones(self._map_size, dtype='int32') * obs_dis * obs_dis p = np.zeros(self._map_size + (2, ), dtype='int32') q.append((sy, sx)) v.add((sy, sx)) d[sy, sx] = 0 p[sy, sx, :] = -1 while len(q): y, x = q.popleft() v.remove((y, x)) assert self._get_canvas_label(y, x) < 4 for dy, dx in self._action_delta_valid: yy, xx = y + dy, x + dx tt = self._get_canvas_label(yy, xx) if tt < 4: dd = obs_dis if tt == 1 else 1 if d[yy, xx] > d[y, x] + dd: d[yy, xx] = d[y, x] + dd p[yy, xx, :] = (y, x) if (yy, xx) not in v: q.append((yy, xx)) v.add((yy, xx)) path = [] y, x = fy, fx while y != -1 and x != -1: path.append((y, x)) y, x = p[y, x] return list(reversed(path)), d, p def _gen_map(self, obstacles=None, start_point=None, final_point=None): canvas = np.empty((self._map_size[0] + 2, self._map_size[1] + 2, 3), dtype='uint8') canvas[:, :, :] = self._colors[0] # reference self._canvas = canvas for i in range(self._map_size[0] + 2): self._fill_canvas(canvas, i, 0, 4, delta=0) self._fill_canvas(canvas, i, self._map_size[1] + 1, 4, delta=0) for i in range(self._map_size[1] + 2): self._fill_canvas(canvas, 0, i, 4, delta=0) self._fill_canvas(canvas, self._map_size[1] + 1, i, 4, delta=0) if obstacles is None: obstacles = [] for i in range(int(self._map_size[0] * self._map_size[1] * self._obs_ratio)): obstacles.append(self._gen_rpt()) for y, x in obstacles: self._fill_canvas(canvas, y, x, v=1) self._obstacles = obstacles self._start_point = start_point or self._gen_rpt() while True: self._final_point = final_point or self._gen_rpt() if self._start_point[0] != self._final_point[0] or self._start_point[1] != self._final_point[1]: break self._fill_canvas(canvas, *self._start_point, v=2) self._fill_canvas(canvas, *self._final_point, v=3) if self._enable_path_checking: path, d, p = self._gen_shortest_path(canvas, self._start_point, self._final_point) for y, x in path: self._fill_canvas(canvas, y, x, v=0) self._fill_canvas(canvas, *self._start_point, v=2) self._fill_canvas(canvas, *self._final_point, v=3) self._shortest_path = path self._quick_distance_mat = d self._quick_distance_prev = p else: self._shortest_path = None self._quick_distance_mat = None self._quick_distance_prev = None self._current_point = self._start_point self._origin_canvas = canvas.copy() def _clear_distance_info(self): self._distance_mat = None self._distance_prev = None self._inv_distance_mat = None self._inv_distance_prev = None def _gen_distance_info(self): if self._distance_mat is not None: return path, d, p = self._gen_shortest_path(self.origin_canvas, self._start_point, self._final_point) if self._shortest_path is None: self._shortest_path = path self._distance_mat = d self._distance_prev = p def _gen_inv_distance_info(self): if self._inv_distance_mat is not None: return path, d, p = self._gen_shortest_path(self.origin_canvas, self._final_point, self._start_point) self._inv_distance_mat = d self._inv_distance_prev = p def _refresh_view(self): if self._visible_size is None: self._set_current_state(self._canvas.copy()) return view = np.empty((self._visible_size[0], self._visible_size[1], 3), dtype='uint8') view[:, :, :] = self._colors[1] y, x = self._current_point ch, cw = self.canvas_size vh, vw = self._visible_size # visible up, left, down, right vu, vl = (vh - 1) // 2, (vw - 1) // 2 vd, vr = vh - vu, vw - vl # visible center y, x vcy, vcx = vu, vl y0, x0 = max(0, y - vu), max(0, x - vl) y1, x1 = min(ch, y + vd), min(cw, x + vr) vu, vl = y - y0, x - x0 vd, vr = y1 - y, x1 - x view[vcy-vu:vcy+vd, vcx-vl:vcx+vr, :] = self._canvas[y-vu:y+vd, x-vl:x+vr, :] self._set_current_state(view) def _get_action_space(self): return self._action_space def _set_current_state(self, o): if self._state_mode == 'DEFAULT': pass elif self._state_mode == 'RENDER': o = render_maze(o) super()._set_current_state(o) def _action(self, action): if self._enable_noaction and action == 0: return self._rewards[1], False dy, dx = self._action_delta[self._action_mapping[action]] y, x = self._current_point oy, ox = y, x y += dy x += dx if self._get_canvas_label(y, x) in (1, 4): return self._rewards[3], False if y == self._final_point[0] and x == self._final_point[1]: reward = self._rewards[2] is_over = True else: if self._dense_reward: if self.inv_distance_mat[oy, ox] > self.inv_distance_mat[y, x]: reward = self._rewards[0] else: reward = self._rewards[1] else: reward = self._rewards[0] is_over = False self._fill_canvas(self._canvas, *self._current_point, v=0) self._current_point = (y, x) self._fill_canvas(self._canvas, *self._current_point, v=2) self._refresh_view() return reward, is_over
[docs] def restart(self, obstacles=None, start_point=None, final_point=None): super().restart() if start_point is not None and final_point is not None: assert start_point[0] != final_point[0] or start_point[1] != final_point[1], 'Invalid start and final point: {} {}'.format( start_point, final_point) self._gen_map(obstacles=obstacles, start_point=start_point, final_point=final_point) self._refresh_view() self._clear_distance_info()
def _restart(self): pass def _finish(self): if self._current_point == self._final_point: self.append_stat('success', 1) else: self.append_stat('success', 0)
[docs] class CustomLavaWorldEnv(MazeEnv): """A maze similar to Lava World in OpenAI Gym""" _empty_canvas = None
[docs] def __init__(self, map_size=15, mode=None, **kwargs): kwargs.setdefault('enable_path_checking', False) super().__init__(map_size, **kwargs) mode = mode or 'ALL' assert mode in ('ALL', 'TRAIN', 'VAL') h, w = get_2dshape(map_size) assert h % 4 == 3 and w % 4 == 3 self._lv_obstacles = list(itertools.chain( [(i, (w-1) // 2) for i in range(h) if i not in ((h-3) // 4, (h-1)//2 + (h+1)//4)], [((h-1) // 2, i) for i in range(w) if i not in ((w-3) // 4, (w-1)//2 + (w+1)//4)] )) self._lv_starts = [(i,j) for i in range(h) for j in range(w) if (i,j) not in self._lv_obstacles] if mode == 'ALL': self._lv_finals = self._lv_starts.copy() elif mode == 'TRAIN': self._lv_finals = [(i,j) for i in range(h) for j in range(w) if (i < h // 2 or j < w // 2) and (i,j) not in self._lv_obstacles] elif mode == 'VAL': self._lv_finals = [(i,j) for i in range(h) for j in range(w) if not (i < h // 2 or j < w // 2) and (i,j) not in self._lv_obstacles]
@property def lv_obstacles(self): return self._lv_obstacles @property def lv_starts(self): return self._lv_starts @property def lv_finals(self): return self._lv_finals
[docs] def restart(self, obstacles=None, start_point=None, final_point=None): assert obstacles is None, 'Can not provide obstacles to CustomLavaWorldEnv' # CAUTION: this method ignores the obstacles parameter super().restart() if start_point is None: i = random.choice(len(self.lv_starts)) start_point = self.lv_starts[i] start_point = tuple(start_point) if final_point is None: while True: j = random.choice(len(self.lv_finals)) final_point = self.lv_finals[j] if start_point != final_point: break final_point = tuple(final_point) assert start_point != final_point, 'Invalid start and final point: {} {}'.format( start_point, final_point) if self._empty_canvas is None: super().restart(self.lv_obstacles, start_point, final_point) self._empty_canvas = self._canvas.copy() self._fill_canvas(self._empty_canvas, *self._start_point, v=0) self._fill_canvas(self._empty_canvas, *self._final_point, v=0) else: # do partial reload self._start_point = start_point self._final_point = final_point self._current_point = start_point self._canvas = self._empty_canvas.copy() self._fill_canvas(self._canvas, *self._start_point, v=2) self._fill_canvas(self._canvas, *self._final_point, v=3) self._origin_canvas = self._canvas.copy() self._refresh_view() self._clear_distance_info()