env.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. @author: huiming zhou
  5. """
  6. class Env():
  7. def __init__(self, xI, xG):
  8. self.x_range = 14 # size of background
  9. self.y_range = 6
  10. self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
  11. self.xI = xI
  12. self.xG = xG
  13. self.obs = self.obs_map()
  14. self.lose = self.lose_map()
  15. self.stateSpace = self.state_space()
  16. def obs_map(self):
  17. """
  18. Initialize obstacles' positions
  19. :return: map of obstacles
  20. """
  21. x = self.x_range
  22. y = self.y_range
  23. obs = []
  24. for i in range(x):
  25. obs.append((i, 0))
  26. for i in range(x):
  27. obs.append((i, y - 1))
  28. for i in range(y):
  29. obs.append((0, i))
  30. for i in range(y):
  31. obs.append((x - 1, i))
  32. return obs
  33. def lose_map(self):
  34. """
  35. Initialize losing states' positions
  36. :return: losing states
  37. """
  38. lose = []
  39. for i in range(2, 12):
  40. lose.append((i, 1))
  41. return lose
  42. def state_space(self):
  43. """
  44. generate state space
  45. :return: state space
  46. """
  47. state_space = []
  48. for i in range(self.x_range):
  49. for j in range(self.y_range):
  50. if (i, j) not in self.obs:
  51. state_space.append((i, j))
  52. return state_space
  53. def get_reward(self, x_next):
  54. """
  55. calculate reward of next state
  56. :param x_next: next state
  57. :return: reward
  58. """
  59. if x_next in self.lose:
  60. return -100 # reward : -100, for lose states
  61. return -1 # reward : -1, for other states