env.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. class Env:
  2. def __init__(self, xI, xG):
  3. self.x_range = 14 # size of background
  4. self.y_range = 6
  5. self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
  6. self.xI = xI
  7. self.xG = xG
  8. self.obs = self.obs_map()
  9. self.lose = self.lose_map()
  10. self.stateSpace = self.state_space()
  11. def obs_map(self):
  12. """
  13. Initialize obstacles' positions
  14. :return: map of obstacles
  15. """
  16. x = self.x_range
  17. y = self.y_range
  18. obs = []
  19. for i in range(x):
  20. obs.append((i, 0))
  21. for i in range(x):
  22. obs.append((i, y - 1))
  23. for i in range(y):
  24. obs.append((0, i))
  25. for i in range(y):
  26. obs.append((x - 1, i))
  27. return obs
  28. def lose_map(self):
  29. """
  30. Initialize losing states' positions
  31. :return: losing states
  32. """
  33. lose = []
  34. for i in range(2, 12):
  35. lose.append((i, 1))
  36. return lose
  37. def state_space(self):
  38. """
  39. generate state space
  40. :return: state space
  41. """
  42. state_space = []
  43. for i in range(self.x_range):
  44. for j in range(self.y_range):
  45. if (i, j) not in self.obs:
  46. state_space.append((i, j))
  47. return state_space
  48. def get_reward(self, x_next):
  49. """
  50. calculate reward of next state
  51. :param x_next: next state
  52. :return: reward
  53. """
  54. if x_next in self.lose:
  55. return -100 # reward : -100, for lose states
  56. return -1 # reward : -1, for other states