env.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. class Env:
  2. def __init__(self, xI, xG):
  3. self.x_range = 51 # size of background
  4. self.y_range = 31
  5. self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
  6. self.xI = xI
  7. self.xG = xG
  8. self.obs = self.obs_map()
  9. self.lose = self.lose_map()
  10. self.stateSpace = self.state_space()
  11. def obs_map(self):
  12. """
  13. Initialize obstacles' positions
  14. :return: map of obstacles
  15. """
  16. x = self.x_range
  17. y = self.y_range
  18. obs = []
  19. for i in range(x):
  20. obs.append((i, 0))
  21. for i in range(x):
  22. obs.append((i, y - 1))
  23. for i in range(y):
  24. obs.append((0, i))
  25. for i in range(y):
  26. obs.append((x - 1, i))
  27. for i in range(10, 21):
  28. obs.append((i, 15))
  29. for i in range(15):
  30. obs.append((20, i))
  31. for i in range(15, 30):
  32. obs.append((30, i))
  33. for i in range(16):
  34. obs.append((40, i))
  35. return obs
  36. def lose_map(self):
  37. """
  38. Initialize losing states' positions
  39. :return: losing states
  40. """
  41. lose = []
  42. for i in range(25, 36):
  43. lose.append((i, 13))
  44. return lose
  45. def state_space(self):
  46. """
  47. generate state space
  48. :return: state space
  49. """
  50. state_space = []
  51. for i in range(self.x_range):
  52. for j in range(self.y_range):
  53. if (i, j) not in self.obs:
  54. state_space.append((i, j))
  55. return state_space
  56. def get_reward(self, x_next):
  57. """
  58. calculate reward of next state
  59. :param x_next: next state
  60. :return: reward
  61. """
  62. reward = []
  63. for x in x_next:
  64. if x in self.xG:
  65. reward.append(10) # reward : 10, for goal states
  66. elif x in self.lose:
  67. reward.append(-10) # reward : -10, for lose states
  68. else:
  69. reward.append(0) # reward : 0, for other states
  70. return reward