env.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. @author: huiming zhou
  5. """
  6. class Env():
  7. def __init__(self, xI, xG):
  8. self.x_range = 51 # size of background
  9. self.y_range = 31
  10. self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
  11. self.xI = xI
  12. self.xG = xG
  13. self.obs = self.obs_map()
  14. self.lose = self.lose_map()
  15. self.stateSpace = self.state_space()
  16. def obs_map(self):
  17. """
  18. Initialize obstacles' positions
  19. :return: map of obstacles
  20. """
  21. x = self.x_range
  22. y = self.y_range
  23. obs = []
  24. for i in range(x):
  25. obs.append((i, 0))
  26. for i in range(x):
  27. obs.append((i, y - 1))
  28. for i in range(y):
  29. obs.append((0, i))
  30. for i in range(y):
  31. obs.append((x - 1, i))
  32. for i in range(10, 21):
  33. obs.append((i, 15))
  34. for i in range(15):
  35. obs.append((20, i))
  36. for i in range(15, 30):
  37. obs.append((30, i))
  38. for i in range(16):
  39. obs.append((40, i))
  40. return obs
  41. def lose_map(self):
  42. """
  43. Initialize losing states' positions
  44. :return: losing states
  45. """
  46. lose = []
  47. for i in range(25, 36):
  48. lose.append((i, 13))
  49. return lose
  50. def state_space(self):
  51. """
  52. generate state space
  53. :return: state space
  54. """
  55. state_space = []
  56. for i in range(self.x_range):
  57. for j in range(self.y_range):
  58. if (i, j) not in self.obs:
  59. state_space.append((i, j))
  60. return state_space
  61. def get_reward(self, x_next):
  62. """
  63. calculate reward of next state
  64. :param x_next: next state
  65. :return: reward
  66. """
  67. reward = []
  68. for x in x_next:
  69. if x in self.xG:
  70. reward.append(10) # reward : 10, for goal states
  71. elif x in self.lose:
  72. reward.append(-10) # reward : -10, for lose states
  73. else:
  74. reward.append(0) # reward : 0, for other states
  75. return reward