env.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. @author: huiming zhou
  5. """
  6. x_range, y_range = 51, 31 # size of background
  7. def obs_map():
  8. """
  9. Initialize obstacles' positions
  10. :return: map of obstacles
  11. """
  12. obs = []
  13. for i in range(x_range):
  14. obs.append((i, 0))
  15. for i in range(x_range):
  16. obs.append((i, y_range - 1))
  17. for i in range(y_range):
  18. obs.append((0, i))
  19. for i in range(y_range):
  20. obs.append((x_range - 1, i))
  21. for i in range(10, 21):
  22. obs.append((i, 15))
  23. for i in range(15):
  24. obs.append((20, i))
  25. for i in range(15, 30):
  26. obs.append((30, i))
  27. for i in range(16):
  28. obs.append((40, i))
  29. return obs
  30. def lose_map():
  31. """
  32. Initialize losing states' positions
  33. :return: losing states
  34. """
  35. lose = []
  36. for i in range(25, 36):
  37. lose.append((i, 13))
  38. return lose
  39. def get_reward(x_next, xG, lose):
  40. """
  41. calculate reward of next state
  42. :param x_next: next state
  43. :return: reward
  44. """
  45. reward = []
  46. for x in x_next:
  47. if x in xG:
  48. reward.append(10) # reward : 10, for goal states
  49. elif x in lose:
  50. reward.append(-10) # reward : -10, for lose states
  51. else:
  52. reward.append(0) # reward : 0, for other states
  53. return reward