5 年之前 · a035aa5504
--- a/Control/Q-learning.py
+++ b/Control/Q-learning.py
@@ -8,16 +8,13 @@ import env
 
				 import plotting
			
 
				 import motion_model
			
 
				 
			
 
				-import matplotlib.pyplot as plt
			
 
				 import numpy as np
			
 
				-import sys
			
 
				-
			
 
				 
			
 
				 class QLEARNING:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.M = 500  # iteration numbers
			
 
				-        self.gamma = 0.9  # discount factor
			
 
				+        self.M = 500                                        # iteration numbers
			
 
				+        self.gamma = 0.9                                    # discount factor
			
 
				         self.alpha = 0.5
			
 
				         self.epsilon = 0.1
			
 
				 
			
@@ -25,10 +22,10 @@ class QLEARNING:
 
				         self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.env.motions  # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace  # state space
			
 
				-        self.obs = self.env.obs_map()  # position of obstacles
			
 
				-        self.lose = self.env.lose_map()  # position of lose states
			
 
				+        self.u_set = self.env.motions                       # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace               # state space
			
 
				+        self.obs = self.env.obs_map()                       # position of obstacles
			
 
				+        self.lose = self.env.lose_map()                     # position of lose states
			
 
				 
			
 
				         self.name1 = "SARSA, M=" + str(self.M)
			
 
				 
			
@@ -49,10 +46,10 @@ class QLEARNING:
 
				 
			
 
				         for k in range(self.M):                                                     # iterations
			
 
				             x = self.state_init()                                                   # initial state
			
 
				-            while x != xG:                                                     # stop condition
			
 
				+            while x != xG:                                                          # stop condition
			
 
				                 u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
			
 
				                 x_next = self.move_next(x, self.u_set[u])                           # next state
			
 
				-                reward = self.env.get_reward(x_next)                          # reward observed
			
 
				+                reward = self.env.get_reward(x_next)                                # reward observed
			
 
				                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
			
 
				                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
			
 
				                 x = x_next
			
@@ -139,7 +136,7 @@ class QLEARNING:
 
				         """
			
 
				 
			
 
				         x, path = xI, [xI]
			
 
				-        while x not in xG:
			
 
				+        while x != xG:
			
 
				             u = self.u_set[policy[x]]
			
 
				             x_next = (x[0] + u[0], x[1] + u[1])
			
 
				             if x_next in self.obs:
			
--- a/Control/Sarsa.py
+++ b/Control/Sarsa.py
@@ -10,12 +10,11 @@ import motion_model
 
				 
			
 
				 import numpy as np
			
 
				 
			
 
				-
			
 
				 class SARSA:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.M = 500  # iteration numbers
			
 
				-        self.gamma = 0.9  # discount factor
			
 
				+        self.M = 500                                # iteration numbers
			
 
				+        self.gamma = 0.9                            # discount factor
			
 
				         self.alpha = 0.5
			
 
				         self.epsilon = 0.1
			
 
				 
			
@@ -23,18 +22,17 @@ class SARSA:
 
				         self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.env.motions  # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace  # state space
			
 
				-        self.obs = self.env.obs_map()  # position of obstacles
			
 
				-        self.lose = self.env.lose_map()  # position of lose states
			
 
				+        self.u_set = self.env.motions               # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace       # state space
			
 
				+        self.obs = self.env.obs_map()               # position of obstacles
			
 
				+        self.lose = self.env.lose_map()             # position of lose states
			
 
				 
			
 
				-        self.name1 = "SARSA, M=" + str(self.M)
			
 
				+        self.name1 = "Q-learning, M=" + str(self.M)
			
 
				 
			
 
				         [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
			
 
				         self.path = self.extract_path(self.xI, self.xG, self.policy)
			
 
				         self.plotting.animation(self.path, self.name1)
			
 
				 
			
 
				-
			
 
				     def Monte_Carlo(self, xI, xG):
			
 
				         """
			
 
				         Monte_Carlo experiments
			
@@ -48,9 +46,9 @@ class SARSA:
 
				         for k in range(self.M):                                                 # iterations
			
 
				             x = self.state_init()                                               # initial state
			
 
				             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
			
 
				-            while x != xG:                                                 # stop condition
			
 
				+            while x != xG:                                                      # stop condition
			
 
				                 x_next = self.move_next(x, self.u_set[u])                       # next state
			
 
				-                reward = self.env.get_reward(x_next)                      # reward observed
			
 
				+                reward = self.env.get_reward(x_next)                            # reward observed
			
 
				                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
			
 
				                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
			
 
				                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
			
@@ -61,7 +59,6 @@ class SARSA:
 
				 
			
 
				         return Q_table, policy
			
 
				 
			
 
				-
			
 
				     def table_init(self):
			
 
				         """
			
 
				         Initialize Q_table: Q(s, a)
			
@@ -81,7 +78,6 @@ class SARSA:
 
				                     Q_table[x] = u
			
 
				         return Q_table
			
 
				 
			
 
				-
			
 
				     def state_init(self):
			
 
				         """
			
 
				         initialize a starting state
			
@@ -93,7 +89,6 @@ class SARSA:
 
				             if (i, j) not in self.obs:
			
 
				                 return (i, j)
			
 
				 
			
 
				-
			
 
				     def epsilon_greedy(self, u, error):
			
 
				         """
			
 
				         generate a policy using epsilon_greedy algorithm
			
@@ -114,7 +109,6 @@ class SARSA:
 
				             return u_e
			
 
				         return u
			
 
				 
			
 
				-
			
 
				     def move_next(self, x, u):
			
 
				         """
			
 
				         get next state.
			
@@ -140,12 +134,12 @@ class SARSA:
 
				         """
			
 
				 
			
 
				         x, path = xI, [xI]
			
 
				-        while x not in xG:
			
 
				+        while x != xG:
			
 
				             u = self.u_set[policy[x]]
			
 
				             x_next = (x[0] + u[0], x[1] + u[1])
			
 
				             if x_next in self.obs:
			
 
				                 print("Collision! Please run again!")
			
 
				-                break
			
 
				+                return path
			
 
				             else:
			
 
				                 path.append(x_next)
			
 
				                 x = x_next
			
--- a/Control/__pycache__/env.cpython-37.pyc
+++ b/Control/__pycache__/env.cpython-37.pyc
--- a/Control/__pycache__/motion_model.cpython-37.pyc
+++ b/Control/__pycache__/motion_model.cpython-37.pyc
--- a/Control/__pycache__/plotting.cpython-37.pyc
+++ b/Control/__pycache__/plotting.cpython-37.pyc
--- a/Control/env.py
+++ b/Control/env.py
@@ -6,7 +6,7 @@
 
				 
			
 
				 class Env():
			
 
				     def __init__(self, xI, xG):
			
 
				-        self.x_range = 14  # size of background
			
 
				+        self.x_range = 14                           # size of background
			
 
				         self.y_range = 6
			
 
				         self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
			
 
				         self.xI = xI
			
@@ -72,5 +72,5 @@ class Env():
 
				         """
			
 
				 
			
 
				         if x_next in self.lose:
			
 
				-            return -100  # reward : -100, for lose states
			
 
				-        return -1  # reward : -1, for other states
			
 
				+            return -100                         # reward : -100, for lose states
			
 
				+        return -1                               # reward : -1, for other states
			
--- a/Control/motion_model.py
+++ b/Control/motion_model.py
@@ -11,7 +11,6 @@ class Motion_model():
 
				         self.env = env.Env(xI, xG)
			
 
				         self.obs = self.env.obs_map()
			
 
				 
			
 
				-
			
 
				     def move_next(self, x, u, eta=0.2):
			
 
				         """
			
 
				         Motion model of robots,
			
--- a/Control/plotting.py
+++ b/Control/plotting.py
@@ -14,7 +14,6 @@ class Plotting():
 
				         self.obs = self.env.obs_map()
			
 
				         self.lose = self.env.lose_map()
			
 
				 
			
 
				-
			
 
				     def animation(self, path, name):
			
 
				         """
			
 
				         animation.
			
@@ -29,7 +28,6 @@ class Plotting():
 
				         self.plot_lose()
			
 
				         self.plot_path(path)
			
 
				 
			
 
				-
			
 
				     def plot_grid(self, name):
			
 
				         """
			
 
				         plot the obstacles in environment.
			
@@ -48,7 +46,6 @@ class Plotting():
 
				         plt.title(name)
			
 
				         plt.axis("equal")
			
 
				 
			
 
				-
			
 
				     def plot_lose(self):
			
 
				         """
			
 
				         plot losing states in environment.
			
@@ -60,7 +57,6 @@ class Plotting():
 
				 
			
 
				         plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
			
 
				 
			
 
				-
			
 
				     def plot_visited(self, visited):
			
 
				         """
			
 
				         animation of order of visited nodes.
			
@@ -87,7 +83,6 @@ class Plotting():
 
				 
			
 
				             if count % length == 0: plt.pause(0.001)
			
 
				 
			
 
				-
			
 
				     def plot_path(self, path):
			
 
				         path.remove(self.xI)
			
 
				         path.remove(self.xG)
			
@@ -100,7 +95,6 @@ class Plotting():
 
				         plt.show()
			
 
				         plt.pause(0.5)
			
 
				 
			
 
				-
			
 
				     def plot_diff(self, diff, name):
			
 
				         plt.figure(2)
			
 
				         plt.title(name, fontdict=None)
			
--- a/Planning/.idea/workspace.xml
+++ b/Planning/.idea/workspace.xml
@@ -1,19 +1,7 @@
 
				 <?xml version="1.0" encoding="UTF-8"?>
			
 
				 <project version="4">
			
 
				   <component name="ChangeListManager">
			
 
				-    <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
			
 
				-      <change beforePath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Model-free Control/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/env.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Model-free Control/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/motion_model.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Model-free Control/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/plotting.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
			
 
				-    </list>
			
 
				+    <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="" />
			
 
				     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
			
 
				     <option name="SHOW_DIALOG" value="false" />
			
 
				     <option name="HIGHLIGHT_CONFLICTS" value="true" />