zhm-real il y a 5 ans
Parent
commit
a035aa5504

+ 9 - 12
Model-free Control/Q-learning.py

@@ -8,16 +8,13 @@ import env
 import plotting
 import motion_model
 
-import matplotlib.pyplot as plt
 import numpy as np
-import sys
-
 
 class QLEARNING:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.M = 500  # iteration numbers
-        self.gamma = 0.9  # discount factor
+        self.M = 500                                        # iteration numbers
+        self.gamma = 0.9                                    # discount factor
         self.alpha = 0.5
         self.epsilon = 0.1
 
@@ -25,10 +22,10 @@ class QLEARNING:
         self.motion = motion_model.Motion_model(self.xI, self.xG)
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.env.motions  # feasible input set
-        self.stateSpace = self.env.stateSpace  # state space
-        self.obs = self.env.obs_map()  # position of obstacles
-        self.lose = self.env.lose_map()  # position of lose states
+        self.u_set = self.env.motions                       # feasible input set
+        self.stateSpace = self.env.stateSpace               # state space
+        self.obs = self.env.obs_map()                       # position of obstacles
+        self.lose = self.env.lose_map()                     # position of lose states
 
         self.name1 = "SARSA, M=" + str(self.M)
 
@@ -49,10 +46,10 @@ class QLEARNING:
 
         for k in range(self.M):                                                     # iterations
             x = self.state_init()                                                   # initial state
-            while x != xG:                                                     # stop condition
+            while x != xG:                                                          # stop condition
                 u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
                 x_next = self.move_next(x, self.u_set[u])                           # next state
-                reward = self.env.get_reward(x_next)                          # reward observed
+                reward = self.env.get_reward(x_next)                                # reward observed
                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
                 x = x_next
@@ -139,7 +136,7 @@ class QLEARNING:
         """
 
         x, path = xI, [xI]
-        while x not in xG:
+        while x != xG:
             u = self.u_set[policy[x]]
             x_next = (x[0] + u[0], x[1] + u[1])
             if x_next in self.obs:

+ 11 - 17
Model-free Control/Sarsa.py

@@ -10,12 +10,11 @@ import motion_model
 
 import numpy as np
 
-
 class SARSA:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.M = 500  # iteration numbers
-        self.gamma = 0.9  # discount factor
+        self.M = 500                                # iteration numbers
+        self.gamma = 0.9                            # discount factor
         self.alpha = 0.5
         self.epsilon = 0.1
 
@@ -23,18 +22,17 @@ class SARSA:
         self.motion = motion_model.Motion_model(self.xI, self.xG)
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.env.motions  # feasible input set
-        self.stateSpace = self.env.stateSpace  # state space
-        self.obs = self.env.obs_map()  # position of obstacles
-        self.lose = self.env.lose_map()  # position of lose states
+        self.u_set = self.env.motions               # feasible input set
+        self.stateSpace = self.env.stateSpace       # state space
+        self.obs = self.env.obs_map()               # position of obstacles
+        self.lose = self.env.lose_map()             # position of lose states
 
-        self.name1 = "SARSA, M=" + str(self.M)
+        self.name1 = "Q-learning, M=" + str(self.M)
 
         [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
         self.path = self.extract_path(self.xI, self.xG, self.policy)
         self.plotting.animation(self.path, self.name1)
 
-
     def Monte_Carlo(self, xI, xG):
         """
         Monte_Carlo experiments
@@ -48,9 +46,9 @@ class SARSA:
         for k in range(self.M):                                                 # iterations
             x = self.state_init()                                               # initial state
             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
-            while x != xG:                                                 # stop condition
+            while x != xG:                                                      # stop condition
                 x_next = self.move_next(x, self.u_set[u])                       # next state
-                reward = self.env.get_reward(x_next)                      # reward observed
+                reward = self.env.get_reward(x_next)                            # reward observed
                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
@@ -61,7 +59,6 @@ class SARSA:
 
         return Q_table, policy
 
-
     def table_init(self):
         """
         Initialize Q_table: Q(s, a)
@@ -81,7 +78,6 @@ class SARSA:
                     Q_table[x] = u
         return Q_table
 
-
     def state_init(self):
         """
         initialize a starting state
@@ -93,7 +89,6 @@ class SARSA:
             if (i, j) not in self.obs:
                 return (i, j)
 
-
     def epsilon_greedy(self, u, error):
         """
         generate a policy using epsilon_greedy algorithm
@@ -114,7 +109,6 @@ class SARSA:
             return u_e
         return u
 
-
     def move_next(self, x, u):
         """
         get next state.
@@ -140,12 +134,12 @@ class SARSA:
         """
 
         x, path = xI, [xI]
-        while x not in xG:
+        while x != xG:
             u = self.u_set[policy[x]]
             x_next = (x[0] + u[0], x[1] + u[1])
             if x_next in self.obs:
                 print("Collision! Please run again!")
-                break
+                return path
             else:
                 path.append(x_next)
                 x = x_next

BIN
Model-free Control/__pycache__/env.cpython-37.pyc


BIN
Model-free Control/__pycache__/motion_model.cpython-37.pyc


BIN
Model-free Control/__pycache__/plotting.cpython-37.pyc


+ 3 - 3
Model-free Control/env.py

@@ -6,7 +6,7 @@
 
 class Env():
     def __init__(self, xI, xG):
-        self.x_range = 14  # size of background
+        self.x_range = 14                           # size of background
         self.y_range = 6
         self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
         self.xI = xI
@@ -72,5 +72,5 @@ class Env():
         """
 
         if x_next in self.lose:
-            return -100  # reward : -100, for lose states
-        return -1  # reward : -1, for other states
+            return -100                         # reward : -100, for lose states
+        return -1                               # reward : -1, for other states

+ 0 - 1
Model-free Control/motion_model.py

@@ -11,7 +11,6 @@ class Motion_model():
         self.env = env.Env(xI, xG)
         self.obs = self.env.obs_map()
 
-
     def move_next(self, x, u, eta=0.2):
         """
         Motion model of robots,

+ 0 - 6
Model-free Control/plotting.py

@@ -14,7 +14,6 @@ class Plotting():
         self.obs = self.env.obs_map()
         self.lose = self.env.lose_map()
 
-
     def animation(self, path, name):
         """
         animation.
@@ -29,7 +28,6 @@ class Plotting():
         self.plot_lose()
         self.plot_path(path)
 
-
     def plot_grid(self, name):
         """
         plot the obstacles in environment.
@@ -48,7 +46,6 @@ class Plotting():
         plt.title(name)
         plt.axis("equal")
 
-
     def plot_lose(self):
         """
         plot losing states in environment.
@@ -60,7 +57,6 @@ class Plotting():
 
         plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
 
-
     def plot_visited(self, visited):
         """
         animation of order of visited nodes.
@@ -87,7 +83,6 @@ class Plotting():
 
             if count % length == 0: plt.pause(0.001)
 
-
     def plot_path(self, path):
         path.remove(self.xI)
         path.remove(self.xG)
@@ -100,7 +95,6 @@ class Plotting():
         plt.show()
         plt.pause(0.5)
 
-
     def plot_diff(self, diff, name):
         plt.figure(2)
         plt.title(name, fontdict=None)

+ 1 - 13
Search-based Planning/.idea/workspace.xml

@@ -1,19 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="ChangeListManager">
-    <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
-      <change beforePath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Model-free Control/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/env.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Model-free Control/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/motion_model.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Model-free Control/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/plotting.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
-    </list>
+    <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="" />
     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />