فهرست منبع

regulate format

zhm-real 5 سال پیش
والد
کامیت
5f8ba1601f
27فایلهای تغییر یافته به همراه156 افزوده شده و 257 حذف شده
  1. 1 4
      Model-free Control/.idea/Model-free Control.iml
  2. 1 1
      Model-free Control/.idea/misc.xml
  3. 24 28
      Model-free Control/Q-learning.py
  4. 23 24
      Model-free Control/Sarsa.py
  5. 1 7
      Model-free Control/env.py
  6. 2 7
      Model-free Control/motion_model.py
  7. 6 11
      Model-free Control/plotting.py
  8. 1 1
      Search-based Planning/.idea/Search-based Planning.iml
  9. 1 1
      Search-based Planning/.idea/misc.xml
  10. 12 21
      Search-based Planning/.idea/workspace.xml
  11. BIN
      Search-based Planning/__pycache__/env.cpython-37.pyc
  12. BIN
      Search-based Planning/__pycache__/plotting.cpython-35.pyc
  13. BIN
      Search-based Planning/__pycache__/plotting.cpython-37.pyc
  14. BIN
      Search-based Planning/__pycache__/queue.cpython-35.pyc
  15. BIN
      Search-based Planning/__pycache__/queue.cpython-37.pyc
  16. 1 2
      Stochastic Shortest Path/.idea/Stochastic Shortest Path.iml
  17. 1 1
      Stochastic Shortest Path/.idea/misc.xml
  18. 17 28
      Stochastic Shortest Path/Q-policy_iteration.py
  19. 13 22
      Stochastic Shortest Path/Q-value_iteration.py
  20. BIN
      Stochastic Shortest Path/__pycache__/env.cpython-37.pyc
  21. BIN
      Stochastic Shortest Path/__pycache__/motion_model.cpython-37.pyc
  22. BIN
      Stochastic Shortest Path/__pycache__/plotting.cpython-37.pyc
  23. 5 15
      Stochastic Shortest Path/env.py
  24. 2 8
      Stochastic Shortest Path/motion_model.py
  25. 2 13
      Stochastic Shortest Path/plotting.py
  26. 15 26
      Stochastic Shortest Path/policy_iteration.py
  27. 28 37
      Stochastic Shortest Path/value_iteration.py

+ 1 - 4
Model-free Control/.idea/Model-free Control.iml

@@ -2,10 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
-  <component name="TestRunnerService">
-    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
-  </component>
 </module>

+ 1 - 1
Model-free Control/.idea/misc.xml

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
 </project>

+ 24 - 28
Model-free Control/Q-learning.py

@@ -1,20 +1,15 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 import plotting
 import motion_model
 
 import numpy as np
 
+
 class QLEARNING:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.M = 500                                        # iteration numbers
-        self.gamma = 0.9                                    # discount factor
+        self.M = 500  # iteration numbers
+        self.gamma = 0.9  # discount factor
         self.alpha = 0.5
         self.epsilon = 0.1
 
@@ -22,10 +17,10 @@ class QLEARNING:
         self.motion = motion_model.Motion_model(self.xI, self.xG)
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.env.motions                       # feasible input set
-        self.stateSpace = self.env.stateSpace               # state space
-        self.obs = self.env.obs_map()                       # position of obstacles
-        self.lose = self.env.lose_map()                     # position of lose states
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
 
         self.name1 = "SARSA, M=" + str(self.M)
 
@@ -33,7 +28,6 @@ class QLEARNING:
         self.path = self.extract_path(self.xI, self.xG, self.policy)
         self.plotting.animation(self.path, self.name1)
 
-
     def Monte_Carlo(self, xI, xG):
         """
         Monte_Carlo experiments
@@ -41,21 +35,21 @@ class QLEARNING:
         :return: Q_table, policy
         """
 
-        Q_table = self.table_init()                                                 # Q_table initialization
-        policy = {}                                                                 # policy table
+        Q_table = self.table_init()  # Q_table initialization
+        policy = {}  # policy table
 
-        for k in range(self.M):                                                     # iterations
-            x = self.state_init()                                                   # initial state
-            while x != xG:                                                          # stop condition
-                u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
-                x_next = self.move_next(x, self.u_set[u])                           # next state
-                reward = self.env.get_reward(x_next)                                # reward observed
+        for k in range(self.M):  # iterations
+            x = self.state_init()  # initial state
+            while x != xG:  # stop condition
+                u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)  # epsilon_greedy policy
+                x_next = self.move_next(x, self.u_set[u])  # next state
+                reward = self.env.get_reward(x_next)  # reward observed
                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
                 x = x_next
 
         for x in Q_table:
-            policy[x] = int(np.argmax(Q_table[x]))                                  # extract policy
+            policy[x] = int(np.argmax(Q_table[x]))  # extract policy
 
         return Q_table, policy
 
@@ -89,7 +83,6 @@ class QLEARNING:
             if (i, j) not in self.obs:
                 return (i, j)
 
-
     def epsilon_greedy(self, u, error):
         """
         generate a policy using epsilon_greedy algorithm
@@ -103,14 +96,17 @@ class QLEARNING:
             u_e = u
             while u_e == u:
                 p = np.random.random_sample()
-                if p < 0.25: u_e = 0
-                elif p < 0.5: u_e = 1
-                elif p < 0.75: u_e = 2
-                else: u_e = 3
+                if p < 0.25:
+                    u_e = 0
+                elif p < 0.5:
+                    u_e = 1
+                elif p < 0.75:
+                    u_e = 2
+                else:
+                    u_e = 3
             return u_e
         return u
 
-
     def move_next(self, x, u):
         """
         get next state.

+ 23 - 24
Model-free Control/Sarsa.py

@@ -1,20 +1,15 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 import plotting
 import motion_model
 
 import numpy as np
 
+
 class SARSA:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.M = 500                                # iteration numbers
-        self.gamma = 0.9                            # discount factor
+        self.M = 500  # iteration numbers
+        self.gamma = 0.9  # discount factor
         self.alpha = 0.5
         self.epsilon = 0.1
 
@@ -22,10 +17,10 @@ class SARSA:
         self.motion = motion_model.Motion_model(self.xI, self.xG)
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.env.motions               # feasible input set
-        self.stateSpace = self.env.stateSpace       # state space
-        self.obs = self.env.obs_map()               # position of obstacles
-        self.lose = self.env.lose_map()             # position of lose states
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
 
         self.name1 = "Q-learning, M=" + str(self.M)
 
@@ -40,22 +35,22 @@ class SARSA:
         :return: Q_table, policy
         """
 
-        Q_table = self.table_init()                                             # Q_table initialization
-        policy = {}                                                             # policy table
+        Q_table = self.table_init()  # Q_table initialization
+        policy = {}  # policy table
 
-        for k in range(self.M):                                                 # iterations
-            x = self.state_init()                                               # initial state
+        for k in range(self.M):  # iterations
+            x = self.state_init()  # initial state
             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
-            while x != xG:                                                      # stop condition
-                x_next = self.move_next(x, self.u_set[u])                       # next state
-                reward = self.env.get_reward(x_next)                            # reward observed
+            while x != xG:  # stop condition
+                x_next = self.move_next(x, self.u_set[u])  # next state
+                reward = self.env.get_reward(x_next)  # reward observed
                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
                 x, u = x_next, u_next
 
         for x in Q_table:
-            policy[x] = int(np.argmax(Q_table[x]))                              # extract policy
+            policy[x] = int(np.argmax(Q_table[x]))  # extract policy
 
         return Q_table, policy
 
@@ -102,10 +97,14 @@ class SARSA:
             u_e = u
             while u_e == u:
                 p = np.random.random_sample()
-                if p < 0.25: u_e = 0
-                elif p < 0.5: u_e = 1
-                elif p < 0.75: u_e = 2
-                else: u_e = 3
+                if p < 0.25:
+                    u_e = 0
+                elif p < 0.5:
+                    u_e = 1
+                elif p < 0.75:
+                    u_e = 2
+                else:
+                    u_e = 3
             return u_e
         return u
 

+ 1 - 7
Model-free Control/env.py

@@ -1,10 +1,4 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
-class Env():
+class Env:
     def __init__(self, xI, xG):
         self.x_range = 14                           # size of background
         self.y_range = 6

+ 2 - 7
Model-free Control/motion_model.py

@@ -1,11 +1,6 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 
+
 class Motion_model():
     def __init__(self, xI, xG):
         self.env = env.Env(xI, xG)
@@ -40,4 +35,4 @@ class Motion_model():
             else:
                 x_next.append(x_check)
 
-        return x_next, p_next
+        return x_next, p_next

+ 6 - 11
Model-free Control/plotting.py

@@ -1,12 +1,7 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import matplotlib.pyplot as plt
 import env
 
+
 class Plotting():
     def __init__(self, xI, xG):
         self.xI, self.xG = xI, xG
@@ -39,10 +34,10 @@ class Plotting():
         obs_x = [self.obs[i][0] for i in range(len(self.obs))]
         obs_y = [self.obs[i][1] for i in range(len(self.obs))]
 
-        plt.plot(self.xI[0], self.xI[1], "bs", ms = 24)
-        plt.plot(self.xG[0], self.xG[1], "gs", ms = 24)
+        plt.plot(self.xI[0], self.xI[1], "bs", ms=24)
+        plt.plot(self.xG[0], self.xG[1], "gs", ms=24)
 
-        plt.plot(obs_x, obs_y, "sk", ms = 24)
+        plt.plot(obs_x, obs_y, "sk", ms=24)
         plt.title(name)
         plt.axis("equal")
 
@@ -55,7 +50,7 @@ class Plotting():
         lose_x = [self.lose[i][0] for i in range(len(self.lose))]
         lose_y = [self.lose[i][1] for i in range(len(self.lose))]
 
-        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
+        plt.plot(lose_x, lose_y, color='#A52A2A', marker='s', ms=24)
 
     def plot_visited(self, visited):
         """
@@ -88,7 +83,7 @@ class Plotting():
         path.remove(self.xG)
 
         for x in path:
-            plt.plot(x[0], x[1], color='#808080', marker='o', ms = 23)
+            plt.plot(x[0], x[1], color='#808080', marker='o', ms=23)
             plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
             [exit(0) if event.key == 'escape' else None])
             plt.pause(0.001)

+ 1 - 1
Search-based Planning/.idea/Search-based Planning.iml

@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 3.5" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

+ 1 - 1
Search-based Planning/.idea/misc.xml

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
 </project>

+ 12 - 21
Search-based Planning/.idea/workspace.xml

@@ -2,16 +2,7 @@
 <project version="4">
   <component name="ChangeListManager">
     <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
-      <change beforePath="$PROJECT_DIR$/.idea/Search-based Planning.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/Search-based Planning.iml" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/env.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/plotting.py" beforeDir="false" afterPath="$PROJECT_DIR$/plotting.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -44,7 +35,7 @@
     <property name="restartRequiresConfirmation" value="false" />
     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
   </component>
-  <component name="RunManager" selected="Python.bfs">
+  <component name="RunManager" selected="Python.a_star">
     <configuration name="a_star" type="PythonConfigurationType" factoryName="Python" temporary="true">
       <module name="Search-based Planning" />
       <option name="INTERPRETER_OPTIONS" value="" />
@@ -152,8 +143,8 @@
     </configuration>
     <recent_temporary>
       <list>
-        <item itemvalue="Python.bfs" />
         <item itemvalue="Python.a_star" />
+        <item itemvalue="Python.bfs" />
         <item itemvalue="Python.dfs" />
         <item itemvalue="Python.dijkstra" />
         <item itemvalue="Python.searching" />
@@ -186,25 +177,25 @@
     <option name="oldMeFiltersMigrated" value="true" />
   </component>
   <component name="WindowStateProjectService">
-    <state width="1832" height="296" key="GridCell.Tab.0.bottom" timestamp="1592800940514">
+    <state width="1832" height="296" key="GridCell.Tab.0.bottom" timestamp="1592801510790">
       <screen x="1920" y="0" width="1920" height="1080" />
     </state>
-    <state width="1832" height="296" key="GridCell.Tab.0.bottom/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
-    <state width="1832" height="296" key="GridCell.Tab.0.center" timestamp="1592800940514">
+    <state width="1832" height="296" key="GridCell.Tab.0.bottom/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
+    <state width="1832" height="296" key="GridCell.Tab.0.center" timestamp="1592801510790">
       <screen x="1920" y="0" width="1920" height="1080" />
     </state>
-    <state width="1832" height="296" key="GridCell.Tab.0.center/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
-    <state width="1832" height="296" key="GridCell.Tab.0.left" timestamp="1592800940514">
+    <state width="1832" height="296" key="GridCell.Tab.0.center/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
+    <state width="1832" height="296" key="GridCell.Tab.0.left" timestamp="1592801510789">
       <screen x="1920" y="0" width="1920" height="1080" />
     </state>
-    <state width="1832" height="296" key="GridCell.Tab.0.left/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
-    <state width="1832" height="296" key="GridCell.Tab.0.right" timestamp="1592800940514">
+    <state width="1832" height="296" key="GridCell.Tab.0.left/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510789" />
+    <state width="1832" height="296" key="GridCell.Tab.0.right" timestamp="1592801510790">
       <screen x="1920" y="0" width="1920" height="1080" />
     </state>
-    <state width="1832" height="296" key="GridCell.Tab.0.right/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
-    <state x="2406" y="174" key="SettingsEditor" timestamp="1592800563456">
+    <state width="1832" height="296" key="GridCell.Tab.0.right/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
+    <state x="2406" y="174" key="SettingsEditor" timestamp="1592801555194">
       <screen x="1920" y="0" width="1920" height="1080" />
     </state>
-    <state x="2406" y="174" key="SettingsEditor/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800563456" />
+    <state x="2406" y="174" key="SettingsEditor/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801555194" />
   </component>
 </project>

BIN
Search-based Planning/__pycache__/env.cpython-37.pyc


BIN
Search-based Planning/__pycache__/plotting.cpython-35.pyc


BIN
Search-based Planning/__pycache__/plotting.cpython-37.pyc


BIN
Search-based Planning/__pycache__/queue.cpython-35.pyc


BIN
Search-based Planning/__pycache__/queue.cpython-37.pyc


+ 1 - 2
Stochastic Shortest Path/.idea/Stochastic Shortest Path.iml

@@ -2,11 +2,10 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="TestRunnerService">
-    <option name="projectConfiguration" value="pytest" />
     <option name="PROJECT_TEST_RUNNER" value="pytest" />
   </component>
 </module>

+ 1 - 1
Stochastic Shortest Path/.idea/misc.xml

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
 </project>

+ 17 - 28
Stochastic Shortest Path/Q-policy_iteration.py

@@ -1,9 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 import plotting
 import motion_model
@@ -12,20 +6,21 @@ import numpy as np
 import copy
 import sys
 
+
 class Q_policy_iteration:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.e = 0.001                                  # threshold for convergence
-        self.gamma = 0.9                                # discount factor
+        self.e = 0.001  # threshold for convergence
+        self.gamma = 0.9  # discount factor
 
-        self.env = env.Env(self.xI, self.xG)                        # class Env
-        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
-        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
+        self.env = env.Env(self.xI, self.xG)  # class Env
+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
 
-        self.u_set = self.env.motions                               # feasible input set
-        self.stateSpace = self.env.stateSpace                       # state space
-        self.obs = self.env.obs_map()                               # position of obstacles
-        self.lose = self.env.lose_map()                             # position of lose states
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
 
         self.name1 = "Q-policy_iteration, gamma=" + str(self.gamma)
 
@@ -33,7 +28,6 @@ class Q_policy_iteration:
         self.path = self.extract_path(self.xI, self.xG, self.policy)
         self.plotting.animation(self.path, self.name1)
 
-
     def policy_evaluation(self, policy, value):
         """
         evaluation process using current policy.
@@ -45,7 +39,7 @@ class Q_policy_iteration:
 
         delta = sys.maxsize
 
-        while delta > self.e:               # convergence condition
+        while delta > self.e:  # convergence condition
             x_value = 0
             for x in value:
                 if x not in self.xG:
@@ -60,7 +54,6 @@ class Q_policy_iteration:
 
         return value
 
-
     def policy_improvement(self, policy, value):
         """
         policy improvement process.
@@ -76,7 +69,6 @@ class Q_policy_iteration:
 
         return policy
 
-
     def iteration(self):
         """
         Q-policy iteration
@@ -88,21 +80,20 @@ class Q_policy_iteration:
         count = 0
 
         for x in self.stateSpace:
-            Q_table[x] = [0, 0, 0, 0]              # initialize Q_value table
-            policy[x] = 0                          # initialize policy table
+            Q_table[x] = [0, 0, 0, 0]  # initialize Q_value table
+            policy[x] = 0  # initialize policy table
 
         while True:
             count += 1
             policy_back = copy.deepcopy(policy)
-            Q_table = self.policy_evaluation(policy, Q_table)   # evaluation process
-            policy = self.policy_improvement(policy, Q_table)   # improvement process
-            if policy_back == policy: break                     # convergence condition
+            Q_table = self.policy_evaluation(policy, Q_table)  # evaluation process
+            policy = self.policy_improvement(policy, Q_table)  # improvement process
+            if policy_back == policy: break  # convergence condition
 
         self.message(count)
 
         return Q_table, policy
 
-
     def cal_Q_value(self, x, p, policy, table):
         """
         cal Q_value.
@@ -114,13 +105,12 @@ class Q_policy_iteration:
         """
 
         value = 0
-        reward = self.env.get_reward(x)                  # get reward of next state
+        reward = self.env.get_reward(x)  # get reward of next state
         for i in range(len(x)):
             value += p[i] * (reward[i] + self.gamma * table[x[i]][policy[x[i]]])
 
         return value
 
-
     def extract_path(self, xI, xG, policy):
         """
         extract path from converged policy.
@@ -143,7 +133,6 @@ class Q_policy_iteration:
                 x = x_next
         return path
 
-
     def message(self, count):
         """
         print important message.

+ 13 - 22
Stochastic Shortest Path/Q-value_iteration.py

@@ -1,9 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 import plotting
 import motion_model
@@ -11,20 +5,21 @@ import motion_model
 import numpy as np
 import sys
 
+
 class Q_value_iteration:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.e = 0.001                                          # threshold for convergence
-        self.gamma = 0.9                                        # discount factor
+        self.e = 0.001  # threshold for convergence
+        self.gamma = 0.9  # discount factor
 
-        self.env = env.Env(self.xI, self.xG)                            # class Env
-        self.motion = motion_model.Motion_model(self.xI, self.xG)       # class Motion_model
-        self.plotting = plotting.Plotting(self.xI, self.xG)             # class Plotting
+        self.env = env.Env(self.xI, self.xG)  # class Env
+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
 
-        self.u_set = self.env.motions                                   # feasible input set
-        self.stateSpace = self.env.stateSpace                           # state space
-        self.obs = self.env.obs_map()                                   # position of obstacles
-        self.lose = self.env.lose_map()                                 # position of lose states
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
 
         self.name1 = "Q-value_iteration, gamma=" + str(self.gamma)
         self.name2 = "converge process, e=" + str(self.e)
@@ -34,7 +29,6 @@ class Q_value_iteration:
         self.plotting.animation(self.path, self.name1)
         self.plotting.plot_diff(self.diff, self.name2)
 
-
     def iteration(self, xI, xG):
         """
         Q_value_iteration
@@ -48,9 +42,9 @@ class Q_value_iteration:
         count = 0
 
         for x in self.stateSpace:
-            Q_table[x] = [0, 0, 0, 0]                       # initialize Q_table
+            Q_table[x] = [0, 0, 0, 0]  # initialize Q_table
 
-        while delta > self.e:                               # convergence condition
+        while delta > self.e:  # convergence condition
             count += 1
             x_value = 0
             for x in self.stateSpace:
@@ -73,7 +67,6 @@ class Q_value_iteration:
 
         return Q_table, policy, diff
 
-
     def cal_Q_value(self, x, p, table):
         """
         cal Q_value.
@@ -85,13 +78,12 @@ class Q_value_iteration:
         """
 
         value = 0
-        reward = self.env.get_reward(x)                  # get reward of next state
+        reward = self.env.get_reward(x)  # get reward of next state
         for i in range(len(x)):
             value += p[i] * (reward[i] + self.gamma * max(table[x[i]]))
 
         return value
 
-
     def extract_path(self, xI, xG, policy):
         """
         extract path from converged policy.
@@ -114,7 +106,6 @@ class Q_value_iteration:
                 x = x_next
         return path
 
-
     def message(self, count):
         """
         print important message.

BIN
Stochastic Shortest Path/__pycache__/env.cpython-37.pyc


BIN
Stochastic Shortest Path/__pycache__/motion_model.cpython-37.pyc


BIN
Stochastic Shortest Path/__pycache__/plotting.cpython-37.pyc


+ 5 - 15
Stochastic Shortest Path/env.py

@@ -1,12 +1,6 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
-class Env():
+class Env:
     def __init__(self, xI, xG):
-        self.x_range = 51           # size of background
+        self.x_range = 51  # size of background
         self.y_range = 31
         self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
         self.xI = xI
@@ -15,7 +9,6 @@ class Env():
         self.lose = self.lose_map()
         self.stateSpace = self.state_space()
 
-
     def obs_map(self):
         """
         Initialize obstacles' positions
@@ -48,7 +41,6 @@ class Env():
 
         return obs
 
-
     def lose_map(self):
         """
         Initialize losing states' positions
@@ -61,7 +53,6 @@ class Env():
 
         return lose
 
-
     def state_space(self):
         """
         generate state space
@@ -76,7 +67,6 @@ class Env():
 
         return state_space
 
-
     def get_reward(self, x_next):
         """
         calculate reward of next state
@@ -88,10 +78,10 @@ class Env():
         reward = []
         for x in x_next:
             if x in self.xG:
-                reward.append(10)       # reward : 10, for goal states
+                reward.append(10)  # reward : 10, for goal states
             elif x in self.lose:
-                reward.append(-10)      # reward : -10, for lose states
+                reward.append(-10)  # reward : -10, for lose states
             else:
-                reward.append(0)        # reward : 0, for other states
+                reward.append(0)  # reward : 0, for other states
 
         return reward

+ 2 - 8
Stochastic Shortest Path/motion_model.py

@@ -1,17 +1,11 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 
+
 class Motion_model():
     def __init__(self, xI, xG):
         self.env = env.Env(xI, xG)
         self.obs = self.env.obs_map()
 
-
     def move_next(self, x, u, eta=0.2):
         """
         Motion model of robots,
@@ -41,4 +35,4 @@ class Motion_model():
             else:
                 x_next.append(x_check)
 
-        return x_next, p_next
+        return x_next, p_next

+ 2 - 13
Stochastic Shortest Path/plotting.py

@@ -1,12 +1,7 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import matplotlib.pyplot as plt
 import env
 
+
 class Plotting():
     def __init__(self, xI, xG):
         self.xI, self.xG = xI, xG
@@ -14,7 +9,6 @@ class Plotting():
         self.obs = self.env.obs_map()
         self.lose = self.env.lose_map()
 
-
     def animation(self, path, name):
         """
         animation.
@@ -29,7 +23,6 @@ class Plotting():
         self.plot_lose()
         self.plot_path(path)
 
-
     def plot_grid(self, name):
         """
         plot the obstacles in environment.
@@ -49,7 +42,6 @@ class Plotting():
         plt.title(name)
         plt.axis("equal")
 
-
     def plot_lose(self):
         """
         plot losing states in environment.
@@ -59,8 +51,7 @@ class Plotting():
         lose_x = [self.lose[i][0] for i in range(len(self.lose))]
         lose_y = [self.lose[i][1] for i in range(len(self.lose))]
 
-        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's')
-
+        plt.plot(lose_x, lose_y, color='#A52A2A', marker='s')
 
     def plot_visited(self, visited):
         """
@@ -88,7 +79,6 @@ class Plotting():
 
             if count % length == 0: plt.pause(0.001)
 
-
     def plot_path(self, path):
         path.remove(self.xI)
         for x in self.xG:
@@ -103,7 +93,6 @@ class Plotting():
         plt.show()
         plt.pause(0.5)
 
-
     def plot_diff(self, diff, name):
         plt.figure(2)
         plt.title(name, fontdict=None)

+ 15 - 26
Stochastic Shortest Path/policy_iteration.py

@@ -1,9 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 import plotting
 import motion_model
@@ -12,20 +6,21 @@ import numpy as np
 import sys
 import copy
 
+
 class Policy_iteration:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.e = 0.001                                      # threshold for convergence
-        self.gamma = 0.9                                    # discount factor
+        self.e = 0.001  # threshold for convergence
+        self.gamma = 0.9  # discount factor
 
         self.env = env.Env(self.xI, self.xG)
         self.motion = motion_model.Motion_model(self.xI, self.xG)
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.env.motions                       # feasible input set
-        self.stateSpace = self.env.stateSpace               # state space
-        self.obs = self.env.obs_map()                       # position of obstacles
-        self.lose = self.env.lose_map()                     # position of lose states
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
 
         self.name1 = "policy_iteration, gamma=" + str(self.gamma)
 
@@ -33,7 +28,6 @@ class Policy_iteration:
         self.path = self.extract_path(self.xI, self.xG, self.policy)
         self.plotting.animation(self.path, self.name1)
 
-
     def policy_evaluation(self, policy, value):
         """
         Evaluate current policy.
@@ -45,7 +39,7 @@ class Policy_iteration:
 
         delta = sys.maxsize
 
-        while delta > self.e:                               # convergence condition
+        while delta > self.e:  # convergence condition
             x_value = 0
             for x in self.stateSpace:
                 if x not in self.xG:
@@ -59,7 +53,6 @@ class Policy_iteration:
 
         return value
 
-
     def policy_improvement(self, policy, value):
         """
         Improve policy using current value table.
@@ -79,7 +72,6 @@ class Policy_iteration:
 
         return policy
 
-
     def iteration(self):
         """
         polity iteration: using evaluate and improvement process until convergence.
@@ -91,21 +83,20 @@ class Policy_iteration:
         count = 0
 
         for x in self.stateSpace:
-            value_table[x] = 0                                              # initialize value table
-            policy[x] = self.u_set[0]                                       # initialize policy table
+            value_table[x] = 0  # initialize value table
+            policy[x] = self.u_set[0]  # initialize policy table
 
         while True:
             count += 1
             policy_back = copy.deepcopy(policy)
-            value_table = self.policy_evaluation(policy, value_table)       # evaluation process
-            policy = self.policy_improvement(policy, value_table)           # policy improvement process
-            if policy_back == policy: break                                 # convergence condition
+            value_table = self.policy_evaluation(policy, value_table)  # evaluation process
+            policy = self.policy_improvement(policy, value_table)  # policy improvement process
+            if policy_back == policy: break  # convergence condition
 
         self.message(count)
 
         return value_table, policy
 
-
     def cal_Q_value(self, x, p, table):
         """
         cal Q_value.
@@ -117,13 +108,12 @@ class Policy_iteration:
         """
 
         value = 0
-        reward = self.env.get_reward(x)                                 # get reward of next state
+        reward = self.env.get_reward(x)  # get reward of next state
         for i in range(len(x)):
-            value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
+            value += p[i] * (reward[i] + self.gamma * table[x[i]])  # cal Q-value
 
         return value
 
-
     def extract_path(self, xI, xG, policy):
         """
         extract path from converged policy.
@@ -146,7 +136,6 @@ class Policy_iteration:
                 x = x_next
         return path
 
-
     def message(self, count):
         """
         print important message.

+ 28 - 37
Stochastic Shortest Path/value_iteration.py

@@ -1,9 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
 import env
 import plotting
 import motion_model
@@ -11,20 +5,21 @@ import motion_model
 import numpy as np
 import sys
 
+
 class Value_iteration:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.e = 0.001                                              # threshold for convergence
-        self.gamma = 0.9                                            # discount factor
+        self.e = 0.001  # threshold for convergence
+        self.gamma = 0.9  # discount factor
 
-        self.env = env.Env(self.xI, self.xG)                        # class Env
-        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
-        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
+        self.env = env.Env(self.xI, self.xG)  # class Env
+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
 
-        self.u_set = self.env.motions                               # feasible input set
-        self.stateSpace = self.env.stateSpace                       # state space
-        self.obs = self.env.obs_map()                               # position of obstacles
-        self.lose = self.env.lose_map()                             # position of lose states
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
 
         self.name1 = "value_iteration, gamma=" + str(self.gamma)
         self.name2 = "converge process, e=" + str(self.e)
@@ -34,7 +29,6 @@ class Value_iteration:
         self.plotting.animation(self.path, self.name1)
         self.plotting.plot_diff(self.diff, self.name2)
 
-
     def iteration(self, xI, xG):
         """
         value_iteration.
@@ -42,36 +36,35 @@ class Value_iteration:
         :return: converged value table, optimal policy and variation of difference,
         """
 
-        value_table = {}                        # value table
-        policy = {}                             # policy
-        diff = []                               # maximum difference between two successive iteration
-        delta = sys.maxsize                     # initialize maximum difference
-        count = 0                               # iteration times
+        value_table = {}  # value table
+        policy = {}  # policy
+        diff = []  # maximum difference between two successive iteration
+        delta = sys.maxsize  # initialize maximum difference
+        count = 0  # iteration times
 
-        for x in self.stateSpace:               # initialize value table for feasible states
+        for x in self.stateSpace:  # initialize value table for feasible states
             value_table[x] = 0
 
-        while delta > self.e:                   # converged condition
+        while delta > self.e:  # converged condition
             count += 1
             x_value = 0
             for x in self.stateSpace:
                 if x not in xG:
                     value_list = []
                     for u in self.u_set:
-                        [x_next, p_next] = self.motion.move_next(x, u)                      # recall motion model
-                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))    # cal Q value
-                    policy[x] = self.u_set[int(np.argmax(value_list))]                      # update policy
-                    v_diff = abs(value_table[x] - max(value_list))                          # maximum difference
-                    value_table[x] = max(value_list)                                        # update value table
+                        [x_next, p_next] = self.motion.move_next(x, u)  # recall motion model
+                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))  # cal Q value
+                    policy[x] = self.u_set[int(np.argmax(value_list))]  # update policy
+                    v_diff = abs(value_table[x] - max(value_list))  # maximum difference
+                    value_table[x] = max(value_list)  # update value table
                     x_value = max(x_value, v_diff)
-            delta = x_value                                                                 # update delta
+            delta = x_value  # update delta
             diff.append(delta)
 
-        self.message(count)                                                                 # print messages
+        self.message(count)  # print messages
 
         return value_table, policy, diff
 
-
     def cal_Q_value(self, x, p, table):
         """
         cal Q_value.
@@ -83,13 +76,12 @@ class Value_iteration:
         """
 
         value = 0
-        reward = self.env.get_reward(x)                                 # get reward of next state
+        reward = self.env.get_reward(x)  # get reward of next state
         for i in range(len(x)):
-            value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
+            value += p[i] * (reward[i] + self.gamma * table[x[i]])  # cal Q-value
 
         return value
 
-
     def extract_path(self, xI, xG, policy):
         """
         extract path from converged policy.
@@ -112,7 +104,6 @@ class Value_iteration:
                 x = x_next
         return path
 
-
     def message(self, count):
         """
         print important message.
@@ -129,7 +120,7 @@ class Value_iteration:
 
 
 if __name__ == '__main__':
-    x_Start = (5, 5)                    # starting state
-    x_Goal = [(49, 5), (49, 25)]        # goal states
+    x_Start = (5, 5)  # starting state
+    x_Goal = [(49, 5), (49, 25)]  # goal states
 
     VI = Value_iteration(x_Start, x_Goal)