5 yıl önce · 5f8ba1601f
--- a/Control/.idea/Model-free
+++ b/Control/.idea/Model-free
@@ -2,10 +2,7 @@
 
															 <module type="PYTHON_MODULE" version="4">
														
 
															   <component name="NewModuleRootManager">
														
 
															     <content url="file://$MODULE_DIR$" />
														
 
															-    <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
														
 
															+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
														
 
															     <orderEntry type="sourceFolder" forTests="false" />
														
 
															   </component>
														
 
															-  <component name="TestRunnerService">
														
 
															-    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
														
 
															-  </component>
														
 
															 </module>
														
--- a/Control/.idea/misc.xml
+++ b/Control/.idea/misc.xml
@@ -1,4 +1,4 @@
 
															 <?xml version="1.0" encoding="UTF-8"?>
														
 
															 <project version="4">
														
 
															-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
														
 
															+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
														
 
															 </project>
														
--- a/Control/Q-learning.py
+++ b/Control/Q-learning.py
@@ -1,20 +1,15 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															 import plotting
														
 
															 import motion_model
														
 
															 import numpy as np
														
 
															+
														
 
															 class QLEARNING:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.M = 500                                        # iteration numbers
														
 
															-        self.gamma = 0.9                                    # discount factor
														
 
															+        self.M = 500  # iteration numbers
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															         self.alpha = 0.5
														
 
															         self.epsilon = 0.1
														
@@ -22,10 +17,10 @@ class QLEARNING:
 
															         self.motion = motion_model.Motion_model(self.xI, self.xG)
														
 
															         self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															-        self.u_set = self.env.motions                       # feasible input set
														
 
															-        self.stateSpace = self.env.stateSpace               # state space
														
 
															-        self.obs = self.env.obs_map()                       # position of obstacles
														
 
															-        self.lose = self.env.lose_map()                     # position of lose states
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															         self.name1 = "SARSA, M=" + str(self.M)
														
@@ -33,7 +28,6 @@ class QLEARNING:
 
															         self.path = self.extract_path(self.xI, self.xG, self.policy)
														
 
															         self.plotting.animation(self.path, self.name1)
														
 
															-
														
 
															     def Monte_Carlo(self, xI, xG):
														
 
															         """
														
 
															         Monte_Carlo experiments
														
@@ -41,21 +35,21 @@ class QLEARNING:
 
															         :return: Q_table, policy
														
 
															         """
														
 
															-        Q_table = self.table_init()                                                 # Q_table initialization
														
 
															-        policy = {}                                                                 # policy table
														
 
															+        Q_table = self.table_init()  # Q_table initialization
														
 
															+        policy = {}  # policy table
														
 
															-        for k in range(self.M):                                                     # iterations
														
 
															-            x = self.state_init()                                                   # initial state
														
 
															-            while x != xG:                                                          # stop condition
														
 
															-                u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
														
 
															-                x_next = self.move_next(x, self.u_set[u])                           # next state
														
 
															-                reward = self.env.get_reward(x_next)                                # reward observed
														
 
															+        for k in range(self.M):  # iterations
														
 
															+            x = self.state_init()  # initial state
														
 
															+            while x != xG:  # stop condition
														
 
															+                u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)  # epsilon_greedy policy
														
 
															+                x_next = self.move_next(x, self.u_set[u])  # next state
														
 
															+                reward = self.env.get_reward(x_next)  # reward observed
														
 
															                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
														
 
															                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
														
 
															                 x = x_next
														
 
															         for x in Q_table:
														
 
															-            policy[x] = int(np.argmax(Q_table[x]))                                  # extract policy
														
 
															+            policy[x] = int(np.argmax(Q_table[x]))  # extract policy
														
 
															         return Q_table, policy
														
@@ -89,7 +83,6 @@ class QLEARNING:
 
															             if (i, j) not in self.obs:
														
 
															                 return (i, j)
														
 
															-
														
 
															     def epsilon_greedy(self, u, error):
														
 
															         """
														
 
															         generate a policy using epsilon_greedy algorithm
														
@@ -103,14 +96,17 @@ class QLEARNING:
 
															             u_e = u
														
 
															             while u_e == u:
														
 
															                 p = np.random.random_sample()
														
 
															-                if p < 0.25: u_e = 0
														
 
															-                elif p < 0.5: u_e = 1
														
 
															-                elif p < 0.75: u_e = 2
														
 
															-                else: u_e = 3
														
 
															+                if p < 0.25:
														
 
															+                    u_e = 0
														
 
															+                elif p < 0.5:
														
 
															+                    u_e = 1
														
 
															+                elif p < 0.75:
														
 
															+                    u_e = 2
														
 
															+                else:
														
 
															+                    u_e = 3
														
 
															             return u_e
														
 
															         return u
														
 
															-
														
 
															     def move_next(self, x, u):
														
 
															         """
														
 
															         get next state.
														
--- a/Control/Sarsa.py
+++ b/Control/Sarsa.py
@@ -1,20 +1,15 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															 import plotting
														
 
															 import motion_model
														
 
															 import numpy as np
														
 
															+
														
 
															 class SARSA:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.M = 500                                # iteration numbers
														
 
															-        self.gamma = 0.9                            # discount factor
														
 
															+        self.M = 500  # iteration numbers
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															         self.alpha = 0.5
														
 
															         self.epsilon = 0.1
														
@@ -22,10 +17,10 @@ class SARSA:
 
															         self.motion = motion_model.Motion_model(self.xI, self.xG)
														
 
															         self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															-        self.u_set = self.env.motions               # feasible input set
														
 
															-        self.stateSpace = self.env.stateSpace       # state space
														
 
															-        self.obs = self.env.obs_map()               # position of obstacles
														
 
															-        self.lose = self.env.lose_map()             # position of lose states
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															         self.name1 = "Q-learning, M=" + str(self.M)
														
@@ -40,22 +35,22 @@ class SARSA:
 
															         :return: Q_table, policy
														
 
															         """
														
 
															-        Q_table = self.table_init()                                             # Q_table initialization
														
 
															-        policy = {}                                                             # policy table
														
 
															+        Q_table = self.table_init()  # Q_table initialization
														
 
															+        policy = {}  # policy table
														
 
															-        for k in range(self.M):                                                 # iterations
														
 
															-            x = self.state_init()                                               # initial state
														
 
															+        for k in range(self.M):  # iterations
														
 
															+            x = self.state_init()  # initial state
														
 
															             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
														
 
															-            while x != xG:                                                      # stop condition
														
 
															-                x_next = self.move_next(x, self.u_set[u])                       # next state
														
 
															-                reward = self.env.get_reward(x_next)                            # reward observed
														
 
															+            while x != xG:  # stop condition
														
 
															+                x_next = self.move_next(x, self.u_set[u])  # next state
														
 
															+                reward = self.env.get_reward(x_next)  # reward observed
														
 
															                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
														
 
															                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
														
 
															                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
														
 
															                 x, u = x_next, u_next
														
 
															         for x in Q_table:
														
 
															-            policy[x] = int(np.argmax(Q_table[x]))                              # extract policy
														
 
															+            policy[x] = int(np.argmax(Q_table[x]))  # extract policy
														
 
															         return Q_table, policy
														
@@ -102,10 +97,14 @@ class SARSA:
 
															             u_e = u
														
 
															             while u_e == u:
														
 
															                 p = np.random.random_sample()
														
 
															-                if p < 0.25: u_e = 0
														
 
															-                elif p < 0.5: u_e = 1
														
 
															-                elif p < 0.75: u_e = 2
														
 
															-                else: u_e = 3
														
 
															+                if p < 0.25:
														
 
															+                    u_e = 0
														
 
															+                elif p < 0.5:
														
 
															+                    u_e = 1
														
 
															+                elif p < 0.75:
														
 
															+                    u_e = 2
														
 
															+                else:
														
 
															+                    u_e = 3
														
 
															             return u_e
														
 
															         return u
														
--- a/Control/env.py
+++ b/Control/env.py
@@ -1,10 +1,4 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															-class Env():
														
 
															+class Env:
														
 
															     def __init__(self, xI, xG):
														
 
															         self.x_range = 14                           # size of background
														
 
															         self.y_range = 6
														
--- a/Control/motion_model.py
+++ b/Control/motion_model.py
@@ -1,11 +1,6 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															+
														
 
															 class Motion_model():
														
 
															     def __init__(self, xI, xG):
														
 
															         self.env = env.Env(xI, xG)
														
@@ -40,4 +35,4 @@ class Motion_model():
 
															             else:
														
 
															                 x_next.append(x_check)
														
 
															-        return x_next, p_next
														
 
															+        return x_next, p_next
														
--- a/Control/plotting.py
+++ b/Control/plotting.py
@@ -1,12 +1,7 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import matplotlib.pyplot as plt
														
 
															 import env
														
 
															+
														
 
															 class Plotting():
														
 
															     def __init__(self, xI, xG):
														
 
															         self.xI, self.xG = xI, xG
														
@@ -39,10 +34,10 @@ class Plotting():
 
															         obs_x = [self.obs[i][0] for i in range(len(self.obs))]
														
 
															         obs_y = [self.obs[i][1] for i in range(len(self.obs))]
														
 
															-        plt.plot(self.xI[0], self.xI[1], "bs", ms = 24)
														
 
															-        plt.plot(self.xG[0], self.xG[1], "gs", ms = 24)
														
 
															+        plt.plot(self.xI[0], self.xI[1], "bs", ms=24)
														
 
															+        plt.plot(self.xG[0], self.xG[1], "gs", ms=24)
														
 
															-        plt.plot(obs_x, obs_y, "sk", ms = 24)
														
 
															+        plt.plot(obs_x, obs_y, "sk", ms=24)
														
 
															         plt.title(name)
														
 
															         plt.axis("equal")
														
@@ -55,7 +50,7 @@ class Plotting():
 
															         lose_x = [self.lose[i][0] for i in range(len(self.lose))]
														
 
															         lose_y = [self.lose[i][1] for i in range(len(self.lose))]
														
 
															-        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
														
 
															+        plt.plot(lose_x, lose_y, color='#A52A2A', marker='s', ms=24)
														
 
															     def plot_visited(self, visited):
														
 
															         """
														
@@ -88,7 +83,7 @@ class Plotting():
 
															         path.remove(self.xG)
														
 
															         for x in path:
														
 
															-            plt.plot(x[0], x[1], color='#808080', marker='o', ms = 23)
														
 
															+            plt.plot(x[0], x[1], color='#808080', marker='o', ms=23)
														
 
															             plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
														
 
															             [exit(0) if event.key == 'escape' else None])
														
 
															             plt.pause(0.001)
														
--- a/Planning/.idea/Search-based
+++ b/Planning/.idea/Search-based
@@ -2,7 +2,7 @@
 
															 <module type="PYTHON_MODULE" version="4">
														
 
															   <component name="NewModuleRootManager">
														
 
															     <content url="file://$MODULE_DIR$" />
														
 
															-    <orderEntry type="jdk" jdkName="Python 3.5" jdkType="Python SDK" />
														
 
															+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
														
 
															     <orderEntry type="sourceFolder" forTests="false" />
														
 
															   </component>
														
 
															 </module>
														
--- a/Planning/.idea/misc.xml
+++ b/Planning/.idea/misc.xml
@@ -1,4 +1,4 @@
 
															 <?xml version="1.0" encoding="UTF-8"?>
														
 
															 <project version="4">
														
 
															-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5" project-jdk-type="Python SDK" />
														
 
															+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
														
 
															 </project>
														
--- a/Planning/.idea/workspace.xml
+++ b/Planning/.idea/workspace.xml
@@ -2,16 +2,7 @@
 
															 <project version="4">
														
 
															   <component name="ChangeListManager">
														
 
															     <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
														
 
															-      <change beforePath="$PROJECT_DIR$/.idea/Search-based Planning.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/Search-based Planning.iml" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
														
 
															       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/env.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/plotting.py" beforeDir="false" afterPath="$PROJECT_DIR$/plotting.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
														
 
															     </list>
														
 
															     <option name="SHOW_DIALOG" value="false" />
														
 
															     <option name="HIGHLIGHT_CONFLICTS" value="true" />
														
@@ -44,7 +35,7 @@
 
															     <property name="restartRequiresConfirmation" value="false" />
														
 
															     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
														
 
															   </component>
														
 
															-  <component name="RunManager" selected="Python.bfs">
														
 
															+  <component name="RunManager" selected="Python.a_star">
														
 
															     <configuration name="a_star" type="PythonConfigurationType" factoryName="Python" temporary="true">
														
 
															       <module name="Search-based Planning" />
														
 
															       <option name="INTERPRETER_OPTIONS" value="" />
														
@@ -152,8 +143,8 @@
 
															     </configuration>
														
 
															     <recent_temporary>
														
 
															       <list>
														
 
															-        <item itemvalue="Python.bfs" />
														
 
															         <item itemvalue="Python.a_star" />
														
 
															+        <item itemvalue="Python.bfs" />
														
 
															         <item itemvalue="Python.dfs" />
														
 
															         <item itemvalue="Python.dijkstra" />
														
 
															         <item itemvalue="Python.searching" />
														
@@ -186,25 +177,25 @@
 
															     <option name="oldMeFiltersMigrated" value="true" />
														
 
															   </component>
														
 
															   <component name="WindowStateProjectService">
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.bottom" timestamp="1592800940514">
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.bottom" timestamp="1592801510790">
														
 
															       <screen x="1920" y="0" width="1920" height="1080" />
														
 
															     </state>
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.bottom/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.center" timestamp="1592800940514">
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.bottom/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.center" timestamp="1592801510790">
														
 
															       <screen x="1920" y="0" width="1920" height="1080" />
														
 
															     </state>
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.center/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.left" timestamp="1592800940514">
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.center/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.left" timestamp="1592801510789">
														
 
															       <screen x="1920" y="0" width="1920" height="1080" />
														
 
															     </state>
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.left/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.right" timestamp="1592800940514">
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.left/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510789" />
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.right" timestamp="1592801510790">
														
 
															       <screen x="1920" y="0" width="1920" height="1080" />
														
 
															     </state>
														
 
															-    <state width="1832" height="296" key="GridCell.Tab.0.right/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
														
 
															-    <state x="2406" y="174" key="SettingsEditor" timestamp="1592800563456">
														
 
															+    <state width="1832" height="296" key="GridCell.Tab.0.right/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
														
 
															+    <state x="2406" y="174" key="SettingsEditor" timestamp="1592801555194">
														
 
															       <screen x="1920" y="0" width="1920" height="1080" />
														
 
															     </state>
														
 
															-    <state x="2406" y="174" key="SettingsEditor/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800563456" />
														
 
															+    <state x="2406" y="174" key="SettingsEditor/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801555194" />
														
 
															   </component>
														
 
															 </project>
														
--- a/Planning/__pycache__/env.cpython-37.pyc
+++ b/Planning/__pycache__/env.cpython-37.pyc
--- a/Planning/__pycache__/plotting.cpython-35.pyc
+++ b/Planning/__pycache__/plotting.cpython-35.pyc
--- a/Planning/__pycache__/plotting.cpython-37.pyc
+++ b/Planning/__pycache__/plotting.cpython-37.pyc
--- a/Planning/__pycache__/queue.cpython-35.pyc
+++ b/Planning/__pycache__/queue.cpython-35.pyc
--- a/Planning/__pycache__/queue.cpython-37.pyc
+++ b/Planning/__pycache__/queue.cpython-37.pyc
--- a/Path/.idea/Stochastic
+++ b/Path/.idea/Stochastic
@@ -2,11 +2,10 @@
 
															 <module type="PYTHON_MODULE" version="4">
														
 
															   <component name="NewModuleRootManager">
														
 
															     <content url="file://$MODULE_DIR$" />
														
 
															-    <orderEntry type="inheritedJdk" />
														
 
															+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
														
 
															     <orderEntry type="sourceFolder" forTests="false" />
														
 
															   </component>
														
 
															   <component name="TestRunnerService">
														
 
															-    <option name="projectConfiguration" value="pytest" />
														
 
															     <option name="PROJECT_TEST_RUNNER" value="pytest" />
														
 
															   </component>
														
 
															 </module>
														
--- a/Path/.idea/misc.xml
+++ b/Path/.idea/misc.xml
@@ -1,4 +1,4 @@
 
															 <?xml version="1.0" encoding="UTF-8"?>
														
 
															 <project version="4">
														
 
															-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
														
 
															+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
														
 
															 </project>
														
--- a/Path/Q-policy_iteration.py
+++ b/Path/Q-policy_iteration.py
@@ -1,9 +1,3 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															 import plotting
														
 
															 import motion_model
														
@@ -12,20 +6,21 @@ import numpy as np
 
															 import copy
														
 
															 import sys
														
 
															+
														
 
															 class Q_policy_iteration:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.e = 0.001                                  # threshold for convergence
														
 
															-        self.gamma = 0.9                                # discount factor
														
 
															+        self.e = 0.001  # threshold for convergence
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															-        self.env = env.Env(self.xI, self.xG)                        # class Env
														
 
															-        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
														
 
															-        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
														
 
															+        self.env = env.Env(self.xI, self.xG)  # class Env
														
 
															+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
														
 
															+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
														
 
															-        self.u_set = self.env.motions                               # feasible input set
														
 
															-        self.stateSpace = self.env.stateSpace                       # state space
														
 
															-        self.obs = self.env.obs_map()                               # position of obstacles
														
 
															-        self.lose = self.env.lose_map()                             # position of lose states
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															         self.name1 = "Q-policy_iteration, gamma=" + str(self.gamma)
														
@@ -33,7 +28,6 @@ class Q_policy_iteration:
 
															         self.path = self.extract_path(self.xI, self.xG, self.policy)
														
 
															         self.plotting.animation(self.path, self.name1)
														
 
															-
														
 
															     def policy_evaluation(self, policy, value):
														
 
															         """
														
 
															         evaluation process using current policy.
														
@@ -45,7 +39,7 @@ class Q_policy_iteration:
 
															         delta = sys.maxsize
														
 
															-        while delta > self.e:               # convergence condition
														
 
															+        while delta > self.e:  # convergence condition
														
 
															             x_value = 0
														
 
															             for x in value:
														
 
															                 if x not in self.xG:
														
@@ -60,7 +54,6 @@ class Q_policy_iteration:
 
															         return value
														
 
															-
														
 
															     def policy_improvement(self, policy, value):
														
 
															         """
														
 
															         policy improvement process.
														
@@ -76,7 +69,6 @@ class Q_policy_iteration:
 
															         return policy
														
 
															-
														
 
															     def iteration(self):
														
 
															         """
														
 
															         Q-policy iteration
														
@@ -88,21 +80,20 @@ class Q_policy_iteration:
 
															         count = 0
														
 
															         for x in self.stateSpace:
														
 
															-            Q_table[x] = [0, 0, 0, 0]              # initialize Q_value table
														
 
															-            policy[x] = 0                          # initialize policy table
														
 
															+            Q_table[x] = [0, 0, 0, 0]  # initialize Q_value table
														
 
															+            policy[x] = 0  # initialize policy table
														
 
															         while True:
														
 
															             count += 1
														
 
															             policy_back = copy.deepcopy(policy)
														
 
															-            Q_table = self.policy_evaluation(policy, Q_table)   # evaluation process
														
 
															-            policy = self.policy_improvement(policy, Q_table)   # improvement process
														
 
															-            if policy_back == policy: break                     # convergence condition
														
 
															+            Q_table = self.policy_evaluation(policy, Q_table)  # evaluation process
														
 
															+            policy = self.policy_improvement(policy, Q_table)  # improvement process
														
 
															+            if policy_back == policy: break  # convergence condition
														
 
															         self.message(count)
														
 
															         return Q_table, policy
														
 
															-
														
 
															     def cal_Q_value(self, x, p, policy, table):
														
 
															         """
														
 
															         cal Q_value.
														
@@ -114,13 +105,12 @@ class Q_policy_iteration:
 
															         """
														
 
															         value = 0
														
 
															-        reward = self.env.get_reward(x)                  # get reward of next state
														
 
															+        reward = self.env.get_reward(x)  # get reward of next state
														
 
															         for i in range(len(x)):
														
 
															             value += p[i] * (reward[i] + self.gamma * table[x[i]][policy[x[i]]])
														
 
															         return value
														
 
															-
														
 
															     def extract_path(self, xI, xG, policy):
														
 
															         """
														
 
															         extract path from converged policy.
														
@@ -143,7 +133,6 @@ class Q_policy_iteration:
 
															                 x = x_next
														
 
															         return path
														
 
															-
														
 
															     def message(self, count):
														
 
															         """
														
 
															         print important message.
														
--- a/Path/Q-value_iteration.py
+++ b/Path/Q-value_iteration.py
@@ -1,9 +1,3 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															 import plotting
														
 
															 import motion_model
														
@@ -11,20 +5,21 @@ import motion_model
 
															 import numpy as np
														
 
															 import sys
														
 
															+
														
 
															 class Q_value_iteration:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.e = 0.001                                          # threshold for convergence
														
 
															-        self.gamma = 0.9                                        # discount factor
														
 
															+        self.e = 0.001  # threshold for convergence
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															-        self.env = env.Env(self.xI, self.xG)                            # class Env
														
 
															-        self.motion = motion_model.Motion_model(self.xI, self.xG)       # class Motion_model
														
 
															-        self.plotting = plotting.Plotting(self.xI, self.xG)             # class Plotting
														
 
															+        self.env = env.Env(self.xI, self.xG)  # class Env
														
 
															+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
														
 
															+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
														
 
															-        self.u_set = self.env.motions                                   # feasible input set
														
 
															-        self.stateSpace = self.env.stateSpace                           # state space
														
 
															-        self.obs = self.env.obs_map()                                   # position of obstacles
														
 
															-        self.lose = self.env.lose_map()                                 # position of lose states
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															         self.name1 = "Q-value_iteration, gamma=" + str(self.gamma)
														
 
															         self.name2 = "converge process, e=" + str(self.e)
														
@@ -34,7 +29,6 @@ class Q_value_iteration:
 
															         self.plotting.animation(self.path, self.name1)
														
 
															         self.plotting.plot_diff(self.diff, self.name2)
														
 
															-
														
 
															     def iteration(self, xI, xG):
														
 
															         """
														
 
															         Q_value_iteration
														
@@ -48,9 +42,9 @@ class Q_value_iteration:
 
															         count = 0
														
 
															         for x in self.stateSpace:
														
 
															-            Q_table[x] = [0, 0, 0, 0]                       # initialize Q_table
														
 
															+            Q_table[x] = [0, 0, 0, 0]  # initialize Q_table
														
 
															-        while delta > self.e:                               # convergence condition
														
 
															+        while delta > self.e:  # convergence condition
														
 
															             count += 1
														
 
															             x_value = 0
														
 
															             for x in self.stateSpace:
														
@@ -73,7 +67,6 @@ class Q_value_iteration:
 
															         return Q_table, policy, diff
														
 
															-
														
 
															     def cal_Q_value(self, x, p, table):
														
 
															         """
														
 
															         cal Q_value.
														
@@ -85,13 +78,12 @@ class Q_value_iteration:
 
															         """
														
 
															         value = 0
														
 
															-        reward = self.env.get_reward(x)                  # get reward of next state
														
 
															+        reward = self.env.get_reward(x)  # get reward of next state
														
 
															         for i in range(len(x)):
														
 
															             value += p[i] * (reward[i] + self.gamma * max(table[x[i]]))
														
 
															         return value
														
 
															-
														
 
															     def extract_path(self, xI, xG, policy):
														
 
															         """
														
 
															         extract path from converged policy.
														
@@ -114,7 +106,6 @@ class Q_value_iteration:
 
															                 x = x_next
														
 
															         return path
														
 
															-
														
 
															     def message(self, count):
														
 
															         """
														
 
															         print important message.
														
--- a/Path/__pycache__/env.cpython-37.pyc
+++ b/Path/__pycache__/env.cpython-37.pyc
--- a/Path/__pycache__/motion_model.cpython-37.pyc
+++ b/Path/__pycache__/motion_model.cpython-37.pyc
--- a/Path/__pycache__/plotting.cpython-37.pyc
+++ b/Path/__pycache__/plotting.cpython-37.pyc
--- a/Path/env.py
+++ b/Path/env.py
@@ -1,12 +1,6 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															-class Env():
														
 
															+class Env:
														
 
															     def __init__(self, xI, xG):
														
 
															-        self.x_range = 51           # size of background
														
 
															+        self.x_range = 51  # size of background
														
 
															         self.y_range = 31
														
 
															         self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
														
 
															         self.xI = xI
														
@@ -15,7 +9,6 @@ class Env():
 
															         self.lose = self.lose_map()
														
 
															         self.stateSpace = self.state_space()
														
 
															-
														
 
															     def obs_map(self):
														
 
															         """
														
 
															         Initialize obstacles' positions
														
@@ -48,7 +41,6 @@ class Env():
 
															         return obs
														
 
															-
														
 
															     def lose_map(self):
														
 
															         """
														
 
															         Initialize losing states' positions
														
@@ -61,7 +53,6 @@ class Env():
 
															         return lose
														
 
															-
														
 
															     def state_space(self):
														
 
															         """
														
 
															         generate state space
														
@@ -76,7 +67,6 @@ class Env():
 
															         return state_space
														
 
															-
														
 
															     def get_reward(self, x_next):
														
 
															         """
														
 
															         calculate reward of next state
														
@@ -88,10 +78,10 @@ class Env():
 
															         reward = []
														
 
															         for x in x_next:
														
 
															             if x in self.xG:
														
 
															-                reward.append(10)       # reward : 10, for goal states
														
 
															+                reward.append(10)  # reward : 10, for goal states
														
 
															             elif x in self.lose:
														
 
															-                reward.append(-10)      # reward : -10, for lose states
														
 
															+                reward.append(-10)  # reward : -10, for lose states
														
 
															             else:
														
 
															-                reward.append(0)        # reward : 0, for other states
														
 
															+                reward.append(0)  # reward : 0, for other states
														
 
															         return reward
														
--- a/Path/motion_model.py
+++ b/Path/motion_model.py
@@ -1,17 +1,11 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															+
														
 
															 class Motion_model():
														
 
															     def __init__(self, xI, xG):
														
 
															         self.env = env.Env(xI, xG)
														
 
															         self.obs = self.env.obs_map()
														
 
															-
														
 
															     def move_next(self, x, u, eta=0.2):
														
 
															         """
														
 
															         Motion model of robots,
														
@@ -41,4 +35,4 @@ class Motion_model():
 
															             else:
														
 
															                 x_next.append(x_check)
														
 
															-        return x_next, p_next
														
 
															+        return x_next, p_next
														
--- a/Path/plotting.py
+++ b/Path/plotting.py
@@ -1,12 +1,7 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import matplotlib.pyplot as plt
														
 
															 import env
														
 
															+
														
 
															 class Plotting():
														
 
															     def __init__(self, xI, xG):
														
 
															         self.xI, self.xG = xI, xG
														
@@ -14,7 +9,6 @@ class Plotting():
 
															         self.obs = self.env.obs_map()
														
 
															         self.lose = self.env.lose_map()
														
 
															-
														
 
															     def animation(self, path, name):
														
 
															         """
														
 
															         animation.
														
@@ -29,7 +23,6 @@ class Plotting():
 
															         self.plot_lose()
														
 
															         self.plot_path(path)
														
 
															-
														
 
															     def plot_grid(self, name):
														
 
															         """
														
 
															         plot the obstacles in environment.
														
@@ -49,7 +42,6 @@ class Plotting():
 
															         plt.title(name)
														
 
															         plt.axis("equal")
														
 
															-
														
 
															     def plot_lose(self):
														
 
															         """
														
 
															         plot losing states in environment.
														
@@ -59,8 +51,7 @@ class Plotting():
 
															         lose_x = [self.lose[i][0] for i in range(len(self.lose))]
														
 
															         lose_y = [self.lose[i][1] for i in range(len(self.lose))]
														
 
															-        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's')
														
 
															-
														
 
															+        plt.plot(lose_x, lose_y, color='#A52A2A', marker='s')
														
 
															     def plot_visited(self, visited):
														
 
															         """
														
@@ -88,7 +79,6 @@ class Plotting():
 
															             if count % length == 0: plt.pause(0.001)
														
 
															-
														
 
															     def plot_path(self, path):
														
 
															         path.remove(self.xI)
														
 
															         for x in self.xG:
														
@@ -103,7 +93,6 @@ class Plotting():
 
															         plt.show()
														
 
															         plt.pause(0.5)
														
 
															-
														
 
															     def plot_diff(self, diff, name):
														
 
															         plt.figure(2)
														
 
															         plt.title(name, fontdict=None)
														
--- a/Path/policy_iteration.py
+++ b/Path/policy_iteration.py
@@ -1,9 +1,3 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															 import plotting
														
 
															 import motion_model
														
@@ -12,20 +6,21 @@ import numpy as np
 
															 import sys
														
 
															 import copy
														
 
															+
														
 
															 class Policy_iteration:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.e = 0.001                                      # threshold for convergence
														
 
															-        self.gamma = 0.9                                    # discount factor
														
 
															+        self.e = 0.001  # threshold for convergence
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															         self.env = env.Env(self.xI, self.xG)
														
 
															         self.motion = motion_model.Motion_model(self.xI, self.xG)
														
 
															         self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															-        self.u_set = self.env.motions                       # feasible input set
														
 
															-        self.stateSpace = self.env.stateSpace               # state space
														
 
															-        self.obs = self.env.obs_map()                       # position of obstacles
														
 
															-        self.lose = self.env.lose_map()                     # position of lose states
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															         self.name1 = "policy_iteration, gamma=" + str(self.gamma)
														
@@ -33,7 +28,6 @@ class Policy_iteration:
 
															         self.path = self.extract_path(self.xI, self.xG, self.policy)
														
 
															         self.plotting.animation(self.path, self.name1)
														
 
															-
														
 
															     def policy_evaluation(self, policy, value):
														
 
															         """
														
 
															         Evaluate current policy.
														
@@ -45,7 +39,7 @@ class Policy_iteration:
 
															         delta = sys.maxsize
														
 
															-        while delta > self.e:                               # convergence condition
														
 
															+        while delta > self.e:  # convergence condition
														
 
															             x_value = 0
														
 
															             for x in self.stateSpace:
														
 
															                 if x not in self.xG:
														
@@ -59,7 +53,6 @@ class Policy_iteration:
 
															         return value
														
 
															-
														
 
															     def policy_improvement(self, policy, value):
														
 
															         """
														
 
															         Improve policy using current value table.
														
@@ -79,7 +72,6 @@ class Policy_iteration:
 
															         return policy
														
 
															-
														
 
															     def iteration(self):
														
 
															         """
														
 
															         polity iteration: using evaluate and improvement process until convergence.
														
@@ -91,21 +83,20 @@ class Policy_iteration:
 
															         count = 0
														
 
															         for x in self.stateSpace:
														
 
															-            value_table[x] = 0                                              # initialize value table
														
 
															-            policy[x] = self.u_set[0]                                       # initialize policy table
														
 
															+            value_table[x] = 0  # initialize value table
														
 
															+            policy[x] = self.u_set[0]  # initialize policy table
														
 
															         while True:
														
 
															             count += 1
														
 
															             policy_back = copy.deepcopy(policy)
														
 
															-            value_table = self.policy_evaluation(policy, value_table)       # evaluation process
														
 
															-            policy = self.policy_improvement(policy, value_table)           # policy improvement process
														
 
															-            if policy_back == policy: break                                 # convergence condition
														
 
															+            value_table = self.policy_evaluation(policy, value_table)  # evaluation process
														
 
															+            policy = self.policy_improvement(policy, value_table)  # policy improvement process
														
 
															+            if policy_back == policy: break  # convergence condition
														
 
															         self.message(count)
														
 
															         return value_table, policy
														
 
															-
														
 
															     def cal_Q_value(self, x, p, table):
														
 
															         """
														
 
															         cal Q_value.
														
@@ -117,13 +108,12 @@ class Policy_iteration:
 
															         """
														
 
															         value = 0
														
 
															-        reward = self.env.get_reward(x)                                 # get reward of next state
														
 
															+        reward = self.env.get_reward(x)  # get reward of next state
														
 
															         for i in range(len(x)):
														
 
															-            value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
														
 
															+            value += p[i] * (reward[i] + self.gamma * table[x[i]])  # cal Q-value
														
 
															         return value
														
 
															-
														
 
															     def extract_path(self, xI, xG, policy):
														
 
															         """
														
 
															         extract path from converged policy.
														
@@ -146,7 +136,6 @@ class Policy_iteration:
 
															                 x = x_next
														
 
															         return path
														
 
															-
														
 
															     def message(self, count):
														
 
															         """
														
 
															         print important message.
														
--- a/Path/value_iteration.py
+++ b/Path/value_iteration.py
@@ -1,9 +1,3 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															 import env
														
 
															 import plotting
														
 
															 import motion_model
														
@@ -11,20 +5,21 @@ import motion_model
 
															 import numpy as np
														
 
															 import sys
														
 
															+
														
 
															 class Value_iteration:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.e = 0.001                                              # threshold for convergence
														
 
															-        self.gamma = 0.9                                            # discount factor
														
 
															+        self.e = 0.001  # threshold for convergence
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															-        self.env = env.Env(self.xI, self.xG)                        # class Env
														
 
															-        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
														
 
															-        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
														
 
															+        self.env = env.Env(self.xI, self.xG)  # class Env
														
 
															+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
														
 
															+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
														
 
															-        self.u_set = self.env.motions                               # feasible input set
														
 
															-        self.stateSpace = self.env.stateSpace                       # state space
														
 
															-        self.obs = self.env.obs_map()                               # position of obstacles
														
 
															-        self.lose = self.env.lose_map()                             # position of lose states
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															         self.name1 = "value_iteration, gamma=" + str(self.gamma)
														
 
															         self.name2 = "converge process, e=" + str(self.e)
														
@@ -34,7 +29,6 @@ class Value_iteration:
 
															         self.plotting.animation(self.path, self.name1)
														
 
															         self.plotting.plot_diff(self.diff, self.name2)
														
 
															-
														
 
															     def iteration(self, xI, xG):
														
 
															         """
														
 
															         value_iteration.
														
@@ -42,36 +36,35 @@ class Value_iteration:
 
															         :return: converged value table, optimal policy and variation of difference,
														
 
															         """
														
 
															-        value_table = {}                        # value table
														
 
															-        policy = {}                             # policy
														
 
															-        diff = []                               # maximum difference between two successive iteration
														
 
															-        delta = sys.maxsize                     # initialize maximum difference
														
 
															-        count = 0                               # iteration times
														
 
															+        value_table = {}  # value table
														
 
															+        policy = {}  # policy
														
 
															+        diff = []  # maximum difference between two successive iteration
														
 
															+        delta = sys.maxsize  # initialize maximum difference
														
 
															+        count = 0  # iteration times
														
 
															-        for x in self.stateSpace:               # initialize value table for feasible states
														
 
															+        for x in self.stateSpace:  # initialize value table for feasible states
														
 
															             value_table[x] = 0
														
 
															-        while delta > self.e:                   # converged condition
														
 
															+        while delta > self.e:  # converged condition
														
 
															             count += 1
														
 
															             x_value = 0
														
 
															             for x in self.stateSpace:
														
 
															                 if x not in xG:
														
 
															                     value_list = []
														
 
															                     for u in self.u_set:
														
 
															-                        [x_next, p_next] = self.motion.move_next(x, u)                      # recall motion model
														
 
															-                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))    # cal Q value
														
 
															-                    policy[x] = self.u_set[int(np.argmax(value_list))]                      # update policy
														
 
															-                    v_diff = abs(value_table[x] - max(value_list))                          # maximum difference
														
 
															-                    value_table[x] = max(value_list)                                        # update value table
														
 
															+                        [x_next, p_next] = self.motion.move_next(x, u)  # recall motion model
														
 
															+                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))  # cal Q value
														
 
															+                    policy[x] = self.u_set[int(np.argmax(value_list))]  # update policy
														
 
															+                    v_diff = abs(value_table[x] - max(value_list))  # maximum difference
														
 
															+                    value_table[x] = max(value_list)  # update value table
														
 
															                     x_value = max(x_value, v_diff)
														
 
															-            delta = x_value                                                                 # update delta
														
 
															+            delta = x_value  # update delta
														
 
															             diff.append(delta)
														
 
															-        self.message(count)                                                                 # print messages
														
 
															+        self.message(count)  # print messages
														
 
															         return value_table, policy, diff
														
 
															-
														
 
															     def cal_Q_value(self, x, p, table):
														
 
															         """
														
 
															         cal Q_value.
														
@@ -83,13 +76,12 @@ class Value_iteration:
 
															         """
														
 
															         value = 0
														
 
															-        reward = self.env.get_reward(x)                                 # get reward of next state
														
 
															+        reward = self.env.get_reward(x)  # get reward of next state
														
 
															         for i in range(len(x)):
														
 
															-            value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
														
 
															+            value += p[i] * (reward[i] + self.gamma * table[x[i]])  # cal Q-value
														
 
															         return value
														
 
															-
														
 
															     def extract_path(self, xI, xG, policy):
														
 
															         """
														
 
															         extract path from converged policy.
														
@@ -112,7 +104,6 @@ class Value_iteration:
 
															                 x = x_next
														
 
															         return path
														
 
															-
														
 
															     def message(self, count):
														
 
															         """
														
 
															         print important message.
														
@@ -129,7 +120,7 @@ class Value_iteration:
 
															 if __name__ == '__main__':
														
 
															-    x_Start = (5, 5)                    # starting state
														
 
															-    x_Goal = [(49, 5), (49, 25)]        # goal states
														
 
															+    x_Start = (5, 5)  # starting state
														
 
															+    x_Goal = [(49, 5), (49, 25)]  # goal states
														
 
															     VI = Value_iteration(x_Start, x_Goal)