5 سال پیش · 5f8ba1601f
--- a/Control/.idea/Model-free
+++ b/Control/.idea/Model-free
@@ -2,10 +2,7 @@
 
				 <module type="PYTHON_MODULE" version="4">
			
 
				   <component name="NewModuleRootManager">
			
 
				     <content url="file://$MODULE_DIR$" />
			
 
				-    <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
			
 
				+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
			
 
				     <orderEntry type="sourceFolder" forTests="false" />
			
 
				   </component>
			
 
				-  <component name="TestRunnerService">
			
 
				-    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
			
 
				-  </component>
			
 
				 </module>
			
--- a/Control/.idea/misc.xml
+++ b/Control/.idea/misc.xml
@@ -1,4 +1,4 @@
 
				 <?xml version="1.0" encoding="UTF-8"?>
			
 
				 <project version="4">
			
 
				-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
			
 
				+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
			
 
				 </project>
			
--- a/Control/Q-learning.py
+++ b/Control/Q-learning.py
@@ -1,20 +1,15 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 import plotting
			
 
				 import motion_model
			
 
				 
			
 
				 import numpy as np
			
 
				 
			
 
				+
			
 
				 class QLEARNING:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.M = 500                                        # iteration numbers
			
 
				-        self.gamma = 0.9                                    # discount factor
			
 
				+        self.M = 500  # iteration numbers
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				         self.alpha = 0.5
			
 
				         self.epsilon = 0.1
			
 
				 
			
@@ -22,10 +17,10 @@ class QLEARNING:
 
				         self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.env.motions                       # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace               # state space
			
 
				-        self.obs = self.env.obs_map()                       # position of obstacles
			
 
				-        self.lose = self.env.lose_map()                     # position of lose states
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				 
			
 
				         self.name1 = "SARSA, M=" + str(self.M)
			
 
				 
			
@@ -33,7 +28,6 @@ class QLEARNING:
 
				         self.path = self.extract_path(self.xI, self.xG, self.policy)
			
 
				         self.plotting.animation(self.path, self.name1)
			
 
				 
			
 
				-
			
 
				     def Monte_Carlo(self, xI, xG):
			
 
				         """
			
 
				         Monte_Carlo experiments
			
@@ -41,21 +35,21 @@ class QLEARNING:
 
				         :return: Q_table, policy
			
 
				         """
			
 
				 
			
 
				-        Q_table = self.table_init()                                                 # Q_table initialization
			
 
				-        policy = {}                                                                 # policy table
			
 
				+        Q_table = self.table_init()  # Q_table initialization
			
 
				+        policy = {}  # policy table
			
 
				 
			
 
				-        for k in range(self.M):                                                     # iterations
			
 
				-            x = self.state_init()                                                   # initial state
			
 
				-            while x != xG:                                                          # stop condition
			
 
				-                u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
			
 
				-                x_next = self.move_next(x, self.u_set[u])                           # next state
			
 
				-                reward = self.env.get_reward(x_next)                                # reward observed
			
 
				+        for k in range(self.M):  # iterations
			
 
				+            x = self.state_init()  # initial state
			
 
				+            while x != xG:  # stop condition
			
 
				+                u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)  # epsilon_greedy policy
			
 
				+                x_next = self.move_next(x, self.u_set[u])  # next state
			
 
				+                reward = self.env.get_reward(x_next)  # reward observed
			
 
				                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
			
 
				                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
			
 
				                 x = x_next
			
 
				 
			
 
				         for x in Q_table:
			
 
				-            policy[x] = int(np.argmax(Q_table[x]))                                  # extract policy
			
 
				+            policy[x] = int(np.argmax(Q_table[x]))  # extract policy
			
 
				 
			
 
				         return Q_table, policy
			
 
				 
			
@@ -89,7 +83,6 @@ class QLEARNING:
 
				             if (i, j) not in self.obs:
			
 
				                 return (i, j)
			
 
				 
			
 
				-
			
 
				     def epsilon_greedy(self, u, error):
			
 
				         """
			
 
				         generate a policy using epsilon_greedy algorithm
			
@@ -103,14 +96,17 @@ class QLEARNING:
 
				             u_e = u
			
 
				             while u_e == u:
			
 
				                 p = np.random.random_sample()
			
 
				-                if p < 0.25: u_e = 0
			
 
				-                elif p < 0.5: u_e = 1
			
 
				-                elif p < 0.75: u_e = 2
			
 
				-                else: u_e = 3
			
 
				+                if p < 0.25:
			
 
				+                    u_e = 0
			
 
				+                elif p < 0.5:
			
 
				+                    u_e = 1
			
 
				+                elif p < 0.75:
			
 
				+                    u_e = 2
			
 
				+                else:
			
 
				+                    u_e = 3
			
 
				             return u_e
			
 
				         return u
			
 
				 
			
 
				-
			
 
				     def move_next(self, x, u):
			
 
				         """
			
 
				         get next state.
			
--- a/Control/Sarsa.py
+++ b/Control/Sarsa.py
@@ -1,20 +1,15 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 import plotting
			
 
				 import motion_model
			
 
				 
			
 
				 import numpy as np
			
 
				 
			
 
				+
			
 
				 class SARSA:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.M = 500                                # iteration numbers
			
 
				-        self.gamma = 0.9                            # discount factor
			
 
				+        self.M = 500  # iteration numbers
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				         self.alpha = 0.5
			
 
				         self.epsilon = 0.1
			
 
				 
			
@@ -22,10 +17,10 @@ class SARSA:
 
				         self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.env.motions               # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace       # state space
			
 
				-        self.obs = self.env.obs_map()               # position of obstacles
			
 
				-        self.lose = self.env.lose_map()             # position of lose states
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				 
			
 
				         self.name1 = "Q-learning, M=" + str(self.M)
			
 
				 
			
@@ -40,22 +35,22 @@ class SARSA:
 
				         :return: Q_table, policy
			
 
				         """
			
 
				 
			
 
				-        Q_table = self.table_init()                                             # Q_table initialization
			
 
				-        policy = {}                                                             # policy table
			
 
				+        Q_table = self.table_init()  # Q_table initialization
			
 
				+        policy = {}  # policy table
			
 
				 
			
 
				-        for k in range(self.M):                                                 # iterations
			
 
				-            x = self.state_init()                                               # initial state
			
 
				+        for k in range(self.M):  # iterations
			
 
				+            x = self.state_init()  # initial state
			
 
				             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
			
 
				-            while x != xG:                                                      # stop condition
			
 
				-                x_next = self.move_next(x, self.u_set[u])                       # next state
			
 
				-                reward = self.env.get_reward(x_next)                            # reward observed
			
 
				+            while x != xG:  # stop condition
			
 
				+                x_next = self.move_next(x, self.u_set[u])  # next state
			
 
				+                reward = self.env.get_reward(x_next)  # reward observed
			
 
				                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
			
 
				                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
			
 
				                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
			
 
				                 x, u = x_next, u_next
			
 
				 
			
 
				         for x in Q_table:
			
 
				-            policy[x] = int(np.argmax(Q_table[x]))                              # extract policy
			
 
				+            policy[x] = int(np.argmax(Q_table[x]))  # extract policy
			
 
				 
			
 
				         return Q_table, policy
			
 
				 
			
@@ -102,10 +97,14 @@ class SARSA:
 
				             u_e = u
			
 
				             while u_e == u:
			
 
				                 p = np.random.random_sample()
			
 
				-                if p < 0.25: u_e = 0
			
 
				-                elif p < 0.5: u_e = 1
			
 
				-                elif p < 0.75: u_e = 2
			
 
				-                else: u_e = 3
			
 
				+                if p < 0.25:
			
 
				+                    u_e = 0
			
 
				+                elif p < 0.5:
			
 
				+                    u_e = 1
			
 
				+                elif p < 0.75:
			
 
				+                    u_e = 2
			
 
				+                else:
			
 
				+                    u_e = 3
			
 
				             return u_e
			
 
				         return u
			
 
				 
			
--- a/Control/env.py
+++ b/Control/env.py
@@ -1,10 +1,4 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				-class Env():
			
 
				+class Env:
			
 
				     def __init__(self, xI, xG):
			
 
				         self.x_range = 14                           # size of background
			
 
				         self.y_range = 6
			
--- a/Control/motion_model.py
+++ b/Control/motion_model.py
@@ -1,11 +1,6 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 
			
 
				+
			
 
				 class Motion_model():
			
 
				     def __init__(self, xI, xG):
			
 
				         self.env = env.Env(xI, xG)
			
@@ -40,4 +35,4 @@ class Motion_model():
 
				             else:
			
 
				                 x_next.append(x_check)
			
 
				 
			
 
				-        return x_next, p_next
			
 
				+        return x_next, p_next
			
--- a/Control/plotting.py
+++ b/Control/plotting.py
@@ -1,12 +1,7 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import matplotlib.pyplot as plt
			
 
				 import env
			
 
				 
			
 
				+
			
 
				 class Plotting():
			
 
				     def __init__(self, xI, xG):
			
 
				         self.xI, self.xG = xI, xG
			
@@ -39,10 +34,10 @@ class Plotting():
 
				         obs_x = [self.obs[i][0] for i in range(len(self.obs))]
			
 
				         obs_y = [self.obs[i][1] for i in range(len(self.obs))]
			
 
				 
			
 
				-        plt.plot(self.xI[0], self.xI[1], "bs", ms = 24)
			
 
				-        plt.plot(self.xG[0], self.xG[1], "gs", ms = 24)
			
 
				+        plt.plot(self.xI[0], self.xI[1], "bs", ms=24)
			
 
				+        plt.plot(self.xG[0], self.xG[1], "gs", ms=24)
			
 
				 
			
 
				-        plt.plot(obs_x, obs_y, "sk", ms = 24)
			
 
				+        plt.plot(obs_x, obs_y, "sk", ms=24)
			
 
				         plt.title(name)
			
 
				         plt.axis("equal")
			
 
				 
			
@@ -55,7 +50,7 @@ class Plotting():
 
				         lose_x = [self.lose[i][0] for i in range(len(self.lose))]
			
 
				         lose_y = [self.lose[i][1] for i in range(len(self.lose))]
			
 
				 
			
 
				-        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
			
 
				+        plt.plot(lose_x, lose_y, color='#A52A2A', marker='s', ms=24)
			
 
				 
			
 
				     def plot_visited(self, visited):
			
 
				         """
			
@@ -88,7 +83,7 @@ class Plotting():
 
				         path.remove(self.xG)
			
 
				 
			
 
				         for x in path:
			
 
				-            plt.plot(x[0], x[1], color='#808080', marker='o', ms = 23)
			
 
				+            plt.plot(x[0], x[1], color='#808080', marker='o', ms=23)
			
 
				             plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
			
 
				             [exit(0) if event.key == 'escape' else None])
			
 
				             plt.pause(0.001)
			
--- a/Planning/.idea/Search-based
+++ b/Planning/.idea/Search-based
@@ -2,7 +2,7 @@
 
				 <module type="PYTHON_MODULE" version="4">
			
 
				   <component name="NewModuleRootManager">
			
 
				     <content url="file://$MODULE_DIR$" />
			
 
				-    <orderEntry type="jdk" jdkName="Python 3.5" jdkType="Python SDK" />
			
 
				+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
			
 
				     <orderEntry type="sourceFolder" forTests="false" />
			
 
				   </component>
			
 
				 </module>
			
--- a/Planning/.idea/misc.xml
+++ b/Planning/.idea/misc.xml
@@ -1,4 +1,4 @@
 
				 <?xml version="1.0" encoding="UTF-8"?>
			
 
				 <project version="4">
			
 
				-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5" project-jdk-type="Python SDK" />
			
 
				+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
			
 
				 </project>
			
--- a/Planning/.idea/workspace.xml
+++ b/Planning/.idea/workspace.xml
@@ -2,16 +2,7 @@
 
				 <project version="4">
			
 
				   <component name="ChangeListManager">
			
 
				     <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
			
 
				-      <change beforePath="$PROJECT_DIR$/.idea/Search-based Planning.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/Search-based Planning.iml" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/env.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/plotting.py" beforeDir="false" afterPath="$PROJECT_DIR$/plotting.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
			
 
				     </list>
			
 
				     <option name="SHOW_DIALOG" value="false" />
			
 
				     <option name="HIGHLIGHT_CONFLICTS" value="true" />
			
@@ -44,7 +35,7 @@
 
				     <property name="restartRequiresConfirmation" value="false" />
			
 
				     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
			
 
				   </component>
			
 
				-  <component name="RunManager" selected="Python.bfs">
			
 
				+  <component name="RunManager" selected="Python.a_star">
			
 
				     <configuration name="a_star" type="PythonConfigurationType" factoryName="Python" temporary="true">
			
 
				       <module name="Search-based Planning" />
			
 
				       <option name="INTERPRETER_OPTIONS" value="" />
			
@@ -152,8 +143,8 @@
 
				     </configuration>
			
 
				     <recent_temporary>
			
 
				       <list>
			
 
				-        <item itemvalue="Python.bfs" />
			
 
				         <item itemvalue="Python.a_star" />
			
 
				+        <item itemvalue="Python.bfs" />
			
 
				         <item itemvalue="Python.dfs" />
			
 
				         <item itemvalue="Python.dijkstra" />
			
 
				         <item itemvalue="Python.searching" />
			
@@ -186,25 +177,25 @@
 
				     <option name="oldMeFiltersMigrated" value="true" />
			
 
				   </component>
			
 
				   <component name="WindowStateProjectService">
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.bottom" timestamp="1592800940514">
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.bottom" timestamp="1592801510790">
			
 
				       <screen x="1920" y="0" width="1920" height="1080" />
			
 
				     </state>
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.bottom/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.center" timestamp="1592800940514">
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.bottom/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.center" timestamp="1592801510790">
			
 
				       <screen x="1920" y="0" width="1920" height="1080" />
			
 
				     </state>
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.center/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.left" timestamp="1592800940514">
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.center/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.left" timestamp="1592801510789">
			
 
				       <screen x="1920" y="0" width="1920" height="1080" />
			
 
				     </state>
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.left/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.right" timestamp="1592800940514">
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.left/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510789" />
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.right" timestamp="1592801510790">
			
 
				       <screen x="1920" y="0" width="1920" height="1080" />
			
 
				     </state>
			
 
				-    <state width="1832" height="296" key="GridCell.Tab.0.right/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800940514" />
			
 
				-    <state x="2406" y="174" key="SettingsEditor" timestamp="1592800563456">
			
 
				+    <state width="1832" height="296" key="GridCell.Tab.0.right/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801510790" />
			
 
				+    <state x="2406" y="174" key="SettingsEditor" timestamp="1592801555194">
			
 
				       <screen x="1920" y="0" width="1920" height="1080" />
			
 
				     </state>
			
 
				-    <state x="2406" y="174" key="SettingsEditor/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592800563456" />
			
 
				+    <state x="2406" y="174" key="SettingsEditor/65.24.1855.1056/1920.0.1920.1080@1920.0.1920.1080" timestamp="1592801555194" />
			
 
				   </component>
			
 
				 </project>
			
--- a/Planning/__pycache__/env.cpython-37.pyc
+++ b/Planning/__pycache__/env.cpython-37.pyc
--- a/Planning/__pycache__/plotting.cpython-35.pyc
+++ b/Planning/__pycache__/plotting.cpython-35.pyc
--- a/Planning/__pycache__/plotting.cpython-37.pyc
+++ b/Planning/__pycache__/plotting.cpython-37.pyc
--- a/Planning/__pycache__/queue.cpython-35.pyc
+++ b/Planning/__pycache__/queue.cpython-35.pyc
--- a/Planning/__pycache__/queue.cpython-37.pyc
+++ b/Planning/__pycache__/queue.cpython-37.pyc
--- a/Path/.idea/Stochastic
+++ b/Path/.idea/Stochastic
@@ -2,11 +2,10 @@
 
				 <module type="PYTHON_MODULE" version="4">
			
 
				   <component name="NewModuleRootManager">
			
 
				     <content url="file://$MODULE_DIR$" />
			
 
				-    <orderEntry type="inheritedJdk" />
			
 
				+    <orderEntry type="jdk" jdkName="Python 3.7 (Search-based Planning)" jdkType="Python SDK" />
			
 
				     <orderEntry type="sourceFolder" forTests="false" />
			
 
				   </component>
			
 
				   <component name="TestRunnerService">
			
 
				-    <option name="projectConfiguration" value="pytest" />
			
 
				     <option name="PROJECT_TEST_RUNNER" value="pytest" />
			
 
				   </component>
			
 
				 </module>
			
--- a/Path/.idea/misc.xml
+++ b/Path/.idea/misc.xml
@@ -1,4 +1,4 @@
 
				 <?xml version="1.0" encoding="UTF-8"?>
			
 
				 <project version="4">
			
 
				-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
			
 
				+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Search-based Planning)" project-jdk-type="Python SDK" />
			
 
				 </project>
			
--- a/Path/Q-policy_iteration.py
+++ b/Path/Q-policy_iteration.py
@@ -1,9 +1,3 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 import plotting
			
 
				 import motion_model
			
@@ -12,20 +6,21 @@ import numpy as np
 
				 import copy
			
 
				 import sys
			
 
				 
			
 
				+
			
 
				 class Q_policy_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001                                  # threshold for convergence
			
 
				-        self.gamma = 0.9                                # discount factor
			
 
				+        self.e = 0.001  # threshold for convergence
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				 
			
 
				-        self.env = env.Env(self.xI, self.xG)                        # class Env
			
 
				-        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
			
 
				-        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
			
 
				+        self.env = env.Env(self.xI, self.xG)  # class Env
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
			
 
				 
			
 
				-        self.u_set = self.env.motions                               # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace                       # state space
			
 
				-        self.obs = self.env.obs_map()                               # position of obstacles
			
 
				-        self.lose = self.env.lose_map()                             # position of lose states
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				 
			
 
				         self.name1 = "Q-policy_iteration, gamma=" + str(self.gamma)
			
 
				 
			
@@ -33,7 +28,6 @@ class Q_policy_iteration:
 
				         self.path = self.extract_path(self.xI, self.xG, self.policy)
			
 
				         self.plotting.animation(self.path, self.name1)
			
 
				 
			
 
				-
			
 
				     def policy_evaluation(self, policy, value):
			
 
				         """
			
 
				         evaluation process using current policy.
			
@@ -45,7 +39,7 @@ class Q_policy_iteration:
 
				 
			
 
				         delta = sys.maxsize
			
 
				 
			
 
				-        while delta > self.e:               # convergence condition
			
 
				+        while delta > self.e:  # convergence condition
			
 
				             x_value = 0
			
 
				             for x in value:
			
 
				                 if x not in self.xG:
			
@@ -60,7 +54,6 @@ class Q_policy_iteration:
 
				 
			
 
				         return value
			
 
				 
			
 
				-
			
 
				     def policy_improvement(self, policy, value):
			
 
				         """
			
 
				         policy improvement process.
			
@@ -76,7 +69,6 @@ class Q_policy_iteration:
 
				 
			
 
				         return policy
			
 
				 
			
 
				-
			
 
				     def iteration(self):
			
 
				         """
			
 
				         Q-policy iteration
			
@@ -88,21 +80,20 @@ class Q_policy_iteration:
 
				         count = 0
			
 
				 
			
 
				         for x in self.stateSpace:
			
 
				-            Q_table[x] = [0, 0, 0, 0]              # initialize Q_value table
			
 
				-            policy[x] = 0                          # initialize policy table
			
 
				+            Q_table[x] = [0, 0, 0, 0]  # initialize Q_value table
			
 
				+            policy[x] = 0  # initialize policy table
			
 
				 
			
 
				         while True:
			
 
				             count += 1
			
 
				             policy_back = copy.deepcopy(policy)
			
 
				-            Q_table = self.policy_evaluation(policy, Q_table)   # evaluation process
			
 
				-            policy = self.policy_improvement(policy, Q_table)   # improvement process
			
 
				-            if policy_back == policy: break                     # convergence condition
			
 
				+            Q_table = self.policy_evaluation(policy, Q_table)  # evaluation process
			
 
				+            policy = self.policy_improvement(policy, Q_table)  # improvement process
			
 
				+            if policy_back == policy: break  # convergence condition
			
 
				 
			
 
				         self.message(count)
			
 
				 
			
 
				         return Q_table, policy
			
 
				 
			
 
				-
			
 
				     def cal_Q_value(self, x, p, policy, table):
			
 
				         """
			
 
				         cal Q_value.
			
@@ -114,13 +105,12 @@ class Q_policy_iteration:
 
				         """
			
 
				 
			
 
				         value = 0
			
 
				-        reward = self.env.get_reward(x)                  # get reward of next state
			
 
				+        reward = self.env.get_reward(x)  # get reward of next state
			
 
				         for i in range(len(x)):
			
 
				             value += p[i] * (reward[i] + self.gamma * table[x[i]][policy[x[i]]])
			
 
				 
			
 
				         return value
			
 
				 
			
 
				-
			
 
				     def extract_path(self, xI, xG, policy):
			
 
				         """
			
 
				         extract path from converged policy.
			
@@ -143,7 +133,6 @@ class Q_policy_iteration:
 
				                 x = x_next
			
 
				         return path
			
 
				 
			
 
				-
			
 
				     def message(self, count):
			
 
				         """
			
 
				         print important message.
			
--- a/Path/Q-value_iteration.py
+++ b/Path/Q-value_iteration.py
@@ -1,9 +1,3 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 import plotting
			
 
				 import motion_model
			
@@ -11,20 +5,21 @@ import motion_model
 
				 import numpy as np
			
 
				 import sys
			
 
				 
			
 
				+
			
 
				 class Q_value_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001                                          # threshold for convergence
			
 
				-        self.gamma = 0.9                                        # discount factor
			
 
				+        self.e = 0.001  # threshold for convergence
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				 
			
 
				-        self.env = env.Env(self.xI, self.xG)                            # class Env
			
 
				-        self.motion = motion_model.Motion_model(self.xI, self.xG)       # class Motion_model
			
 
				-        self.plotting = plotting.Plotting(self.xI, self.xG)             # class Plotting
			
 
				+        self.env = env.Env(self.xI, self.xG)  # class Env
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
			
 
				 
			
 
				-        self.u_set = self.env.motions                                   # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace                           # state space
			
 
				-        self.obs = self.env.obs_map()                                   # position of obstacles
			
 
				-        self.lose = self.env.lose_map()                                 # position of lose states
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				 
			
 
				         self.name1 = "Q-value_iteration, gamma=" + str(self.gamma)
			
 
				         self.name2 = "converge process, e=" + str(self.e)
			
@@ -34,7 +29,6 @@ class Q_value_iteration:
 
				         self.plotting.animation(self.path, self.name1)
			
 
				         self.plotting.plot_diff(self.diff, self.name2)
			
 
				 
			
 
				-
			
 
				     def iteration(self, xI, xG):
			
 
				         """
			
 
				         Q_value_iteration
			
@@ -48,9 +42,9 @@ class Q_value_iteration:
 
				         count = 0
			
 
				 
			
 
				         for x in self.stateSpace:
			
 
				-            Q_table[x] = [0, 0, 0, 0]                       # initialize Q_table
			
 
				+            Q_table[x] = [0, 0, 0, 0]  # initialize Q_table
			
 
				 
			
 
				-        while delta > self.e:                               # convergence condition
			
 
				+        while delta > self.e:  # convergence condition
			
 
				             count += 1
			
 
				             x_value = 0
			
 
				             for x in self.stateSpace:
			
@@ -73,7 +67,6 @@ class Q_value_iteration:
 
				 
			
 
				         return Q_table, policy, diff
			
 
				 
			
 
				-
			
 
				     def cal_Q_value(self, x, p, table):
			
 
				         """
			
 
				         cal Q_value.
			
@@ -85,13 +78,12 @@ class Q_value_iteration:
 
				         """
			
 
				 
			
 
				         value = 0
			
 
				-        reward = self.env.get_reward(x)                  # get reward of next state
			
 
				+        reward = self.env.get_reward(x)  # get reward of next state
			
 
				         for i in range(len(x)):
			
 
				             value += p[i] * (reward[i] + self.gamma * max(table[x[i]]))
			
 
				 
			
 
				         return value
			
 
				 
			
 
				-
			
 
				     def extract_path(self, xI, xG, policy):
			
 
				         """
			
 
				         extract path from converged policy.
			
@@ -114,7 +106,6 @@ class Q_value_iteration:
 
				                 x = x_next
			
 
				         return path
			
 
				 
			
 
				-
			
 
				     def message(self, count):
			
 
				         """
			
 
				         print important message.
			
--- a/Path/__pycache__/env.cpython-37.pyc
+++ b/Path/__pycache__/env.cpython-37.pyc
--- a/Path/__pycache__/motion_model.cpython-37.pyc
+++ b/Path/__pycache__/motion_model.cpython-37.pyc
--- a/Path/__pycache__/plotting.cpython-37.pyc
+++ b/Path/__pycache__/plotting.cpython-37.pyc
--- a/Path/env.py
+++ b/Path/env.py
@@ -1,12 +1,6 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				-class Env():
			
 
				+class Env:
			
 
				     def __init__(self, xI, xG):
			
 
				-        self.x_range = 51           # size of background
			
 
				+        self.x_range = 51  # size of background
			
 
				         self.y_range = 31
			
 
				         self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
			
 
				         self.xI = xI
			
@@ -15,7 +9,6 @@ class Env():
 
				         self.lose = self.lose_map()
			
 
				         self.stateSpace = self.state_space()
			
 
				 
			
 
				-
			
 
				     def obs_map(self):
			
 
				         """
			
 
				         Initialize obstacles' positions
			
@@ -48,7 +41,6 @@ class Env():
 
				 
			
 
				         return obs
			
 
				 
			
 
				-
			
 
				     def lose_map(self):
			
 
				         """
			
 
				         Initialize losing states' positions
			
@@ -61,7 +53,6 @@ class Env():
 
				 
			
 
				         return lose
			
 
				 
			
 
				-
			
 
				     def state_space(self):
			
 
				         """
			
 
				         generate state space
			
@@ -76,7 +67,6 @@ class Env():
 
				 
			
 
				         return state_space
			
 
				 
			
 
				-
			
 
				     def get_reward(self, x_next):
			
 
				         """
			
 
				         calculate reward of next state
			
@@ -88,10 +78,10 @@ class Env():
 
				         reward = []
			
 
				         for x in x_next:
			
 
				             if x in self.xG:
			
 
				-                reward.append(10)       # reward : 10, for goal states
			
 
				+                reward.append(10)  # reward : 10, for goal states
			
 
				             elif x in self.lose:
			
 
				-                reward.append(-10)      # reward : -10, for lose states
			
 
				+                reward.append(-10)  # reward : -10, for lose states
			
 
				             else:
			
 
				-                reward.append(0)        # reward : 0, for other states
			
 
				+                reward.append(0)  # reward : 0, for other states
			
 
				 
			
 
				         return reward
			
--- a/Path/motion_model.py
+++ b/Path/motion_model.py
@@ -1,17 +1,11 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 
			
 
				+
			
 
				 class Motion_model():
			
 
				     def __init__(self, xI, xG):
			
 
				         self.env = env.Env(xI, xG)
			
 
				         self.obs = self.env.obs_map()
			
 
				 
			
 
				-
			
 
				     def move_next(self, x, u, eta=0.2):
			
 
				         """
			
 
				         Motion model of robots,
			
@@ -41,4 +35,4 @@ class Motion_model():
 
				             else:
			
 
				                 x_next.append(x_check)
			
 
				 
			
 
				-        return x_next, p_next
			
 
				+        return x_next, p_next
			
--- a/Path/plotting.py
+++ b/Path/plotting.py
@@ -1,12 +1,7 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import matplotlib.pyplot as plt
			
 
				 import env
			
 
				 
			
 
				+
			
 
				 class Plotting():
			
 
				     def __init__(self, xI, xG):
			
 
				         self.xI, self.xG = xI, xG
			
@@ -14,7 +9,6 @@ class Plotting():
 
				         self.obs = self.env.obs_map()
			
 
				         self.lose = self.env.lose_map()
			
 
				 
			
 
				-
			
 
				     def animation(self, path, name):
			
 
				         """
			
 
				         animation.
			
@@ -29,7 +23,6 @@ class Plotting():
 
				         self.plot_lose()
			
 
				         self.plot_path(path)
			
 
				 
			
 
				-
			
 
				     def plot_grid(self, name):
			
 
				         """
			
 
				         plot the obstacles in environment.
			
@@ -49,7 +42,6 @@ class Plotting():
 
				         plt.title(name)
			
 
				         plt.axis("equal")
			
 
				 
			
 
				-
			
 
				     def plot_lose(self):
			
 
				         """
			
 
				         plot losing states in environment.
			
@@ -59,8 +51,7 @@ class Plotting():
 
				         lose_x = [self.lose[i][0] for i in range(len(self.lose))]
			
 
				         lose_y = [self.lose[i][1] for i in range(len(self.lose))]
			
 
				 
			
 
				-        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's')
			
 
				-
			
 
				+        plt.plot(lose_x, lose_y, color='#A52A2A', marker='s')
			
 
				 
			
 
				     def plot_visited(self, visited):
			
 
				         """
			
@@ -88,7 +79,6 @@ class Plotting():
 
				 
			
 
				             if count % length == 0: plt.pause(0.001)
			
 
				 
			
 
				-
			
 
				     def plot_path(self, path):
			
 
				         path.remove(self.xI)
			
 
				         for x in self.xG:
			
@@ -103,7 +93,6 @@ class Plotting():
 
				         plt.show()
			
 
				         plt.pause(0.5)
			
 
				 
			
 
				-
			
 
				     def plot_diff(self, diff, name):
			
 
				         plt.figure(2)
			
 
				         plt.title(name, fontdict=None)
			
--- a/Path/policy_iteration.py
+++ b/Path/policy_iteration.py
@@ -1,9 +1,3 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 import plotting
			
 
				 import motion_model
			
@@ -12,20 +6,21 @@ import numpy as np
 
				 import sys
			
 
				 import copy
			
 
				 
			
 
				+
			
 
				 class Policy_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001                                      # threshold for convergence
			
 
				-        self.gamma = 0.9                                    # discount factor
			
 
				+        self.e = 0.001  # threshold for convergence
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				 
			
 
				         self.env = env.Env(self.xI, self.xG)
			
 
				         self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.env.motions                       # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace               # state space
			
 
				-        self.obs = self.env.obs_map()                       # position of obstacles
			
 
				-        self.lose = self.env.lose_map()                     # position of lose states
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				 
			
 
				         self.name1 = "policy_iteration, gamma=" + str(self.gamma)
			
 
				 
			
@@ -33,7 +28,6 @@ class Policy_iteration:
 
				         self.path = self.extract_path(self.xI, self.xG, self.policy)
			
 
				         self.plotting.animation(self.path, self.name1)
			
 
				 
			
 
				-
			
 
				     def policy_evaluation(self, policy, value):
			
 
				         """
			
 
				         Evaluate current policy.
			
@@ -45,7 +39,7 @@ class Policy_iteration:
 
				 
			
 
				         delta = sys.maxsize
			
 
				 
			
 
				-        while delta > self.e:                               # convergence condition
			
 
				+        while delta > self.e:  # convergence condition
			
 
				             x_value = 0
			
 
				             for x in self.stateSpace:
			
 
				                 if x not in self.xG:
			
@@ -59,7 +53,6 @@ class Policy_iteration:
 
				 
			
 
				         return value
			
 
				 
			
 
				-
			
 
				     def policy_improvement(self, policy, value):
			
 
				         """
			
 
				         Improve policy using current value table.
			
@@ -79,7 +72,6 @@ class Policy_iteration:
 
				 
			
 
				         return policy
			
 
				 
			
 
				-
			
 
				     def iteration(self):
			
 
				         """
			
 
				         polity iteration: using evaluate and improvement process until convergence.
			
@@ -91,21 +83,20 @@ class Policy_iteration:
 
				         count = 0
			
 
				 
			
 
				         for x in self.stateSpace:
			
 
				-            value_table[x] = 0                                              # initialize value table
			
 
				-            policy[x] = self.u_set[0]                                       # initialize policy table
			
 
				+            value_table[x] = 0  # initialize value table
			
 
				+            policy[x] = self.u_set[0]  # initialize policy table
			
 
				 
			
 
				         while True:
			
 
				             count += 1
			
 
				             policy_back = copy.deepcopy(policy)
			
 
				-            value_table = self.policy_evaluation(policy, value_table)       # evaluation process
			
 
				-            policy = self.policy_improvement(policy, value_table)           # policy improvement process
			
 
				-            if policy_back == policy: break                                 # convergence condition
			
 
				+            value_table = self.policy_evaluation(policy, value_table)  # evaluation process
			
 
				+            policy = self.policy_improvement(policy, value_table)  # policy improvement process
			
 
				+            if policy_back == policy: break  # convergence condition
			
 
				 
			
 
				         self.message(count)
			
 
				 
			
 
				         return value_table, policy
			
 
				 
			
 
				-
			
 
				     def cal_Q_value(self, x, p, table):
			
 
				         """
			
 
				         cal Q_value.
			
@@ -117,13 +108,12 @@ class Policy_iteration:
 
				         """
			
 
				 
			
 
				         value = 0
			
 
				-        reward = self.env.get_reward(x)                                 # get reward of next state
			
 
				+        reward = self.env.get_reward(x)  # get reward of next state
			
 
				         for i in range(len(x)):
			
 
				-            value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
			
 
				+            value += p[i] * (reward[i] + self.gamma * table[x[i]])  # cal Q-value
			
 
				 
			
 
				         return value
			
 
				 
			
 
				-
			
 
				     def extract_path(self, xI, xG, policy):
			
 
				         """
			
 
				         extract path from converged policy.
			
@@ -146,7 +136,6 @@ class Policy_iteration:
 
				                 x = x_next
			
 
				         return path
			
 
				 
			
 
				-
			
 
				     def message(self, count):
			
 
				         """
			
 
				         print important message.
			
--- a/Path/value_iteration.py
+++ b/Path/value_iteration.py
@@ -1,9 +1,3 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				 import env
			
 
				 import plotting
			
 
				 import motion_model
			
@@ -11,20 +5,21 @@ import motion_model
 
				 import numpy as np
			
 
				 import sys
			
 
				 
			
 
				+
			
 
				 class Value_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001                                              # threshold for convergence
			
 
				-        self.gamma = 0.9                                            # discount factor
			
 
				+        self.e = 0.001  # threshold for convergence
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				 
			
 
				-        self.env = env.Env(self.xI, self.xG)                        # class Env
			
 
				-        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
			
 
				-        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
			
 
				+        self.env = env.Env(self.xI, self.xG)  # class Env
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)  # class Motion_model
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)  # class Plotting
			
 
				 
			
 
				-        self.u_set = self.env.motions                               # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace                       # state space
			
 
				-        self.obs = self.env.obs_map()                               # position of obstacles
			
 
				-        self.lose = self.env.lose_map()                             # position of lose states
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				 
			
 
				         self.name1 = "value_iteration, gamma=" + str(self.gamma)
			
 
				         self.name2 = "converge process, e=" + str(self.e)
			
@@ -34,7 +29,6 @@ class Value_iteration:
 
				         self.plotting.animation(self.path, self.name1)
			
 
				         self.plotting.plot_diff(self.diff, self.name2)
			
 
				 
			
 
				-
			
 
				     def iteration(self, xI, xG):
			
 
				         """
			
 
				         value_iteration.
			
@@ -42,36 +36,35 @@ class Value_iteration:
 
				         :return: converged value table, optimal policy and variation of difference,
			
 
				         """
			
 
				 
			
 
				-        value_table = {}                        # value table
			
 
				-        policy = {}                             # policy
			
 
				-        diff = []                               # maximum difference between two successive iteration
			
 
				-        delta = sys.maxsize                     # initialize maximum difference
			
 
				-        count = 0                               # iteration times
			
 
				+        value_table = {}  # value table
			
 
				+        policy = {}  # policy
			
 
				+        diff = []  # maximum difference between two successive iteration
			
 
				+        delta = sys.maxsize  # initialize maximum difference
			
 
				+        count = 0  # iteration times
			
 
				 
			
 
				-        for x in self.stateSpace:               # initialize value table for feasible states
			
 
				+        for x in self.stateSpace:  # initialize value table for feasible states
			
 
				             value_table[x] = 0
			
 
				 
			
 
				-        while delta > self.e:                   # converged condition
			
 
				+        while delta > self.e:  # converged condition
			
 
				             count += 1
			
 
				             x_value = 0
			
 
				             for x in self.stateSpace:
			
 
				                 if x not in xG:
			
 
				                     value_list = []
			
 
				                     for u in self.u_set:
			
 
				-                        [x_next, p_next] = self.motion.move_next(x, u)                      # recall motion model
			
 
				-                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))    # cal Q value
			
 
				-                    policy[x] = self.u_set[int(np.argmax(value_list))]                      # update policy
			
 
				-                    v_diff = abs(value_table[x] - max(value_list))                          # maximum difference
			
 
				-                    value_table[x] = max(value_list)                                        # update value table
			
 
				+                        [x_next, p_next] = self.motion.move_next(x, u)  # recall motion model
			
 
				+                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))  # cal Q value
			
 
				+                    policy[x] = self.u_set[int(np.argmax(value_list))]  # update policy
			
 
				+                    v_diff = abs(value_table[x] - max(value_list))  # maximum difference
			
 
				+                    value_table[x] = max(value_list)  # update value table
			
 
				                     x_value = max(x_value, v_diff)
			
 
				-            delta = x_value                                                                 # update delta
			
 
				+            delta = x_value  # update delta
			
 
				             diff.append(delta)
			
 
				 
			
 
				-        self.message(count)                                                                 # print messages
			
 
				+        self.message(count)  # print messages
			
 
				 
			
 
				         return value_table, policy, diff
			
 
				 
			
 
				-
			
 
				     def cal_Q_value(self, x, p, table):
			
 
				         """
			
 
				         cal Q_value.
			
@@ -83,13 +76,12 @@ class Value_iteration:
 
				         """
			
 
				 
			
 
				         value = 0
			
 
				-        reward = self.env.get_reward(x)                                 # get reward of next state
			
 
				+        reward = self.env.get_reward(x)  # get reward of next state
			
 
				         for i in range(len(x)):
			
 
				-            value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
			
 
				+            value += p[i] * (reward[i] + self.gamma * table[x[i]])  # cal Q-value
			
 
				 
			
 
				         return value
			
 
				 
			
 
				-
			
 
				     def extract_path(self, xI, xG, policy):
			
 
				         """
			
 
				         extract path from converged policy.
			
@@ -112,7 +104,6 @@ class Value_iteration:
 
				                 x = x_next
			
 
				         return path
			
 
				 
			
 
				-
			
 
				     def message(self, count):
			
 
				         """
			
 
				         print important message.
			
@@ -129,7 +120,7 @@ class Value_iteration:
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    x_Start = (5, 5)                    # starting state
			
 
				-    x_Goal = [(49, 5), (49, 25)]        # goal states
			
 
				+    x_Start = (5, 5)  # starting state
			
 
				+    x_Goal = [(49, 5), (49, 25)]  # goal states
			
 
				 
			
 
				     VI = Value_iteration(x_Start, x_Goal)