преди 5 години · fd59affd71
--- a/Planning/.idea/workspace.xml
+++ b/Planning/.idea/workspace.xml
@@ -2,9 +2,17 @@
 
				 <project version="4">
			
 
				   <component name="ChangeListManager">
			
 
				     <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
			
 
				-      <change afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/env.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/tools.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" afterDir="false" />
			
 
				     </list>
			
 
				     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
			
 
				     <option name="SHOW_DIALOG" value="false" />
			
@@ -43,7 +51,7 @@
 
				       </list>
			
 
				     </option>
			
 
				   </component>
			
 
				-  <component name="RunManager" selected="Python.dfs">
			
 
				+  <component name="RunManager" selected="Python.a_star">
			
 
				     <configuration name="a_star" type="PythonConfigurationType" factoryName="Python" temporary="true">
			
 
				       <module name="Search-based Planning" />
			
 
				       <option name="INTERPRETER_OPTIONS" value="" />
			
@@ -151,10 +159,10 @@
 
				     </configuration>
			
 
				     <recent_temporary>
			
 
				       <list>
			
 
				-        <item itemvalue="Python.dfs" />
			
 
				-        <item itemvalue="Python.bfs" />
			
 
				         <item itemvalue="Python.a_star" />
			
 
				         <item itemvalue="Python.dijkstra" />
			
 
				+        <item itemvalue="Python.dfs" />
			
 
				+        <item itemvalue="Python.bfs" />
			
 
				         <item itemvalue="Python.searching" />
			
 
				       </list>
			
 
				     </recent_temporary>
			
--- a/Planning/__pycache__/env.cpython-37.pyc
+++ b/Planning/__pycache__/env.cpython-37.pyc
--- a/Planning/__pycache__/tools.cpython-37.pyc
+++ b/Planning/__pycache__/tools.cpython-37.pyc
--- a/Planning/a_star.py
+++ b/Planning/a_star.py
@@ -9,6 +9,7 @@ import tools
 
				 import env
			
 
				 import motion_model
			
 
				 
			
 
				+
			
 
				 class Astar:
			
 
				     def __init__(self, x_start, x_goal, heuristic_type):
			
 
				         self.u_set = motion_model.motions                   # feasible input set
			
@@ -16,7 +17,8 @@ class Astar:
 
				         self.obs = env.obs_map()                            # position of obstacles
			
 
				         self.heuristic_type = heuristic_type
			
 
				 
			
 
				-        env.show_map(self.xI, self.xG, self.obs, "a_star searching")
			
 
				+        tools.show_map(self.xI, self.xG, self.obs, "a_star searching")
			
 
				+
			
 
				 
			
 
				     def searching(self):
			
 
				         """
			
@@ -48,8 +50,10 @@ class Astar:
 
				                         parent[x_next] = x_current
			
 
				                         action[x_next] = u_next
			
 
				         [path_astar, actions_astar] = tools.extract_path(self.xI, self.xG, parent, action)
			
 
				+
			
 
				         return path_astar, actions_astar
			
 
				 
			
 
				+
			
 
				     def get_cost(self, x, u):
			
 
				         """
			
 
				         Calculate cost for this motion
			
@@ -62,6 +66,7 @@ class Astar:
 
				 
			
 
				         return 1
			
 
				 
			
 
				+
			
 
				     def Heuristic(self, state, goal, heuristic_type):
			
 
				         """
			
 
				         Calculate heuristic.
			
@@ -85,4 +90,4 @@ if __name__ == '__main__':
 
				     x_Goal = (49, 5)                # Goal node
			
 
				     astar = Astar(x_Start, x_Goal, "manhattan")
			
 
				     [path_astar, actions_astar] = astar.searching()
			
 
				-    tools.showPath(x_Start, x_Goal, path_astar)      # Plot path and visited nodes
			
 
				+    tools.showPath(x_Start, x_Goal, path_astar)
			
--- a/Planning/bfs.py
+++ b/Planning/bfs.py
@@ -9,17 +9,15 @@ import tools
 
				 import env
			
 
				 import motion_model
			
 
				 
			
 
				-class BFS:
			
 
				-    """
			
 
				-    BFS -> Breadth-first Searching
			
 
				-    """
			
 
				 
			
 
				+class BFS:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.u_set = motion_model.motions                             # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				         self.obs = env.obs_map()                                      # position of obstacles
			
 
				 
			
 
				-        env.show_map(self.xI, self.xG, self.obs, "breadth-first searching")
			
 
				+        tools.show_map(self.xI, self.xG, self.obs, "breadth-first searching")
			
 
				+
			
 
				 
			
 
				     def searching(self):
			
 
				         """
			
@@ -46,6 +44,7 @@ class BFS:
 
				                     parent[x_next] = x_current
			
 
				                     action[x_next] = u_next
			
 
				         [path_bfs, action_bfs] = tools.extract_path(self.xI, self.xG, parent, action)     # extract path
			
 
				+
			
 
				         return path_bfs, action_bfs
			
 
				 
			
 
				 
			
--- a/Planning/dfs.py
+++ b/Planning/dfs.py
@@ -9,17 +9,15 @@ import tools
 
				 import env
			
 
				 import motion_model
			
 
				 
			
 
				-class DFS:
			
 
				-    """
			
 
				-    DFS -> Depth-first Searching
			
 
				-    """
			
 
				 
			
 
				+class DFS:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.u_set = motion_model.motions                       # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				         self.obs = env.obs_map()                                # position of obstacles
			
 
				 
			
 
				-        env.show_map(self.xI, self.xG, self.obs, "depth-first searching")
			
 
				+        tools.show_map(self.xI, self.xG, self.obs, "depth-first searching")
			
 
				+
			
 
				 
			
 
				     def searching(self):
			
 
				         """
			
@@ -46,6 +44,7 @@ class DFS:
 
				                     parent[x_next] = x_current
			
 
				                     action[x_next] = u_next
			
 
				         [path_dfs, action_dfs] = tools.extract_path(self.xI, self.xG, parent, action)
			
 
				+
			
 
				         return path_dfs, action_dfs
			
 
				 
			
 
				 
			
@@ -54,4 +53,4 @@ if __name__ == '__main__':
 
				     x_Goal = (49, 5)                # Goal node
			
 
				     dfs = DFS(x_Start, x_Goal)
			
 
				     [path_dfs, action_dfs] = dfs.searching()
			
 
				-    tools.showPath(x_Start, x_Goal, path_dfs)
			
 
				+    tools.showPath(x_Start, x_Goal, path_dfs)
			
--- a/Planning/dijkstra.py
+++ b/Planning/dijkstra.py
@@ -9,13 +9,15 @@ import env
 
				 import tools
			
 
				 import motion_model
			
 
				 
			
 
				+
			
 
				 class Dijkstra:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.u_set = motion_model.motions                      # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				         self.obs = env.obs_map()                               # position of obstacles
			
 
				 
			
 
				-        env.show_map(self.xI, self.xG, self.obs, "dijkstra searching")
			
 
				+        tools.show_map(self.xI, self.xG, self.obs, "dijkstra searching")
			
 
				+
			
 
				 
			
 
				     def searching(self):
			
 
				         """
			
@@ -47,8 +49,10 @@ class Dijkstra:
 
				                         parent[x_next] = x_current
			
 
				                         action[x_next] = u_next
			
 
				         [path_dijk, action_dijk] = tools.extract_path(self.xI, self.xG, parent, action)
			
 
				+
			
 
				         return path_dijk, action_dijk
			
 
				 
			
 
				+
			
 
				     def get_cost(self, x, u):
			
 
				         """
			
 
				         Calculate cost for this motion
			
@@ -67,4 +71,4 @@ if __name__ == '__main__':
 
				     x_Goal = (49, 5)                # Goal node
			
 
				     dijkstra = Dijkstra(x_Start, x_Goal)
			
 
				     [path_dijk, actions_dijk] = dijkstra.searching()
			
 
				-    tools.showPath(x_Start, x_Goal, path_dijk)
			
 
				+    tools.showPath(x_Start, x_Goal, path_dijk)
			
--- a/Planning/env.py
+++ b/Planning/env.py
@@ -4,8 +4,6 @@
 
				 @author: huiming zhou
			
 
				 """
			
 
				 
			
 
				-import matplotlib.pyplot as plt
			
 
				-
			
 
				 x_range, y_range = 51, 31     # size of background
			
 
				 
			
 
				 def obs_map():
			
@@ -37,16 +35,3 @@ def obs_map():
 
				         obs.append((40, i))
			
 
				 
			
 
				     return obs
			
 
				-
			
 
				-
			
 
				-def show_map(xI, xG, obs_map, name):
			
 
				-    obs_x = [obs_map[i][0] for i in range(len(obs_map))]
			
 
				-    obs_y = [obs_map[i][1] for i in range(len(obs_map))]
			
 
				-
			
 
				-    plt.plot(xI[0], xI[1], "bs")
			
 
				-    plt.plot(xG[0], xG[1], "gs")
			
 
				-    plt.plot(obs_x, obs_y, "sk")
			
 
				-    plt.title(name, fontdict=None)
			
 
				-    plt.grid(True)
			
 
				-    plt.axis("equal")
			
 
				-
			
--- a/Planning/tools.py
+++ b/Planning/tools.py
@@ -6,6 +6,7 @@
 
				 
			
 
				 import matplotlib.pyplot as plt
			
 
				 
			
 
				+
			
 
				 def extract_path(xI, xG, parent, actions):
			
 
				     """
			
 
				     Extract the path based on the relationship of nodes.
			
@@ -25,9 +26,21 @@ def extract_path(xI, xG, parent, actions):
 
				         path_back.append(x_current)
			
 
				         acts_back.append(actions[x_current])
			
 
				         if x_current == xI: break
			
 
				+
			
 
				     return list(reversed(path_back)), list(reversed(acts_back))
			
 
				 
			
 
				 
			
 
				+def show_map(xI, xG, obs_map, name):
			
 
				+    obs_x = [obs_map[i][0] for i in range(len(obs_map))]
			
 
				+    obs_y = [obs_map[i][1] for i in range(len(obs_map))]
			
 
				+
			
 
				+    plt.plot(xI[0], xI[1], "bs")
			
 
				+    plt.plot(xG[0], xG[1], "gs")
			
 
				+    plt.plot(obs_x, obs_y, "sk")
			
 
				+    plt.title(name, fontdict=None)
			
 
				+    plt.axis("equal")
			
 
				+
			
 
				+
			
 
				 def showPath(xI, xG, path):
			
 
				     """
			
 
				     Plot the path.
			
@@ -37,6 +50,7 @@ def showPath(xI, xG, path):
 
				     :param path: Planning path
			
 
				     :return: A plot
			
 
				     """
			
 
				+
			
 
				     path.remove(xI)
			
 
				     path.remove(xG)
			
 
				     path_x = [path[i][0] for i in range(len(path))]
			
@@ -50,6 +64,5 @@ def plot_dots(x, length):
 
				     plt.plot(x[0], x[1], linewidth='3', color='#808080', marker='o')
			
 
				     plt.gcf().canvas.mpl_connect('key_release_event',
			
 
				                                  lambda event: [exit(0) if event.key == 'escape' else None])
			
 
				-    if length % 15 == 0:
			
 
				-        plt.pause(0.001)
			
 
				+    if length % 15 == 0: plt.pause(0.001)
			
 
				 
			
--- a/Path/Q-policy_iteration.py
+++ b/Path/Q-policy_iteration.py
@@ -16,13 +16,13 @@ import sys
 
				 
			
 
				 class Q_policy_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				-        self.u_set = motion_model.motions  # feasible input set
			
 
				+        self.u_set = motion_model.motions                       # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001
			
 
				-        self.gamma = 0.9
			
 
				-        self.obs = env.obs_map()  # position of obstacles
			
 
				-        self.lose = env.lose_map()
			
 
				-        self.name1 = "policy_iteration, e=" + str(self.e) + ", gamma=" + str(self.gamma)
			
 
				+        self.e = 0.001                                          # threshold for convergence
			
 
				+        self.gamma = 0.9                                        # discount factor
			
 
				+        self.obs = env.obs_map()                                # position of obstacles
			
 
				+        self.lose = env.lose_map()                              # position of lose states
			
 
				+        self.name1 = "Q-policy_iteration, e=" + str(self.e) + ", gamma=" + str(self.gamma)
			
 
				         self.name2 = "convergence of error"
			
 
				 
			
 
				 
			
--- a/Path/Q-value_iteration.py
+++ b/Path/Q-value_iteration.py
@@ -14,13 +14,13 @@ import sys
 
				 
			
 
				 class Q_value_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				-        self.u_set = motion_model.motions                      # feasible input set
			
 
				+        self.u_set = motion_model.motions                           # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001
			
 
				-        self.gamma = 0.9
			
 
				-        self.obs = env.obs_map()                               # position of obstacles
			
 
				-        self.lose = env.lose_map()
			
 
				-        self.name1 = "value_iteration, e=" + str(self.e) + ", gamma=" + str(self.gamma)
			
 
				+        self.e = 0.001                                              # threshold for convergence
			
 
				+        self.gamma = 0.9                                            # discount factor
			
 
				+        self.obs = env.obs_map()                                    # position of obstacles
			
 
				+        self.lose = env.lose_map()                                  # position of lose states
			
 
				+        self.name1 = "Q-value_iteration, e=" + str(self.e) + ", gamma=" + str(self.gamma)
			
 
				         self.name2 = "convergence of error"
			
 
				 
			
 
				 
			
--- a/Path/policy_iteration.py
+++ b/Path/policy_iteration.py
@@ -16,12 +16,12 @@ import sys
 
				 
			
 
				 class Policy_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				-        self.u_set = motion_model.motions  # feasible input set
			
 
				+        self.u_set = motion_model.motions                       # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001
			
 
				-        self.gamma = 0.9
			
 
				-        self.obs = env.obs_map()  # position of obstacles
			
 
				-        self.lose = env.lose_map()
			
 
				+        self.e = 0.001                                          # threshold for convergence
			
 
				+        self.gamma = 0.9                                        # discount factor
			
 
				+        self.obs = env.obs_map()                                # position of obstacles
			
 
				+        self.lose = env.lose_map()                              # position of lose states
			
 
				         self.name1 = "policy_iteration, e=" + str(self.e) + ", gamma=" + str(self.gamma)
			
 
				         self.name2 = "convergence of error"
			
 
				 
			
--- a/Path/value_iteration.py
+++ b/Path/value_iteration.py
@@ -12,102 +12,146 @@ import matplotlib.pyplot as plt
 
				 import numpy as np
			
 
				 import sys
			
 
				 
			
 
				+
			
 
				 class Value_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.u_set = motion_model.motions                      # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001
			
 
				-        self.gamma = 0.9
			
 
				+        self.e = 0.001                                         # threshold for convergence
			
 
				+        self.gamma = 0.9                                       # discount factor
			
 
				         self.obs = env.obs_map()                               # position of obstacles
			
 
				-        self.lose = env.lose_map()
			
 
				-        self.name1 = "value_iteration, e=" + str(self.e) + ", gamma=" + str(self.gamma)
			
 
				-        self.name2 = "convergence of error"
			
 
				+        self.lose = env.lose_map()                             # position of lose states
			
 
				+        self.name1 = "value_iteration, e=" + str(self.e) \
			
 
				+                     + ", gamma=" + str(self.gamma)
			
 
				+        self.name2 = "convergence of error, e=0.001"
			
 
				 
			
 
				 
			
 
				     def iteration(self):
			
 
				-        value_table = {}
			
 
				-        policy = {}
			
 
				-        diff = []
			
 
				-        delta = sys.maxsize
			
 
				+        """
			
 
				+        value_iteration.
			
 
				+
			
 
				+        :return: converged value table, optimal policy and variation of difference,
			
 
				+        """
			
 
				+
			
 
				+        value_table = {}                        # value table
			
 
				+        policy = {}                             # policy
			
 
				+        diff = []                               # maximum difference between two successive iteration
			
 
				+        delta = sys.maxsize                     # initialize maximum difference
			
 
				+        count = 0                               # iteration times
			
 
				 
			
 
				         for i in range(env.x_range):
			
 
				             for j in range(env.y_range):
			
 
				                 if (i, j) not in self.obs:
			
 
				-                    value_table[(i, j)] = 0
			
 
				+                    value_table[(i, j)] = 0     # initialize value table for feasible states
			
 
				 
			
 
				-        while delta > self.e:
			
 
				+        while delta > self.e:                   # converged condition
			
 
				+            count += 1
			
 
				             x_value = 0
			
 
				             for x in value_table:
			
 
				-                if x in self.xG: continue
			
 
				-                else:
			
 
				+                if x not in self.xG:
			
 
				                     value_list = []
			
 
				                     for u in self.u_set:
			
 
				-                        [x_next, p_next] = motion_model.move_prob(x, u, self.obs)
			
 
				-                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))
			
 
				-                    policy[x] = self.u_set[int(np.argmax(value_list))]
			
 
				-                    v_diff = abs(value_table[x] - max(value_list))
			
 
				-                    value_table[x] = max(value_list)
			
 
				+                        [x_next, p_next] = motion_model.move_prob(x, u, self.obs)           # recall motion model
			
 
				+                        value_list.append(self.cal_Q_value(x_next, p_next, value_table))    # cal Q value
			
 
				+                    policy[x] = self.u_set[int(np.argmax(value_list))]                      # update policy
			
 
				+                    v_diff = abs(value_table[x] - max(value_list))                          # maximum difference
			
 
				+                    value_table[x] = max(value_list)                                        # update value table
			
 
				                     if v_diff > 0:
			
 
				                         x_value = max(x_value, v_diff)
			
 
				-            delta = x_value
			
 
				+            delta = x_value                                                                 # update delta
			
 
				             diff.append(delta)
			
 
				-        return value_table, policy, diff
			
 
				-
			
 
				-
			
 
				-    def simulation(self, xI, xG, policy):
			
 
				-        path = []
			
 
				-        x = xI
			
 
				-        while x not in xG:
			
 
				-            u = policy[x]
			
 
				-            x_next = (x[0] + u[0], x[1] + u[1])
			
 
				-            if x_next not in self.obs:
			
 
				-                x = x_next
			
 
				-                path.append(x)
			
 
				-        path.pop()
			
 
				-        return path
			
 
				+        self.message(count)
			
 
				 
			
 
				+        return value_table, policy, diff
			
 
				 
			
 
				-    def animation(self, path, diff):
			
 
				-        plt.figure(1)
			
 
				-        tools.show_map(self.xI, self.xG, self.obs, self.lose, self.name1)
			
 
				-        for x in path:
			
 
				-            tools.plot_dots(x)
			
 
				-        plt.show()
			
 
				 
			
 
				-        plt.figure(2)
			
 
				-        plt.plot(diff, color='#808080', marker='o')
			
 
				-        plt.title(self.name2, fontdict=None)
			
 
				-        plt.xlabel('iterations')
			
 
				-        plt.grid('on')
			
 
				-        plt.show()
			
 
				+    def cal_Q_value(self, x, p, table):
			
 
				+        """
			
 
				+        cal Q_value.
			
 
				 
			
 
				+        :param x: next state vector
			
 
				+        :param p: probability of each state
			
 
				+        :param table: value table
			
 
				+        :return: Q-value
			
 
				+        """
			
 
				 
			
 
				-    def cal_Q_value(self, x, p, table):
			
 
				         value = 0
			
 
				-        reward = self.get_reward(x)
			
 
				+        reward = self.get_reward(x)                                     # get reward of next state
			
 
				         for i in range(len(x)):
			
 
				-            value += p[i] * (reward[i] + self.gamma * table[x[i]])
			
 
				+            value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
			
 
				+
			
 
				         return value
			
 
				 
			
 
				 
			
 
				     def get_reward(self, x_next):
			
 
				+        """
			
 
				+        calculate reward of next state
			
 
				+
			
 
				+        :param x_next: next state
			
 
				+        :return: reward
			
 
				+        """
			
 
				+
			
 
				         reward = []
			
 
				         for x in x_next:
			
 
				             if x in self.xG:
			
 
				-                reward.append(10)
			
 
				+                reward.append(10)           # reward : 10, for goal states
			
 
				             elif x in self.lose:
			
 
				-                reward.append(-10)
			
 
				+                reward.append(-10)          # reward : -10, for lose states
			
 
				             else:
			
 
				-                reward.append(0)
			
 
				+                reward.append(0)            # reward : 0, for other states
			
 
				+
			
 
				         return reward
			
 
				 
			
 
				 
			
 
				+    def simulation(self, xI, xG, policy, diff):
			
 
				+        """
			
 
				+        simulate a path using converged policy.
			
 
				+
			
 
				+        :param xI: starting state
			
 
				+        :param xG: goal state
			
 
				+        :param policy: converged policy
			
 
				+        :return: simulation path
			
 
				+        """
			
 
				+
			
 
				+        plt.figure(1)                                               # path animation
			
 
				+        tools.show_map(xI, xG, self.obs, self.lose, self.name1)     # show background
			
 
				+
			
 
				+        x, path = xI, []
			
 
				+        while True:
			
 
				+            u = policy[x]
			
 
				+            x_next = (x[0] + u[0], x[1] + u[1])
			
 
				+            if x_next in self.obs:
			
 
				+                print("Collision!")                                 # collision: simulation failed
			
 
				+            else:
			
 
				+                x = x_next
			
 
				+                if x_next in xG: break
			
 
				+                else:
			
 
				+                    tools.plot_dots(x)                              # each state in optimal path
			
 
				+                    path.append(x)
			
 
				+
			
 
				+        plt.figure(2)                                               # difference between two successive iteration
			
 
				+        plt.plot(diff, color='#808080', marker='o')
			
 
				+        plt.title(self.name2, fontdict=None)
			
 
				+        plt.xlabel('iterations')
			
 
				+        plt.grid('on')
			
 
				+        plt.show()
			
 
				+
			
 
				+        return path
			
 
				+
			
 
				+
			
 
				+    def message(self, count):
			
 
				+        print("starting state: ", self.xI)
			
 
				+        print("goal states: ", self.xG)
			
 
				+        print("condition for convergence: ", self.e)
			
 
				+        print("discount factor: ", self.gamma)
			
 
				+        print("iteration times: ", count)
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     x_Start = (5, 5)
			
 
				     x_Goal = [(49, 5), (49, 25)]
			
 
				 
			
 
				     VI = Value_iteration(x_Start, x_Goal)
			
 
				     [value_VI, policy_VI, diff_VI] = VI.iteration()
			
 
				-    path_VI = VI.simulation(x_Start, x_Goal, policy_VI)
			
 
				+    path_VI = VI.simulation(x_Start, x_Goal, policy_VI, diff_VI)
			
 
				 
			
 
				-    VI.animation(path_VI, diff_VI)