5 anos atrás · 45089174cb
--- a/Control/Q-learning.py
+++ b/Control/Q-learning.py
@@ -5,7 +5,7 @@
 
				 """
			
 
				 
			
 
				 import env
			
 
				-import tools
			
 
				+import plotting
			
 
				 import motion_model
			
 
				 
			
 
				 import matplotlib.pyplot as plt
			
@@ -15,18 +15,29 @@ import sys
 
				 
			
 
				 class QLEARNING:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				-        self.u_set = motion_model.motions                       # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.M = 500                                            # iteration numbers
			
 
				-        self.gamma = 0.9                                        # discount factor
			
 
				+        self.M = 500  # iteration numbers
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				         self.alpha = 0.5
			
 
				-        self.epsilon = 0.1                                      # epsilon error
			
 
				-        self.obs = env.obs_map()                                # position of obstacles
			
 
				-        self.lose = env.lose_map()                              # position of lose states
			
 
				-        self.name1 = "Qlearning, M=" + str(self.M)
			
 
				+        self.epsilon = 0.1
			
 
				 
			
 
				+        self.env = env.Env(self.xI, self.xG)
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-    def Monte_Carlo(self):
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				+
			
 
				+        self.name1 = "SARSA, M=" + str(self.M)
			
 
				+
			
 
				+        [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
			
 
				+        self.path = self.extract_path(self.xI, self.xG, self.policy)
			
 
				+        self.plotting.animation(self.path, self.name1)
			
 
				+
			
 
				+
			
 
				+    def Monte_Carlo(self, xI, xG):
			
 
				         """
			
 
				         Monte_Carlo experiments
			
 
				 
			
@@ -38,10 +49,10 @@ class QLEARNING:
 
				 
			
 
				         for k in range(self.M):                                                     # iterations
			
 
				             x = self.state_init()                                                   # initial state
			
 
				-            while x != self.xG:                                                     # stop condition
			
 
				+            while x != xG:                                                     # stop condition
			
 
				                 u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
			
 
				                 x_next = self.move_next(x, self.u_set[u])                           # next state
			
 
				-                reward = env.get_reward(x_next, self.lose)                          # reward observed
			
 
				+                reward = self.env.get_reward(x_next)                          # reward observed
			
 
				                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
			
 
				                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
			
 
				                 x = x_next
			
@@ -51,7 +62,6 @@ class QLEARNING:
 
				 
			
 
				         return Q_table, policy
			
 
				 
			
 
				-
			
 
				     def table_init(self):
			
 
				         """
			
 
				         Initialize Q_table: Q(s, a)
			
@@ -60,28 +70,25 @@ class QLEARNING:
 
				 
			
 
				         Q_table = {}
			
 
				 
			
 
				-        for i in range(env.x_range):
			
 
				-            for j in range(env.y_range):
			
 
				-                u = []
			
 
				-                if (i, j) not in self.obs:
			
 
				-                    for k in range(len(self.u_set)):
			
 
				-                        if (i, j) == self.xG:
			
 
				-                            u.append(0)
			
 
				-                        else:
			
 
				-                            u.append(np.random.random_sample())
			
 
				-                    Q_table[(i, j)] = u
			
 
				-
			
 
				+        for x in self.stateSpace:
			
 
				+            u = []
			
 
				+            if x not in self.obs:
			
 
				+                for k in range(len(self.u_set)):
			
 
				+                    if x == self.xG:
			
 
				+                        u.append(0)
			
 
				+                    else:
			
 
				+                        u.append(np.random.random_sample())
			
 
				+                    Q_table[x] = u
			
 
				         return Q_table
			
 
				 
			
 
				-
			
 
				     def state_init(self):
			
 
				         """
			
 
				         initialize a starting state
			
 
				         :return: starting state
			
 
				         """
			
 
				         while True:
			
 
				-            i = np.random.randint(0, env.x_range - 1)
			
 
				-            j = np.random.randint(0, env.y_range - 1)
			
 
				+            i = np.random.randint(0, self.env.x_range - 1)
			
 
				+            j = np.random.randint(0, self.env.y_range - 1)
			
 
				             if (i, j) not in self.obs:
			
 
				                 return (i, j)
			
 
				 
			
@@ -121,40 +128,36 @@ class QLEARNING:
 
				             return x
			
 
				         return x_next
			
 
				 
			
 
				-
			
 
				-    def simulation(self, xI, xG, policy):
			
 
				+    def extract_path(self, xI, xG, policy):
			
 
				         """
			
 
				-        simulate a path using converged policy.
			
 
				+        extract path from converged policy.
			
 
				 
			
 
				         :param xI: starting state
			
 
				-        :param xG: goal state
			
 
				+        :param xG: goal states
			
 
				         :param policy: converged policy
			
 
				-        :return: simulation path
			
 
				+        :return: path
			
 
				         """
			
 
				 
			
 
				-        plt.figure(1)                                                   # path animation
			
 
				-        tools.show_map(xI, xG, self.obs, self.lose, self.name1)         # show background
			
 
				-
			
 
				-        x, path = xI, []
			
 
				-        while True:
			
 
				+        x, path = xI, [xI]
			
 
				+        while x not in xG:
			
 
				             u = self.u_set[policy[x]]
			
 
				             x_next = (x[0] + u[0], x[1] + u[1])
			
 
				             if x_next in self.obs:
			
 
				-                print("Collision!")                                     # collision: simulation failed
			
 
				+                print("Collision! Please run again!")
			
 
				+                break
			
 
				             else:
			
 
				+                path.append(x_next)
			
 
				                 x = x_next
			
 
				-                if x_next == xG:
			
 
				-                    break
			
 
				-                else:
			
 
				-                    tools.plot_dots(x)                                  # each state in optimal path
			
 
				-                    path.append(x)
			
 
				-        plt.show()
			
 
				-        self.message()
			
 
				-
			
 
				         return path
			
 
				 
			
 
				-
			
 
				     def message(self):
			
 
				+        """
			
 
				+        print important message.
			
 
				+
			
 
				+        :param count: iteration numbers
			
 
				+        :return: print
			
 
				+        """
			
 
				+
			
 
				         print("starting state: ", self.xI)
			
 
				         print("goal state: ", self.xG)
			
 
				         print("iteration numbers: ", self.M)
			
@@ -168,5 +171,3 @@ if __name__ == '__main__':
 
				     x_Goal = (12, 1)
			
 
				 
			
 
				     Q_CALL = QLEARNING(x_Start, x_Goal)
			
 
				-    [value_SARSA, policy_SARSA] = Q_CALL.Monte_Carlo()
			
 
				-    path_VI = Q_CALL.simulation(x_Start, x_Goal, policy_SARSA)
			
--- a/Control/Sarsa.py
+++ b/Control/Sarsa.py
@@ -5,27 +5,37 @@
 
				 """
			
 
				 
			
 
				 import env
			
 
				-import tools
			
 
				+import plotting
			
 
				 import motion_model
			
 
				 
			
 
				-import matplotlib.pyplot as plt
			
 
				 import numpy as np
			
 
				 
			
 
				 
			
 
				 class SARSA:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				-        self.u_set = motion_model.motions                       # feasible input set
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.M = 500                                            # iteration numbers
			
 
				-        self.gamma = 0.9                                        # discount factor
			
 
				+        self.M = 500  # iteration numbers
			
 
				+        self.gamma = 0.9  # discount factor
			
 
				         self.alpha = 0.5
			
 
				-        self.epsilon = 0.1                                      # epsilon error
			
 
				-        self.obs = env.obs_map()                                # position of obstacles
			
 
				-        self.lose = env.lose_map()                              # position of lose states
			
 
				+        self.epsilon = 0.1
			
 
				+
			
 
				+        self.env = env.Env(self.xI, self.xG)
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				+
			
 
				+        self.u_set = self.env.motions  # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace  # state space
			
 
				+        self.obs = self.env.obs_map()  # position of obstacles
			
 
				+        self.lose = self.env.lose_map()  # position of lose states
			
 
				+
			
 
				         self.name1 = "SARSA, M=" + str(self.M)
			
 
				 
			
 
				+        [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
			
 
				+        self.path = self.extract_path(self.xI, self.xG, self.policy)
			
 
				+        self.plotting.animation(self.path, self.name1)
			
 
				+
			
 
				 
			
 
				-    def Monte_Carlo(self):
			
 
				+    def Monte_Carlo(self, xI, xG):
			
 
				         """
			
 
				         Monte_Carlo experiments
			
 
				 
			
@@ -38,9 +48,9 @@ class SARSA:
 
				         for k in range(self.M):                                                 # iterations
			
 
				             x = self.state_init()                                               # initial state
			
 
				             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
			
 
				-            while x != self.xG:                                                 # stop condition
			
 
				+            while x != xG:                                                 # stop condition
			
 
				                 x_next = self.move_next(x, self.u_set[u])                       # next state
			
 
				-                reward = env.get_reward(x_next, self.lose)                      # reward observed
			
 
				+                reward = self.env.get_reward(x_next)                      # reward observed
			
 
				                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
			
 
				                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
			
 
				                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
			
@@ -60,17 +70,15 @@ class SARSA:
 
				 
			
 
				         Q_table = {}
			
 
				 
			
 
				-        for i in range(env.x_range):
			
 
				-            for j in range(env.y_range):
			
 
				-                u = []
			
 
				-                if (i, j) not in self.obs:
			
 
				-                    for k in range(len(self.u_set)):
			
 
				-                        if (i, j) == self.xG:
			
 
				-                            u.append(0)
			
 
				-                        else:
			
 
				-                            u.append(np.random.random_sample())
			
 
				-                    Q_table[(i, j)] = u
			
 
				-
			
 
				+        for x in self.stateSpace:
			
 
				+            u = []
			
 
				+            if x not in self.obs:
			
 
				+                for k in range(len(self.u_set)):
			
 
				+                    if x == self.xG:
			
 
				+                        u.append(0)
			
 
				+                    else:
			
 
				+                        u.append(np.random.random_sample())
			
 
				+                    Q_table[x] = u
			
 
				         return Q_table
			
 
				 
			
 
				 
			
@@ -80,8 +88,8 @@ class SARSA:
 
				         :return: starting state
			
 
				         """
			
 
				         while True:
			
 
				-            i = np.random.randint(0, env.x_range - 1)
			
 
				-            j = np.random.randint(0, env.y_range - 1)
			
 
				+            i = np.random.randint(0, self.env.x_range - 1)
			
 
				+            j = np.random.randint(0, self.env.y_range - 1)
			
 
				             if (i, j) not in self.obs:
			
 
				                 return (i, j)
			
 
				 
			
@@ -121,40 +129,36 @@ class SARSA:
 
				             return x
			
 
				         return x_next
			
 
				 
			
 
				-
			
 
				-    def simulation(self, xI, xG, policy):
			
 
				+    def extract_path(self, xI, xG, policy):
			
 
				         """
			
 
				-        simulate a path using converged policy.
			
 
				+        extract path from converged policy.
			
 
				 
			
 
				         :param xI: starting state
			
 
				-        :param xG: goal state
			
 
				+        :param xG: goal states
			
 
				         :param policy: converged policy
			
 
				-        :return: simulation path
			
 
				+        :return: path
			
 
				         """
			
 
				 
			
 
				-        plt.figure(1)                                                   # path animation
			
 
				-        tools.show_map(xI, xG, self.obs, self.lose, self.name1)         # show background
			
 
				-
			
 
				-        x, path = xI, []
			
 
				-        while True:
			
 
				+        x, path = xI, [xI]
			
 
				+        while x not in xG:
			
 
				             u = self.u_set[policy[x]]
			
 
				             x_next = (x[0] + u[0], x[1] + u[1])
			
 
				             if x_next in self.obs:
			
 
				-                print("Collision!")                                     # collision: simulation failed
			
 
				+                print("Collision! Please run again!")
			
 
				+                break
			
 
				             else:
			
 
				+                path.append(x_next)
			
 
				                 x = x_next
			
 
				-                if x_next == xG:
			
 
				-                    break
			
 
				-                else:
			
 
				-                    tools.plot_dots(x)                                  # each state in optimal path
			
 
				-                    path.append(x)
			
 
				-        plt.show()
			
 
				-        self.message()
			
 
				-
			
 
				         return path
			
 
				 
			
 
				-
			
 
				     def message(self):
			
 
				+        """
			
 
				+        print important message.
			
 
				+
			
 
				+        :param count: iteration numbers
			
 
				+        :return: print
			
 
				+        """
			
 
				+
			
 
				         print("starting state: ", self.xI)
			
 
				         print("goal state: ", self.xG)
			
 
				         print("iteration numbers: ", self.M)
			
@@ -168,5 +172,3 @@ if __name__ == '__main__':
 
				     x_Goal = (12, 1)
			
 
				 
			
 
				     SARSA_CALL = SARSA(x_Start, x_Goal)
			
 
				-    [value_SARSA, policy_SARSA] = SARSA_CALL.Monte_Carlo()
			
 
				-    path_VI = SARSA_CALL.simulation(x_Start, x_Goal, policy_SARSA)
			
--- a/Control/__pycache__/env.cpython-37.pyc
+++ b/Control/__pycache__/env.cpython-37.pyc
--- a/Control/__pycache__/motion_model.cpython-37.pyc
+++ b/Control/__pycache__/motion_model.cpython-37.pyc
--- a/Control/__pycache__/plotting.cpython-37.pyc
+++ b/Control/__pycache__/plotting.cpython-37.pyc
--- a/Control/__pycache__/tools.cpython-37.pyc
+++ b/Control/__pycache__/tools.cpython-37.pyc
--- a/Control/env.py
+++ b/Control/env.py
@@ -4,53 +4,73 @@
 
				 @author: huiming zhou
			
 
				 """
			
 
				 
			
 
				-x_range, y_range = 14, 6     # size of background
			
 
				-
			
 
				-
			
 
				-def obs_map():
			
 
				-    """
			
 
				-    Initialize obstacles' positions
			
 
				-
			
 
				-    :return: map of obstacles
			
 
				-    """
			
 
				-
			
 
				-    obs = []
			
 
				-    for i in range(x_range):
			
 
				-        obs.append((i, 0))
			
 
				-    for i in range(x_range):
			
 
				-        obs.append((i, y_range - 1))
			
 
				-
			
 
				-    for i in range(y_range):
			
 
				-        obs.append((0, i))
			
 
				-    for i in range(y_range):
			
 
				-        obs.append((x_range - 1, i))
			
 
				-
			
 
				-    return obs
			
 
				-
			
 
				-
			
 
				-def lose_map():
			
 
				-    """
			
 
				-    Initialize losing states' positions
			
 
				-    :return: losing states
			
 
				-    """
			
 
				-
			
 
				-    lose = []
			
 
				-    for i in range(2, 12):
			
 
				-        lose.append((i, 1))
			
 
				-
			
 
				-    return lose
			
 
				-
			
 
				-
			
 
				-def get_reward(x_next, lose):
			
 
				-    """
			
 
				-    calculate reward of next state
			
 
				-
			
 
				-    :param x_next: next state
			
 
				-    :return: reward
			
 
				-    """
			
 
				-
			
 
				-    if x_next in lose:
			
 
				-        return -100                      # reward : -100, for lose states
			
 
				-    return -1                            # reward : -1, for other states
			
 
				-
			
 
				-
			
 
				+class Env():
			
 
				+    def __init__(self, xI, xG):
			
 
				+        self.x_range = 14  # size of background
			
 
				+        self.y_range = 6
			
 
				+        self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
			
 
				+        self.xI = xI
			
 
				+        self.xG = xG
			
 
				+        self.obs = self.obs_map()
			
 
				+        self.lose = self.lose_map()
			
 
				+        self.stateSpace = self.state_space()
			
 
				+
			
 
				+    def obs_map(self):
			
 
				+        """
			
 
				+        Initialize obstacles' positions
			
 
				+
			
 
				+        :return: map of obstacles
			
 
				+        """
			
 
				+        x = self.x_range
			
 
				+        y = self.y_range
			
 
				+        obs = []
			
 
				+
			
 
				+        for i in range(x):
			
 
				+            obs.append((i, 0))
			
 
				+        for i in range(x):
			
 
				+            obs.append((i, y - 1))
			
 
				+
			
 
				+        for i in range(y):
			
 
				+            obs.append((0, i))
			
 
				+        for i in range(y):
			
 
				+            obs.append((x - 1, i))
			
 
				+
			
 
				+        return obs
			
 
				+
			
 
				+    def lose_map(self):
			
 
				+        """
			
 
				+        Initialize losing states' positions
			
 
				+        :return: losing states
			
 
				+        """
			
 
				+
			
 
				+        lose = []
			
 
				+        for i in range(2, 12):
			
 
				+            lose.append((i, 1))
			
 
				+
			
 
				+        return lose
			
 
				+
			
 
				+    def state_space(self):
			
 
				+        """
			
 
				+        generate state space
			
 
				+        :return: state space
			
 
				+        """
			
 
				+
			
 
				+        state_space = []
			
 
				+        for i in range(self.x_range):
			
 
				+            for j in range(self.y_range):
			
 
				+                if (i, j) not in self.obs:
			
 
				+                    state_space.append((i, j))
			
 
				+
			
 
				+        return state_space
			
 
				+
			
 
				+    def get_reward(self, x_next):
			
 
				+        """
			
 
				+        calculate reward of next state
			
 
				+
			
 
				+        :param x_next: next state
			
 
				+        :return: reward
			
 
				+        """
			
 
				+
			
 
				+        if x_next in self.lose:
			
 
				+            return -100  # reward : -100, for lose states
			
 
				+        return -1  # reward : -1, for other states
			
--- a/Control/motion_model.py
+++ b/Control/motion_model.py
@@ -3,37 +3,42 @@
 
				 """
			
 
				 @author: huiming zhou
			
 
				 """
			
 
				-import numpy as np
			
 
				-
			
 
				-motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]                # feasible motion sets
			
 
				-
			
 
				-
			
 
				-def move_prob(x, u, obs, eta = 0.2):
			
 
				-    """
			
 
				-    Motion model of robots,
			
 
				-
			
 
				-    :param x: current state (node)
			
 
				-    :param u: input
			
 
				-    :param obs: obstacle map
			
 
				-    :param eta: noise in motion model
			
 
				-    :return: next states and corresponding probability
			
 
				-    """
			
 
				-
			
 
				-    p_next = [1 - eta, eta / 2, eta / 2]
			
 
				-    x_next = []
			
 
				-    if u == (0, 1):
			
 
				-        u_real = [(0, 1), (-1, 0), (1, 0)]
			
 
				-    elif u == (0, -1):
			
 
				-        u_real = [(0, -1), (-1, 0), (1, 0)]
			
 
				-    elif u == (-1, 0):
			
 
				-        u_real = [(-1, 0), (0, 1), (0, -1)]
			
 
				-    else:
			
 
				-        u_real = [(1, 0), (0, 1), (0, -1)]
			
 
				-
			
 
				-    for act in u_real:
			
 
				-        if (x[0] + act[0], x[1] + act[1]) in obs:
			
 
				-            x_next.append(x)
			
 
				+
			
 
				+import env
			
 
				+
			
 
				+class Motion_model():
			
 
				+    def __init__(self, xI, xG):
			
 
				+        self.env = env.Env(xI, xG)
			
 
				+        self.obs = self.env.obs_map()
			
 
				+
			
 
				+
			
 
				+    def move_next(self, x, u, eta=0.2):
			
 
				+        """
			
 
				+        Motion model of robots,
			
 
				+
			
 
				+        :param x: current state (node)
			
 
				+        :param u: input
			
 
				+        :param obs: obstacle map
			
 
				+        :param eta: noise in motion model
			
 
				+        :return: next states and corresponding probability
			
 
				+        """
			
 
				+
			
 
				+        p_next = [1 - eta, eta / 2, eta / 2]
			
 
				+        x_next = []
			
 
				+        if u == (0, 1):
			
 
				+            u_real = [(0, 1), (-1, 0), (1, 0)]
			
 
				+        elif u == (0, -1):
			
 
				+            u_real = [(0, -1), (-1, 0), (1, 0)]
			
 
				+        elif u == (-1, 0):
			
 
				+            u_real = [(-1, 0), (0, 1), (0, -1)]
			
 
				         else:
			
 
				-            x_next.append((x[0] + act[0], x[1] + act[1]))
			
 
				+            u_real = [(1, 0), (0, 1), (0, -1)]
			
 
				+
			
 
				+        for act in u_real:
			
 
				+            x_check = (x[0] + act[0], x[1] + act[1])
			
 
				+            if x_check in self.obs:
			
 
				+                x_next.append(x)
			
 
				+            else:
			
 
				+                x_next.append(x_check)
			
 
				 
			
 
				-    return x_next, p_next
			
 
				+        return x_next, p_next
			
--- a/Control/plotting.py
+++ b/Control/plotting.py
@@ -0,0 +1,121 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+@author: huiming zhou
			
 
				+"""
			
 
				+
			
 
				+import matplotlib.pyplot as plt
			
 
				+import env
			
 
				+
			
 
				+class Plotting():
			
 
				+    def __init__(self, xI, xG):
			
 
				+        self.xI, self.xG = xI, xG
			
 
				+        self.env = env.Env(self.xI, self.xG)
			
 
				+        self.obs = self.env.obs_map()
			
 
				+        self.lose = self.env.lose_map()
			
 
				+
			
 
				+
			
 
				+    def animation(self, path, name):
			
 
				+        """
			
 
				+        animation.
			
 
				+
			
 
				+        :param path: optimal path
			
 
				+        :param name: tile of figure
			
 
				+        :return: an animation
			
 
				+        """
			
 
				+
			
 
				+        plt.figure(1)
			
 
				+        self.plot_grid(name)
			
 
				+        self.plot_lose()
			
 
				+        self.plot_path(path)
			
 
				+
			
 
				+
			
 
				+    def plot_grid(self, name):
			
 
				+        """
			
 
				+        plot the obstacles in environment.
			
 
				+
			
 
				+        :param name: title of figure
			
 
				+        :return: plot
			
 
				+        """
			
 
				+
			
 
				+        obs_x = [self.obs[i][0] for i in range(len(self.obs))]
			
 
				+        obs_y = [self.obs[i][1] for i in range(len(self.obs))]
			
 
				+
			
 
				+        plt.plot(self.xI[0], self.xI[1], "bs", ms = 24)
			
 
				+        plt.plot(self.xG[0], self.xG[1], "gs", ms = 24)
			
 
				+
			
 
				+        plt.plot(obs_x, obs_y, "sk", ms = 24)
			
 
				+        plt.title(name)
			
 
				+        plt.axis("equal")
			
 
				+
			
 
				+
			
 
				+    def plot_lose(self):
			
 
				+        """
			
 
				+        plot losing states in environment.
			
 
				+        :return: a plot
			
 
				+        """
			
 
				+
			
 
				+        lose_x = [self.lose[i][0] for i in range(len(self.lose))]
			
 
				+        lose_y = [self.lose[i][1] for i in range(len(self.lose))]
			
 
				+
			
 
				+        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
			
 
				+
			
 
				+
			
 
				+    def plot_visited(self, visited):
			
 
				+        """
			
 
				+        animation of order of visited nodes.
			
 
				+
			
 
				+        :param visited: visited nodes
			
 
				+        :return: animation
			
 
				+        """
			
 
				+
			
 
				+        visited.remove(self.xI)
			
 
				+        count = 0
			
 
				+
			
 
				+        for x in visited:
			
 
				+            count += 1
			
 
				+            plt.plot(x[0], x[1], linewidth='3', color='#808080', marker='o')
			
 
				+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
			
 
				+            [exit(0) if event.key == 'escape' else None])
			
 
				+
			
 
				+            if count < len(visited) / 3:
			
 
				+                length = 15
			
 
				+            elif count < len(visited) * 2 / 3:
			
 
				+                length = 30
			
 
				+            else:
			
 
				+                length = 45
			
 
				+
			
 
				+            if count % length == 0: plt.pause(0.001)
			
 
				+
			
 
				+
			
 
				+    def plot_path(self, path):
			
 
				+        path.remove(self.xI)
			
 
				+        path.remove(self.xG)
			
 
				+
			
 
				+        for x in path:
			
 
				+            plt.plot(x[0], x[1], color='#808080', marker='o', ms = 23)
			
 
				+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
			
 
				+            [exit(0) if event.key == 'escape' else None])
			
 
				+            plt.pause(0.001)
			
 
				+        plt.show()
			
 
				+        plt.pause(0.5)
			
 
				+
			
 
				+
			
 
				+    def plot_diff(self, diff, name):
			
 
				+        plt.figure(2)
			
 
				+        plt.title(name, fontdict=None)
			
 
				+        plt.xlabel('iterations')
			
 
				+        plt.ylabel('difference of successive iterations')
			
 
				+        plt.grid('on')
			
 
				+
			
 
				+        count = 0
			
 
				+        for x in diff:
			
 
				+            plt.plot(count, x, color='#808080', marker='o')  # plot dots for animation
			
 
				+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
			
 
				+            [exit(0) if event.key == 'escape' else None])
			
 
				+            plt.pause(0.07)
			
 
				+            count += 1
			
 
				+
			
 
				+        plt.plot(diff, color='#808080')
			
 
				+        plt.pause(0.01)
			
 
				+        plt.show()
			
--- a/Control/tools.py
+++ b/Control/tools.py
@@ -1,92 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-"""
			
 
				-@author: huiming zhou
			
 
				-"""
			
 
				-
			
 
				-import matplotlib.pyplot as plt
			
 
				-
			
 
				-
			
 
				-def extract_path(xI, xG, parent, actions):
			
 
				-    """
			
 
				-    Extract the path based on the relationship of nodes.
			
 
				-
			
 
				-    :param xI: Starting node
			
 
				-    :param xG: Goal node
			
 
				-    :param parent: Relationship between nodes
			
 
				-    :param actions: Action needed for transfer between two nodes
			
 
				-    :return: The planning path
			
 
				-    """
			
 
				-
			
 
				-    path_back = [xG]
			
 
				-    acts_back = [actions[xG]]
			
 
				-    x_current = xG
			
 
				-    while True:
			
 
				-        x_current = parent[x_current]
			
 
				-        path_back.append(x_current)
			
 
				-        acts_back.append(actions[x_current])
			
 
				-        if x_current == xI: break
			
 
				-
			
 
				-    return list(reversed(path_back)), list(reversed(acts_back))
			
 
				-
			
 
				-
			
 
				-def showPath(xI, xG, path):
			
 
				-    """
			
 
				-    Plot the path.
			
 
				-
			
 
				-    :param xI: Starting node
			
 
				-    :param xG: Goal node
			
 
				-    :param path: Planning path
			
 
				-    :return: A plot
			
 
				-    """
			
 
				-
			
 
				-    path.remove(xI)
			
 
				-    path.remove(xG)
			
 
				-    path_x = [path[i][0] for i in range(len(path))]
			
 
				-    path_y = [path[i][1] for i in range(len(path))]
			
 
				-    plt.plot(path_x, path_y, linewidth='5', color='r', linestyle='-')
			
 
				-    plt.pause(0.001)
			
 
				-    plt.show()
			
 
				-
			
 
				-
			
 
				-def show_map(xI, xG, obs_map, lose_map, name):
			
 
				-    """
			
 
				-    Plot the background you designed.
			
 
				-
			
 
				-    :param xI: starting state
			
 
				-    :param xG: goal states
			
 
				-    :param obs_map: positions of obstacles
			
 
				-    :param lose_map: positions of losing state
			
 
				-    :param name: name of this figure
			
 
				-    :return: a figure
			
 
				-    """
			
 
				-
			
 
				-    obs_x = [obs_map[i][0] for i in range(len(obs_map))]
			
 
				-    obs_y = [obs_map[i][1] for i in range(len(obs_map))]
			
 
				-
			
 
				-    lose_x = [lose_map[i][0] for i in range(len(lose_map))]
			
 
				-    lose_y = [lose_map[i][1] for i in range(len(lose_map))]
			
 
				-
			
 
				-    plt.plot(xI[0], xI[1], "bs", ms = 24)                                    # plot starting state (blue)
			
 
				-    plt.plot(xG[0], xG[1], "gs", ms = 24)                                    # plot goal states (green)
			
 
				-
			
 
				-    plt.plot(obs_x, obs_y, "sk", ms = 24)                                    # plot obstacles (black)
			
 
				-    plt.plot(lose_x, lose_y, marker = 's', color = '#A52A2A', ms = 24)       # plot losing states (grown)
			
 
				-    plt.title(name, fontdict=None)
			
 
				-    plt.axis("equal")
			
 
				-
			
 
				-
			
 
				-def plot_dots(x):
			
 
				-    """
			
 
				-    Plot state x for animation
			
 
				-
			
 
				-    :param x: current node
			
 
				-    :return: a plot
			
 
				-    """
			
 
				-
			
 
				-    plt.plot(x[0], x[1], linewidth='3', color='#808080', marker='o', ms = 23)    # plot dots for animation
			
 
				-    plt.gcf().canvas.mpl_connect('key_release_event',
			
 
				-                                 lambda event: [exit(0) if event.key == 'escape' else None])
			
 
				-    plt.pause(0.1)
			
 
				-
			
 
				-
			
--- a/Planning/.idea/workspace.xml
+++ b/Planning/.idea/workspace.xml
@@ -2,21 +2,17 @@
 
				 <project version="4">
			
 
				   <component name="ChangeListManager">
			
 
				     <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
			
 
				+      <change beforePath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/../Model-free Control/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/env.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/../Model-free Control/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/motion_model.py" afterDir="false" />
			
 
				+      <change beforePath="$PROJECT_DIR$/../Model-free Control/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/plotting.py" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/plotting.py" beforeDir="false" afterPath="$PROJECT_DIR$/plotting.py" afterDir="false" />
			
 
				       <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/env.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/gif/VI_E.gif" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/gif/VI_E.gif" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/motion_model.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/plotting.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" afterDir="false" />
			
 
				     </list>
			
 
				     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
			
 
				     <option name="SHOW_DIALOG" value="false" />
			
@@ -40,7 +36,7 @@
 
				   </component>
			
 
				   <component name="PropertiesComponent">
			
 
				     <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
			
 
				-    <property name="last_opened_file_path" value="$PROJECT_DIR$/../Stochastic Shortest Path" />
			
 
				+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
			
 
				     <property name="restartRequiresConfirmation" value="false" />
			
 
				     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
			
 
				   </component>
			
--- a/Planning/a_star.py
+++ b/Planning/a_star.py
@@ -12,16 +12,16 @@ class Astar:
 
				     def __init__(self, x_start, x_goal, heuristic_type):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				 
			
 
				-        self.Env = env.Env()
			
 
				-        self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				+        self.Env = env.Env()                                                        # class Env
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)                         # class Plotting
			
 
				 
			
 
				-        self.u_set = self.Env.motions                               # feasible input set
			
 
				-        self.obs = self.Env.obs                                     # position of obstacles
			
 
				+        self.u_set = self.Env.motions                                               # feasible input set
			
 
				+        self.obs = self.Env.obs                                                     # position of obstacles
			
 
				 
			
 
				         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG, heuristic_type)
			
 
				 
			
 
				         self.fig_name = "A* Algorithm"
			
 
				-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
			
 
				+        self.plotting.animation(self.path, self.visited, self.fig_name)             # animation generate
			
 
				 
			
 
				 
			
 
				     def searching(self, xI, xG, heuristic_type):
			
@@ -31,26 +31,26 @@ class Astar:
 
				         :return: planning path, action in each node, visited nodes in the planning process
			
 
				         """
			
 
				 
			
 
				-        q_astar = queue.QueuePrior()                            # priority queue
			
 
				+        q_astar = queue.QueuePrior()                                                # priority queue
			
 
				         q_astar.put(xI, 0)
			
 
				-        parent = {xI: xI}                                       # record parents of nodes
			
 
				-        action = {xI: (0, 0)}                                   # record actions of nodes
			
 
				+        parent = {xI: xI}                                                           # record parents of nodes
			
 
				+        action = {xI: (0, 0)}                                                       # record actions of nodes
			
 
				         visited = []
			
 
				         cost = {xI: 0}
			
 
				 
			
 
				         while not q_astar.empty():
			
 
				             x_current = q_astar.get()
			
 
				-            if x_current == xG:                                 # stop condition
			
 
				+            if x_current == xG:                                                     # stop condition
			
 
				                 break
			
 
				             visited.append(x_current)
			
 
				-            for u_next in self.u_set:                           # explore neighborhoods of current node
			
 
				+            for u_next in self.u_set:                                   # explore neighborhoods of current node
			
 
				                 x_next = tuple([x_current[i] + u_next[i] for i in range(len(x_current))])
			
 
				                 if x_next not in self.obs:
			
 
				                     new_cost = cost[x_current] + self.get_cost(x_current, u_next)
			
 
				-                    if x_next not in cost or new_cost < cost[x_next]:           # conditions for updating cost
			
 
				+                    if x_next not in cost or new_cost < cost[x_next]:   # conditions for updating cost
			
 
				                         cost[x_next] = new_cost
			
 
				                         priority = new_cost + self.Heuristic(x_next, xG, heuristic_type)
			
 
				-                        q_astar.put(x_next, priority)           # put node into queue using priority "f+h"
			
 
				+                        q_astar.put(x_next, priority)                   # put node into queue using priority "f+h"
			
 
				                         parent[x_next], action[x_next] = x_current, u_next
			
 
				 
			
 
				         [path, policy] = self.extract_path(xI, xG, parent, action)
			
@@ -113,6 +113,7 @@ class Astar:
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    x_Start = (5, 5)                # Starting node
			
 
				-    x_Goal = (49, 5)                # Goal node
			
 
				+    x_Start = (5, 5)  # Starting node
			
 
				+    x_Goal = (49, 5)  # Goal node
			
 
				+
			
 
				     astar = Astar(x_Start, x_Goal, "manhattan")
			
--- a/Planning/bfs.py
+++ b/Planning/bfs.py
@@ -15,13 +15,13 @@ class BFS:
 
				         self.Env = env.Env()
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.Env.motions                               # feasible input set
			
 
				-        self.obs = self.Env.obs                                     # position of obstacles
			
 
				+        self.u_set = self.Env.motions                                       # feasible input set
			
 
				+        self.obs = self.Env.obs                                             # position of obstacles
			
 
				 
			
 
				         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
			
 
				 
			
 
				         self.fig_name = "Breadth-first Searching"
			
 
				-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
			
 
				+        self.plotting.animation(self.path, self.visited, self.fig_name)     # animation generate
			
 
				 
			
 
				 
			
 
				     def searching(self, xI, xG):
			
@@ -31,10 +31,10 @@ class BFS:
 
				         :return: planning path, action in each node, visited nodes in the planning process
			
 
				         """
			
 
				 
			
 
				-        q_bfs = queue.QueueFIFO()                                     # first-in-first-out queue
			
 
				+        q_bfs = queue.QueueFIFO()                                           # first-in-first-out queue
			
 
				         q_bfs.put(xI)
			
 
				-        parent = {xI: xI}                                   # record parents of nodes
			
 
				-        action = {xI: (0, 0)}                                    # record actions of nodes
			
 
				+        parent = {xI: xI}                                                   # record parents of nodes
			
 
				+        action = {xI: (0, 0)}                                               # record actions of nodes
			
 
				         visited = []
			
 
				 
			
 
				         while not q_bfs.empty():
			
@@ -42,13 +42,13 @@ class BFS:
 
				             if x_current == xG:
			
 
				                 break
			
 
				             visited.append(x_current)
			
 
				-            for u_next in self.u_set:                                 # explore neighborhoods of current node
			
 
				+            for u_next in self.u_set:                                       # explore neighborhoods of current node
			
 
				                 x_next = tuple([x_current[i] + u_next[i] for i in range(len(x_current))])
			
 
				-                if x_next not in parent and x_next not in self.obs:   # node not visited and not in obstacles
			
 
				+                if x_next not in parent and x_next not in self.obs:         # node not visited and not in obstacles
			
 
				                     q_bfs.put(x_next)
			
 
				                     parent[x_next], action[x_next] = x_current, u_next
			
 
				 
			
 
				-        [path, policy] = self.extract_path(xI, xG, parent, action)     # extract path
			
 
				+        [path, policy] = self.extract_path(xI, xG, parent, action)          # extract path
			
 
				 
			
 
				         return path, policy, visited
			
 
				 
			
--- a/Planning/dfs.py
+++ b/Planning/dfs.py
@@ -15,13 +15,13 @@ class DFS:
 
				         self.Env = env.Env()
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.Env.motions                   # feasible input set
			
 
				-        self.obs = self.Env.obs                         # position of obstacles
			
 
				+        self.u_set = self.Env.motions                                           # feasible input set
			
 
				+        self.obs = self.Env.obs                                                 # position of obstacles
			
 
				 
			
 
				         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
			
 
				 
			
 
				         self.fig_name = "Depth-first Searching"
			
 
				-        self.plotting.animation(self.path, self.visited, self.fig_name)      # animation generate
			
 
				+        self.plotting.animation(self.path, self.visited, self.fig_name)         # animation generate
			
 
				 
			
 
				 
			
 
				     def searching(self, xI, xG):
			
--- a/Planning/dijkstra.py
+++ b/Planning/dijkstra.py
@@ -15,13 +15,13 @@ class Dijkstra:
 
				         self.Env = env.Env()
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.Env.motions                               # feasible input set
			
 
				-        self.obs = self.Env.obs                                     # position of obstacles
			
 
				+        self.u_set = self.Env.motions                                           # feasible input set
			
 
				+        self.obs = self.Env.obs                                                 # position of obstacles
			
 
				 
			
 
				         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
			
 
				 
			
 
				         self.fig_name = "Dijkstra's Algorithm"
			
 
				-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
			
 
				+        self.plotting.animation(self.path, self.visited, self.fig_name)         # animation generate
			
 
				 
			
 
				 
			
 
				     def searching(self, xI, xG):
			
--- a/Planning/queue.py
+++ b/Planning/queue.py
@@ -3,7 +3,6 @@
 
				 
			
 
				 """
			
 
				 @author: Huiming Zhou
			
 
				-@description: this file defines three kinds of queues that will be used in algorithms.
			
 
				 """
			
 
				 
			
 
				 import collections