5 лет назад · 45089174cb
--- a/Control/Q-learning.py
+++ b/Control/Q-learning.py
@@ -5,7 +5,7 @@
 
															 """
														
 
															 import env
														
 
															-import tools
														
 
															+import plotting
														
 
															 import motion_model
														
 
															 import matplotlib.pyplot as plt
														
@@ -15,18 +15,29 @@ import sys
 
															 class QLEARNING:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															-        self.u_set = motion_model.motions                       # feasible input set
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.M = 500                                            # iteration numbers
														
 
															-        self.gamma = 0.9                                        # discount factor
														
 
															+        self.M = 500  # iteration numbers
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															         self.alpha = 0.5
														
 
															-        self.epsilon = 0.1                                      # epsilon error
														
 
															-        self.obs = env.obs_map()                                # position of obstacles
														
 
															-        self.lose = env.lose_map()                              # position of lose states
														
 
															-        self.name1 = "Qlearning, M=" + str(self.M)
														
 
															+        self.epsilon = 0.1
														
 
															+        self.env = env.Env(self.xI, self.xG)
														
 
															+        self.motion = motion_model.Motion_model(self.xI, self.xG)
														
 
															+        self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															-    def Monte_Carlo(self):
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															+
														
 
															+        self.name1 = "SARSA, M=" + str(self.M)
														
 
															+
														
 
															+        [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
														
 
															+        self.path = self.extract_path(self.xI, self.xG, self.policy)
														
 
															+        self.plotting.animation(self.path, self.name1)
														
 
															+
														
 
															+
														
 
															+    def Monte_Carlo(self, xI, xG):
														
 
															         """
														
 
															         Monte_Carlo experiments
														
@@ -38,10 +49,10 @@ class QLEARNING:
 
															         for k in range(self.M):                                                     # iterations
														
 
															             x = self.state_init()                                                   # initial state
														
 
															-            while x != self.xG:                                                     # stop condition
														
 
															+            while x != xG:                                                     # stop condition
														
 
															                 u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
														
 
															                 x_next = self.move_next(x, self.u_set[u])                           # next state
														
 
															-                reward = env.get_reward(x_next, self.lose)                          # reward observed
														
 
															+                reward = self.env.get_reward(x_next)                          # reward observed
														
 
															                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
														
 
															                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
														
 
															                 x = x_next
														
@@ -51,7 +62,6 @@ class QLEARNING:
 
															         return Q_table, policy
														
 
															-
														
 
															     def table_init(self):
														
 
															         """
														
 
															         Initialize Q_table: Q(s, a)
														
@@ -60,28 +70,25 @@ class QLEARNING:
 
															         Q_table = {}
														
 
															-        for i in range(env.x_range):
														
 
															-            for j in range(env.y_range):
														
 
															-                u = []
														
 
															-                if (i, j) not in self.obs:
														
 
															-                    for k in range(len(self.u_set)):
														
 
															-                        if (i, j) == self.xG:
														
 
															-                            u.append(0)
														
 
															-                        else:
														
 
															-                            u.append(np.random.random_sample())
														
 
															-                    Q_table[(i, j)] = u
														
 
															-
														
 
															+        for x in self.stateSpace:
														
 
															+            u = []
														
 
															+            if x not in self.obs:
														
 
															+                for k in range(len(self.u_set)):
														
 
															+                    if x == self.xG:
														
 
															+                        u.append(0)
														
 
															+                    else:
														
 
															+                        u.append(np.random.random_sample())
														
 
															+                    Q_table[x] = u
														
 
															         return Q_table
														
 
															-
														
 
															     def state_init(self):
														
 
															         """
														
 
															         initialize a starting state
														
 
															         :return: starting state
														
 
															         """
														
 
															         while True:
														
 
															-            i = np.random.randint(0, env.x_range - 1)
														
 
															-            j = np.random.randint(0, env.y_range - 1)
														
 
															+            i = np.random.randint(0, self.env.x_range - 1)
														
 
															+            j = np.random.randint(0, self.env.y_range - 1)
														
 
															             if (i, j) not in self.obs:
														
 
															                 return (i, j)
														
@@ -121,40 +128,36 @@ class QLEARNING:
 
															             return x
														
 
															         return x_next
														
 
															-
														
 
															-    def simulation(self, xI, xG, policy):
														
 
															+    def extract_path(self, xI, xG, policy):
														
 
															         """
														
 
															-        simulate a path using converged policy.
														
 
															+        extract path from converged policy.
														
 
															         :param xI: starting state
														
 
															-        :param xG: goal state
														
 
															+        :param xG: goal states
														
 
															         :param policy: converged policy
														
 
															-        :return: simulation path
														
 
															+        :return: path
														
 
															         """
														
 
															-        plt.figure(1)                                                   # path animation
														
 
															-        tools.show_map(xI, xG, self.obs, self.lose, self.name1)         # show background
														
 
															-
														
 
															-        x, path = xI, []
														
 
															-        while True:
														
 
															+        x, path = xI, [xI]
														
 
															+        while x not in xG:
														
 
															             u = self.u_set[policy[x]]
														
 
															             x_next = (x[0] + u[0], x[1] + u[1])
														
 
															             if x_next in self.obs:
														
 
															-                print("Collision!")                                     # collision: simulation failed
														
 
															+                print("Collision! Please run again!")
														
 
															+                break
														
 
															             else:
														
 
															+                path.append(x_next)
														
 
															                 x = x_next
														
 
															-                if x_next == xG:
														
 
															-                    break
														
 
															-                else:
														
 
															-                    tools.plot_dots(x)                                  # each state in optimal path
														
 
															-                    path.append(x)
														
 
															-        plt.show()
														
 
															-        self.message()
														
 
															-
														
 
															         return path
														
 
															-
														
 
															     def message(self):
														
 
															+        """
														
 
															+        print important message.
														
 
															+
														
 
															+        :param count: iteration numbers
														
 
															+        :return: print
														
 
															+        """
														
 
															+
														
 
															         print("starting state: ", self.xI)
														
 
															         print("goal state: ", self.xG)
														
 
															         print("iteration numbers: ", self.M)
														
@@ -168,5 +171,3 @@ if __name__ == '__main__':
 
															     x_Goal = (12, 1)
														
 
															     Q_CALL = QLEARNING(x_Start, x_Goal)
														
 
															-    [value_SARSA, policy_SARSA] = Q_CALL.Monte_Carlo()
														
 
															-    path_VI = Q_CALL.simulation(x_Start, x_Goal, policy_SARSA)
														
--- a/Control/Sarsa.py
+++ b/Control/Sarsa.py
@@ -5,27 +5,37 @@
 
															 """
														
 
															 import env
														
 
															-import tools
														
 
															+import plotting
														
 
															 import motion_model
														
 
															-import matplotlib.pyplot as plt
														
 
															 import numpy as np
														
 
															 class SARSA:
														
 
															     def __init__(self, x_start, x_goal):
														
 
															-        self.u_set = motion_model.motions                       # feasible input set
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.M = 500                                            # iteration numbers
														
 
															-        self.gamma = 0.9                                        # discount factor
														
 
															+        self.M = 500  # iteration numbers
														
 
															+        self.gamma = 0.9  # discount factor
														
 
															         self.alpha = 0.5
														
 
															-        self.epsilon = 0.1                                      # epsilon error
														
 
															-        self.obs = env.obs_map()                                # position of obstacles
														
 
															-        self.lose = env.lose_map()                              # position of lose states
														
 
															+        self.epsilon = 0.1
														
 
															+
														
 
															+        self.env = env.Env(self.xI, self.xG)
														
 
															+        self.motion = motion_model.Motion_model(self.xI, self.xG)
														
 
															+        self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															+
														
 
															+        self.u_set = self.env.motions  # feasible input set
														
 
															+        self.stateSpace = self.env.stateSpace  # state space
														
 
															+        self.obs = self.env.obs_map()  # position of obstacles
														
 
															+        self.lose = self.env.lose_map()  # position of lose states
														
 
															+
														
 
															         self.name1 = "SARSA, M=" + str(self.M)
														
 
															+        [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
														
 
															+        self.path = self.extract_path(self.xI, self.xG, self.policy)
														
 
															+        self.plotting.animation(self.path, self.name1)
														
 
															+
														
 
															-    def Monte_Carlo(self):
														
 
															+    def Monte_Carlo(self, xI, xG):
														
 
															         """
														
 
															         Monte_Carlo experiments
														
@@ -38,9 +48,9 @@ class SARSA:
 
															         for k in range(self.M):                                                 # iterations
														
 
															             x = self.state_init()                                               # initial state
														
 
															             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
														
 
															-            while x != self.xG:                                                 # stop condition
														
 
															+            while x != xG:                                                 # stop condition
														
 
															                 x_next = self.move_next(x, self.u_set[u])                       # next state
														
 
															-                reward = env.get_reward(x_next, self.lose)                      # reward observed
														
 
															+                reward = self.env.get_reward(x_next)                      # reward observed
														
 
															                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
														
 
															                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
														
 
															                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
														
@@ -60,17 +70,15 @@ class SARSA:
 
															         Q_table = {}
														
 
															-        for i in range(env.x_range):
														
 
															-            for j in range(env.y_range):
														
 
															-                u = []
														
 
															-                if (i, j) not in self.obs:
														
 
															-                    for k in range(len(self.u_set)):
														
 
															-                        if (i, j) == self.xG:
														
 
															-                            u.append(0)
														
 
															-                        else:
														
 
															-                            u.append(np.random.random_sample())
														
 
															-                    Q_table[(i, j)] = u
														
 
															-
														
 
															+        for x in self.stateSpace:
														
 
															+            u = []
														
 
															+            if x not in self.obs:
														
 
															+                for k in range(len(self.u_set)):
														
 
															+                    if x == self.xG:
														
 
															+                        u.append(0)
														
 
															+                    else:
														
 
															+                        u.append(np.random.random_sample())
														
 
															+                    Q_table[x] = u
														
 
															         return Q_table
														
@@ -80,8 +88,8 @@ class SARSA:
 
															         :return: starting state
														
 
															         """
														
 
															         while True:
														
 
															-            i = np.random.randint(0, env.x_range - 1)
														
 
															-            j = np.random.randint(0, env.y_range - 1)
														
 
															+            i = np.random.randint(0, self.env.x_range - 1)
														
 
															+            j = np.random.randint(0, self.env.y_range - 1)
														
 
															             if (i, j) not in self.obs:
														
 
															                 return (i, j)
														
@@ -121,40 +129,36 @@ class SARSA:
 
															             return x
														
 
															         return x_next
														
 
															-
														
 
															-    def simulation(self, xI, xG, policy):
														
 
															+    def extract_path(self, xI, xG, policy):
														
 
															         """
														
 
															-        simulate a path using converged policy.
														
 
															+        extract path from converged policy.
														
 
															         :param xI: starting state
														
 
															-        :param xG: goal state
														
 
															+        :param xG: goal states
														
 
															         :param policy: converged policy
														
 
															-        :return: simulation path
														
 
															+        :return: path
														
 
															         """
														
 
															-        plt.figure(1)                                                   # path animation
														
 
															-        tools.show_map(xI, xG, self.obs, self.lose, self.name1)         # show background
														
 
															-
														
 
															-        x, path = xI, []
														
 
															-        while True:
														
 
															+        x, path = xI, [xI]
														
 
															+        while x not in xG:
														
 
															             u = self.u_set[policy[x]]
														
 
															             x_next = (x[0] + u[0], x[1] + u[1])
														
 
															             if x_next in self.obs:
														
 
															-                print("Collision!")                                     # collision: simulation failed
														
 
															+                print("Collision! Please run again!")
														
 
															+                break
														
 
															             else:
														
 
															+                path.append(x_next)
														
 
															                 x = x_next
														
 
															-                if x_next == xG:
														
 
															-                    break
														
 
															-                else:
														
 
															-                    tools.plot_dots(x)                                  # each state in optimal path
														
 
															-                    path.append(x)
														
 
															-        plt.show()
														
 
															-        self.message()
														
 
															-
														
 
															         return path
														
 
															-
														
 
															     def message(self):
														
 
															+        """
														
 
															+        print important message.
														
 
															+
														
 
															+        :param count: iteration numbers
														
 
															+        :return: print
														
 
															+        """
														
 
															+
														
 
															         print("starting state: ", self.xI)
														
 
															         print("goal state: ", self.xG)
														
 
															         print("iteration numbers: ", self.M)
														
@@ -168,5 +172,3 @@ if __name__ == '__main__':
 
															     x_Goal = (12, 1)
														
 
															     SARSA_CALL = SARSA(x_Start, x_Goal)
														
 
															-    [value_SARSA, policy_SARSA] = SARSA_CALL.Monte_Carlo()
														
 
															-    path_VI = SARSA_CALL.simulation(x_Start, x_Goal, policy_SARSA)
														
--- a/Control/__pycache__/env.cpython-37.pyc
+++ b/Control/__pycache__/env.cpython-37.pyc
--- a/Control/__pycache__/motion_model.cpython-37.pyc
+++ b/Control/__pycache__/motion_model.cpython-37.pyc
--- a/Control/__pycache__/plotting.cpython-37.pyc
+++ b/Control/__pycache__/plotting.cpython-37.pyc
--- a/Control/__pycache__/tools.cpython-37.pyc
+++ b/Control/__pycache__/tools.cpython-37.pyc
--- a/Control/env.py
+++ b/Control/env.py
@@ -4,53 +4,73 @@
 
															 @author: huiming zhou
														
 
															 """
														
 
															-x_range, y_range = 14, 6     # size of background
														
 
															-
														
 
															-
														
 
															-def obs_map():
														
 
															-    """
														
 
															-    Initialize obstacles' positions
														
 
															-
														
 
															-    :return: map of obstacles
														
 
															-    """
														
 
															-
														
 
															-    obs = []
														
 
															-    for i in range(x_range):
														
 
															-        obs.append((i, 0))
														
 
															-    for i in range(x_range):
														
 
															-        obs.append((i, y_range - 1))
														
 
															-
														
 
															-    for i in range(y_range):
														
 
															-        obs.append((0, i))
														
 
															-    for i in range(y_range):
														
 
															-        obs.append((x_range - 1, i))
														
 
															-
														
 
															-    return obs
														
 
															-
														
 
															-
														
 
															-def lose_map():
														
 
															-    """
														
 
															-    Initialize losing states' positions
														
 
															-    :return: losing states
														
 
															-    """
														
 
															-
														
 
															-    lose = []
														
 
															-    for i in range(2, 12):
														
 
															-        lose.append((i, 1))
														
 
															-
														
 
															-    return lose
														
 
															-
														
 
															-
														
 
															-def get_reward(x_next, lose):
														
 
															-    """
														
 
															-    calculate reward of next state
														
 
															-
														
 
															-    :param x_next: next state
														
 
															-    :return: reward
														
 
															-    """
														
 
															-
														
 
															-    if x_next in lose:
														
 
															-        return -100                      # reward : -100, for lose states
														
 
															-    return -1                            # reward : -1, for other states
														
 
															-
														
 
															-
														
 
															+class Env():
														
 
															+    def __init__(self, xI, xG):
														
 
															+        self.x_range = 14  # size of background
														
 
															+        self.y_range = 6
														
 
															+        self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
														
 
															+        self.xI = xI
														
 
															+        self.xG = xG
														
 
															+        self.obs = self.obs_map()
														
 
															+        self.lose = self.lose_map()
														
 
															+        self.stateSpace = self.state_space()
														
 
															+
														
 
															+    def obs_map(self):
														
 
															+        """
														
 
															+        Initialize obstacles' positions
														
 
															+
														
 
															+        :return: map of obstacles
														
 
															+        """
														
 
															+        x = self.x_range
														
 
															+        y = self.y_range
														
 
															+        obs = []
														
 
															+
														
 
															+        for i in range(x):
														
 
															+            obs.append((i, 0))
														
 
															+        for i in range(x):
														
 
															+            obs.append((i, y - 1))
														
 
															+
														
 
															+        for i in range(y):
														
 
															+            obs.append((0, i))
														
 
															+        for i in range(y):
														
 
															+            obs.append((x - 1, i))
														
 
															+
														
 
															+        return obs
														
 
															+
														
 
															+    def lose_map(self):
														
 
															+        """
														
 
															+        Initialize losing states' positions
														
 
															+        :return: losing states
														
 
															+        """
														
 
															+
														
 
															+        lose = []
														
 
															+        for i in range(2, 12):
														
 
															+            lose.append((i, 1))
														
 
															+
														
 
															+        return lose
														
 
															+
														
 
															+    def state_space(self):
														
 
															+        """
														
 
															+        generate state space
														
 
															+        :return: state space
														
 
															+        """
														
 
															+
														
 
															+        state_space = []
														
 
															+        for i in range(self.x_range):
														
 
															+            for j in range(self.y_range):
														
 
															+                if (i, j) not in self.obs:
														
 
															+                    state_space.append((i, j))
														
 
															+
														
 
															+        return state_space
														
 
															+
														
 
															+    def get_reward(self, x_next):
														
 
															+        """
														
 
															+        calculate reward of next state
														
 
															+
														
 
															+        :param x_next: next state
														
 
															+        :return: reward
														
 
															+        """
														
 
															+
														
 
															+        if x_next in self.lose:
														
 
															+            return -100  # reward : -100, for lose states
														
 
															+        return -1  # reward : -1, for other states
														
--- a/Control/motion_model.py
+++ b/Control/motion_model.py
@@ -3,37 +3,42 @@
 
															 """
														
 
															 @author: huiming zhou
														
 
															 """
														
 
															-import numpy as np
														
 
															-
														
 
															-motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]                # feasible motion sets
														
 
															-
														
 
															-
														
 
															-def move_prob(x, u, obs, eta = 0.2):
														
 
															-    """
														
 
															-    Motion model of robots,
														
 
															-
														
 
															-    :param x: current state (node)
														
 
															-    :param u: input
														
 
															-    :param obs: obstacle map
														
 
															-    :param eta: noise in motion model
														
 
															-    :return: next states and corresponding probability
														
 
															-    """
														
 
															-
														
 
															-    p_next = [1 - eta, eta / 2, eta / 2]
														
 
															-    x_next = []
														
 
															-    if u == (0, 1):
														
 
															-        u_real = [(0, 1), (-1, 0), (1, 0)]
														
 
															-    elif u == (0, -1):
														
 
															-        u_real = [(0, -1), (-1, 0), (1, 0)]
														
 
															-    elif u == (-1, 0):
														
 
															-        u_real = [(-1, 0), (0, 1), (0, -1)]
														
 
															-    else:
														
 
															-        u_real = [(1, 0), (0, 1), (0, -1)]
														
 
															-
														
 
															-    for act in u_real:
														
 
															-        if (x[0] + act[0], x[1] + act[1]) in obs:
														
 
															-            x_next.append(x)
														
 
															+
														
 
															+import env
														
 
															+
														
 
															+class Motion_model():
														
 
															+    def __init__(self, xI, xG):
														
 
															+        self.env = env.Env(xI, xG)
														
 
															+        self.obs = self.env.obs_map()
														
 
															+
														
 
															+
														
 
															+    def move_next(self, x, u, eta=0.2):
														
 
															+        """
														
 
															+        Motion model of robots,
														
 
															+
														
 
															+        :param x: current state (node)
														
 
															+        :param u: input
														
 
															+        :param obs: obstacle map
														
 
															+        :param eta: noise in motion model
														
 
															+        :return: next states and corresponding probability
														
 
															+        """
														
 
															+
														
 
															+        p_next = [1 - eta, eta / 2, eta / 2]
														
 
															+        x_next = []
														
 
															+        if u == (0, 1):
														
 
															+            u_real = [(0, 1), (-1, 0), (1, 0)]
														
 
															+        elif u == (0, -1):
														
 
															+            u_real = [(0, -1), (-1, 0), (1, 0)]
														
 
															+        elif u == (-1, 0):
														
 
															+            u_real = [(-1, 0), (0, 1), (0, -1)]
														
 
															         else:
														
 
															-            x_next.append((x[0] + act[0], x[1] + act[1]))
														
 
															+            u_real = [(1, 0), (0, 1), (0, -1)]
														
 
															+
														
 
															+        for act in u_real:
														
 
															+            x_check = (x[0] + act[0], x[1] + act[1])
														
 
															+            if x_check in self.obs:
														
 
															+                x_next.append(x)
														
 
															+            else:
														
 
															+                x_next.append(x_check)
														
 
															-    return x_next, p_next
														
 
															+        return x_next, p_next
														
--- a/Control/plotting.py
+++ b/Control/plotting.py
@@ -0,0 +1,121 @@
 
															+#!/usr/bin/env python3
														
 
															+# -*- coding: utf-8 -*-
														
 
															+"""
														
 
															+@author: huiming zhou
														
 
															+"""
														
 
															+
														
 
															+import matplotlib.pyplot as plt
														
 
															+import env
														
 
															+
														
 
															+class Plotting():
														
 
															+    def __init__(self, xI, xG):
														
 
															+        self.xI, self.xG = xI, xG
														
 
															+        self.env = env.Env(self.xI, self.xG)
														
 
															+        self.obs = self.env.obs_map()
														
 
															+        self.lose = self.env.lose_map()
														
 
															+
														
 
															+
														
 
															+    def animation(self, path, name):
														
 
															+        """
														
 
															+        animation.
														
 
															+
														
 
															+        :param path: optimal path
														
 
															+        :param name: tile of figure
														
 
															+        :return: an animation
														
 
															+        """
														
 
															+
														
 
															+        plt.figure(1)
														
 
															+        self.plot_grid(name)
														
 
															+        self.plot_lose()
														
 
															+        self.plot_path(path)
														
 
															+
														
 
															+
														
 
															+    def plot_grid(self, name):
														
 
															+        """
														
 
															+        plot the obstacles in environment.
														
 
															+
														
 
															+        :param name: title of figure
														
 
															+        :return: plot
														
 
															+        """
														
 
															+
														
 
															+        obs_x = [self.obs[i][0] for i in range(len(self.obs))]
														
 
															+        obs_y = [self.obs[i][1] for i in range(len(self.obs))]
														
 
															+
														
 
															+        plt.plot(self.xI[0], self.xI[1], "bs", ms = 24)
														
 
															+        plt.plot(self.xG[0], self.xG[1], "gs", ms = 24)
														
 
															+
														
 
															+        plt.plot(obs_x, obs_y, "sk", ms = 24)
														
 
															+        plt.title(name)
														
 
															+        plt.axis("equal")
														
 
															+
														
 
															+
														
 
															+    def plot_lose(self):
														
 
															+        """
														
 
															+        plot losing states in environment.
														
 
															+        :return: a plot
														
 
															+        """
														
 
															+
														
 
															+        lose_x = [self.lose[i][0] for i in range(len(self.lose))]
														
 
															+        lose_y = [self.lose[i][1] for i in range(len(self.lose))]
														
 
															+
														
 
															+        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
														
 
															+
														
 
															+
														
 
															+    def plot_visited(self, visited):
														
 
															+        """
														
 
															+        animation of order of visited nodes.
														
 
															+
														
 
															+        :param visited: visited nodes
														
 
															+        :return: animation
														
 
															+        """
														
 
															+
														
 
															+        visited.remove(self.xI)
														
 
															+        count = 0
														
 
															+
														
 
															+        for x in visited:
														
 
															+            count += 1
														
 
															+            plt.plot(x[0], x[1], linewidth='3', color='#808080', marker='o')
														
 
															+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
														
 
															+            [exit(0) if event.key == 'escape' else None])
														
 
															+
														
 
															+            if count < len(visited) / 3:
														
 
															+                length = 15
														
 
															+            elif count < len(visited) * 2 / 3:
														
 
															+                length = 30
														
 
															+            else:
														
 
															+                length = 45
														
 
															+
														
 
															+            if count % length == 0: plt.pause(0.001)
														
 
															+
														
 
															+
														
 
															+    def plot_path(self, path):
														
 
															+        path.remove(self.xI)
														
 
															+        path.remove(self.xG)
														
 
															+
														
 
															+        for x in path:
														
 
															+            plt.plot(x[0], x[1], color='#808080', marker='o', ms = 23)
														
 
															+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
														
 
															+            [exit(0) if event.key == 'escape' else None])
														
 
															+            plt.pause(0.001)
														
 
															+        plt.show()
														
 
															+        plt.pause(0.5)
														
 
															+
														
 
															+
														
 
															+    def plot_diff(self, diff, name):
														
 
															+        plt.figure(2)
														
 
															+        plt.title(name, fontdict=None)
														
 
															+        plt.xlabel('iterations')
														
 
															+        plt.ylabel('difference of successive iterations')
														
 
															+        plt.grid('on')
														
 
															+
														
 
															+        count = 0
														
 
															+        for x in diff:
														
 
															+            plt.plot(count, x, color='#808080', marker='o')  # plot dots for animation
														
 
															+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
														
 
															+            [exit(0) if event.key == 'escape' else None])
														
 
															+            plt.pause(0.07)
														
 
															+            count += 1
														
 
															+
														
 
															+        plt.plot(diff, color='#808080')
														
 
															+        plt.pause(0.01)
														
 
															+        plt.show()
														
--- a/Control/tools.py
+++ b/Control/tools.py
@@ -1,92 +0,0 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding: utf-8 -*-
														
 
															-"""
														
 
															-@author: huiming zhou
														
 
															-"""
														
 
															-
														
 
															-import matplotlib.pyplot as plt
														
 
															-
														
 
															-
														
 
															-def extract_path(xI, xG, parent, actions):
														
 
															-    """
														
 
															-    Extract the path based on the relationship of nodes.
														
 
															-
														
 
															-    :param xI: Starting node
														
 
															-    :param xG: Goal node
														
 
															-    :param parent: Relationship between nodes
														
 
															-    :param actions: Action needed for transfer between two nodes
														
 
															-    :return: The planning path
														
 
															-    """
														
 
															-
														
 
															-    path_back = [xG]
														
 
															-    acts_back = [actions[xG]]
														
 
															-    x_current = xG
														
 
															-    while True:
														
 
															-        x_current = parent[x_current]
														
 
															-        path_back.append(x_current)
														
 
															-        acts_back.append(actions[x_current])
														
 
															-        if x_current == xI: break
														
 
															-
														
 
															-    return list(reversed(path_back)), list(reversed(acts_back))
														
 
															-
														
 
															-
														
 
															-def showPath(xI, xG, path):
														
 
															-    """
														
 
															-    Plot the path.
														
 
															-
														
 
															-    :param xI: Starting node
														
 
															-    :param xG: Goal node
														
 
															-    :param path: Planning path
														
 
															-    :return: A plot
														
 
															-    """
														
 
															-
														
 
															-    path.remove(xI)
														
 
															-    path.remove(xG)
														
 
															-    path_x = [path[i][0] for i in range(len(path))]
														
 
															-    path_y = [path[i][1] for i in range(len(path))]
														
 
															-    plt.plot(path_x, path_y, linewidth='5', color='r', linestyle='-')
														
 
															-    plt.pause(0.001)
														
 
															-    plt.show()
														
 
															-
														
 
															-
														
 
															-def show_map(xI, xG, obs_map, lose_map, name):
														
 
															-    """
														
 
															-    Plot the background you designed.
														
 
															-
														
 
															-    :param xI: starting state
														
 
															-    :param xG: goal states
														
 
															-    :param obs_map: positions of obstacles
														
 
															-    :param lose_map: positions of losing state
														
 
															-    :param name: name of this figure
														
 
															-    :return: a figure
														
 
															-    """
														
 
															-
														
 
															-    obs_x = [obs_map[i][0] for i in range(len(obs_map))]
														
 
															-    obs_y = [obs_map[i][1] for i in range(len(obs_map))]
														
 
															-
														
 
															-    lose_x = [lose_map[i][0] for i in range(len(lose_map))]
														
 
															-    lose_y = [lose_map[i][1] for i in range(len(lose_map))]
														
 
															-
														
 
															-    plt.plot(xI[0], xI[1], "bs", ms = 24)                                    # plot starting state (blue)
														
 
															-    plt.plot(xG[0], xG[1], "gs", ms = 24)                                    # plot goal states (green)
														
 
															-
														
 
															-    plt.plot(obs_x, obs_y, "sk", ms = 24)                                    # plot obstacles (black)
														
 
															-    plt.plot(lose_x, lose_y, marker = 's', color = '#A52A2A', ms = 24)       # plot losing states (grown)
														
 
															-    plt.title(name, fontdict=None)
														
 
															-    plt.axis("equal")
														
 
															-
														
 
															-
														
 
															-def plot_dots(x):
														
 
															-    """
														
 
															-    Plot state x for animation
														
 
															-
														
 
															-    :param x: current node
														
 
															-    :return: a plot
														
 
															-    """
														
 
															-
														
 
															-    plt.plot(x[0], x[1], linewidth='3', color='#808080', marker='o', ms = 23)    # plot dots for animation
														
 
															-    plt.gcf().canvas.mpl_connect('key_release_event',
														
 
															-                                 lambda event: [exit(0) if event.key == 'escape' else None])
														
 
															-    plt.pause(0.1)
														
 
															-
														
 
															-
														
--- a/Planning/.idea/workspace.xml
+++ b/Planning/.idea/workspace.xml
@@ -2,21 +2,17 @@
 
															 <project version="4">
														
 
															   <component name="ChangeListManager">
														
 
															     <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
														
 
															+      <change beforePath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" afterDir="false" />
														
 
															+      <change beforePath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" afterDir="false" />
														
 
															+      <change beforePath="$PROJECT_DIR$/../Model-free Control/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/env.py" afterDir="false" />
														
 
															+      <change beforePath="$PROJECT_DIR$/../Model-free Control/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/motion_model.py" afterDir="false" />
														
 
															+      <change beforePath="$PROJECT_DIR$/../Model-free Control/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/plotting.py" afterDir="false" />
														
 
															       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
														
 
															       <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
														
 
															       <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
														
 
															       <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
														
 
															       <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/plotting.py" beforeDir="false" afterPath="$PROJECT_DIR$/plotting.py" afterDir="false" />
														
 
															       <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/env.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/gif/VI_E.gif" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/gif/VI_E.gif" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/motion_model.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/plotting.py" afterDir="false" />
														
 
															-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" afterDir="false" />
														
 
															     </list>
														
 
															     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
														
 
															     <option name="SHOW_DIALOG" value="false" />
														
@@ -40,7 +36,7 @@
 
															   </component>
														
 
															   <component name="PropertiesComponent">
														
 
															     <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
														
 
															-    <property name="last_opened_file_path" value="$PROJECT_DIR$/../Stochastic Shortest Path" />
														
 
															+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
														
 
															     <property name="restartRequiresConfirmation" value="false" />
														
 
															     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
														
 
															   </component>
														
--- a/Planning/a_star.py
+++ b/Planning/a_star.py
@@ -12,16 +12,16 @@ class Astar:
 
															     def __init__(self, x_start, x_goal, heuristic_type):
														
 
															         self.xI, self.xG = x_start, x_goal
														
 
															-        self.Env = env.Env()
														
 
															-        self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															+        self.Env = env.Env()                                                        # class Env
														
 
															+        self.plotting = plotting.Plotting(self.xI, self.xG)                         # class Plotting
														
 
															-        self.u_set = self.Env.motions                               # feasible input set
														
 
															-        self.obs = self.Env.obs                                     # position of obstacles
														
 
															+        self.u_set = self.Env.motions                                               # feasible input set
														
 
															+        self.obs = self.Env.obs                                                     # position of obstacles
														
 
															         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG, heuristic_type)
														
 
															         self.fig_name = "A* Algorithm"
														
 
															-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
														
 
															+        self.plotting.animation(self.path, self.visited, self.fig_name)             # animation generate
														
 
															     def searching(self, xI, xG, heuristic_type):
														
@@ -31,26 +31,26 @@ class Astar:
 
															         :return: planning path, action in each node, visited nodes in the planning process
														
 
															         """
														
 
															-        q_astar = queue.QueuePrior()                            # priority queue
														
 
															+        q_astar = queue.QueuePrior()                                                # priority queue
														
 
															         q_astar.put(xI, 0)
														
 
															-        parent = {xI: xI}                                       # record parents of nodes
														
 
															-        action = {xI: (0, 0)}                                   # record actions of nodes
														
 
															+        parent = {xI: xI}                                                           # record parents of nodes
														
 
															+        action = {xI: (0, 0)}                                                       # record actions of nodes
														
 
															         visited = []
														
 
															         cost = {xI: 0}
														
 
															         while not q_astar.empty():
														
 
															             x_current = q_astar.get()
														
 
															-            if x_current == xG:                                 # stop condition
														
 
															+            if x_current == xG:                                                     # stop condition
														
 
															                 break
														
 
															             visited.append(x_current)
														
 
															-            for u_next in self.u_set:                           # explore neighborhoods of current node
														
 
															+            for u_next in self.u_set:                                   # explore neighborhoods of current node
														
 
															                 x_next = tuple([x_current[i] + u_next[i] for i in range(len(x_current))])
														
 
															                 if x_next not in self.obs:
														
 
															                     new_cost = cost[x_current] + self.get_cost(x_current, u_next)
														
 
															-                    if x_next not in cost or new_cost < cost[x_next]:           # conditions for updating cost
														
 
															+                    if x_next not in cost or new_cost < cost[x_next]:   # conditions for updating cost
														
 
															                         cost[x_next] = new_cost
														
 
															                         priority = new_cost + self.Heuristic(x_next, xG, heuristic_type)
														
 
															-                        q_astar.put(x_next, priority)           # put node into queue using priority "f+h"
														
 
															+                        q_astar.put(x_next, priority)                   # put node into queue using priority "f+h"
														
 
															                         parent[x_next], action[x_next] = x_current, u_next
														
 
															         [path, policy] = self.extract_path(xI, xG, parent, action)
														
@@ -113,6 +113,7 @@ class Astar:
 
															 if __name__ == '__main__':
														
 
															-    x_Start = (5, 5)                # Starting node
														
 
															-    x_Goal = (49, 5)                # Goal node
														
 
															+    x_Start = (5, 5)  # Starting node
														
 
															+    x_Goal = (49, 5)  # Goal node
														
 
															+
														
 
															     astar = Astar(x_Start, x_Goal, "manhattan")
														
--- a/Planning/bfs.py
+++ b/Planning/bfs.py
@@ -15,13 +15,13 @@ class BFS:
 
															         self.Env = env.Env()
														
 
															         self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															-        self.u_set = self.Env.motions                               # feasible input set
														
 
															-        self.obs = self.Env.obs                                     # position of obstacles
														
 
															+        self.u_set = self.Env.motions                                       # feasible input set
														
 
															+        self.obs = self.Env.obs                                             # position of obstacles
														
 
															         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
														
 
															         self.fig_name = "Breadth-first Searching"
														
 
															-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
														
 
															+        self.plotting.animation(self.path, self.visited, self.fig_name)     # animation generate
														
 
															     def searching(self, xI, xG):
														
@@ -31,10 +31,10 @@ class BFS:
 
															         :return: planning path, action in each node, visited nodes in the planning process
														
 
															         """
														
 
															-        q_bfs = queue.QueueFIFO()                                     # first-in-first-out queue
														
 
															+        q_bfs = queue.QueueFIFO()                                           # first-in-first-out queue
														
 
															         q_bfs.put(xI)
														
 
															-        parent = {xI: xI}                                   # record parents of nodes
														
 
															-        action = {xI: (0, 0)}                                    # record actions of nodes
														
 
															+        parent = {xI: xI}                                                   # record parents of nodes
														
 
															+        action = {xI: (0, 0)}                                               # record actions of nodes
														
 
															         visited = []
														
 
															         while not q_bfs.empty():
														
@@ -42,13 +42,13 @@ class BFS:
 
															             if x_current == xG:
														
 
															                 break
														
 
															             visited.append(x_current)
														
 
															-            for u_next in self.u_set:                                 # explore neighborhoods of current node
														
 
															+            for u_next in self.u_set:                                       # explore neighborhoods of current node
														
 
															                 x_next = tuple([x_current[i] + u_next[i] for i in range(len(x_current))])
														
 
															-                if x_next not in parent and x_next not in self.obs:   # node not visited and not in obstacles
														
 
															+                if x_next not in parent and x_next not in self.obs:         # node not visited and not in obstacles
														
 
															                     q_bfs.put(x_next)
														
 
															                     parent[x_next], action[x_next] = x_current, u_next
														
 
															-        [path, policy] = self.extract_path(xI, xG, parent, action)     # extract path
														
 
															+        [path, policy] = self.extract_path(xI, xG, parent, action)          # extract path
														
 
															         return path, policy, visited
														
--- a/Planning/dfs.py
+++ b/Planning/dfs.py
@@ -15,13 +15,13 @@ class DFS:
 
															         self.Env = env.Env()
														
 
															         self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															-        self.u_set = self.Env.motions                   # feasible input set
														
 
															-        self.obs = self.Env.obs                         # position of obstacles
														
 
															+        self.u_set = self.Env.motions                                           # feasible input set
														
 
															+        self.obs = self.Env.obs                                                 # position of obstacles
														
 
															         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
														
 
															         self.fig_name = "Depth-first Searching"
														
 
															-        self.plotting.animation(self.path, self.visited, self.fig_name)      # animation generate
														
 
															+        self.plotting.animation(self.path, self.visited, self.fig_name)         # animation generate
														
 
															     def searching(self, xI, xG):
														
--- a/Planning/dijkstra.py
+++ b/Planning/dijkstra.py
@@ -15,13 +15,13 @@ class Dijkstra:
 
															         self.Env = env.Env()
														
 
															         self.plotting = plotting.Plotting(self.xI, self.xG)
														
 
															-        self.u_set = self.Env.motions                               # feasible input set
														
 
															-        self.obs = self.Env.obs                                     # position of obstacles
														
 
															+        self.u_set = self.Env.motions                                           # feasible input set
														
 
															+        self.obs = self.Env.obs                                                 # position of obstacles
														
 
															         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
														
 
															         self.fig_name = "Dijkstra's Algorithm"
														
 
															-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
														
 
															+        self.plotting.animation(self.path, self.visited, self.fig_name)         # animation generate
														
 
															     def searching(self, xI, xG):
														
--- a/Planning/queue.py
+++ b/Planning/queue.py
@@ -3,7 +3,6 @@
 
															 """
														
 
															 @author: Huiming Zhou
														
 
															-@description: this file defines three kinds of queues that will be used in algorithms.
														
 
															 """
														
 
															 import collections