zhm-real 5 anos atrás
pai
commit
45089174cb

+ 49 - 48
Model-free Control/Q-learning.py

@@ -5,7 +5,7 @@
 """
 
 import env
-import tools
+import plotting
 import motion_model
 
 import matplotlib.pyplot as plt
@@ -15,18 +15,29 @@ import sys
 
 class QLEARNING:
     def __init__(self, x_start, x_goal):
-        self.u_set = motion_model.motions                       # feasible input set
         self.xI, self.xG = x_start, x_goal
-        self.M = 500                                            # iteration numbers
-        self.gamma = 0.9                                        # discount factor
+        self.M = 500  # iteration numbers
+        self.gamma = 0.9  # discount factor
         self.alpha = 0.5
-        self.epsilon = 0.1                                      # epsilon error
-        self.obs = env.obs_map()                                # position of obstacles
-        self.lose = env.lose_map()                              # position of lose states
-        self.name1 = "Qlearning, M=" + str(self.M)
+        self.epsilon = 0.1
 
+        self.env = env.Env(self.xI, self.xG)
+        self.motion = motion_model.Motion_model(self.xI, self.xG)
+        self.plotting = plotting.Plotting(self.xI, self.xG)
 
-    def Monte_Carlo(self):
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
+
+        self.name1 = "SARSA, M=" + str(self.M)
+
+        [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
+        self.path = self.extract_path(self.xI, self.xG, self.policy)
+        self.plotting.animation(self.path, self.name1)
+
+
+    def Monte_Carlo(self, xI, xG):
         """
         Monte_Carlo experiments
 
@@ -38,10 +49,10 @@ class QLEARNING:
 
         for k in range(self.M):                                                     # iterations
             x = self.state_init()                                                   # initial state
-            while x != self.xG:                                                     # stop condition
+            while x != xG:                                                     # stop condition
                 u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)   # epsilon_greedy policy
                 x_next = self.move_next(x, self.u_set[u])                           # next state
-                reward = env.get_reward(x_next, self.lose)                          # reward observed
+                reward = self.env.get_reward(x_next)                          # reward observed
                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
                                 self.alpha * (reward + self.gamma * max(Q_table[x_next]))
                 x = x_next
@@ -51,7 +62,6 @@ class QLEARNING:
 
         return Q_table, policy
 
-
     def table_init(self):
         """
         Initialize Q_table: Q(s, a)
@@ -60,28 +70,25 @@ class QLEARNING:
 
         Q_table = {}
 
-        for i in range(env.x_range):
-            for j in range(env.y_range):
-                u = []
-                if (i, j) not in self.obs:
-                    for k in range(len(self.u_set)):
-                        if (i, j) == self.xG:
-                            u.append(0)
-                        else:
-                            u.append(np.random.random_sample())
-                    Q_table[(i, j)] = u
-
+        for x in self.stateSpace:
+            u = []
+            if x not in self.obs:
+                for k in range(len(self.u_set)):
+                    if x == self.xG:
+                        u.append(0)
+                    else:
+                        u.append(np.random.random_sample())
+                    Q_table[x] = u
         return Q_table
 
-
     def state_init(self):
         """
         initialize a starting state
         :return: starting state
         """
         while True:
-            i = np.random.randint(0, env.x_range - 1)
-            j = np.random.randint(0, env.y_range - 1)
+            i = np.random.randint(0, self.env.x_range - 1)
+            j = np.random.randint(0, self.env.y_range - 1)
             if (i, j) not in self.obs:
                 return (i, j)
 
@@ -121,40 +128,36 @@ class QLEARNING:
             return x
         return x_next
 
-
-    def simulation(self, xI, xG, policy):
+    def extract_path(self, xI, xG, policy):
         """
-        simulate a path using converged policy.
+        extract path from converged policy.
 
         :param xI: starting state
-        :param xG: goal state
+        :param xG: goal states
         :param policy: converged policy
-        :return: simulation path
+        :return: path
         """
 
-        plt.figure(1)                                                   # path animation
-        tools.show_map(xI, xG, self.obs, self.lose, self.name1)         # show background
-
-        x, path = xI, []
-        while True:
+        x, path = xI, [xI]
+        while x not in xG:
             u = self.u_set[policy[x]]
             x_next = (x[0] + u[0], x[1] + u[1])
             if x_next in self.obs:
-                print("Collision!")                                     # collision: simulation failed
+                print("Collision! Please run again!")
+                break
             else:
+                path.append(x_next)
                 x = x_next
-                if x_next == xG:
-                    break
-                else:
-                    tools.plot_dots(x)                                  # each state in optimal path
-                    path.append(x)
-        plt.show()
-        self.message()
-
         return path
 
-
     def message(self):
+        """
+        print important message.
+
+        :param count: iteration numbers
+        :return: print
+        """
+
         print("starting state: ", self.xI)
         print("goal state: ", self.xG)
         print("iteration numbers: ", self.M)
@@ -168,5 +171,3 @@ if __name__ == '__main__':
     x_Goal = (12, 1)
 
     Q_CALL = QLEARNING(x_Start, x_Goal)
-    [value_SARSA, policy_SARSA] = Q_CALL.Monte_Carlo()
-    path_VI = Q_CALL.simulation(x_Start, x_Goal, policy_SARSA)

+ 48 - 46
Model-free Control/Sarsa.py

@@ -5,27 +5,37 @@
 """
 
 import env
-import tools
+import plotting
 import motion_model
 
-import matplotlib.pyplot as plt
 import numpy as np
 
 
 class SARSA:
     def __init__(self, x_start, x_goal):
-        self.u_set = motion_model.motions                       # feasible input set
         self.xI, self.xG = x_start, x_goal
-        self.M = 500                                            # iteration numbers
-        self.gamma = 0.9                                        # discount factor
+        self.M = 500  # iteration numbers
+        self.gamma = 0.9  # discount factor
         self.alpha = 0.5
-        self.epsilon = 0.1                                      # epsilon error
-        self.obs = env.obs_map()                                # position of obstacles
-        self.lose = env.lose_map()                              # position of lose states
+        self.epsilon = 0.1
+
+        self.env = env.Env(self.xI, self.xG)
+        self.motion = motion_model.Motion_model(self.xI, self.xG)
+        self.plotting = plotting.Plotting(self.xI, self.xG)
+
+        self.u_set = self.env.motions  # feasible input set
+        self.stateSpace = self.env.stateSpace  # state space
+        self.obs = self.env.obs_map()  # position of obstacles
+        self.lose = self.env.lose_map()  # position of lose states
+
         self.name1 = "SARSA, M=" + str(self.M)
 
+        [self.value, self.policy] = self.Monte_Carlo(self.xI, self.xG)
+        self.path = self.extract_path(self.xI, self.xG, self.policy)
+        self.plotting.animation(self.path, self.name1)
+
 
-    def Monte_Carlo(self):
+    def Monte_Carlo(self, xI, xG):
         """
         Monte_Carlo experiments
 
@@ -38,9 +48,9 @@ class SARSA:
         for k in range(self.M):                                                 # iterations
             x = self.state_init()                                               # initial state
             u = self.epsilon_greedy(int(np.argmax(Q_table[x])), self.epsilon)
-            while x != self.xG:                                                 # stop condition
+            while x != xG:                                                 # stop condition
                 x_next = self.move_next(x, self.u_set[u])                       # next state
-                reward = env.get_reward(x_next, self.lose)                      # reward observed
+                reward = self.env.get_reward(x_next)                      # reward observed
                 u_next = self.epsilon_greedy(int(np.argmax(Q_table[x_next])), self.epsilon)
                 Q_table[x][u] = (1 - self.alpha) * Q_table[x][u] + \
                                 self.alpha * (reward + self.gamma * Q_table[x_next][u_next])
@@ -60,17 +70,15 @@ class SARSA:
 
         Q_table = {}
 
-        for i in range(env.x_range):
-            for j in range(env.y_range):
-                u = []
-                if (i, j) not in self.obs:
-                    for k in range(len(self.u_set)):
-                        if (i, j) == self.xG:
-                            u.append(0)
-                        else:
-                            u.append(np.random.random_sample())
-                    Q_table[(i, j)] = u
-
+        for x in self.stateSpace:
+            u = []
+            if x not in self.obs:
+                for k in range(len(self.u_set)):
+                    if x == self.xG:
+                        u.append(0)
+                    else:
+                        u.append(np.random.random_sample())
+                    Q_table[x] = u
         return Q_table
 
 
@@ -80,8 +88,8 @@ class SARSA:
         :return: starting state
         """
         while True:
-            i = np.random.randint(0, env.x_range - 1)
-            j = np.random.randint(0, env.y_range - 1)
+            i = np.random.randint(0, self.env.x_range - 1)
+            j = np.random.randint(0, self.env.y_range - 1)
             if (i, j) not in self.obs:
                 return (i, j)
 
@@ -121,40 +129,36 @@ class SARSA:
             return x
         return x_next
 
-
-    def simulation(self, xI, xG, policy):
+    def extract_path(self, xI, xG, policy):
         """
-        simulate a path using converged policy.
+        extract path from converged policy.
 
         :param xI: starting state
-        :param xG: goal state
+        :param xG: goal states
         :param policy: converged policy
-        :return: simulation path
+        :return: path
         """
 
-        plt.figure(1)                                                   # path animation
-        tools.show_map(xI, xG, self.obs, self.lose, self.name1)         # show background
-
-        x, path = xI, []
-        while True:
+        x, path = xI, [xI]
+        while x not in xG:
             u = self.u_set[policy[x]]
             x_next = (x[0] + u[0], x[1] + u[1])
             if x_next in self.obs:
-                print("Collision!")                                     # collision: simulation failed
+                print("Collision! Please run again!")
+                break
             else:
+                path.append(x_next)
                 x = x_next
-                if x_next == xG:
-                    break
-                else:
-                    tools.plot_dots(x)                                  # each state in optimal path
-                    path.append(x)
-        plt.show()
-        self.message()
-
         return path
 
-
     def message(self):
+        """
+        print important message.
+
+        :param count: iteration numbers
+        :return: print
+        """
+
         print("starting state: ", self.xI)
         print("goal state: ", self.xG)
         print("iteration numbers: ", self.M)
@@ -168,5 +172,3 @@ if __name__ == '__main__':
     x_Goal = (12, 1)
 
     SARSA_CALL = SARSA(x_Start, x_Goal)
-    [value_SARSA, policy_SARSA] = SARSA_CALL.Monte_Carlo()
-    path_VI = SARSA_CALL.simulation(x_Start, x_Goal, policy_SARSA)

BIN
Model-free Control/__pycache__/env.cpython-37.pyc


BIN
Model-free Control/__pycache__/motion_model.cpython-37.pyc


BIN
Model-free Control/__pycache__/plotting.cpython-37.pyc


BIN
Model-free Control/__pycache__/tools.cpython-37.pyc


+ 70 - 50
Model-free Control/env.py

@@ -4,53 +4,73 @@
 @author: huiming zhou
 """
 
-x_range, y_range = 14, 6     # size of background
-
-
-def obs_map():
-    """
-    Initialize obstacles' positions
-
-    :return: map of obstacles
-    """
-
-    obs = []
-    for i in range(x_range):
-        obs.append((i, 0))
-    for i in range(x_range):
-        obs.append((i, y_range - 1))
-
-    for i in range(y_range):
-        obs.append((0, i))
-    for i in range(y_range):
-        obs.append((x_range - 1, i))
-
-    return obs
-
-
-def lose_map():
-    """
-    Initialize losing states' positions
-    :return: losing states
-    """
-
-    lose = []
-    for i in range(2, 12):
-        lose.append((i, 1))
-
-    return lose
-
-
-def get_reward(x_next, lose):
-    """
-    calculate reward of next state
-
-    :param x_next: next state
-    :return: reward
-    """
-
-    if x_next in lose:
-        return -100                      # reward : -100, for lose states
-    return -1                            # reward : -1, for other states
-
-
+class Env():
+    def __init__(self, xI, xG):
+        self.x_range = 14  # size of background
+        self.y_range = 6
+        self.motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]
+        self.xI = xI
+        self.xG = xG
+        self.obs = self.obs_map()
+        self.lose = self.lose_map()
+        self.stateSpace = self.state_space()
+
+    def obs_map(self):
+        """
+        Initialize obstacles' positions
+
+        :return: map of obstacles
+        """
+        x = self.x_range
+        y = self.y_range
+        obs = []
+
+        for i in range(x):
+            obs.append((i, 0))
+        for i in range(x):
+            obs.append((i, y - 1))
+
+        for i in range(y):
+            obs.append((0, i))
+        for i in range(y):
+            obs.append((x - 1, i))
+
+        return obs
+
+    def lose_map(self):
+        """
+        Initialize losing states' positions
+        :return: losing states
+        """
+
+        lose = []
+        for i in range(2, 12):
+            lose.append((i, 1))
+
+        return lose
+
+    def state_space(self):
+        """
+        generate state space
+        :return: state space
+        """
+
+        state_space = []
+        for i in range(self.x_range):
+            for j in range(self.y_range):
+                if (i, j) not in self.obs:
+                    state_space.append((i, j))
+
+        return state_space
+
+    def get_reward(self, x_next):
+        """
+        calculate reward of next state
+
+        :param x_next: next state
+        :return: reward
+        """
+
+        if x_next in self.lose:
+            return -100  # reward : -100, for lose states
+        return -1  # reward : -1, for other states

+ 37 - 32
Model-free Control/motion_model.py

@@ -3,37 +3,42 @@
 """
 @author: huiming zhou
 """
-import numpy as np
-
-motions = [(1, 0), (-1, 0), (0, 1), (0, -1)]                # feasible motion sets
-
-
-def move_prob(x, u, obs, eta = 0.2):
-    """
-    Motion model of robots,
-
-    :param x: current state (node)
-    :param u: input
-    :param obs: obstacle map
-    :param eta: noise in motion model
-    :return: next states and corresponding probability
-    """
-
-    p_next = [1 - eta, eta / 2, eta / 2]
-    x_next = []
-    if u == (0, 1):
-        u_real = [(0, 1), (-1, 0), (1, 0)]
-    elif u == (0, -1):
-        u_real = [(0, -1), (-1, 0), (1, 0)]
-    elif u == (-1, 0):
-        u_real = [(-1, 0), (0, 1), (0, -1)]
-    else:
-        u_real = [(1, 0), (0, 1), (0, -1)]
-
-    for act in u_real:
-        if (x[0] + act[0], x[1] + act[1]) in obs:
-            x_next.append(x)
+
+import env
+
+class Motion_model():
+    def __init__(self, xI, xG):
+        self.env = env.Env(xI, xG)
+        self.obs = self.env.obs_map()
+
+
+    def move_next(self, x, u, eta=0.2):
+        """
+        Motion model of robots,
+
+        :param x: current state (node)
+        :param u: input
+        :param obs: obstacle map
+        :param eta: noise in motion model
+        :return: next states and corresponding probability
+        """
+
+        p_next = [1 - eta, eta / 2, eta / 2]
+        x_next = []
+        if u == (0, 1):
+            u_real = [(0, 1), (-1, 0), (1, 0)]
+        elif u == (0, -1):
+            u_real = [(0, -1), (-1, 0), (1, 0)]
+        elif u == (-1, 0):
+            u_real = [(-1, 0), (0, 1), (0, -1)]
         else:
-            x_next.append((x[0] + act[0], x[1] + act[1]))
+            u_real = [(1, 0), (0, 1), (0, -1)]
+
+        for act in u_real:
+            x_check = (x[0] + act[0], x[1] + act[1])
+            if x_check in self.obs:
+                x_next.append(x)
+            else:
+                x_next.append(x_check)
 
-    return x_next, p_next
+        return x_next, p_next

+ 121 - 0
Model-free Control/plotting.py

@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: huiming zhou
+"""
+
+import matplotlib.pyplot as plt
+import env
+
+class Plotting():
+    def __init__(self, xI, xG):
+        self.xI, self.xG = xI, xG
+        self.env = env.Env(self.xI, self.xG)
+        self.obs = self.env.obs_map()
+        self.lose = self.env.lose_map()
+
+
+    def animation(self, path, name):
+        """
+        animation.
+
+        :param path: optimal path
+        :param name: tile of figure
+        :return: an animation
+        """
+
+        plt.figure(1)
+        self.plot_grid(name)
+        self.plot_lose()
+        self.plot_path(path)
+
+
+    def plot_grid(self, name):
+        """
+        plot the obstacles in environment.
+
+        :param name: title of figure
+        :return: plot
+        """
+
+        obs_x = [self.obs[i][0] for i in range(len(self.obs))]
+        obs_y = [self.obs[i][1] for i in range(len(self.obs))]
+
+        plt.plot(self.xI[0], self.xI[1], "bs", ms = 24)
+        plt.plot(self.xG[0], self.xG[1], "gs", ms = 24)
+
+        plt.plot(obs_x, obs_y, "sk", ms = 24)
+        plt.title(name)
+        plt.axis("equal")
+
+
+    def plot_lose(self):
+        """
+        plot losing states in environment.
+        :return: a plot
+        """
+
+        lose_x = [self.lose[i][0] for i in range(len(self.lose))]
+        lose_y = [self.lose[i][1] for i in range(len(self.lose))]
+
+        plt.plot(lose_x, lose_y, color = '#A52A2A', marker = 's', ms = 24)
+
+
+    def plot_visited(self, visited):
+        """
+        animation of order of visited nodes.
+
+        :param visited: visited nodes
+        :return: animation
+        """
+
+        visited.remove(self.xI)
+        count = 0
+
+        for x in visited:
+            count += 1
+            plt.plot(x[0], x[1], linewidth='3', color='#808080', marker='o')
+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
+            [exit(0) if event.key == 'escape' else None])
+
+            if count < len(visited) / 3:
+                length = 15
+            elif count < len(visited) * 2 / 3:
+                length = 30
+            else:
+                length = 45
+
+            if count % length == 0: plt.pause(0.001)
+
+
+    def plot_path(self, path):
+        path.remove(self.xI)
+        path.remove(self.xG)
+
+        for x in path:
+            plt.plot(x[0], x[1], color='#808080', marker='o', ms = 23)
+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
+            [exit(0) if event.key == 'escape' else None])
+            plt.pause(0.001)
+        plt.show()
+        plt.pause(0.5)
+
+
+    def plot_diff(self, diff, name):
+        plt.figure(2)
+        plt.title(name, fontdict=None)
+        plt.xlabel('iterations')
+        plt.ylabel('difference of successive iterations')
+        plt.grid('on')
+
+        count = 0
+        for x in diff:
+            plt.plot(count, x, color='#808080', marker='o')  # plot dots for animation
+            plt.gcf().canvas.mpl_connect('key_release_event', lambda event:
+            [exit(0) if event.key == 'escape' else None])
+            plt.pause(0.07)
+            count += 1
+
+        plt.plot(diff, color='#808080')
+        plt.pause(0.01)
+        plt.show()

+ 0 - 92
Model-free Control/tools.py

@@ -1,92 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-@author: huiming zhou
-"""
-
-import matplotlib.pyplot as plt
-
-
-def extract_path(xI, xG, parent, actions):
-    """
-    Extract the path based on the relationship of nodes.
-
-    :param xI: Starting node
-    :param xG: Goal node
-    :param parent: Relationship between nodes
-    :param actions: Action needed for transfer between two nodes
-    :return: The planning path
-    """
-
-    path_back = [xG]
-    acts_back = [actions[xG]]
-    x_current = xG
-    while True:
-        x_current = parent[x_current]
-        path_back.append(x_current)
-        acts_back.append(actions[x_current])
-        if x_current == xI: break
-
-    return list(reversed(path_back)), list(reversed(acts_back))
-
-
-def showPath(xI, xG, path):
-    """
-    Plot the path.
-
-    :param xI: Starting node
-    :param xG: Goal node
-    :param path: Planning path
-    :return: A plot
-    """
-
-    path.remove(xI)
-    path.remove(xG)
-    path_x = [path[i][0] for i in range(len(path))]
-    path_y = [path[i][1] for i in range(len(path))]
-    plt.plot(path_x, path_y, linewidth='5', color='r', linestyle='-')
-    plt.pause(0.001)
-    plt.show()
-
-
-def show_map(xI, xG, obs_map, lose_map, name):
-    """
-    Plot the background you designed.
-
-    :param xI: starting state
-    :param xG: goal states
-    :param obs_map: positions of obstacles
-    :param lose_map: positions of losing state
-    :param name: name of this figure
-    :return: a figure
-    """
-
-    obs_x = [obs_map[i][0] for i in range(len(obs_map))]
-    obs_y = [obs_map[i][1] for i in range(len(obs_map))]
-
-    lose_x = [lose_map[i][0] for i in range(len(lose_map))]
-    lose_y = [lose_map[i][1] for i in range(len(lose_map))]
-
-    plt.plot(xI[0], xI[1], "bs", ms = 24)                                    # plot starting state (blue)
-    plt.plot(xG[0], xG[1], "gs", ms = 24)                                    # plot goal states (green)
-
-    plt.plot(obs_x, obs_y, "sk", ms = 24)                                    # plot obstacles (black)
-    plt.plot(lose_x, lose_y, marker = 's', color = '#A52A2A', ms = 24)       # plot losing states (grown)
-    plt.title(name, fontdict=None)
-    plt.axis("equal")
-
-
-def plot_dots(x):
-    """
-    Plot state x for animation
-
-    :param x: current node
-    :return: a plot
-    """
-
-    plt.plot(x[0], x[1], linewidth='3', color='#808080', marker='o', ms = 23)    # plot dots for animation
-    plt.gcf().canvas.mpl_connect('key_release_event',
-                                 lambda event: [exit(0) if event.key == 'escape' else None])
-    plt.pause(0.1)
-
-

+ 6 - 10
Search-based Planning/.idea/workspace.xml

@@ -2,21 +2,17 @@
 <project version="4">
   <component name="ChangeListManager">
     <list default="true" id="025aff36-a6aa-4945-ab7e-b2c625055f47" name="Default Changelist" comment="">
+      <change beforePath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Q-learning.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/Sarsa.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/../Model-free Control/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/env.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/../Model-free Control/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/motion_model.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/../Model-free Control/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Model-free Control/plotting.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/a_star.py" beforeDir="false" afterPath="$PROJECT_DIR$/a_star.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/bfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/bfs.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/dfs.py" beforeDir="false" afterPath="$PROJECT_DIR$/dfs.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/dijkstra.py" beforeDir="false" afterPath="$PROJECT_DIR$/dijkstra.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/plotting.py" beforeDir="false" afterPath="$PROJECT_DIR$/plotting.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/queue.py" beforeDir="false" afterPath="$PROJECT_DIR$/queue.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-policy_iteration.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/Q-value_iteration.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/env.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/env.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/gif/VI_E.gif" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/gif/VI_E.gif" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/motion_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/motion_model.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/policy_iteration.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/plotting.py" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" beforeDir="false" afterPath="$PROJECT_DIR$/../Stochastic Shortest Path/value_iteration.py" afterDir="false" />
     </list>
     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
     <option name="SHOW_DIALOG" value="false" />
@@ -40,7 +36,7 @@
   </component>
   <component name="PropertiesComponent">
     <property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
-    <property name="last_opened_file_path" value="$PROJECT_DIR$/../Stochastic Shortest Path" />
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
     <property name="restartRequiresConfirmation" value="false" />
     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
   </component>

+ 15 - 14
Search-based Planning/a_star.py

@@ -12,16 +12,16 @@ class Astar:
     def __init__(self, x_start, x_goal, heuristic_type):
         self.xI, self.xG = x_start, x_goal
 
-        self.Env = env.Env()
-        self.plotting = plotting.Plotting(self.xI, self.xG)
+        self.Env = env.Env()                                                        # class Env
+        self.plotting = plotting.Plotting(self.xI, self.xG)                         # class Plotting
 
-        self.u_set = self.Env.motions                               # feasible input set
-        self.obs = self.Env.obs                                     # position of obstacles
+        self.u_set = self.Env.motions                                               # feasible input set
+        self.obs = self.Env.obs                                                     # position of obstacles
 
         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG, heuristic_type)
 
         self.fig_name = "A* Algorithm"
-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
+        self.plotting.animation(self.path, self.visited, self.fig_name)             # animation generate
 
 
     def searching(self, xI, xG, heuristic_type):
@@ -31,26 +31,26 @@ class Astar:
         :return: planning path, action in each node, visited nodes in the planning process
         """
 
-        q_astar = queue.QueuePrior()                            # priority queue
+        q_astar = queue.QueuePrior()                                                # priority queue
         q_astar.put(xI, 0)
-        parent = {xI: xI}                                       # record parents of nodes
-        action = {xI: (0, 0)}                                   # record actions of nodes
+        parent = {xI: xI}                                                           # record parents of nodes
+        action = {xI: (0, 0)}                                                       # record actions of nodes
         visited = []
         cost = {xI: 0}
 
         while not q_astar.empty():
             x_current = q_astar.get()
-            if x_current == xG:                                 # stop condition
+            if x_current == xG:                                                     # stop condition
                 break
             visited.append(x_current)
-            for u_next in self.u_set:                           # explore neighborhoods of current node
+            for u_next in self.u_set:                                   # explore neighborhoods of current node
                 x_next = tuple([x_current[i] + u_next[i] for i in range(len(x_current))])
                 if x_next not in self.obs:
                     new_cost = cost[x_current] + self.get_cost(x_current, u_next)
-                    if x_next not in cost or new_cost < cost[x_next]:           # conditions for updating cost
+                    if x_next not in cost or new_cost < cost[x_next]:   # conditions for updating cost
                         cost[x_next] = new_cost
                         priority = new_cost + self.Heuristic(x_next, xG, heuristic_type)
-                        q_astar.put(x_next, priority)           # put node into queue using priority "f+h"
+                        q_astar.put(x_next, priority)                   # put node into queue using priority "f+h"
                         parent[x_next], action[x_next] = x_current, u_next
 
         [path, policy] = self.extract_path(xI, xG, parent, action)
@@ -113,6 +113,7 @@ class Astar:
 
 
 if __name__ == '__main__':
-    x_Start = (5, 5)                # Starting node
-    x_Goal = (49, 5)                # Goal node
+    x_Start = (5, 5)  # Starting node
+    x_Goal = (49, 5)  # Goal node
+
     astar = Astar(x_Start, x_Goal, "manhattan")

+ 9 - 9
Search-based Planning/bfs.py

@@ -15,13 +15,13 @@ class BFS:
         self.Env = env.Env()
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.Env.motions                               # feasible input set
-        self.obs = self.Env.obs                                     # position of obstacles
+        self.u_set = self.Env.motions                                       # feasible input set
+        self.obs = self.Env.obs                                             # position of obstacles
 
         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
 
         self.fig_name = "Breadth-first Searching"
-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
+        self.plotting.animation(self.path, self.visited, self.fig_name)     # animation generate
 
 
     def searching(self, xI, xG):
@@ -31,10 +31,10 @@ class BFS:
         :return: planning path, action in each node, visited nodes in the planning process
         """
 
-        q_bfs = queue.QueueFIFO()                                     # first-in-first-out queue
+        q_bfs = queue.QueueFIFO()                                           # first-in-first-out queue
         q_bfs.put(xI)
-        parent = {xI: xI}                                   # record parents of nodes
-        action = {xI: (0, 0)}                                    # record actions of nodes
+        parent = {xI: xI}                                                   # record parents of nodes
+        action = {xI: (0, 0)}                                               # record actions of nodes
         visited = []
 
         while not q_bfs.empty():
@@ -42,13 +42,13 @@ class BFS:
             if x_current == xG:
                 break
             visited.append(x_current)
-            for u_next in self.u_set:                                 # explore neighborhoods of current node
+            for u_next in self.u_set:                                       # explore neighborhoods of current node
                 x_next = tuple([x_current[i] + u_next[i] for i in range(len(x_current))])
-                if x_next not in parent and x_next not in self.obs:   # node not visited and not in obstacles
+                if x_next not in parent and x_next not in self.obs:         # node not visited and not in obstacles
                     q_bfs.put(x_next)
                     parent[x_next], action[x_next] = x_current, u_next
 
-        [path, policy] = self.extract_path(xI, xG, parent, action)     # extract path
+        [path, policy] = self.extract_path(xI, xG, parent, action)          # extract path
 
         return path, policy, visited
 

+ 3 - 3
Search-based Planning/dfs.py

@@ -15,13 +15,13 @@ class DFS:
         self.Env = env.Env()
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.Env.motions                   # feasible input set
-        self.obs = self.Env.obs                         # position of obstacles
+        self.u_set = self.Env.motions                                           # feasible input set
+        self.obs = self.Env.obs                                                 # position of obstacles
 
         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
 
         self.fig_name = "Depth-first Searching"
-        self.plotting.animation(self.path, self.visited, self.fig_name)      # animation generate
+        self.plotting.animation(self.path, self.visited, self.fig_name)         # animation generate
 
 
     def searching(self, xI, xG):

+ 3 - 3
Search-based Planning/dijkstra.py

@@ -15,13 +15,13 @@ class Dijkstra:
         self.Env = env.Env()
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.Env.motions                               # feasible input set
-        self.obs = self.Env.obs                                     # position of obstacles
+        self.u_set = self.Env.motions                                           # feasible input set
+        self.obs = self.Env.obs                                                 # position of obstacles
 
         [self.path, self.policy, self.visited] = self.searching(self.xI, self.xG)
 
         self.fig_name = "Dijkstra's Algorithm"
-        self.plotting.animation(self.path, self.visited, self.fig_name)  # animation generate
+        self.plotting.animation(self.path, self.visited, self.fig_name)         # animation generate
 
 
     def searching(self, xI, xG):

+ 0 - 1
Search-based Planning/queue.py

@@ -3,7 +3,6 @@
 
 """
 @author: Huiming Zhou
-@description: this file defines three kinds of queues that will be used in algorithms.
 """
 
 import collections