5 лет назад · 85d773890a
--- a/Path/Q-policy_iteration.py
+++ b/Path/Q-policy_iteration.py
@@ -15,17 +15,17 @@ import sys
 
				 class Q_policy_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001  # threshold for convergence
			
 
				-        self.gamma = 0.9  # discount factor
			
 
				+        self.e = 0.001                                  # threshold for convergence
			
 
				+        self.gamma = 0.9                                # discount factor
			
 
				 
			
 
				-        self.env = env.Env(self.xI, self.xG)
			
 
				-        self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				-        self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				+        self.env = env.Env(self.xI, self.xG)                        # class Env
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
			
 
				 
			
 
				-        self.u_set = self.env.motions  # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace  # state space
			
 
				-        self.obs = self.env.obs_map()  # position of obstacles
			
 
				-        self.lose = self.env.lose_map()  # position of lose states
			
 
				+        self.u_set = self.env.motions                               # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace                       # state space
			
 
				+        self.obs = self.env.obs_map()                               # position of obstacles
			
 
				+        self.lose = self.env.lose_map()                             # position of lose states
			
 
				 
			
 
				         self.name1 = "Q-policy_iteration, gamma=" + str(self.gamma)
			
 
				 
			
--- a/Path/Q-value_iteration.py
+++ b/Path/Q-value_iteration.py
@@ -14,17 +14,17 @@ import sys
 
				 class Q_value_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001  # threshold for convergence
			
 
				-        self.gamma = 0.9  # discount factor
			
 
				+        self.e = 0.001                                          # threshold for convergence
			
 
				+        self.gamma = 0.9                                        # discount factor
			
 
				 
			
 
				-        self.env = env.Env(self.xI, self.xG)
			
 
				-        self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				-        self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				+        self.env = env.Env(self.xI, self.xG)                            # class Env
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)       # class Motion_model
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)             # class Plotting
			
 
				 
			
 
				-        self.u_set = self.env.motions  # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace  # state space
			
 
				-        self.obs = self.env.obs_map()  # position of obstacles
			
 
				-        self.lose = self.env.lose_map()  # position of lose states
			
 
				+        self.u_set = self.env.motions                                   # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace                           # state space
			
 
				+        self.obs = self.env.obs_map()                                   # position of obstacles
			
 
				+        self.lose = self.env.lose_map()                                 # position of lose states
			
 
				 
			
 
				         self.name1 = "Q-value_iteration, gamma=" + str(self.gamma)
			
 
				         self.name2 = "converge process, e=" + str(self.e)
			
@@ -48,7 +48,7 @@ class Q_value_iteration:
 
				         count = 0
			
 
				 
			
 
				         for x in self.stateSpace:
			
 
				-            Q_table[x] = [0, 0, 0, 0]          # initialize Q_table
			
 
				+            Q_table[x] = [0, 0, 0, 0]                       # initialize Q_table
			
 
				 
			
 
				         while delta > self.e:                               # convergence condition
			
 
				             count += 1
			
--- a/Path/env.py
+++ b/Path/env.py
@@ -88,10 +88,10 @@ class Env():
 
				         reward = []
			
 
				         for x in x_next:
			
 
				             if x in self.xG:
			
 
				-                reward.append(10)  # reward : 10, for goal states
			
 
				+                reward.append(10)       # reward : 10, for goal states
			
 
				             elif x in self.lose:
			
 
				-                reward.append(-10)  # reward : -10, for lose states
			
 
				+                reward.append(-10)      # reward : -10, for lose states
			
 
				             else:
			
 
				-                reward.append(0)  # reward : 0, for other states
			
 
				+                reward.append(0)        # reward : 0, for other states
			
 
				 
			
 
				         return reward
			
--- a/Path/policy_iteration.py
+++ b/Path/policy_iteration.py
@@ -15,17 +15,17 @@ import copy
 
				 class Policy_iteration:
			
 
				     def __init__(self, x_start, x_goal):
			
 
				         self.xI, self.xG = x_start, x_goal
			
 
				-        self.e = 0.001  # threshold for convergence
			
 
				-        self.gamma = 0.9  # discount factor
			
 
				+        self.e = 0.001                                      # threshold for convergence
			
 
				+        self.gamma = 0.9                                    # discount factor
			
 
				 
			
 
				         self.env = env.Env(self.xI, self.xG)
			
 
				         self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				         self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				 
			
 
				-        self.u_set = self.env.motions  # feasible input set
			
 
				-        self.stateSpace = self.env.stateSpace  # state space
			
 
				-        self.obs = self.env.obs_map()  # position of obstacles
			
 
				-        self.lose = self.env.lose_map()  # position of lose states
			
 
				+        self.u_set = self.env.motions                       # feasible input set
			
 
				+        self.stateSpace = self.env.stateSpace               # state space
			
 
				+        self.obs = self.env.obs_map()                       # position of obstacles
			
 
				+        self.lose = self.env.lose_map()                     # position of lose states
			
 
				 
			
 
				         self.name1 = "policy_iteration, gamma=" + str(self.gamma)
			
 
				 
			
@@ -45,7 +45,7 @@ class Policy_iteration:
 
				 
			
 
				         delta = sys.maxsize
			
 
				 
			
 
				-        while delta > self.e:           # convergence condition
			
 
				+        while delta > self.e:                               # convergence condition
			
 
				             x_value = 0
			
 
				             for x in self.stateSpace:
			
 
				                 if x not in self.xG:
			
@@ -91,8 +91,8 @@ class Policy_iteration:
 
				         count = 0
			
 
				 
			
 
				         for x in self.stateSpace:
			
 
				-            value_table[x] = 0             # initialize value table
			
 
				-            policy[x] = self.u_set[0]      # initialize policy table
			
 
				+            value_table[x] = 0                                              # initialize value table
			
 
				+            policy[x] = self.u_set[0]                                       # initialize policy table
			
 
				 
			
 
				         while True:
			
 
				             count += 1
			
@@ -117,7 +117,7 @@ class Policy_iteration:
 
				         """
			
 
				 
			
 
				         value = 0
			
 
				-        reward = self.env.get_reward(x)                  # get reward of next state
			
 
				+        reward = self.env.get_reward(x)                                 # get reward of next state
			
 
				         for i in range(len(x)):
			
 
				             value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
			
 
				 
			
--- a/Path/value_iteration.py
+++ b/Path/value_iteration.py
@@ -17,9 +17,9 @@ class Value_iteration:
 
				         self.e = 0.001                                              # threshold for convergence
			
 
				         self.gamma = 0.9                                            # discount factor
			
 
				 
			
 
				-        self.env = env.Env(self.xI, self.xG)
			
 
				-        self.motion = motion_model.Motion_model(self.xI, self.xG)
			
 
				-        self.plotting = plotting.Plotting(self.xI, self.xG)
			
 
				+        self.env = env.Env(self.xI, self.xG)                        # class Env
			
 
				+        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
			
 
				+        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
			
 
				 
			
 
				         self.u_set = self.env.motions                               # feasible input set
			
 
				         self.stateSpace = self.env.stateSpace                       # state space