zhm-real 5 rokov pred
rodič
commit
85d773890a

+ 9 - 9
Stochastic Shortest Path/Q-policy_iteration.py

@@ -15,17 +15,17 @@ import sys
 class Q_policy_iteration:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.e = 0.001  # threshold for convergence
-        self.gamma = 0.9  # discount factor
+        self.e = 0.001                                  # threshold for convergence
+        self.gamma = 0.9                                # discount factor
 
-        self.env = env.Env(self.xI, self.xG)
-        self.motion = motion_model.Motion_model(self.xI, self.xG)
-        self.plotting = plotting.Plotting(self.xI, self.xG)
+        self.env = env.Env(self.xI, self.xG)                        # class Env
+        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
+        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
 
-        self.u_set = self.env.motions  # feasible input set
-        self.stateSpace = self.env.stateSpace  # state space
-        self.obs = self.env.obs_map()  # position of obstacles
-        self.lose = self.env.lose_map()  # position of lose states
+        self.u_set = self.env.motions                               # feasible input set
+        self.stateSpace = self.env.stateSpace                       # state space
+        self.obs = self.env.obs_map()                               # position of obstacles
+        self.lose = self.env.lose_map()                             # position of lose states
 
         self.name1 = "Q-policy_iteration, gamma=" + str(self.gamma)
 

+ 10 - 10
Stochastic Shortest Path/Q-value_iteration.py

@@ -14,17 +14,17 @@ import sys
 class Q_value_iteration:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.e = 0.001  # threshold for convergence
-        self.gamma = 0.9  # discount factor
+        self.e = 0.001                                          # threshold for convergence
+        self.gamma = 0.9                                        # discount factor
 
-        self.env = env.Env(self.xI, self.xG)
-        self.motion = motion_model.Motion_model(self.xI, self.xG)
-        self.plotting = plotting.Plotting(self.xI, self.xG)
+        self.env = env.Env(self.xI, self.xG)                            # class Env
+        self.motion = motion_model.Motion_model(self.xI, self.xG)       # class Motion_model
+        self.plotting = plotting.Plotting(self.xI, self.xG)             # class Plotting
 
-        self.u_set = self.env.motions  # feasible input set
-        self.stateSpace = self.env.stateSpace  # state space
-        self.obs = self.env.obs_map()  # position of obstacles
-        self.lose = self.env.lose_map()  # position of lose states
+        self.u_set = self.env.motions                                   # feasible input set
+        self.stateSpace = self.env.stateSpace                           # state space
+        self.obs = self.env.obs_map()                                   # position of obstacles
+        self.lose = self.env.lose_map()                                 # position of lose states
 
         self.name1 = "Q-value_iteration, gamma=" + str(self.gamma)
         self.name2 = "converge process, e=" + str(self.e)
@@ -48,7 +48,7 @@ class Q_value_iteration:
         count = 0
 
         for x in self.stateSpace:
-            Q_table[x] = [0, 0, 0, 0]          # initialize Q_table
+            Q_table[x] = [0, 0, 0, 0]                       # initialize Q_table
 
         while delta > self.e:                               # convergence condition
             count += 1

+ 3 - 3
Stochastic Shortest Path/env.py

@@ -88,10 +88,10 @@ class Env():
         reward = []
         for x in x_next:
             if x in self.xG:
-                reward.append(10)  # reward : 10, for goal states
+                reward.append(10)       # reward : 10, for goal states
             elif x in self.lose:
-                reward.append(-10)  # reward : -10, for lose states
+                reward.append(-10)      # reward : -10, for lose states
             else:
-                reward.append(0)  # reward : 0, for other states
+                reward.append(0)        # reward : 0, for other states
 
         return reward

+ 10 - 10
Stochastic Shortest Path/policy_iteration.py

@@ -15,17 +15,17 @@ import copy
 class Policy_iteration:
     def __init__(self, x_start, x_goal):
         self.xI, self.xG = x_start, x_goal
-        self.e = 0.001  # threshold for convergence
-        self.gamma = 0.9  # discount factor
+        self.e = 0.001                                      # threshold for convergence
+        self.gamma = 0.9                                    # discount factor
 
         self.env = env.Env(self.xI, self.xG)
         self.motion = motion_model.Motion_model(self.xI, self.xG)
         self.plotting = plotting.Plotting(self.xI, self.xG)
 
-        self.u_set = self.env.motions  # feasible input set
-        self.stateSpace = self.env.stateSpace  # state space
-        self.obs = self.env.obs_map()  # position of obstacles
-        self.lose = self.env.lose_map()  # position of lose states
+        self.u_set = self.env.motions                       # feasible input set
+        self.stateSpace = self.env.stateSpace               # state space
+        self.obs = self.env.obs_map()                       # position of obstacles
+        self.lose = self.env.lose_map()                     # position of lose states
 
         self.name1 = "policy_iteration, gamma=" + str(self.gamma)
 
@@ -45,7 +45,7 @@ class Policy_iteration:
 
         delta = sys.maxsize
 
-        while delta > self.e:           # convergence condition
+        while delta > self.e:                               # convergence condition
             x_value = 0
             for x in self.stateSpace:
                 if x not in self.xG:
@@ -91,8 +91,8 @@ class Policy_iteration:
         count = 0
 
         for x in self.stateSpace:
-            value_table[x] = 0             # initialize value table
-            policy[x] = self.u_set[0]      # initialize policy table
+            value_table[x] = 0                                              # initialize value table
+            policy[x] = self.u_set[0]                                       # initialize policy table
 
         while True:
             count += 1
@@ -117,7 +117,7 @@ class Policy_iteration:
         """
 
         value = 0
-        reward = self.env.get_reward(x)                  # get reward of next state
+        reward = self.env.get_reward(x)                                 # get reward of next state
         for i in range(len(x)):
             value += p[i] * (reward[i] + self.gamma * table[x[i]])      # cal Q-value
 

+ 3 - 3
Stochastic Shortest Path/value_iteration.py

@@ -17,9 +17,9 @@ class Value_iteration:
         self.e = 0.001                                              # threshold for convergence
         self.gamma = 0.9                                            # discount factor
 
-        self.env = env.Env(self.xI, self.xG)
-        self.motion = motion_model.Motion_model(self.xI, self.xG)
-        self.plotting = plotting.Plotting(self.xI, self.xG)
+        self.env = env.Env(self.xI, self.xG)                        # class Env
+        self.motion = motion_model.Motion_model(self.xI, self.xG)   # class Motion_model
+        self.plotting = plotting.Plotting(self.xI, self.xG)         # class Plotting
 
         self.u_set = self.env.motions                               # feasible input set
         self.stateSpace = self.env.stateSpace                       # state space