zhm-real 5 anos atrás
pai
commit
3232325783

+ 1 - 0
Stochastic Shortest Path/Q-policy_iteration.py

@@ -112,6 +112,7 @@ class Q_policy_iteration:
         reward = env.get_reward(x, self.xG, self.lose)                  # get reward of next state
         for i in range(len(x)):
             value += p[i] * (reward[i] + self.gamma * table[x[i]][policy[x[i]]])
+
         return value
 
 

+ 1 - 0
Stochastic Shortest Path/env.py

@@ -47,6 +47,7 @@ def lose_map():
     lose = []
     for i in range(25, 36):
         lose.append((i, 13))
+        
     return lose
 
 

+ 1 - 0
Stochastic Shortest Path/motion_model.py

@@ -34,4 +34,5 @@ def move_prob(x, u, obs, eta = 0.2):
             x_next.append(x)
         else:
             x_next.append((x[0] + act[0], x[1] + act[1]))
+
     return x_next, p_next