5 سال پیش · 3232325783
--- a/Path/Q-policy_iteration.py
+++ b/Path/Q-policy_iteration.py
@@ -112,6 +112,7 @@ class Q_policy_iteration:
 
				         reward = env.get_reward(x, self.xG, self.lose)                  # get reward of next state
			
 
				         for i in range(len(x)):
			
 
				             value += p[i] * (reward[i] + self.gamma * table[x[i]][policy[x[i]]])
			
 
				+
			
 
				         return value
			
 
				 
			
 
				 
			
--- a/Path/env.py
+++ b/Path/env.py
@@ -47,6 +47,7 @@ def lose_map():
 
				     lose = []
			
 
				     for i in range(25, 36):
			
 
				         lose.append((i, 13))
			
 
				+        
			
 
				     return lose
			
 
				 
			
 
				 
			
--- a/Path/motion_model.py
+++ b/Path/motion_model.py
@@ -34,4 +34,5 @@ def move_prob(x, u, obs, eta = 0.2):
 
				             x_next.append(x)
			
 
				         else:
			
 
				             x_next.append((x[0] + act[0], x[1] + act[1]))
			
 
				+
			
 
				     return x_next, p_next