4 rokov pred · fc37432a4a
--- a/Planning/dynamic_prog.py
+++ b/Planning/dynamic_prog.py
@@ -0,0 +1,103 @@
 
				+# ----------
			
 
				+# User Instructions:
			
 
				+#
			
 
				+# Create a function compute_value which returns
			
 
				+# a grid of values. The value of a cell is the minimum
			
 
				+# number of moves required to get from the cell to the goal.
			
 
				+#
			
 
				+# If a cell is a wall or it is impossible to reach the goal from a cell,
			
 
				+# assign that cell a value of 99.
			
 
				+# ----------
			
 
				+import heapq
			
 
				+from collections import deque
			
 
				+
			
 
				+import numpy as np
			
 
				+from icecream import ic
			
 
				+
			
 
				+from first_search import check_navigable
			
 
				+
			
 
				+# grid = [[0, 1, 0, 0, 0, 0],
			
 
				+#         [0, 1, 0, 0, 0, 0],
			
 
				+#         [0, 1, 0, 0, 0, 0],
			
 
				+#         [0, 1, 0, 0, 0, 0],
			
 
				+#         [0, 0, 0, 0, 1, 0]]
			
 
				+grid = np.array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
			
 
				+                [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
			
 
				+                [0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0],
			
 
				+                [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
			
 
				+                [0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0],
			
 
				+                [0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0],
			
 
				+                [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
			
 
				+                [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
			
 
				+                [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0],
			
 
				+                [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0]])
			
 
				+
			
 
				+# init = [0,0]
			
 
				+goal = [len(grid)-1, len(grid[0])-1]
			
 
				+cost = 1  # the cost associated with moving from a cell to an adjacent one
			
 
				+
			
 
				+delta = [[-1, 0],  # go up
			
 
				+         [0, -1],  # go left
			
 
				+         [1, 0],  # go down
			
 
				+         [0, 1]]  # go right
			
 
				+
			
 
				+delta_name = ['^', '<', 'v', '>']
			
 
				+
			
 
				+def heuristic(*args):
			
 
				+    return 0
			
 
				+
			
 
				+def compute_value(grid, goal, cost):
			
 
				+    """Compute the reversed cost map. The goal point will have zero cost and
			
 
				+    each grid has a cost value representing the cost from here to the goal.
			
 
				+
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    grid : np.ndarray
			
 
				+        Map of the env.
			
 
				+    goal : np.ndarray
			
 
				+        Goal position.
			
 
				+    cost : float
			
 
				+        Cost value for each step.
			
 
				+
			
 
				+    Returns
			
 
				+    -------
			
 
				+    np.ndarray
			
 
				+        A reversed cost map.
			
 
				+    """
			
 
				+    grid = np.array(grid)
			
 
				+    goal = np.array(goal)
			
 
				+
			
 
				+    open_list = []
			
 
				+    heapq.heappush(open_list, np.concatenate(
			
 
				+        ([heuristic(goal, goal), 0], goal)))
			
 
				+    closed_list = dict()
			
 
				+    closed_list[tuple(goal)] = 0
			
 
				+    cost_map = np.zeros([len(grid), len(grid[0])]) + float('inf')
			
 
				+    cost_map[goal[0], goal[1]] = 0
			
 
				+    expand_map = np.zeros([len(grid), len(grid[0])]) - float('inf')
			
 
				+    current_expand_num = 0
			
 
				+    expand_map[goal[0], goal[1]] = current_expand_num
			
 
				+
			
 
				+    while open_list:
			
 
				+        current_center_grid = heapq.heappop(open_list)
			
 
				+        current_center_grid = current_center_grid[2:4]  # type: list[int]
			
 
				+        if check_navigable(grid, current_center_grid):
			
 
				+            for move_num in range(len(delta)):
			
 
				+                next = np.array(current_center_grid)+delta[move_num]
			
 
				+                next = next.astype('int')  # type: np.ndarray[int]
			
 
				+                if check_navigable(grid, next) and tuple(next) not in closed_list:
			
 
				+                    current_expand_num += 1
			
 
				+                    current_cost = closed_list[current_center_grid[0],
			
 
				+                                               current_center_grid[1]]+cost
			
 
				+                    expand_grid = [heuristic(next, goal)+current_expand_num,
			
 
				+                                   current_expand_num] + list(next)
			
 
				+                    heapq.heappush(open_list, expand_grid)
			
 
				+                    closed_list[tuple(next)] = current_cost
			
 
				+                    cost_map[next[0], next[1]] = current_cost
			
 
				+                    expand_map[next[0], next[1]] = current_expand_num
			
 
				+    return cost_map
			
 
				+
			
 
				+def generate_policy(cost_map):
			
 
				+    pass
			
 
				+
			
 
				+ic(compute_value(grid, goal, cost))