# ----------
# User Instructions:
#
# Create a function compute_value which returns
# a grid of values. The value of a cell is the minimum
# number of moves required to get from the cell to the goal.
#
# If a cell is a wall or it is impossible to reach the goal from a cell,
# assign that cell a value of 99.
# ----------
import heapq
from collections import deque

import numpy as np
from icecream import ic

from first_search import check_navigable

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]
grid = np.array([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
                [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
                [0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0],
                [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
                [0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0],
                [0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0],
                [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
                [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
                [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0],
                [0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0]])

# init = [0,0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1  # the cost associated with moving from a cell to an adjacent one

delta = [[-1, 0],  # go up
         [0, -1],  # go left
         [1, 0],  # go down
         [0, 1]]  # go right

delta_name = ['^', '<', 'v', '>']


def heuristic(*args):
    return 0


def compute_value(grid, goal, cost):
    """Compute the reversed cost map. The goal point will have zero cost and
    each grid has a cost value representing the cost from here to the goal.

    Parameters
    ----------
    grid : np.ndarray
        Map of the env.
    goal : np.ndarray
        Goal position.
    cost : float
        Cost value for each step.

    Returns
    -------
    np.ndarray
        A reversed cost map.
    """
    grid = np.array(grid)
    goal = np.array(goal)

    open_list = []
    heapq.heappush(open_list, np.concatenate(
        ([heuristic(goal, goal), 0], goal)))
    closed_list = dict()
    closed_list[tuple(goal)] = 0
    cost_map = np.zeros([len(grid), len(grid[0])]) + float('inf')
    cost_map[goal[0], goal[1]] = 0
    expand_map = np.zeros([len(grid), len(grid[0])]) - float('inf')
    current_expand_num = 0
    expand_map[goal[0], goal[1]] = current_expand_num

    while open_list:
        current_center_grid = heapq.heappop(open_list)
        current_center_grid = current_center_grid[2:4]  # type: list[int]
        if check_navigable(grid, current_center_grid):
            for move_num in range(len(delta)):
                next = np.array(current_center_grid)+delta[move_num]
                next = next.astype('int')  # type: np.ndarray[int]
                if check_navigable(grid, next) and tuple(next) not in closed_list:
                    current_expand_num += 1
                    current_cost = closed_list[current_center_grid[0],
                                               current_center_grid[1]]+cost
                    expand_grid = [heuristic(next, goal)+current_expand_num,
                                   current_expand_num] + list(next)
                    heapq.heappush(open_list, expand_grid)
                    closed_list[tuple(next)] = current_cost
                    cost_map[next[0], next[1]] = current_cost
                    expand_map[next[0], next[1]] = current_expand_num
    return cost_map


def generate_policy(cost_map, goal):

    def check_navigable(cost_map, pos):
        goal = np.array(pos).astype(int)
        if all(goal >= np.array([0, 0])) and all(goal <= np.array([len(cost_map)-1, len(cost_map[0])-1])):
            if cost_map[goal[0], goal[1]] != float('inf'):
                return True
        return False

    delta = np.array([[-1, 0],  # type: np.ndarray[int]
                      [0, -1],  # go left
                      [1, 0],  # go down
                      [0, 1]])  # go right
    cost_map = np.array(cost_map)
    policy_map = np.array([['+' for _ in range(len(cost_map[0]))]
                          for _ in range(len(cost_map))])
    policy_map[goal[0], goal[1]] = '*'
    open_list = deque()
    checked = set([tuple(goal)])
    neighbor_grids = np.array([goal for _ in range(4)]) + delta
    neighbor_grids = map(lambda pos: pos if check_navigable(
        cost_map, pos) else False, neighbor_grids)
    #  print(list(neighbor_grids))
    for pos in neighbor_grids:
        if type(pos) == np.ndarray:
            open_list.append(pos)

    while open_list:
        current_grid = open_list.popleft()
        checked.add(tuple(current_grid))
        neighbor_grids = np.array([current_grid for _ in range(4)]) + delta
        neighbor_costs = list(map(lambda pos: cost_map[pos[0], pos[1]] if check_navigable(
            cost_map, pos) else float('inf'), neighbor_grids))
        # expand the open list
        for i in range(4):
            if neighbor_costs[i] != float('inf') and tuple(neighbor_grids[i]) not in checked:
                open_list.append(neighbor_grids[i])
        minimum_cost_grid_index = np.argmin(neighbor_costs)
        dlt_opst = ['^', '<', 'v', '>']
        policy_map[current_grid[0], current_grid[1]
                   ] = dlt_opst[minimum_cost_grid_index]

    return policy_map


cost_map = compute_value(grid, goal, cost)
ic(generate_policy(cost_map, goal))