from __future__ import annotations

import time
from collections.abc import Callable
from functools import lru_cache

from .dynamics import Counts, allocations_from_counts, legal_next_counts, move_cost, round_service_reward
from .models import HiddenRecipe, V3Action


ProgressCallback = Callable[[dict[str, object]], None]


def solve_exact(
    recipe: HiddenRecipe,
    start_round: int = 0,
    start_counts: Counts | None = None,
    progress_callback: ProgressCallback | None = None,
) -> tuple[float, list[V3Action]]:
    counts = start_counts or recipe.initial_courier_counts
    legal_cache: dict[Counts, tuple[Counts, ...]] = {}
    started_at = time.perf_counter()
    state_counter = {"visited": 0}

    @lru_cache(maxsize=None)
    def value(round_index: int, current_counts: Counts) -> tuple[float, tuple[Counts, ...]]:
        state_counter["visited"] += 1
        if progress_callback and state_counter["visited"] % 200 == 0:
            progress_callback(
                {
                    "round_index": round_index,
                    "elapsed_seconds": time.perf_counter() - started_at,
                    "states_visited": state_counter["visited"],
                }
            )

        round_template = recipe.rounds[round_index]
        if round_index >= recipe.profile.total_rounds - 1:
            reward, _, _ = round_service_reward(
                round_template=round_template,
                current_counts=current_counts,
                missed_order_penalty=recipe.profile.missed_order_penalty,
            )
            return reward, ()

        best_total = float("-inf")
        best_path: tuple[Counts, ...] = ()
        candidate_next_counts = legal_cache.get(current_counts)
        if candidate_next_counts is None:
            candidate_next_counts = legal_next_counts(recipe, current_counts)
            legal_cache[current_counts] = candidate_next_counts

        service_reward, _, _ = round_service_reward(
            round_template=round_template,
            current_counts=current_counts,
            missed_order_penalty=recipe.profile.missed_order_penalty,
        )
        for next_counts in candidate_next_counts:
            total = service_reward - move_cost(recipe, round_index, current_counts, next_counts)
            future_total, future_path = value(round_index + 1, next_counts)
            total += future_total
            if total > best_total:
                best_total = total
                best_path = (next_counts, *future_path)
        return best_total, best_path

    reward, path = value(start_round, counts)
    return reward, [allocations_from_counts(recipe, next_counts) for next_counts in path]


def best_action(recipe: HiddenRecipe, round_index: int, current_counts: Counts) -> V3Action:
    _, plan = solve_exact(recipe, start_round=round_index, start_counts=current_counts)
    return plan[0] if plan else allocations_from_counts(recipe, current_counts)