"""Deterministic grader for trajectory scoring.

Scoring weights (difficulty-aware):
  base score      5%   (participation — guarantees score > 0)
  partial fixes  35%   (proportional to fix ratio)
  complete bonus 25%   (all issues fixed — scales with difficulty)
  efficiency     25%   (decays with extra steps — slower decay for harder tasks)
  hint penalty   -4%   each (reduced to -3% for hard/expert)
  failed edit    -2%   each
  difficulty     +5%   bonus for hard/expert tasks when fully solved

Score is clamped to [0.0, 1.0].
"""

from typing import Any, Dict, List

from server.models import GraderResult, TaskDifficulty
from server.tasks.task_registry import TASK_REGISTRY

# ── Base weights ──────────────────────────────────────────────
BASE_SCORE = 0.05
PARTIAL_FIX_WEIGHT = 0.35
COMPLETE_BONUS = 0.25
EFFICIENCY_MAX = 0.25
EFFICIENCY_DECAY = 0.03  # per extra step beyond optimal
HINT_PENALTY = 0.04
FAILED_ACTION_PENALTY = 0.02

# ── Difficulty modifiers ──────────────────────────────────────
# Maps difficulty → (complete_bonus_extra, efficiency_decay_mult, hint_penalty_mult)
#   complete_bonus_extra: added to COMPLETE_BONUS when all issues fixed
#   efficiency_decay_mult: multiplier on decay (lower = more forgiving)
#   hint_penalty_mult: multiplier on hint cost (lower = cheaper hints)
DIFFICULTY_MODIFIERS = {
    TaskDifficulty.EASY:   (0.00, 1.0, 1.0),
    TaskDifficulty.MEDIUM: (0.00, 0.9, 1.0),
    TaskDifficulty.HARD:   (0.03, 0.7, 0.75),
}

SCORE_FLOOR = 0.01
SCORE_CEIL = 0.99

EDIT_ACTION_TYPES = frozenset({
    "edit_file", "replace_line", "add_line",
    "delete_line", "add_block", "delete_block",
})


def _clamp(value: float) -> float:
    """Clamp score to [0, 1]."""
    return max(SCORE_FLOOR, min(SCORE_CEIL, round(value, 4)))


def _get_difficulty(task_id: str) -> TaskDifficulty:
    """Look up a task's difficulty from the registry."""
    task_cls = TASK_REGISTRY.get(task_id)
    if task_cls is None:
        return TaskDifficulty.MEDIUM
    return task_cls.DIFFICULTY


def run_grader(task_id: str, trajectory: List[Dict[str, Any]]) -> GraderResult:
    if task_id not in TASK_REGISTRY:
        raise ValueError(f"Unknown task: {task_id}")

    difficulty = _get_difficulty(task_id)
    bonus_extra, decay_mult, hint_mult = DIFFICULTY_MODIFIERS.get(
        difficulty, (0.00, 1.0, 1.0)
    )

    if not trajectory:
        return GraderResult(
            task_id=task_id,
            score=_clamp(BASE_SCORE),
            breakdown={
                "base": BASE_SCORE,
                "partial_fixes": 0.0,
                "complete_solution": 0.0,
                "efficiency": 0.0,
                "difficulty_bonus": 0.0,
                "hint_penalty": 0.0,
                "failed_action_penalty": 0.0,
            },
            feedback="No actions taken.",
            steps_taken=0,
            hints_used=0,
        )

    final_step = trajectory[-1]
    steps_taken = len(trajectory)
    hints_used = sum(
        1 for s in trajectory
        if s.get("action", {}).get("action_type") == "request_hint"
    )

    issues_fixed = int(final_step.get("info", {}).get("issues_fixed", 0))
    issues_total = max(1, int(final_step.get("info", {}).get("issues_total", 1)))
    fix_ratio = issues_fixed / issues_total

    # ── Component 1: Partial fix credit (proportional) ────────
    partial_score = PARTIAL_FIX_WEIGHT * fix_ratio

    # ── Component 2: Full-solution bonus ──────────────────────
    complete_bonus = COMPLETE_BONUS if issues_fixed == issues_total else 0.0

    # ── Component 3: Difficulty bonus ─────────────────────────
    # Extra reward for fully solving harder tasks
    diff_bonus = bonus_extra if issues_fixed == issues_total else 0.0

    # ── Component 4: Efficiency bonus ─────────────────────────
    # Harder tasks get slower decay (more forgiving on step count)
    if issues_fixed == 0:
        efficiency_score = 0.0
    elif steps_taken <= issues_total:
        efficiency_score = EFFICIENCY_MAX
    else:
        extra = steps_taken - issues_total
        effective_decay = EFFICIENCY_DECAY * decay_mult
        efficiency_score = max(0.0, EFFICIENCY_MAX - effective_decay * extra)

    # ── Component 5: Hint penalty ─────────────────────────────
    # Harder tasks get reduced hint penalty (hints are more reasonable)
    hint_pen = HINT_PENALTY * hint_mult * hints_used

    # ── Component 6: Failed action penalty ────────────────────
    failed_edits = 0
    for step in trajectory:
        action = step.get("action", {})
        if action.get("action_type") in EDIT_ACTION_TYPES:
            edits = action.get("edits") or []
            if not any(e.get("file_path") for e in edits):
                failed_edits += 1
    failed_pen = FAILED_ACTION_PENALTY * failed_edits

    raw = (
        BASE_SCORE
        + partial_score
        + complete_bonus
        + diff_bonus
        + efficiency_score
        - hint_pen
        - failed_pen
    )
    score = _clamp(raw)

    # ── Feedback ──────────────────────────────────────────────
    if score >= 0.85:
        feedback = "Excellent — all issues fixed efficiently."
    elif score >= 0.65:
        feedback = "Good job — most issues fixed."
    elif score >= 0.45:
        feedback = "Partial success — some issues remain."
    elif score >= 0.25:
        feedback = "Limited progress — review the error messages carefully."
    else:
        feedback = "Needs improvement — try analyzing the error phase first."

    return GraderResult(
        task_id=task_id,
        score=score,
        breakdown={
            "base": BASE_SCORE,
            "partial_fixes": round(partial_score, 4),
            "complete_solution": round(complete_bonus, 4),
            "difficulty_bonus": round(diff_bonus, 4),
            "efficiency": round(efficiency_score, 4),
            "hint_penalty": round(-hint_pen, 4),
            "failed_action_penalty": round(-failed_pen, 4),
        },
        feedback=feedback,
        steps_taken=steps_taken,
        hints_used=hints_used,
    )