"""Gap Analysis + Recommendation services.

Gap analysis — Weighted Competency Fulfillment Algorithm.
See research/design-decision-changes.md §Module 4 for derivation. Pure functions,
no DB writes: the report is computed on demand from UserSkill + RoleSkill state.

Recommendations — composite ranking over the Resource catalog.
See research/08-next-modules-build-plan.md §Module A. Type weights from
research/03-gap-analysis-engine-research.md §2.1 are the single source of truth
for type preference.
"""
from __future__ import annotations

from dataclasses import asdict, dataclass, field
from typing import Iterable

from apps.resources.models import Resource, SkillResource
from apps.roles.models import Role, RoleSkill
from apps.skills.models import UserSkill
from apps.skills.utils import proficiency_to_level


LEVEL_THRESHOLDS: dict[str, int] = {
    'BEGINNER': 40,
    'INTERMEDIATE': 60,
    'ADVANCED': 100,
}

MANDATORY_CAP = 60.0

BANDS: list[tuple[float, str]] = [
    (85.0, 'STRONG'),
    (65.0, 'GOOD'),
    (45.0, 'PARTIAL'),
    (0.0, 'WEAK'),
]


@dataclass
class SkillGap:
    skill_id: int
    skill_name: str
    category: str
    required_level: str
    threshold: int
    user_proficiency: int
    weight: float
    is_mandatory: bool
    gap: int
    satisfaction: float
    gap_type: str
    severity: str | None

    def to_dict(self) -> dict:
        return asdict(self)


@dataclass
class GapReport:
    role_id: int
    role_name: str
    readiness: float
    band: str
    mandatory_cap_applied: bool
    no_requirements: bool
    gaps: list[SkillGap] = field(default_factory=list)

    def to_dict(self) -> dict:
        return {
            'role_id': self.role_id,
            'role_name': self.role_name,
            'readiness': round(self.readiness, 2),
            'band': self.band,
            'mandatory_cap_applied': self.mandatory_cap_applied,
            'mandatory_cap': MANDATORY_CAP,
            'no_requirements': self.no_requirements,
            'gaps': [g.to_dict() for g in self.gaps],
        }


def _band_for(readiness: float) -> str:
    for floor, name in BANDS:
        if readiness >= floor:
            return name
    return 'WEAK'


def _classify(
    user_proficiency: int,
    threshold: int,
    is_mandatory: bool,
) -> tuple[str, str | None]:
    """Return (gap_type, severity) per §3 of design-decision-changes.md."""
    if user_proficiency >= threshold:
        return 'MET', None

    if user_proficiency == 0:
        return 'MISSING', 'CRITICAL' if is_mandatory else 'MEDIUM'

    ratio = (threshold - user_proficiency) / threshold
    if is_mandatory or ratio >= 0.5:
        severity = 'HIGH'
    elif ratio >= 0.25:
        severity = 'MEDIUM'
    else:
        severity = 'LOW'
    return 'INSUFFICIENT', severity


def _user_proficiency_map(user_id: int, skill_ids: Iterable[int]) -> dict[int, int]:
    qs = UserSkill.objects.filter(user_id=user_id, skill_id__in=list(skill_ids))
    return {us.skill_id: us.proficiency for us in qs}


def compute_gap_report(user, role: Role) -> GapReport:
    role_skills = list(
        RoleSkill.objects.filter(role=role).select_related('skill')
    )

    if not role_skills:
        return GapReport(
            role_id=role.id,
            role_name=role.role_name,
            readiness=100.0,
            band=_band_for(100.0),
            mandatory_cap_applied=False,
            no_requirements=True,
        )

    prof_map = _user_proficiency_map(user.id, (rs.skill_id for rs in role_skills))

    gaps: list[SkillGap] = []
    weighted_sat_sum = 0.0
    weight_sum = 0.0
    mandatory_shortfall = False

    for rs in role_skills:
        threshold = LEVEL_THRESHOLDS[rs.required_level]
        user_prof = prof_map.get(rs.skill_id, 0)
        satisfaction = min(user_prof / threshold, 1.0)
        gap_points = max(threshold - user_prof, 0)
        gap_type, severity = _classify(user_prof, threshold, rs.is_mandatory)

        if rs.is_mandatory and satisfaction < 1.0:
            mandatory_shortfall = True

        weight_sum += rs.weight
        weighted_sat_sum += satisfaction * rs.weight

        gaps.append(SkillGap(
            skill_id=rs.skill_id,
            skill_name=rs.skill.skill_name,
            category=rs.skill.category,
            required_level=rs.required_level,
            threshold=threshold,
            user_proficiency=user_prof,
            weight=rs.weight,
            is_mandatory=rs.is_mandatory,
            gap=gap_points,
            satisfaction=round(satisfaction, 4),
            gap_type=gap_type,
            severity=severity,
        ))

    if weight_sum <= 0:
        # No positive weight to aggregate. With the RoleSkill DB CheckConstraints
        # in force a mandatory skill always has weight>0, so weight_sum<=0 implies
        # no mandatory shortfall and the documented zero-weight contract returns
        # 100/no_requirements. The mandatory branch is defense-in-depth against a
        # constraint-bypassing bulk write — it avoids a 0.0/0.0 ZeroDivisionError.
        readiness = 0.0 if mandatory_shortfall else 100.0
        return GapReport(
            role_id=role.id,
            role_name=role.role_name,
            readiness=readiness,
            band=_band_for(readiness),
            # readiness here is 0 (mandatory unmet) or 100 (no requirements);
            # neither is a value "capped down to 60", so the cap is not applied.
            mandatory_cap_applied=False,
            no_requirements=not mandatory_shortfall,
            gaps=gaps,
        )

    raw_readiness = (weighted_sat_sum / weight_sum) * 100.0
    cap_applied = mandatory_shortfall and raw_readiness > MANDATORY_CAP
    readiness = MANDATORY_CAP if cap_applied else raw_readiness
    readiness = max(0.0, min(readiness, 100.0))

    gaps.sort(
        key=lambda g: (
            -{'CRITICAL': 4, 'HIGH': 3, 'MEDIUM': 2, 'LOW': 1, None: 0}[g.severity],
            -g.weight,
            g.skill_name,
        )
    )

    return GapReport(
        role_id=role.id,
        role_name=role.role_name,
        readiness=readiness,
        band=_band_for(readiness),
        mandatory_cap_applied=cap_applied,
        no_requirements=False,
        gaps=gaps,
    )


# ---------------------------------------------------------------------------
# Recommendation service
# ---------------------------------------------------------------------------
#
# Recommendations are ranked by a TWO-KEYED order so that material which is
# clearly the wrong level can never leapfrog appropriately-levelled material on
# quality (type/relevance/rating) alone:
#
#   1. LEVEL TIER (primary) — the distance between the user's CURRENT level for
#      the skill (proficiency_to_level(), NOT the role's required level) and the
#      resource's difficulty. distance 0-1 = "near" (tier 0); distance >=2 =
#      "far" (tier 1). Every near resource ranks above every far one. So a
#      beginner always sees BEGINNER/INTERMEDIATE content before ADVANCED — a
#      hard guarantee, independent of how good the far resource looks otherwise.
#
#   2. COMPOSITE QUALITY SCORE (secondary) — orders resources WITHIN a tier:
#
#        score = TYPE_WEIGHT[type] * w_type
#              + DIFFICULTY_MATCH(user_current_level, difficulty) * w_diff
#              + relevance_score * 10 * w_rel          # normalise to 0..10
#              + (rating / 5.0) * 10 * w_rating        # normalise to 0..10
#
#      Within the near tier the difficulty term still gives exact-level a gentle
#      edge over one-level-off, but it stays SOFT: a much more relevant/stronger
#      one-level-off resource can still outrank a weak exact-level one. The only
#      HARD rule is the near-over-far split above.
#
# Coefficients (single source of truth — keep in sync with the docstring above):
#
#   w_type    = 1.0   (dominant — follows the type preference from research
#                      §2.1 "Video > Course > Article > Docs")
#   w_diff    = 1.0
#   w_rel     = 1.5   (relevance is heavily weighted — a very-relevant DOCS
#                      can outrank a somewhat-relevant VIDEO at the same level)
#   w_rating  = 0.5
#
# Notable consequences, verified by unit tests:
#   * Equal rating/relevance/difficulty → VIDEO > COURSE > ARTICLE > DOCS.
#   * A resource <=1 level from the user ALWAYS ranks above one >=2 levels away,
#     regardless of type/relevance/rating (the level-tier guarantee).
#   * Within a tier, exact-level beats one-level-off on equal quality, yet a
#     relevance_score=1.0 DOCS can outrank a relevance_score=0.3 VIDEO at the
#     same level (relevance is weighted 1.5x), both at rating 3.5/5:
#       DOCS : 4*1.0 + 10*1.0 + (1.0*10)*1.5 + (3.5/5*10)*0.5 = 32.5
#       VIDEO: 10*1.0 + 10*1.0 + (0.3*10)*1.5 + (3.5/5*10)*0.5 = 28.0
#   * Stable tie-break by resource_id ASC so API snapshots don't flap.

TYPE_WEIGHTS: dict[str, int] = {
    'VIDEO': 10,
    'COURSE': 8,
    'ARTICLE': 6,
    'DOCS': 4,
}

DIFFICULTY_ORDER: dict[str, int] = {
    'BEGINNER': 0,
    'INTERMEDIATE': 1,
    'ADVANCED': 2,
}

W_TYPE = 1.0
W_DIFF = 1.0
W_REL = 1.5
W_RATING = 0.5

DIFFICULTY_EXACT = 10.0
DIFFICULTY_ADJACENT = 5.0
DIFFICULTY_MISMATCH = 0.0

# A resource within this many levels of the user's current level is "near"
# (tier 0); anything further is "far" (tier 1) and is ranked below every near
# resource. With the 3-level scale this means: appropriate-level + one step are
# near, a two-step mismatch (e.g. ADVANCED for a BEGINNER) is far.
NEAR_MAX_DISTANCE = 1

DEFAULT_LIMIT_PER_SKILL = 3

# Upper bound on resources returned per skill gap. Caps an adversarial
# ?limit=99999999 so a single request can't try to materialise the whole
# catalog per skill; the curated catalog is small, so 50 is generous.
MAX_LIMIT_PER_SKILL = 50


@dataclass
class RecommendationItem:
    resource_id: int
    title: str
    provider: str
    url: str
    type: str
    difficulty_level: str
    duration: int
    rating: float
    relevance_score: float
    score: float

    def to_dict(self) -> dict:
        return asdict(self)


def _difficulty_match_score(anchor_level: str, resource_level: str) -> float:
    anchor = DIFFICULTY_ORDER[anchor_level]
    res = DIFFICULTY_ORDER[resource_level]
    dist = abs(anchor - res)
    if dist == 0:
        return DIFFICULTY_EXACT
    if dist == 1:
        return DIFFICULTY_ADJACENT
    return DIFFICULTY_MISMATCH


def _difficulty_tier(anchor_level: str, resource_level: str) -> int:
    """Primary ranking key: 0 if the resource is within NEAR_MAX_DISTANCE levels
    of the user's current level ("near"), else 1 ("far").

    Sorting on this BEFORE the composite score guarantees a clearly-mismatched
    resource (>=2 levels off, e.g. ADVANCED for a beginner) can never outrank an
    appropriately-levelled one on type/relevance/rating alone.
    """
    dist = abs(DIFFICULTY_ORDER[anchor_level] - DIFFICULTY_ORDER[resource_level])
    return 0 if dist <= NEAR_MAX_DISTANCE else 1


def _score(
    type_: str,
    anchor_level: str,
    resource_level: str,
    relevance_score: float,
    rating: float,
) -> float:
    return (
        TYPE_WEIGHTS.get(type_, 0) * W_TYPE
        + _difficulty_match_score(anchor_level, resource_level) * W_DIFF
        + (relevance_score * 10.0) * W_REL
        + (rating / 5.0 * 10.0) * W_RATING
    )


def compute_recommendations(
    user,
    role: Role,
    limit_per_skill: int = DEFAULT_LIMIT_PER_SKILL,
) -> dict:
    """Return {"role_id", "role_name", "recommendations": {skill_id: [...]}}.

    Only returns entries for skills with an actual gap (MISSING or INSUFFICIENT);
    MET skills are skipped. Stable tie-break by resource_id ASC.
    """
    report = compute_gap_report(user, role)

    gap_skills = [g for g in report.gaps if g.gap_type != 'MET']

    recommendations: dict[int, list[dict]] = {}
    if not gap_skills:
        return {
            'role_id': role.id,
            'role_name': role.role_name,
            'recommendations': recommendations,
        }

    skill_ids = [g.skill_id for g in gap_skills]
    links = (
        SkillResource.objects
        .filter(skill_id__in=skill_ids)
        .select_related('resource')
    )

    by_skill: dict[int, list[tuple[SkillResource, Resource]]] = {
        sid: [] for sid in skill_ids
    }
    for link in links:
        by_skill[link.skill_id].append((link, link.resource))

    # Anchor difficulty matching to the user's CURRENT level for each gap skill
    # (derived from proficiency), not the role's required level — so learners are
    # steered to material at their level first. See the scoring block above.
    anchor_by_skill = {
        g.skill_id: proficiency_to_level(g.user_proficiency) for g in gap_skills
    }

    for skill_id, pairs in by_skill.items():
        anchor_level = anchor_by_skill[skill_id]
        scored: list[tuple[int, float, int, RecommendationItem]] = []
        for link, resource in pairs:
            score = _score(
                resource.type,
                anchor_level,
                resource.difficulty_level,
                link.relevance_score,
                resource.rating,
            )
            tier = _difficulty_tier(anchor_level, resource.difficulty_level)
            scored.append((
                tier,
                score,
                resource.id,
                RecommendationItem(
                    resource_id=resource.id,
                    title=resource.title,
                    provider=resource.provider,
                    url=resource.url,
                    type=resource.type,
                    difficulty_level=resource.difficulty_level,
                    duration=resource.duration,
                    rating=resource.rating,
                    relevance_score=link.relevance_score,
                    score=round(score, 4),
                ),
            ))

        # Primary: level tier (near before far). Secondary: composite score desc.
        # Tertiary: resource_id asc for stable, snapshot-friendly ordering.
        scored.sort(key=lambda t: (t[0], -t[1], t[2]))
        top = [item.to_dict() for _, _, _, item in scored[:limit_per_skill]]
        recommendations[skill_id] = top

    return {
        'role_id': role.id,
        'role_name': role.role_name,
        'recommendations': recommendations,
    }


# ---------------------------------------------------------------------------
# Role suggestion service
# ---------------------------------------------------------------------------
#
# Ranks every active role by how well the user's current skills fit it, so the
# UI can surface "Recommended for you" (top fit) above the rest of the catalog.
# The fit reflects an uploaded resume automatically: CV → UserSkill → gap
# analysis is already wired, so suggestions update once parsed skills are saved.
#
# Reuses compute_gap_report per role (no duplicated readiness math). For the
# small curated catalog (~10 roles, ceiling ~50) the ~2 queries/role cost is
# negligible; batch into a single RoleSkill + UserSkill pass if it ever grows.

DEFAULT_TOP_MISSING = 3


def suggest_roles(user, top_missing: int = DEFAULT_TOP_MISSING) -> dict:
    """Return {"has_skills", "roles": [...]} ranked by skill fit.

    Each role row: role_id, role_name, industry, readiness, band,
    matched_skills/total_skills, top_missing_skills (most-severe first),
    no_requirements. Sorted by (no_requirements last, readiness desc,
    role_name asc) — roles with no requirements report readiness=100
    spuriously, so they sink to the bottom rather than top the list.
    """
    has_skills = UserSkill.objects.filter(user_id=user.id).exists()

    rows: list[dict] = []
    for role in Role.objects.filter(is_active=True):
        report = compute_gap_report(user, role)
        total = len(report.gaps)
        matched = sum(1 for g in report.gaps if g.gap_type == 'MET')
        # report.gaps is severity-sorted, so the first non-MET names are the
        # most critical missing skills.
        missing = [g.skill_name for g in report.gaps if g.gap_type != 'MET']
        rows.append({
            'role_id': role.id,
            'role_name': role.role_name,
            'industry': role.industry,
            'readiness': round(report.readiness, 2),
            'band': report.band,
            'matched_skills': matched,
            'total_skills': total,
            'top_missing_skills': missing[:top_missing],
            'no_requirements': report.no_requirements,
        })

    rows.sort(key=lambda r: (r['no_requirements'], -r['readiness'], r['role_name']))
    return {'has_skills': has_skills, 'roles': rows}