import re
from collections import defaultdict
from typing import Dict, List

from src.models import CompanyRanking, JobMatch, JobPosting, ResumeProfile

ENTRY_LEVEL_TERMS = {"intern", "internship", "entry", "junior", "new grad", "associate", "graduate"}
SENIOR_TERMS = {"senior", "staff", "principal", "lead", "manager", "director", "architect"}


def _tokenize(text: str) -> set[str]:
    return set(re.findall(r"[a-zA-Z0-9\+#\.]+", text.lower()))


def _contains_phrase(text: str, phrases: List[str]) -> bool:
    lowered = text.lower()
    return any(phrase.lower() in lowered for phrase in phrases)


def score_job_match(job: JobPosting, profile: ResumeProfile) -> JobMatch:
    blob = " ".join([job.title, job.department, job.description, job.location]).lower()
    tokens = _tokenize(blob)

    skill_pool = set(profile.skills + profile.languages + profile.frameworks + profile.tools)
    overlap = {item for item in skill_pool if item.lower() in blob or item.lower() in tokens}

    skill_score = min(40.0, 8.0 * len(overlap))

    role_score = 0.0
    if profile.target_titles and _contains_phrase(job.title, profile.target_titles):
        role_score = 25.0
    elif not profile.target_titles and _contains_phrase(job.title, ["engineer", "analyst", "developer", "scientist"]):
        role_score = 12.0

    entry_score = 0.0
    title_lower = job.title.lower()
    if any(term in title_lower for term in ENTRY_LEVEL_TERMS):
        entry_score += 20.0
    if any(term in title_lower for term in SENIOR_TERMS):
        entry_score -= 25.0

    location_score = 0.0
    if profile.locations and any(loc.lower() in blob for loc in profile.locations):
        location_score = 10.0

    total = max(0.0, min(100.0, skill_score + role_score + entry_score + location_score))

    reasons = []
    if overlap:
        reasons.append(f"skill overlap ({', '.join(sorted(overlap)[:4])})")
    if role_score > 0:
        reasons.append("role alignment")
    if entry_score > 0:
        reasons.append("entry-level title")
    if entry_score < 0:
        reasons.append("senior-level penalty")
    if location_score > 0:
        reasons.append("location fit")
    if not reasons:
        reasons.append("limited overlap but still relevant board")

    return JobMatch(
        company=job.company,
        title=job.title,
        location=job.location,
        url=job.url,
        score=round(total, 2),
        explanation="; ".join(reasons),
        ats=job.ats,
    )


def rank_companies(matches: List[JobMatch]) -> List[CompanyRanking]:
    grouped: Dict[str, List[JobMatch]] = defaultdict(list)
    for match in matches:
        grouped[match.company].append(match)

    rankings: List[CompanyRanking] = []
    for company, company_matches in grouped.items():
        sorted_matches = sorted(company_matches, key=lambda m: m.score, reverse=True)
        top = sorted_matches[:5]
        avg_top = sum(match.score for match in top) / len(top)

        rankings.append(
            CompanyRanking(
                company=company,
                company_score=round(avg_top, 2),
                match_count=len([m for m in company_matches if m.score >= 20]),
                best_role=top[0].title if top else "",
                ats=top[0].ats if top else "unknown",
                explanation=top[0].explanation if top else "",
            )
        )

    return sorted(rankings, key=lambda item: item.company_score, reverse=True)