import os
import re

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from utilities.skills import (
    extract_resume_skills,
    extract_required_skills_from_jd,
    SKILLS_SORTED_BY_LENGTH,
    clean_text,
)

# MPNet is stronger than MiniLM for long-form resume/JD similarity.
SEMANTIC_MODEL_ID = os.getenv("SEMANTIC_MODEL", "msmarco-distilbert-base-v4")
MAX_DOC_CHARS = 8000
MAX_CHUNKS = 24
MIN_CHUNK_CHARS = 35

model = SentenceTransformer(SEMANTIC_MODEL_ID)


# ---------------------------------------------------------------------------
# Stop-word list — common English words that pollute keyword matching
# ---------------------------------------------------------------------------
STOP_WORDS: set = {
    "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
    "of", "with", "by", "from", "as", "is", "was", "are", "were", "be",
    "been", "being", "have", "has", "had", "do", "does", "did", "will",
    "would", "could", "should", "may", "might", "shall", "can", "need",
    "that", "this", "these", "those", "it", "its", "we", "our", "you",
    "your", "they", "their", "he", "she", "his", "her", "i", "my",
    "not", "no", "so", "if", "then", "than", "also", "just", "only",
    "about", "up", "out", "over", "into", "through", "during", "including",
    "used", "use", "using", "work", "working", "works", "strong", "good",
    "experience", "experiences", "role", "team", "company", "environment",
    "ability", "skills", "skill", "looking", "required", "requirement",
    "plus", "bonus", "nice", "preferred", "knowledge", "understanding",
    "familiarity", "proficiency", "proficient", "hands", "on",
}


# ---------------------------------------------------------------------------
# Text utilities
# ---------------------------------------------------------------------------

def truncate_text(text: str, max_chars: int = MAX_DOC_CHARS) -> str:
    if len(text) <= max_chars:
        return text
    return text[:max_chars].rsplit(" ", 1)[0]


def split_into_chunks(text: str, max_chunks: int = MAX_CHUNKS) -> list[str]:
    """Split resume/JD into comparable segments (bullets, lines, sentences)."""
    if not text:
        return []

    parts = re.split(r"[\n\r]+|(?<=[.!?])\s+", text)
    chunks = [p.strip() for p in parts if len(p.strip()) >= MIN_CHUNK_CHARS]

    if not chunks and text.strip():
        words = text.split()
        window = 55
        for i in range(0, len(words), window):
            piece = " ".join(words[i : i + window])
            if len(piece) >= MIN_CHUNK_CHARS:
                chunks.append(piece)

    return chunks[:max_chunks]


def extract_skill_sentences(text: str) -> str:
    """
    Skill-heavy lines only — used as a secondary signal, not the main embedding.
    """
    segments = re.split(r"[\n\r.;]+", text)
    relevant = []
    for seg in segments:
        seg_clean = clean_text(seg)
        if len(seg_clean) < MIN_CHUNK_CHARS:
            continue
        if any(skill in seg_clean for skill in SKILLS_SORTED_BY_LENGTH):
            relevant.append(seg_clean)
    return " ".join(relevant) if relevant else clean_text(text)


def calibrate_semantic_score(cosine: float) -> float:
    """
    Map raw cosine similarity to a 0–100 ATS-style scale.

    MPNet/MiniLM cosine for related resume/JD pairs usually sits in ~0.35–0.82,
    not 0.9+, so raw cosine understates good matches without calibration.
    """
    cosine = float(np.clip(cosine, 0.0, 1.0))
    low, high = 0.20, 0.78
    scaled = (cosine - low) / (high - low) * 100.0
    return round(float(np.clip(scaled, 0.0, 100.0)), 2)


def _pairwise_cosine(a: np.ndarray, b: np.ndarray) -> float:
    return float(cosine_similarity([a], [b])[0][0])


def _chunk_bidirectional_score(resume_chunks: list[str], jd_chunks: list[str]) -> float:
    """How well JD requirements are covered by resume (and vice versa)."""
    if not resume_chunks or not jd_chunks:
        return 0.0

    resume_emb = model.encode(resume_chunks, convert_to_numpy=True)
    jd_emb = model.encode(jd_chunks, convert_to_numpy=True)
    sim_matrix = cosine_similarity(resume_emb, jd_emb)

    jd_coverage = float(sim_matrix.max(axis=0).mean())
    resume_coverage = float(sim_matrix.max(axis=1).mean())
    return (jd_coverage + resume_coverage) / 2.0


def remove_stop_words(text: str) -> set:
    """Return meaningful tokens after removing stop words."""
    tokens = set(text.split())
    return tokens - STOP_WORDS


# ---------------------------------------------------------------------------
# Scoring functions
# ---------------------------------------------------------------------------

def keyword_match_score(resume_text: str, jd_text: str) -> float:
    """
    Skill-only keyword match.

    Strategy:
    - Extract recognised tech skills from both texts using the master taxonomy.
    - Score = |resume_skills ∩ jd_skills| / |jd_skills|
    - This eliminates stop-word noise and counts only meaningful tech terms.

    Weighting bonus:
    - JD skills that appear multiple times are treated as high-priority.
      A missing high-frequency skill is penalised more heavily.
    """
    jd_skills_freq  = extract_required_skills_from_jd(jd_text)   # {skill: freq}
    resume_skills   = extract_resume_skills(resume_text)

    if not jd_skills_freq:
        return 0.0

    # Weighted scoring: skills mentioned more in JD carry more weight
    total_weight   = sum(jd_skills_freq.values())
    matched_weight = sum(
        freq for skill, freq in jd_skills_freq.items()
        if skill in resume_skills
    )

    return round(matched_weight / total_weight * 100, 2)


def _normalize_for_embedding(text: str) -> str:
    """
    Convert resume or JD into a neutral skill-centric representation.

    Problem: resumes use first-person achievement language; JDs use
    third-person requirement language. A general-purpose model sees these
    as stylistically distant (cosine ~0.40) even when skills match perfectly.

    Fix: extract skills + skill-heavy sentences and represent both docs
    in the same "skills: X Y Z context: ..." format so the model compares
    skill vocabulary, not writing style.
    """
    cleaned = clean_text(text)
    extracted_skills = extract_resume_skills(cleaned)
    skill_list = " ".join(sorted(extracted_skills))
    skill_context = clean_text(extract_skill_sentences(text))
    return f"skills: {skill_list} context: {skill_context}"


def semantic_match_score(resume_text: str, jd_text: str) -> float:
    """
    Semantic similarity tuned for resume <-> JD alignment.

    Both documents are normalised into skill-centric representations
    before embedding so the model compares skill overlap, not writing style.

    Combines:
    1. Normalised full-doc embedding  (50%) - fixes style mismatch
    2. Chunk-level bi-directional on RAW text (35%) - preserves sentence boundaries
    3. Skill-sentences-only embedding (15%) - fine-grained skill context
    """
    if not resume_text.strip() or not jd_text.strip():
        return 0.0

    # Signal 1: normalised doc (style-agnostic skill comparison)
    resume_norm = _normalize_for_embedding(resume_text)
    jd_norm     = _normalize_for_embedding(jd_text)
    doc_emb     = model.encode([resume_norm, jd_norm], convert_to_numpy=True)
    full_sim    = _pairwise_cosine(doc_emb[0], doc_emb[1])

    # Signal 2: chunk-level on RAW text (needs \n/. boundaries intact)
    resume_chunks = split_into_chunks(resume_text)
    jd_chunks     = split_into_chunks(jd_text)
    if len(resume_chunks) > 1 and len(jd_chunks) > 1:
        chunk_sim = _chunk_bidirectional_score(resume_chunks, jd_chunks)
    else:
        chunk_sim = full_sim

    # Signal 3: skill-sentence embedding
    resume_skill_text = extract_skill_sentences(resume_text)
    jd_skill_text     = extract_skill_sentences(jd_text)
    if resume_skill_text and jd_skill_text:
        skill_emb = model.encode(
            [truncate_text(resume_skill_text, 4000),
             truncate_text(jd_skill_text, 4000)],
            convert_to_numpy=True,
        )
        skill_sim = _pairwise_cosine(skill_emb[0], skill_emb[1])
    else:
        skill_sim = full_sim

    raw_cosine = 0.50 * full_sim + 0.35 * chunk_sim + 0.15 * skill_sim
    return calibrate_semantic_score(raw_cosine)


def experience_level_penalty(resume_text: str, jd_text: str) -> float:
    """
    Detects seniority mismatch and returns a 0–10 penalty.

    E.g. a senior-level JD matched against a junior resume
    should score lower even if skills overlap.
    """
    SENIOR_SIGNALS = {"senior", "lead", "principal", "architect", "staff", "head of"}
    JUNIOR_SIGNALS = {"junior", "entry level", "entry-level", "graduate", "intern", "fresher"}

    jd_lower     = jd_text.lower()
    resume_lower = resume_text.lower()

    jd_is_senior = any(s in jd_lower for s in SENIOR_SIGNALS)
    jd_is_junior = any(s in jd_lower for s in JUNIOR_SIGNALS)

    resume_is_senior = any(s in resume_lower for s in SENIOR_SIGNALS)
    resume_is_junior = any(s in resume_lower for s in JUNIOR_SIGNALS)

    # JD wants senior but resume signals junior
    if jd_is_senior and resume_is_junior:
        return 10.0
    # JD wants junior but resume is over-qualified (minor penalty)
    if jd_is_junior and resume_is_senior:
        return 3.0
    return 0.0


# ---------------------------------------------------------------------------
# Final composite score
# ---------------------------------------------------------------------------

def final_ats_score(resume_text: str, jd_text: str) -> dict:
    """
    Composite ATS score weighted as:
      60%  semantic similarity  (contextual understanding)
      40%  keyword match        (skill taxonomy match, frequency-weighted)

    A seniority mismatch penalty (0–10 pts) is subtracted from the final score.

    Returns a dict compatible with ScoreResponse schema.
    """
    semantic = semantic_match_score(resume_text, jd_text)
    keyword  = keyword_match_score(resume_text, jd_text)
    penalty  = experience_level_penalty(resume_text, jd_text)

    raw_score  = round(0.45 * semantic + 0.55 * keyword, 2)
    final      = round(max(0.0, raw_score - penalty), 2)

    return {
        "semantic_score":  round(semantic, 2),
        "keyword_score":   round(keyword, 2),
        "final_ats_score": final,
    }


# ---------------------------------------------------------------------------
# Smoke-test
# ---------------------------------------------------------------------------
if __name__ == "__main__":
    examples = [
        [
            "Python developer with FastAPI, SQL, and machine learning experience",
            "Looking for a Python developer with FastAPI, SQL, and ML skills",
        ],
        [
            "Built backend services using Python frameworks and databases",
            "Python developer with FastAPI and SQL",
        ],
        [
            "Python Python Python SQL SQL FastAPI",
            "Python developer with FastAPI and SQL",
        ],
        [
            "Professional photographer specialising in portraits and wildlife",
            "Hiring a machine learning engineer with Python and PyTorch",
        ],
        [
            "Led ML teams, deployed large-scale models, optimised transformers",
            "Junior Python developer with basic ML",
        ],
        [
            "NLP engineer: PyTorch, HuggingFace transformers, LLM fine-tuning, RAG pipelines",
            "Senior ML engineer: LLM, RAG, fine-tuning, Python, AWS SageMaker",
        ],
    ]

    print(f"{'#':<3} {'Semantic':>10} {'Keyword':>10} {'Final ATS':>10}")
    print("-" * 38)
    for i, (resume, jd) in enumerate(examples):
        result = final_ats_score(resume, jd)
        print(
            f"{i:<3} {result['semantic_score']:>10} "
            f"{result['keyword_score']:>10} "
            f"{result['final_ats_score']:>10}"
        )