Spaces:

Klnimri
/

AI_CV_Matching

Running

File size: 49,775 Bytes

# app.py — SGS ATS Candidate Matcher (HF Inference API ONLY, Spaces-safe)
# ✅ No transformers / torch / sentence-transformers
# ✅ Uses ONLY huggingface_hub.InferenceClient (works with hub 1.x)
# ✅ Top 10 executive report + shortlist + exports + contacts + progress
# ✅ Max CV uploads = 10
#
# Space secret required:
#   HF_TOKEN  (Settings → Secrets)
#
# Optional env vars:
#   LLM_MODEL   (default: Qwen/Qwen2.5-7B-Instruct)
#   EMBED_MODEL (default: sentence-transformers/all-MiniLM-L6-v2)
#   LLM_BATCH_SIZE, LLM_MAX_TOKENS, LLM_TEMPERATURE

import os
import re
import json
import time
import csv
import hashlib
import tempfile
from typing import List, Dict, Any, Optional, Tuple

import numpy as np
import pandas as pd
import gradio as gr

from huggingface_hub import InferenceClient
from huggingface_hub.errors import BadRequestError, HfHubHTTPError

from pydantic import BaseModel, Field
from pypdf import PdfReader
import docx2txt


# =========================================================
# Models (Inference API)
# =========================================================
# NOTE: Meta Llama repos are often gated on Hugging Face.
# If you have access, you can set LLM_MODEL to e.g. "meta-llama/Llama-3.1-8B-Instruct".
LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-7B-Instruct")
EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")


# =========================================================
# Controls
# =========================================================
MAX_CV_UPLOADS = 10
MAX_CV_CHARS = 120_000
MAX_JD_CHARS = 60_000

CHUNK_SIZE_CHARS = 1100
CHUNK_OVERLAP_CHARS = 180

TOP_CHUNKS_PER_CV = 10          # retrieval
EVIDENCE_CHUNKS_PER_CV = 4      # sent to LLM judge

LLM_BATCH_SIZE = int(os.getenv("LLM_BATCH_SIZE", "3"))
LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "2600"))
LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15"))

ALLOW_LEXICAL_FALLBACK = True


# =========================================================
# Output schemas
# =========================================================
class RequirementCheck(BaseModel):
    requirement: str
    status: str = Field(..., description="met | partial | missing")
    evidence: str = Field(..., description="short quote <=160 chars or empty")


class CandidateLLMResult(BaseModel):
    filename: str
    final_score: float = Field(..., description="0-100")
    fit_level: str = Field(..., description="excellent | good | maybe | weak")
    summary: str
    strengths: List[str]
    gaps: List[str]
    risks: List[str]
    checklist: List[RequirementCheck]
    top_evidence: List[str]


class LLMRankingOutput(BaseModel):
    ranked: List[CandidateLLMResult]
    overall_notes: str


# =========================================================
# Client
# =========================================================
_hf_client: Optional[InferenceClient] = None


def get_hf_client() -> InferenceClient:
    global _hf_client
    if _hf_client is not None:
        return _hf_client

    token = os.getenv("HF_TOKEN", "").strip()
    if not token:
        raise gr.Error("HF_TOKEN is not set. Add it in Space Settings → Repository secrets.")
    _hf_client = InferenceClient(token=token)
    return _hf_client


# =========================================================
# Text + files
# =========================================================
def gr_file_to_path(f: Any) -> Optional[str]:
    if f is None:
        return None
    if isinstance(f, str):
        return f
    if isinstance(f, dict) and "path" in f:
        return f["path"]
    if hasattr(f, "name"):
        return f.name
    return None


def clean_text(t: str) -> str:
    t = (t or "").replace("\x00", " ")
    t = re.sub(r"[ \t]+", " ", t)
    t = re.sub(r"\n{3,}", "\n\n", t)
    return t.strip()


def read_file_to_text(file_path: str) -> str:
    lower = file_path.lower()
    if lower.endswith(".pdf"):
        reader = PdfReader(file_path)
        parts = []
        for page in reader.pages:
            parts.append(page.extract_text() or "")
        return "\n".join(parts).strip()

    if lower.endswith(".docx"):
        return (docx2txt.process(file_path) or "").strip()

    with open(file_path, "rb") as f:
        raw = f.read()
    try:
        return raw.decode("utf-8", errors="ignore").strip()
    except Exception:
        return raw.decode(errors="ignore").strip()


def file_bytes_hash(path: str) -> str:
    with open(path, "rb") as f:
        return hashlib.sha256(f.read()).hexdigest()


def chunk_text_safe(text: str, chunk_size: int = CHUNK_SIZE_CHARS, overlap: int = CHUNK_OVERLAP_CHARS) -> List[str]:
    text = (text or "").strip()
    if not text:
        return []
    chunks = []
    i = 0
    n = len(text)
    while i < n:
        j = min(i + chunk_size, n)
        ch = text[i:j].strip()
        if ch:
            chunks.append(ch)
        if j == n:
            break
        i = max(0, j - overlap)
    return chunks


def mask_pii(text: str) -> str:
    text = re.sub(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", "[EMAIL]", text)
    text = re.sub(r"(\+?\d[\d\-\s]{7,}\d)", "[PHONE]", text)
    return text


# =========================================================
# Contact extraction
# =========================================================
_EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
_PHONE_RE = re.compile(r"(?:\+?\d{1,3}[\s\-]?)?(?:\(?\d{2,4}\)?[\s\-]?)?\d{3,4}[\s\-]?\d{3,4}")


def _normalize_phone(p: str) -> str:
    return re.sub(r"[^\d+]", "", p)


def guess_name(text: str) -> str:
    lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]
    for ln in lines[:14]:
        if "@" in ln:
            continue
        if len(ln) > 55:
            continue
        if re.search(r"\d{3,}", ln):
            continue
        if re.search(r"[A-Za-z\u0600-\u06FF]", ln):
            bad = {"curriculum vitae", "cv", "resume", "profile"}
            if ln.lower() in bad:
                continue
            return ln
    return ""


def extract_contact_info(text: str) -> Dict[str, str]:
    t = text or ""
    emails = _EMAIL_RE.findall(t)

    raw_phones = _PHONE_RE.findall(t)
    phones = []
    for p in raw_phones:
        npn = _normalize_phone(p)
        digits = re.sub(r"\D", "", npn)
        if 8 <= len(digits) <= 16:
            phones.append(npn)

    return {"name": guess_name(t), "email": emails[0] if emails else "", "phone": phones[0] if phones else ""}


# =========================================================
# Embeddings via HF Inference API (feature-extraction)
# =========================================================
def _l2norm(v: np.ndarray) -> np.ndarray:
    return v / (np.linalg.norm(v) + 1e-12)


def embed_texts_api(texts: List[str]) -> np.ndarray:
    """
    Returns shape [len(texts), d] float32 embeddings using HF Inference 'feature-extraction'.
    Uses 'inputs=' to be compatible across huggingface_hub versions.
    """
    client = get_hf_client()

    vecs = []
    for t in texts:
        v = client.feature_extraction(model=EMBED_MODEL, inputs=t)
        v = np.array(v, dtype=np.float32).reshape(-1)
        v = _l2norm(v)
        vecs.append(v)

    return np.stack(vecs, axis=0) if vecs else np.zeros((0, 384), dtype=np.float32)


def cosine_sim_matrix(a: np.ndarray, b: np.ndarray) -> np.ndarray:
    # assumes both are normalized
    return np.matmul(a, b.T)


# =========================================================
# Lexical fallback (no embeddings)
# =========================================================
_WORD_RE = re.compile(r"[A-Za-z\u0600-\u06FF0-9]+")


def _tokenize(text: str) -> List[str]:
    return [w.lower() for w in _WORD_RE.findall(text or "") if len(w) >= 2]


def lexical_rank_chunks(jd: str, chunks: List[str], top_k: int) -> List[Tuple[int, float]]:
    jd_tokens = _tokenize(jd)
    if not jd_tokens or not chunks:
        return []

    jd_set = set(jd_tokens)
    scores = []
    for i, ch in enumerate(chunks):
        ch_tokens = _tokenize(ch)
        if not ch_tokens:
            scores.append((i, 0.0))
            continue
        inter = len(jd_set.intersection(set(ch_tokens)))
        scores.append((i, float(inter) / float(len(jd_set) + 1e-9)))

    scores.sort(key=lambda x: x[1], reverse=True)
    return scores[:top_k]


# =========================================================
# LLM Judge (Ranking) with robust JSON parsing
# =========================================================
def build_llm_prompt(jd_text: str, must_haves: str, candidates: List[Dict[str, Any]]) -> str:
    schema_example = {
        "ranked": [
            {
                "filename": "<cv_filename>",
                "final_score": 0,
                "fit_level": "weak",
                "summary": "one short paragraph",
                "strengths": ["max 4 items"],
                "gaps": ["max 4 items"],
                "risks": ["max 3 items"],
                "checklist": [
                    {"requirement": "SHORT label (<=8 words)", "status": "met", "evidence": "short quote <=160 chars"}
                ],
                "top_evidence": ["max 3 short quotes"],
            }
        ],
        "overall_notes": "short",
    }

    return f"""
You are an expert recruiter and ATS evaluator.

Return ONLY one JSON object, EXACTLY matching this schema:
{json.dumps(schema_example, ensure_ascii=False)}

Hard limits (MUST follow):
- strengths: max 4 bullets
- gaps: max 4 bullets
- risks: max 3 bullets
- checklist: max 6 requirements total
- requirement: SHORT label (<=8 words). Do NOT paste long JD sentences.
- evidence: <=160 chars or empty
- top_evidence: max 3 short quotes

Rules:
- Use ONLY the provided evidence_chunks. Do NOT invent experience.
- final_score 0-100 (be strict: missing must-haves should significantly reduce score)
- fit_level: excellent | good | maybe | weak
- status: met | partial | missing

Job Description (compressed):
\"\"\"{jd_text[:4000]}\"\"\"

Must-haves (optional):
\"\"\"{(must_haves or '').strip()[:1200]}\"\"\"

Candidates:
{json.dumps(candidates, ensure_ascii=False)}

Output JSON only. No markdown. No extra text.
""".strip()


def _extract_first_complete_json_object(text: str) -> Optional[str]:
    if not text:
        return None
    start = text.find("{")
    if start < 0:
        return None

    depth = 0
    in_str = False
    esc = False

    for i in range(start, len(text)):
        ch = text[i]
        if in_str:
            if esc:
                esc = False
            elif ch == "\\":
                esc = True
            elif ch == '"':
                in_str = False
            continue
        else:
            if ch == '"':
                in_str = True
                continue

        if ch == "{":
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0:
                return text[start : i + 1]
    return None


def fit_level_from_score(score: float) -> str:
    s = float(score)
    if s >= 85:
        return "excellent"
    if s >= 70:
        return "good"
    if s >= 55:
        return "maybe"
    return "weak"


def clamp(x: float, lo: float, hi: float) -> float:
    return max(lo, min(hi, x))


# -------------------------
# STRICTER scoring (post-process)
# -------------------------
def apply_strict_scoring(c: CandidateLLMResult) -> CandidateLLMResult:
    """
    Make scoring stricter using the produced checklist:
    - Compute checklist fulfillment ratio: met=1, partial=0.5, missing=0
    - Scale score down heavily when must-haves are missing.
    - If ALL requirements are missing (or met=0 with >=3 reqs), hard cap score.
    """
    base = float(c.final_score)
    cl = c.checklist or []
    if not cl:
        # If model didn't produce checklist, slightly penalize (still allow ranking).
        adj = clamp(base * 0.85, 0.0, 100.0)
        c.final_score = adj
        c.fit_level = fit_level_from_score(adj)
        return c

    total = len(cl)
    met = 0
    partial = 0
    missing = 0
    for it in cl:
        st = (it.status or "").strip().lower()
        if st == "met":
            met += 1
        elif st == "partial":
            partial += 1
        else:
            missing += 1

    ratio = (met + 0.5 * partial) / float(max(1, total))  # 0..1

    # Strong penalty curve: when ratio is low, multiplier drops hard.
    # multiplier is between 0.20 and 1.00
    multiplier = 0.20 + 0.80 * (ratio ** 1.6)

    adj = base * multiplier

    # If basically no must-haves met, cap it.
    if total >= 3 and met == 0 and partial == 0:
        adj = min(adj, 25.0)
    elif total >= 3 and met == 0:
        adj = min(adj, 35.0)

    adj = clamp(adj, 0.0, 100.0)
    c.final_score = float(round(adj, 2))
    c.fit_level = fit_level_from_score(c.final_score)
    return c


def fallback_candidate(filename: str, local_score: float) -> CandidateLLMResult:
    # Even fallback should not look "good" if local retrieval is mid; keep.
    adj = float(round(local_score, 2))
    return CandidateLLMResult(
        filename=filename,
        final_score=adj,
        fit_level=fit_level_from_score(adj),
        summary="LLM output incomplete; fallback score based on retrieval signals.",
        strengths=[],
        gaps=[],
        risks=[],
        checklist=[],
        top_evidence=[],
    )


def _llm_call_or_raise(prompt: str, temperature: float, max_tokens: int) -> str:
    client = get_hf_client()
    try:
        resp = client.chat_completion(
            model=LLM_MODEL,
            messages=[
                {"role": "system", "content": "Return ONLY valid JSON matching the schema. No markdown."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=max_tokens,
            temperature=temperature,
        )
        return (resp.choices[0].message.content or "").strip()
    except BadRequestError as e:
        msg = str(e)
        raise gr.Error(
            "LLM call failed. This usually means the model name is wrong or the model is gated.\n\n"
            f"Current LLM_MODEL: {LLM_MODEL}\n"
            "Try setting LLM_MODEL to a public model like:\n"
            "- Qwen/Qwen2.5-7B-Instruct\n"
            "- mistralai/Mistral-7B-Instruct-v0.3\n"
            "Or if you have Meta access:\n"
            "- meta-llama/Llama-3.1-8B-Instruct\n\n"
            f"Raw error: {msg}"
        ) from e
    except HfHubHTTPError as e:
        raise gr.Error(f"HF Inference error: {e}") from e


def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, Any]]) -> LLMRankingOutput:
    prompt = build_llm_prompt(
        jd_text,
        must_haves or "",
        [{"filename": b["filename"], "evidence_chunks": b["evidence_chunks"]} for b in batch],
    )

    out: Optional[LLMRankingOutput] = None

    text = _llm_call_or_raise(prompt, LLM_TEMPERATURE, LLM_MAX_TOKENS)
    try:
        out = LLMRankingOutput.model_validate(json.loads(text))
    except Exception:
        obj = _extract_first_complete_json_object(text)
        if obj:
            out = LLMRankingOutput.model_validate(json.loads(obj))

    if out is None:
        text2 = _llm_call_or_raise(prompt, 0.0, max(LLM_MAX_TOKENS, 3200))
        try:
            out = LLMRankingOutput.model_validate(json.loads(text2))
        except Exception:
            obj2 = _extract_first_complete_json_object(text2)
            if obj2:
                out = LLMRankingOutput.model_validate(json.loads(obj2))

    if out is None:
        ranked = [fallback_candidate(b["filename"], b.get("local_score", 50.0)) for b in batch]
        return LLMRankingOutput(ranked=ranked, overall_notes="LLM parsing failed; used retrieval-based fallback.")

    returned = {c.filename: c for c in out.ranked}
    missing = [b for b in batch if b["filename"] not in returned]

    for b in missing:
        single_prompt = build_llm_prompt(
            jd_text,
            must_haves or "",
            [{"filename": b["filename"], "evidence_chunks": b["evidence_chunks"]}],
        )
        single_text = _llm_call_or_raise(single_prompt, 0.0, min(2200, LLM_MAX_TOKENS))

        single_out: Optional[LLMRankingOutput] = None
        try:
            single_out = LLMRankingOutput.model_validate(json.loads(single_text))
        except Exception:
            single_obj = _extract_first_complete_json_object(single_text)
            if single_obj:
                single_out = LLMRankingOutput.model_validate(json.loads(single_obj))

        if single_out and single_out.ranked:
            returned[b["filename"]] = single_out.ranked[0]
        else:
            returned[b["filename"]] = fallback_candidate(b["filename"], b.get("local_score", 50.0))

    merged_ranked = sorted(returned.values(), key=lambda x: float(x.final_score), reverse=True)
    notes = (out.overall_notes or "").strip()
    if missing:
        notes = (notes + " | Some candidates re-judged individually / fallback used.").strip(" |")

    return LLMRankingOutput(ranked=merged_ranked, overall_notes=notes)


def merge_llm_batches(batch_outputs: List[LLMRankingOutput]) -> LLMRankingOutput:
    all_ranked: List[CandidateLLMResult] = []
    notes = []
    for out in batch_outputs:
        notes.append(out.overall_notes)
        all_ranked.extend(out.ranked)

    # Apply strict scoring AFTER LLM returns (prevents "missing everything but 65" cases)
    all_ranked = [apply_strict_scoring(c) for c in all_ranked]

    all_ranked = sorted(all_ranked, key=lambda x: float(x.final_score), reverse=True)
    return LLMRankingOutput(ranked=all_ranked, overall_notes=" | ".join([n for n in notes if n])[:1200])


# =========================================================
# Local scoring (retrieval-only, scaled to 0-100)
# =========================================================
def compute_retrieval_score(top_sims: List[float]) -> float:
    if not top_sims:
        return 0.0
    top = sorted(top_sims, reverse=True)[:5]
    m = float(np.mean(top))
    mx = float(np.max(top))
    raw = 0.65 * m + 0.35 * mx
    return float(clamp(raw * 100.0, 0.0, 100.0))


# =========================================================
# UI rendering (SGS)
# =========================================================
def fit_badge(level: str) -> str:
    level = (level or "").lower().strip()
    if level == "excellent":
        return '<span class="badge b-exc">Excellent</span>'
    if level == "good":
        return '<span class="badge b-good">Good</span>'
    if level == "maybe":
        return '<span class="badge b-maybe">Potential</span>'
    return '<span class="badge b-weak">Weak</span>'


def score_pill(score: float) -> str:
    s = float(score)
    cls = "p-high" if s >= 80 else ("p-mid" if s >= 65 else ("p-low" if s >= 45 else "p-bad"))
    return f'<span class="pill {cls}">{s:.1f}</span>'


def candidate_card_html(rank: int, c: CandidateLLMResult) -> str:
    score = float(c.final_score)
    w = max(0, min(100, int(round(score))))

    checklist_rows = ""
    for item in (c.checklist or [])[:6]:
        st = (item.status or "").lower().strip()
        cls = "ok" if st == "met" else ("partial" if st == "partial" else "miss")
        ev = (item.evidence or "").strip().replace("<", "&lt;").replace(">", "&gt;")
        req = (item.requirement or "").strip().replace("<", "&lt;").replace(">", "&gt;")
        checklist_rows += f"""
          <div class="checkrow {cls}">
            <div class="req">{req}</div>
            <div class="st">{st.upper()}</div>
            <div class="ev">{ev if ev else "—"}</div>
          </div>
        """

    strengths = "".join([f"<li>{s}</li>" for s in (c.strengths or [])[:4]]) or "<li>—</li>"
    gaps = "".join([f"<li>{g}</li>" for g in (c.gaps or [])[:4]]) or "<li>—</li>"
    risks = "".join([f"<li>{r}</li>" for r in (c.risks or [])[:3]]) or "<li>—</li>"

    evidence_html = ""
    for q in (c.top_evidence or [])[:3]:
        q = q.replace("<", "&lt;").replace(">", "&gt;")
        evidence_html += f'<div class="quote">“{q}”</div>'

    return f"""
    <div class="card">
      <div class="card-top">
        <div class="card-title">
          <div class="rank">#{rank}</div>
          <div class="file">{c.filename}</div>
        </div>
        <div class="card-meta">
          {fit_badge(c.fit_level)}
          {score_pill(score)}
        </div>
      </div>

      <div class="bar"><div class="fill" style="width:{w}%"></div></div>
      <div class="summary">{c.summary}</div>

      <div class="grid">
        <div>
          <div class="section-title">Strengths</div>
          <ul class="list">{strengths}</ul>
        </div>
        <div>
          <div class="section-title">Gaps</div>
          <ul class="list">{gaps}</ul>
        </div>
      </div>

      <div class="section-title">Risks</div>
      <ul class="list">{risks}</ul>

      <div class="section-title">Requirements Checklist</div>
      <div class="checklist">
        {checklist_rows if checklist_rows else '<div class="quote muted">No checklist produced.</div>'}
      </div>

      <div class="section-title">Evidence</div>
      <div class="quotes">
        {evidence_html if evidence_html else '<div class="quote muted">No evidence produced.</div>'}
      </div>
    </div>
    """


def _safe_int(x, default: int = 0) -> int:
    try:
        return int(x)
    except Exception:
        return default


def render_single_html(ranked_dicts: List[Dict[str, Any]], idx: int) -> Tuple[str, str, int]:
    """Render ONE candidate card at a time to reduce DOM size / fullscreen lag."""
    if not ranked_dicts:
        html = '''
        <div class="hero report-hero">
          <div class="hero-left">
            <div class="hero-title">SGS Candidate Fit Report</div>
            <div class="hero-sub">Run matching to generate results.</div>
          </div>
        </div>
        '''
        return html, "—", 0

    idx = max(0, min(_safe_int(idx, 0), len(ranked_dicts) - 1))

    c = CandidateLLMResult.model_validate(ranked_dicts[idx])
    card = candidate_card_html(idx + 1, c)
    top_score = float(ranked_dicts[0].get("final_score", 0.0))

    html = f'''
    <div class="hero report-hero">
      <div class="hero-left">
        <div class="hero-title">SGS Candidate Fit Report</div>
        <div class="hero-sub">Navigate candidates using ◀ / ▶ (renders one card to reduce lag)</div>
      </div>
      <div class="hero-right">
        <div class="kpi">
          <div class="kpi-label">Candidate</div>
          <div class="kpi-val">{idx+1}/{len(ranked_dicts)}</div>
        </div>
        <div class="kpi">
          <div class="kpi-label">Top Score</div>
          <div class="kpi-val">{top_score:.1f}</div>
        </div>
      </div>
    </div>
    {card}
    '''
    nav = f"**Showing:** {idx+1} / {len(ranked_dicts)}"
    return html, nav, idx


def nav_prev(ranked_dicts: List[Dict[str, Any]], idx: int):
    return render_single_html(ranked_dicts, _safe_int(idx, 0) - 1)


def nav_next(ranked_dicts: List[Dict[str, Any]], idx: int):
    return render_single_html(ranked_dicts, _safe_int(idx, 0) + 1)


# =========================================================
# Shortlist export
# =========================================================
def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]:
    if shortlist_table is None or shortlist_table.empty:
        raise gr.Error("No shortlist data yet. Run ranking first.")

    shortlisted_df = shortlist_table[shortlist_table["Shortlisted"] == True]
    if shortlisted_df.empty:
        raise gr.Error("No candidates marked as shortlisted.")

    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    shortlisted_df.to_csv(tmp.name, index=False)

    emails = shortlisted_df["Email"].dropna().astype(str).str.strip().tolist()
    emails = [e for e in emails if e]
    email_block = ", ".join(sorted(set(emails)))

    msg = f"Exported {len(shortlisted_df)} shortlisted candidate(s)."
    return tmp.name, msg, email_block


# =========================================================
# Mini refresh HTML (fix scroll lag after report generation)
# =========================================================
def build_mini_refresh_script() -> str:
    nonce = str(int(time.time() * 1000))
    # Forces a layout reflow similar to what happens when opening the accordion:
    # - dispatch resize twice across frames
    # - apply temporary will-change to hint GPU
    # - keep scroll position stable
    return f"""
<div id="mini-refresh-{nonce}" style="display:none"></div>
<script>
(() => {{
  try {{
    const y = window.scrollY || 0;
    const root = document.querySelector('.gradio-container');
    if (root) {{
      root.style.willChange = 'transform';
      root.style.transform = 'translateZ(0)';
    }}
    requestAnimationFrame(() => {{
      window.dispatchEvent(new Event('resize'));
      requestAnimationFrame(() => {{
        window.dispatchEvent(new Event('resize'));
        setTimeout(() => {{
          try {{ window.scrollTo(0, y); }} catch(e) {{}}
          if (root) {{
            root.style.transform = '';
            root.style.willChange = 'auto';
          }}
        }}, 60);
      }});
    }});
  }} catch(e) {{}}
}})();
</script>
""".strip()


# =========================================================
# Main app pipeline
# =========================================================
def rank_app(
    jd_file_obj,
    cv_file_objs,
    must_haves: str,
    mask_pii_toggle: bool,
    show_contacts_toggle: bool,
    progress=gr.Progress(track_tqdm=False),
):
    t0 = time.time()
    get_hf_client()  # validate token early

    progress(0.05, desc="Loading Job Description...")
    jd_path = gr_file_to_path(jd_file_obj)
    if not jd_path:
        raise gr.Error("Please upload a Job Description file (PDF/DOCX/TXT).")

    jd_text = clean_text(read_file_to_text(jd_path))[:MAX_JD_CHARS]
    if not jd_text:
        raise gr.Error("Could not extract text from the Job Description file.")

    if not cv_file_objs:
        raise gr.Error("Please upload at least 1 CV.")
    if len(cv_file_objs) > MAX_CV_UPLOADS:
        raise gr.Error(f"Maximum allowed CV uploads is {MAX_CV_UPLOADS}. You uploaded {len(cv_file_objs)}.")

    cv_paths = []
    for f in cv_file_objs:
        p = gr_file_to_path(f)
        if p:
            cv_paths.append(p)
    if not cv_paths:
        raise gr.Error("Could not read uploaded CV files (no valid paths).")

    progress(0.10, desc="Checking duplicates...")
    seen = {}
    duplicates = []
    unique_paths = []
    for p in cv_paths:
        fname = os.path.basename(p)
        try:
            h = file_bytes_hash(p)
        except Exception:
            h = hashlib.sha256(clean_text(read_file_to_text(p)).encode("utf-8", errors="ignore")).hexdigest()
        if h in seen:
            duplicates.append((fname, seen[h]))
            continue
        seen[h] = fname
        unique_paths.append(p)

    progress(0.14, desc="Preparing retrieval engine...")
    use_embeddings = True
    jd_vec = None
    try:
        jd_vec = embed_texts_api([jd_text])  # [1,d]
    except Exception:
        if not ALLOW_LEXICAL_FALLBACK:
            raise gr.Error("Embedding endpoint failed. Try again later.")
        use_embeddings = False

    local_pool = []
    contacts_map: Dict[str, Dict[str, str]] = {}

    total = len(unique_paths)
    for idx, p in enumerate(unique_paths, start=1):
        prog = 0.14 + 0.54 * (idx / max(1, total))
        progress(prog, desc=f"Processing CVs ({idx}/{total}) — {os.path.basename(p)}")

        raw = clean_text(read_file_to_text(p))[:MAX_CV_CHARS]
        if not raw:
            continue

        filename = os.path.basename(p)
        contacts_map[filename] = (
            extract_contact_info(raw) if show_contacts_toggle else {"name": "", "email": "", "phone": ""}
        )

        chunks = chunk_text_safe(raw)
        if not chunks:
            continue

        if use_embeddings and jd_vec is not None:
            try:
                chunk_vecs = embed_texts_api(chunks)  # [n,d]
                sims = cosine_sim_matrix(jd_vec, chunk_vecs)[0]  # [n]
                idxs = np.argsort(sims)[::-1][:TOP_CHUNKS_PER_CV]
                top_chunks = [(int(i), float(sims[int(i)]), chunks[int(i)]) for i in idxs]
            except Exception:
                use_embeddings = False
                scored = lexical_rank_chunks(jd_text, chunks, TOP_CHUNKS_PER_CV)
                top_chunks = [(i, s, chunks[i]) for i, s in scored]
        else:
            scored = lexical_rank_chunks(jd_text, chunks, TOP_CHUNKS_PER_CV)
            top_chunks = [(i, s, chunks[i]) for i, s in scored]

        retr_sims = [s for _, s, _ in top_chunks]
        local_score = compute_retrieval_score(retr_sims)

        evidence_chunks = [txt for _, _, txt in top_chunks[:EVIDENCE_CHUNKS_PER_CV]]
        if mask_pii_toggle:
            evidence_chunks = [mask_pii(x) for x in evidence_chunks]

        local_pool.append({"filename": filename, "local_score": local_score, "evidence_chunks": evidence_chunks})

    if not local_pool:
        raise gr.Error("Could not extract usable text from the uploaded CVs.")

    progress(0.70, desc="Preparing LLM ranking...")
    local_pool = sorted(local_pool, key=lambda x: float(x["local_score"]), reverse=True)

    batch_outputs: List[LLMRankingOutput] = []
    batches = max(1, (len(local_pool) + LLM_BATCH_SIZE - 1) // LLM_BATCH_SIZE)

    for b in range(batches):
        start = b * LLM_BATCH_SIZE
        end = start + LLM_BATCH_SIZE
        batch = local_pool[start:end]

        prog = 0.70 + 0.22 * ((b + 1) / batches)
        progress(prog, desc=f"LLM judging batches ({b+1}/{batches})...")

        out = llm_judge_rank_batch(jd_text, must_haves or "", batch)
        batch_outputs.append(out)

    progress(0.94, desc="Finalizing report...")
    judged = merge_llm_batches(batch_outputs)
    ranked = judged.ranked
    if not ranked:
        raise gr.Error("LLM returned an empty ranking.")

    # Re-sort after strict scoring (already sorted in merge, but keep safe)
    ranked = sorted(ranked, key=lambda x: float(x.final_score), reverse=True)

    ranked_dicts = [c.model_dump() for c in ranked]
    idx0 = 0
    first_html, nav, idx0 = render_single_html(ranked_dicts, idx0)

    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    with open(tmp.name, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(
            ["Rank", "Filename", "FinalScore(0-100)", "FitLevel", "Name", "Email", "Phone", "Summary", "LocalScore"]
        )
        for ridx, c in enumerate(ranked, start=1):
            ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
            local = next((x["local_score"] for x in local_pool if x["filename"] == c.filename), "")
            w.writerow(
                [
                    ridx,
                    c.filename,
                    round(float(c.final_score), 2),
                    c.fit_level,
                    ci.get("name", ""),
                    ci.get("email", ""),
                    ci.get("phone", ""),
                    c.summary,
                    local,
                ]
            )

    shortlist_rows = []
    for ridx, c in enumerate(ranked, start=1):
        ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
        shortlist_rows.append(
            [
                False,
                ridx,
                c.filename,
                round(float(c.final_score), 2),
                c.fit_level,
                ci.get("name", ""),
                ci.get("email", ""),
                ci.get("phone", ""),
            ]
        )

    shortlist_df = pd.DataFrame(
        shortlist_rows, columns=["Shortlisted", "Rank", "Filename", "Score", "Fit", "Name", "Email", "Phone"]
    )

    elapsed = time.time() - t0
    meta = (
        f"**LLM model:** `{LLM_MODEL}`  \n"
        f"**Embedding model:** `{EMBED_MODEL}`  \n\n"
        f"**CVs uploaded:** {len(cv_paths)} (max {MAX_CV_UPLOADS}) → **Unique processed:** {len(unique_paths)}  \n"
        f"**Ranked (ALL):** {len(ranked)}  \n"
        f"**LLM batches:** {batches} (batch size={LLM_BATCH_SIZE})  \n"
        f"**Time:** {elapsed:.2f}s  \n"
        f"**Duplicates skipped:** {len(duplicates)}  \n"
        f"**Retrieval mode:** {'Embeddings (API)' if use_embeddings else 'Lexical fallback'}  \n\n"
        f"**LLM Notes:** {(judged.overall_notes or '').strip()}"
    )

    # Mini refresh to remove scroll lag after render
    refresh_html = build_mini_refresh_script()

    progress(1.0, desc="Done ✅")
    return first_html, meta, tmp.name, shortlist_df, "", "", ranked_dicts, idx0, nav, refresh_html


# =========================================================
# SGS CSS (neutral light-grey + visible borders)
# + file uploader readable on both themes
# + progress text white (like you asked)
# =========================================================
CUSTOM_CSS = """
:root{
  --sgs-blue:#0B3D91;
  --sgs-green:#00A651;

  --text:#111827;
  --muted: rgba(17,24,39,.70);

  --bg1:#f2f4f7;
  --bg2:#e9edf2;

  --line: rgba(17,24,39,.22);
  --line2: rgba(17,24,39,.28);

  --shadow: 0 14px 28px rgba(2,6,23,.10);
}

/* Layout */
.gradio-container{max-width:1180px !important;}

/* Background */
body, .gradio-container{
  background:
    radial-gradient(1200px 700px at 10% 10%, rgba(11,61,145,.08), transparent 55%),
    radial-gradient(900px 600px at 90% 20%, rgba(0,166,81,.07), transparent 60%),
    radial-gradient(800px 520px at 55% 90%, rgba(79,178,255,.07), transparent 60%),
    linear-gradient(180deg, var(--bg1), var(--bg2)) !important;
}

/* Subtle moving veil */
body:before{
  content:"";
  position: fixed;
  inset: 0;
  pointer-events:none;
  background: linear-gradient(120deg,
    rgba(11,61,145,.06),
    rgba(0,166,81,.05),
    rgba(79,178,255,.05),
    rgba(11,61,145,.06)
  );
  background-size: 320% 320%;
  mix-blend-mode: multiply;
  opacity: .35;
  animation: bgShift 10s ease-in-out infinite;
}
@keyframes bgShift{
  0%{ background-position: 0% 50%; }
  50%{ background-position: 100% 50%; }
  100%{ background-position: 0% 50%; }
}

/* Keep text dark always */
.gradio-container, .gradio-container *{ color: var(--text) !important; }

/* Hero */
.hero{
  border:1.2px solid var(--line2);
  background: linear-gradient(135deg, rgba(255,255,255,.86), rgba(247,248,250,.82));
  border-radius: 22px;
  padding: 20px 20px 18px;
  display:flex;
  align-items:flex-end;
  justify-content:space-between;
  gap:16px;
  box-shadow: 0 18px 40px rgba(2,6,23,.12);
  margin: 12px 0 16px;
  position: relative;
  overflow: hidden;
  backdrop-filter: blur(10px);
  -webkit-backdrop-filter: blur(10px);
  animation: heroIn .65s ease-out both;
}
@keyframes heroIn{
  from{ opacity:0; transform: translateY(10px); }
  to{ opacity:1; transform: translateY(0); }
}
.hero-left{max-width: 740px;}
.hero *{ position: relative; z-index: 1; }

.hero:before, .hero:after{
  content:"";
  position:absolute;
  width: 360px;
  height: 360px;
  border-radius: 999px;
  filter: blur(44px);
  opacity: .26;
  pointer-events:none;
  animation: floaty 7s ease-in-out infinite;
}
.hero:before{
  background: radial-gradient(circle at 35% 35%, rgba(11,61,145,.22), transparent 62%),
              radial-gradient(circle at 35% 35%, rgba(79,178,255,.18), transparent 70%);
  top:-190px; left:-170px;
}
.hero:after{
  background: radial-gradient(circle at 60% 40%, rgba(0,166,81,.18), transparent 64%),
              radial-gradient(circle at 60% 40%, rgba(11,61,145,.10), transparent 72%);
  bottom:-220px; right:-190px;
  animation-delay: -2.8s;
}
@keyframes floaty{
  0%,100%{ transform: translate(0,0); }
  50%{ transform: translate(18px, -12px); }
}

.hero-title{
  font-weight: 1000;
  font-size: 28px;
  letter-spacing: -0.02em;
  line-height: 1.08;
}
.hero-title .accent{ display:inline-block; position: relative; }
.hero-title .accent:after{
  content:"";
  position:absolute;
  left:0; right:0;
  height: 10px;
  bottom: -7px;
  background: linear-gradient(90deg,
    rgba(11,61,145,0),
    rgba(11,61,145,.34),
    rgba(79,178,255,.34),
    rgba(0,166,81,.26),
    rgba(0,166,81,0)
  );
  filter: blur(1px);
  opacity: .90;
  transform: scaleX(0);
  transform-origin: left;
  animation: underlineIn .9s ease-out .25s both;
}
@keyframes underlineIn{
  from{ transform: scaleX(0); opacity: 0; }
  to{ transform: scaleX(1); opacity: .90; }
}
.hero-sub{
  color: var(--muted) !important;
  margin-top: 8px;
  font-size: 13.5px;
  line-height: 1.55rem;
  max-width: 74ch;
}
.hero-right{ display:flex; gap:10px; flex-wrap:wrap; justify-content:flex-end; }

/* KPI cards */
.kpi{
  background: rgba(255,255,255,.78);
  border:1.2px solid var(--line);
  border-radius: 16px;
  padding: 10px 12px;
  min-width: 150px;
  backdrop-filter: blur(8px);
  -webkit-backdrop-filter: blur(8px);
  transition: transform .18s ease, box-shadow .18s ease, border-color .18s ease;
}
.kpi:hover{
  transform: translateY(-2px);
  box-shadow: 0 18px 38px rgba(2,6,23,.12);
  border-color: var(--line2);
}
.kpi-label{ color:rgba(17,24,39,.78) !important; font-size:12px; font-weight:800; }
.kpi-val{ font-size:18px; font-weight:1000; margin-top:2px; }

/* Blocks */
.gradio-container .block{
  border-radius: 18px !important;
  border: 1.2px solid var(--line) !important;
  background: rgba(255,255,255,.72) !important;
  box-shadow: var(--shadow);
}

/* Inputs */
textarea, input[type="text"]{
  background: rgba(255,255,255,.90) !important;
  border: 1.2px solid var(--line) !important;
  border-radius: 14px !important;
}
textarea:focus, input[type="text"]:focus{
  outline: none !important;
  box-shadow: 0 0 0 3px rgba(79,178,255,.18) !important;
  border-color: var(--line2) !important;
}

/* Buttons */
button.primary, .gradio-container button{
  border-radius: 14px !important;
  border: 1px solid rgba(15,23,42,.18) !important;
  background: linear-gradient(90deg, rgba(11,61,145,.92), rgba(0,166,81,.78)) !important;
  color: #fff !important;
  transition: transform .15s ease, box-shadow .15s ease, filter .15s ease;
}
button.primary:hover, .gradio-container button:hover{
  transform: translateY(-1px);
  box-shadow: 0 14px 35px rgba(11,61,145,.16);
  filter: brightness(1.05);
}
button.primary:active, .gradio-container button:active{ transform: translateY(0) scale(.99); }

/* Tabs */
.gradio-container .tabs{
  border: 1.2px solid var(--line) !important;
  border-radius: 18px !important;
  overflow: hidden;
}
.gradio-container .tabitem{ background: rgba(255,255,255,.70) !important; }
.gradio-container .tab-nav{
  background: rgba(255,255,255,.70) !important;
  border-bottom: 1.2px solid var(--line) !important;
}

/* Cards */
.cards{display:grid;grid-template-columns: 1fr; gap: 12px;}
.card{
  background: linear-gradient(180deg, rgba(255,255,255,.92), rgba(247,248,250,.88));
  border:1.2px solid var(--line);
  border-radius: 18px;
  padding: 14px;
  box-shadow: var(--shadow);
  transition: transform .18s ease, box-shadow .18s ease, border-color .18s ease;
}
.card:hover{
  transform: translateY(-2px);
  box-shadow: 0 20px 40px rgba(2,6,23,.12);
  border-color: var(--line2);
}
.card-top{display:flex;align-items:flex-start;justify-content:space-between;gap:10px;}
.card-title{display:flex;gap:10px;align-items:baseline;flex-wrap:wrap;}
.rank{
  background: rgba(11,61,145,.10);
  border:1.2px solid rgba(11,61,145,.22);
  font-weight: 1000;
  border-radius: 999px;
  padding: 6px 10px;
  font-size: 12px;
}
.file{font-weight:1000;font-size:16px;}
.card-meta{display:flex;gap:8px;align-items:center;flex-wrap:wrap;justify-content:flex-end;}

/* Badges / Pills */
.badge{
  display:inline-flex;align-items:center;
  padding: 6px 10px;border-radius: 999px;font-size:12px;font-weight:1000;
  border:1.2px solid var(--line);
  color: var(--text) !important;
}
.b-exc{ background: rgba(0,166,81,.12); border-color: rgba(0,166,81,.26); }
.b-good{ background: rgba(11,61,145,.10); border-color: rgba(11,61,145,.24); }
.b-maybe{ background: rgba(245,158,11,.12); border-color: rgba(245,158,11,.28); }
.b-weak{ background: rgba(239,68,68,.10); border-color: rgba(239,68,68,.26); }

.pill{
  display:inline-flex;align-items:center;justify-content:center;
  min-width:60px;padding: 6px 10px;border-radius: 999px;font-weight: 1000;
  border:1.2px solid var(--line);
  background: rgba(255,255,255,.78);
  color: var(--text) !important;
}
.p-high{ background: rgba(0,166,81,.12); border-color: rgba(0,166,81,.26); }
.p-mid{  background: rgba(11,61,145,.10); border-color: rgba(11,61,145,.24); }
.p-low{  background: rgba(245,158,11,.12); border-color: rgba(245,158,11,.28); }
.p-bad{  background: rgba(239,68,68,.10); border-color: rgba(239,68,68,.26); }

/* Score bar */
.bar{
  width: 100%; height: 10px; border-radius: 999px;
  background: rgba(17,24,39,.08); overflow: hidden;
  border:1.2px solid var(--line);
  margin: 10px 0 10px;
}
.fill{
  height:100%; border-radius: 999px;
  background: linear-gradient(90deg, var(--sgs-green), #4fb2ff, var(--sgs-blue));
}

.summary{font-size:13px;line-height:1.55rem;margin: 6px 0 10px;color:var(--text) !important;}
.section-title{font-size:13px;font-weight:1000;margin:10px 0 6px;color:var(--text) !important;}

.grid{display:grid;grid-template-columns: 1fr 1fr; gap: 14px;}
@media(max-width:860px){
  .grid{grid-template-columns:1fr;}
  .hero{flex-direction:column; align-items:flex-start;}
  .hero-right{justify-content:flex-start;}
  .kpi{min-width: 160px;}
  .hero-title{font-size: 24px;}
}

.list{margin:0;padding-left:18px;color:var(--text) !important;}
.list li{margin:6px 0;line-height:1.30rem;color:var(--text) !important;}

/* Quotes / Evidence */
.quotes{display:grid;gap:10px;margin-top:6px;}
.quote{
  background: rgba(255,255,255,.82);
  border:1.2px solid var(--line);
  border-radius: 14px;
  padding: 10px 12px;
  color: var(--text) !important;
  font-size: 13px;
  line-height: 1.45rem;
}
.quote.muted{opacity:.85;}

/* Checklist */
.checklist{display:grid;gap:8px;margin-top:6px;}
.checkrow{
  display:grid; grid-template-columns: 1.1fr .4fr 1.5fr; gap:10px;
  padding:10px 12px; border-radius:14px;
  border:1.2px solid var(--line);
  background: rgba(255,255,255,.82);
  font-size:13px;
  position: relative;
  overflow: hidden;
}
.checkrow:before{
  content:"";
  position:absolute;
  left:0; top:0; bottom:0;
  width:4px;
  background: rgba(17,24,39,.22);
}
.checkrow .req{font-weight:1000;color:var(--text) !important;}
.checkrow .ev{color:rgba(17,24,39,0.88) !important;}
.checkrow .st{font-weight:1000;text-align:center;letter-spacing:.4px;}

/* Status colors */
.checkrow.ok:before{ background: rgba(0,166,81,.95); }
.checkrow.partial:before{ background: rgba(245,158,11,.95); }
.checkrow.miss:before{ background: rgba(239,68,68,.95); }

.checkrow.ok .st{ color: rgba(0,120,70,1) !important; }
.checkrow.partial .st{ color: rgba(150,95,10,1) !important; }
.checkrow.miss .st{ color: rgba(160,20,20,1) !important; }

/* =========================================================
   File uploader: readable label/filename ALWAYS
   ========================================================= */
.gradio-container .file,
.gradio-container .file-upload,
.gradio-container .upload-button,
.gradio-container .file-upload > div,
.gradio-container [data-testid="file"]{
  background: rgba(245,247,250,.92) !important;
  border: 1.4px solid rgba(17,24,39,.28) !important;
  border-radius: 16px !important;
  box-shadow: 0 12px 24px rgba(2,6,23,.10) !important;
}
.gradio-container .file *,
.gradio-container .file-upload *,
.gradio-container .upload-button *,
.gradio-container [data-testid="file"] *{
  color: #111827 !important;
}
.gradio-container .file-upload .file-title,
.gradio-container .file-upload .file-label,
.gradio-container .file-upload .label,
.gradio-container .file-upload .wrap,
.gradio-container .file-upload .header,
.gradio-container [data-testid="file"] .label{
  background: rgba(245,247,250,.92) !important;
  border-bottom: 1.4px solid rgba(17,24,39,.20) !important;
}
.gradio-container .file-upload .file-name,
.gradio-container .file-upload .filename,
.gradio-container [data-testid="file"] .file-name{
  font-weight: 900 !important;
}
.gradio-container .file-upload button,
.gradio-container [data-testid="file"] button{
  background: rgba(255,255,255,.85) !important;
  border: 1.2px solid rgba(17,24,39,.28) !important;
  color: #111827 !important;
}
.gradio-container .file:hover,
.gradio-container .file-upload:hover,
.gradio-container [data-testid="file"]:hover{
  border-color: rgba(17,24,39,.36) !important;
  box-shadow: 0 16px 32px rgba(2,6,23,.12) !important;
}

/* =========================================================
   Progress label text = white
   ========================================================= */
.gradio-container .progress-text,
.gradio-container .progress_label,
.gradio-container .progress-label,
.gradio-container .eta,
.gradio-container [data-testid="progress-text"],
.gradio-container [data-testid="progress-label"],
.gradio-container [data-testid="progress-bar"] *{
  color: #ffffff !important;
  text-shadow: 0 1px 2px rgba(0,0,0,.55);
}

/* Respect reduced motion */
@media (prefers-reduced-motion: reduce){
  body:before, .hero, .hero:before, .hero:after{
    animation: none !important;
  }
}
"""


# =========================================================
# UI
# =========================================================
theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="green",
    neutral_hue="slate",
    radius_size="lg",
    font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"],
)

with gr.Blocks(title="SGS ATS Candidate Matcher", theme=theme, css=CUSTOM_CSS) as demo:
    gr.HTML(f"""
    <div class="hero">
      <div class="hero-left">
        <div class="hero-title"><span class="accent">Intelligent</span> CV–JD matching for SGS</div>
        <div class="hero-sub">
          Analyze job descriptions and candidate CVs to deliver accurate matching, structured insights,
          and data-driven hiring decisions — all in minutes, not weeks.
        </div>
      </div>

      <div class="hero-right">
        <div class="kpi">
          <div class="kpi-label">Max CV uploads</div>
          <div class="kpi-val">{MAX_CV_UPLOADS}</div>
        </div>
        <div class="kpi">
          <div class="kpi-label">Important</div>
          <div class="kpi-val">Set HF_TOKEN</div>
        </div>
      </div>
    </div>
    """)

    with gr.Row():
        jd_file = gr.File(label="Job Description file (PDF/DOCX/TXT)", file_types=[".pdf", ".docx", ".txt"])
        cv_files = gr.File(label=f"Upload CVs (max {MAX_CV_UPLOADS})", file_count="multiple", file_types=[".pdf", ".docx", ".txt"])

    with gr.Accordion("Settings", open=False):
        must_haves = gr.Textbox(
            label="Must-have requirements (optional) — one per line",
            lines=5,
            placeholder="Example:\nRecruitment lifecycle\nATS usage\nInterview scheduling\nOffer negotiation",
        )
        mask_pii_toggle = gr.Checkbox(label="Mask PII (emails/phones) in evidence", value=True)
        show_contacts_toggle = gr.Checkbox(label="Extract contact info (Name / Email / Phone) from CVs", value=True)

    run_btn = gr.Button("Generate Candidate Fit Report", variant="primary")

    with gr.Tabs():
        with gr.Tab("Executive Report"):
            ranked_state = gr.State([])
            idx_state = gr.State(0)

            # invisible HTML output used to run the mini-refresh script after report generation
            mini_refresh = gr.HTML(visible=False)

            with gr.Row():
                prev_btn = gr.Button("◀", size="sm")
                nav_text = gr.Markdown("—")
                next_btn = gr.Button("▶", size="sm")

            report_html = gr.HTML()
            meta_md = gr.Markdown()
            export_full = gr.File(label="Download Full Ranking CSV (includes contacts)")

        with gr.Tab("Shortlist & Export"):
            gr.Markdown("Tick **Shortlisted** candidates, then click **Export Shortlist**.")
            shortlist_df = gr.Dataframe(
                headers=["Shortlisted", "Rank", "Filename", "Score", "Fit", "Name", "Email", "Phone"],
                datatype=["bool", "number", "str", "number", "str", "str", "str", "str"],
                interactive=True,
            )
            with gr.Row():
                export_shortlist_btn = gr.Button("Export Shortlist CSV", variant="secondary")
                export_shortlist_file = gr.File(label="Download Shortlist CSV")
            export_shortlist_msg = gr.Markdown()
            email_list = gr.Textbox(
                label="Email list (copy/paste) — shortlisted only",
                lines=3,
                placeholder="Emails will appear here after exporting shortlist...",
            )

    run_btn.click(
        fn=rank_app,
        inputs=[jd_file, cv_files, must_haves, mask_pii_toggle, show_contacts_toggle],
        outputs=[report_html, meta_md, export_full, shortlist_df, export_shortlist_msg, email_list, ranked_state, idx_state, nav_text, mini_refresh],
    )

    prev_btn.click(
        fn=nav_prev,
        inputs=[ranked_state, idx_state],
        outputs=[report_html, nav_text, idx_state],
    )

    next_btn.click(
        fn=nav_next,
        inputs=[ranked_state, idx_state],
        outputs=[report_html, nav_text, idx_state],
    )

    export_shortlist_btn.click(
        fn=export_shortlist,
        inputs=[shortlist_df],
        outputs=[export_shortlist_file, export_shortlist_msg, email_list],
    )

demo.launch(server_name="0.0.0.0", server_port=7860)