Spaces:
Running
Running
| # app.py β SGS ATS Candidate Matcher (HF Inference API ONLY, Spaces-safe) | |
| # β No transformers / torch / sentence-transformers | |
| # β Uses ONLY huggingface_hub.InferenceClient (works with hub 1.x) | |
| # β Top 10 executive report + shortlist + exports + contacts + progress | |
| # β Max CV uploads = 10 | |
| # | |
| # Space secret required: | |
| # HF_TOKEN (Settings β Secrets) | |
| # | |
| # Optional env vars: | |
| # LLM_MODEL (default: Qwen/Qwen2.5-7B-Instruct) | |
| # EMBED_MODEL (default: sentence-transformers/all-MiniLM-L6-v2) | |
| # LLM_BATCH_SIZE, LLM_MAX_TOKENS, LLM_TEMPERATURE | |
| import os | |
| import re | |
| import json | |
| import time | |
| import csv | |
| import hashlib | |
| import tempfile | |
| from typing import List, Dict, Any, Optional, Tuple | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from huggingface_hub.errors import BadRequestError, HfHubHTTPError | |
| from pydantic import BaseModel, Field | |
| from pypdf import PdfReader | |
| import docx2txt | |
| # ========================================================= | |
| # Models (Inference API) | |
| # ========================================================= | |
| # NOTE: Meta Llama repos are often gated on Hugging Face. | |
| # If you have access, you can set LLM_MODEL to e.g. "meta-llama/Llama-3.1-8B-Instruct". | |
| LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-7B-Instruct") | |
| EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") | |
| # ========================================================= | |
| # Controls | |
| # ========================================================= | |
| MAX_CV_UPLOADS = 10 | |
| MAX_CV_CHARS = 120_000 | |
| MAX_JD_CHARS = 60_000 | |
| CHUNK_SIZE_CHARS = 1100 | |
| CHUNK_OVERLAP_CHARS = 180 | |
| TOP_CHUNKS_PER_CV = 10 # retrieval | |
| EVIDENCE_CHUNKS_PER_CV = 4 # sent to LLM judge | |
| LLM_BATCH_SIZE = int(os.getenv("LLM_BATCH_SIZE", "3")) | |
| LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "2600")) | |
| LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15")) | |
| ALLOW_LEXICAL_FALLBACK = True | |
| # ========================================================= | |
| # Output schemas | |
| # ========================================================= | |
| class RequirementCheck(BaseModel): | |
| requirement: str | |
| status: str = Field(..., description="met | partial | missing") | |
| evidence: str = Field(..., description="short quote <=160 chars or empty") | |
| class CandidateLLMResult(BaseModel): | |
| filename: str | |
| final_score: float = Field(..., description="0-100") | |
| fit_level: str = Field(..., description="excellent | good | maybe | weak") | |
| summary: str | |
| strengths: List[str] | |
| gaps: List[str] | |
| risks: List[str] | |
| checklist: List[RequirementCheck] | |
| top_evidence: List[str] | |
| class LLMRankingOutput(BaseModel): | |
| ranked: List[CandidateLLMResult] | |
| overall_notes: str | |
| # ========================================================= | |
| # Client | |
| # ========================================================= | |
| _hf_client: Optional[InferenceClient] = None | |
| def get_hf_client() -> InferenceClient: | |
| global _hf_client | |
| if _hf_client is not None: | |
| return _hf_client | |
| token = os.getenv("HF_TOKEN", "").strip() | |
| if not token: | |
| raise gr.Error("HF_TOKEN is not set. Add it in Space Settings β Repository secrets.") | |
| _hf_client = InferenceClient(token=token) | |
| return _hf_client | |
| # ========================================================= | |
| # Text + files | |
| # ========================================================= | |
| def gr_file_to_path(f: Any) -> Optional[str]: | |
| if f is None: | |
| return None | |
| if isinstance(f, str): | |
| return f | |
| if isinstance(f, dict) and "path" in f: | |
| return f["path"] | |
| if hasattr(f, "name"): | |
| return f.name | |
| return None | |
| def clean_text(t: str) -> str: | |
| t = (t or "").replace("\x00", " ") | |
| t = re.sub(r"[ \t]+", " ", t) | |
| t = re.sub(r"\n{3,}", "\n\n", t) | |
| return t.strip() | |
| def read_file_to_text(file_path: str) -> str: | |
| lower = file_path.lower() | |
| if lower.endswith(".pdf"): | |
| reader = PdfReader(file_path) | |
| parts = [] | |
| for page in reader.pages: | |
| parts.append(page.extract_text() or "") | |
| return "\n".join(parts).strip() | |
| if lower.endswith(".docx"): | |
| return (docx2txt.process(file_path) or "").strip() | |
| with open(file_path, "rb") as f: | |
| raw = f.read() | |
| try: | |
| return raw.decode("utf-8", errors="ignore").strip() | |
| except Exception: | |
| return raw.decode(errors="ignore").strip() | |
| def file_bytes_hash(path: str) -> str: | |
| with open(path, "rb") as f: | |
| return hashlib.sha256(f.read()).hexdigest() | |
| def chunk_text_safe(text: str, chunk_size: int = CHUNK_SIZE_CHARS, overlap: int = CHUNK_OVERLAP_CHARS) -> List[str]: | |
| text = (text or "").strip() | |
| if not text: | |
| return [] | |
| chunks = [] | |
| i = 0 | |
| n = len(text) | |
| while i < n: | |
| j = min(i + chunk_size, n) | |
| ch = text[i:j].strip() | |
| if ch: | |
| chunks.append(ch) | |
| if j == n: | |
| break | |
| i = max(0, j - overlap) | |
| return chunks | |
| def mask_pii(text: str) -> str: | |
| text = re.sub(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", "[EMAIL]", text) | |
| text = re.sub(r"(\+?\d[\d\-\s]{7,}\d)", "[PHONE]", text) | |
| return text | |
| # ========================================================= | |
| # Contact extraction | |
| # ========================================================= | |
| _EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b") | |
| _PHONE_RE = re.compile(r"(?:\+?\d{1,3}[\s\-]?)?(?:\(?\d{2,4}\)?[\s\-]?)?\d{3,4}[\s\-]?\d{3,4}") | |
| def _normalize_phone(p: str) -> str: | |
| return re.sub(r"[^\d+]", "", p) | |
| def guess_name(text: str) -> str: | |
| lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()] | |
| for ln in lines[:14]: | |
| if "@" in ln: | |
| continue | |
| if len(ln) > 55: | |
| continue | |
| if re.search(r"\d{3,}", ln): | |
| continue | |
| if re.search(r"[A-Za-z\u0600-\u06FF]", ln): | |
| bad = {"curriculum vitae", "cv", "resume", "profile"} | |
| if ln.lower() in bad: | |
| continue | |
| return ln | |
| return "" | |
| def extract_contact_info(text: str) -> Dict[str, str]: | |
| t = text or "" | |
| emails = _EMAIL_RE.findall(t) | |
| raw_phones = _PHONE_RE.findall(t) | |
| phones = [] | |
| for p in raw_phones: | |
| npn = _normalize_phone(p) | |
| digits = re.sub(r"\D", "", npn) | |
| if 8 <= len(digits) <= 16: | |
| phones.append(npn) | |
| return {"name": guess_name(t), "email": emails[0] if emails else "", "phone": phones[0] if phones else ""} | |
| # ========================================================= | |
| # Embeddings via HF Inference API (feature-extraction) | |
| # ========================================================= | |
| def _l2norm(v: np.ndarray) -> np.ndarray: | |
| return v / (np.linalg.norm(v) + 1e-12) | |
| def embed_texts_api(texts: List[str]) -> np.ndarray: | |
| """ | |
| Returns shape [len(texts), d] float32 embeddings using HF Inference 'feature-extraction'. | |
| Uses 'inputs=' to be compatible across huggingface_hub versions. | |
| """ | |
| client = get_hf_client() | |
| vecs = [] | |
| for t in texts: | |
| v = client.feature_extraction(model=EMBED_MODEL, inputs=t) | |
| v = np.array(v, dtype=np.float32).reshape(-1) | |
| v = _l2norm(v) | |
| vecs.append(v) | |
| return np.stack(vecs, axis=0) if vecs else np.zeros((0, 384), dtype=np.float32) | |
| def cosine_sim_matrix(a: np.ndarray, b: np.ndarray) -> np.ndarray: | |
| # assumes both are normalized | |
| return np.matmul(a, b.T) | |
| # ========================================================= | |
| # Lexical fallback (no embeddings) | |
| # ========================================================= | |
| _WORD_RE = re.compile(r"[A-Za-z\u0600-\u06FF0-9]+") | |
| def _tokenize(text: str) -> List[str]: | |
| return [w.lower() for w in _WORD_RE.findall(text or "") if len(w) >= 2] | |
| def lexical_rank_chunks(jd: str, chunks: List[str], top_k: int) -> List[Tuple[int, float]]: | |
| jd_tokens = _tokenize(jd) | |
| if not jd_tokens or not chunks: | |
| return [] | |
| jd_set = set(jd_tokens) | |
| scores = [] | |
| for i, ch in enumerate(chunks): | |
| ch_tokens = _tokenize(ch) | |
| if not ch_tokens: | |
| scores.append((i, 0.0)) | |
| continue | |
| inter = len(jd_set.intersection(set(ch_tokens))) | |
| scores.append((i, float(inter) / float(len(jd_set) + 1e-9))) | |
| scores.sort(key=lambda x: x[1], reverse=True) | |
| return scores[:top_k] | |
| # ========================================================= | |
| # LLM Judge (Ranking) with robust JSON parsing | |
| # ========================================================= | |
| def build_llm_prompt(jd_text: str, must_haves: str, candidates: List[Dict[str, Any]]) -> str: | |
| schema_example = { | |
| "ranked": [ | |
| { | |
| "filename": "<cv_filename>", | |
| "final_score": 0, | |
| "fit_level": "weak", | |
| "summary": "one short paragraph", | |
| "strengths": ["max 4 items"], | |
| "gaps": ["max 4 items"], | |
| "risks": ["max 3 items"], | |
| "checklist": [ | |
| {"requirement": "SHORT label (<=8 words)", "status": "met", "evidence": "short quote <=160 chars"} | |
| ], | |
| "top_evidence": ["max 3 short quotes"], | |
| } | |
| ], | |
| "overall_notes": "short", | |
| } | |
| return f""" | |
| You are an expert recruiter and ATS evaluator. | |
| Return ONLY one JSON object, EXACTLY matching this schema: | |
| {json.dumps(schema_example, ensure_ascii=False)} | |
| Hard limits (MUST follow): | |
| - strengths: max 4 bullets | |
| - gaps: max 4 bullets | |
| - risks: max 3 bullets | |
| - checklist: max 6 requirements total | |
| - requirement: SHORT label (<=8 words). Do NOT paste long JD sentences. | |
| - evidence: <=160 chars or empty | |
| - top_evidence: max 3 short quotes | |
| Rules: | |
| - Use ONLY the provided evidence_chunks. Do NOT invent experience. | |
| - final_score 0-100 (be strict: missing must-haves should significantly reduce score) | |
| - fit_level: excellent | good | maybe | weak | |
| - status: met | partial | missing | |
| Job Description (compressed): | |
| \"\"\"{jd_text[:4000]}\"\"\" | |
| Must-haves (optional): | |
| \"\"\"{(must_haves or '').strip()[:1200]}\"\"\" | |
| Candidates: | |
| {json.dumps(candidates, ensure_ascii=False)} | |
| Output JSON only. No markdown. No extra text. | |
| """.strip() | |
| def _extract_first_complete_json_object(text: str) -> Optional[str]: | |
| if not text: | |
| return None | |
| start = text.find("{") | |
| if start < 0: | |
| return None | |
| depth = 0 | |
| in_str = False | |
| esc = False | |
| for i in range(start, len(text)): | |
| ch = text[i] | |
| if in_str: | |
| if esc: | |
| esc = False | |
| elif ch == "\\": | |
| esc = True | |
| elif ch == '"': | |
| in_str = False | |
| continue | |
| else: | |
| if ch == '"': | |
| in_str = True | |
| continue | |
| if ch == "{": | |
| depth += 1 | |
| elif ch == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| return text[start : i + 1] | |
| return None | |
| def fit_level_from_score(score: float) -> str: | |
| s = float(score) | |
| if s >= 85: | |
| return "excellent" | |
| if s >= 70: | |
| return "good" | |
| if s >= 55: | |
| return "maybe" | |
| return "weak" | |
| def clamp(x: float, lo: float, hi: float) -> float: | |
| return max(lo, min(hi, x)) | |
| # ------------------------- | |
| # STRICTER scoring (post-process) | |
| # ------------------------- | |
| def apply_strict_scoring(c: CandidateLLMResult) -> CandidateLLMResult: | |
| """ | |
| Make scoring stricter using the produced checklist: | |
| - Compute checklist fulfillment ratio: met=1, partial=0.5, missing=0 | |
| - Scale score down heavily when must-haves are missing. | |
| - If ALL requirements are missing (or met=0 with >=3 reqs), hard cap score. | |
| """ | |
| base = float(c.final_score) | |
| cl = c.checklist or [] | |
| if not cl: | |
| # If model didn't produce checklist, slightly penalize (still allow ranking). | |
| adj = clamp(base * 0.85, 0.0, 100.0) | |
| c.final_score = adj | |
| c.fit_level = fit_level_from_score(adj) | |
| return c | |
| total = len(cl) | |
| met = 0 | |
| partial = 0 | |
| missing = 0 | |
| for it in cl: | |
| st = (it.status or "").strip().lower() | |
| if st == "met": | |
| met += 1 | |
| elif st == "partial": | |
| partial += 1 | |
| else: | |
| missing += 1 | |
| ratio = (met + 0.5 * partial) / float(max(1, total)) # 0..1 | |
| # Strong penalty curve: when ratio is low, multiplier drops hard. | |
| # multiplier is between 0.20 and 1.00 | |
| multiplier = 0.20 + 0.80 * (ratio ** 1.6) | |
| adj = base * multiplier | |
| # If basically no must-haves met, cap it. | |
| if total >= 3 and met == 0 and partial == 0: | |
| adj = min(adj, 25.0) | |
| elif total >= 3 and met == 0: | |
| adj = min(adj, 35.0) | |
| adj = clamp(adj, 0.0, 100.0) | |
| c.final_score = float(round(adj, 2)) | |
| c.fit_level = fit_level_from_score(c.final_score) | |
| return c | |
| def fallback_candidate(filename: str, local_score: float) -> CandidateLLMResult: | |
| # Even fallback should not look "good" if local retrieval is mid; keep. | |
| adj = float(round(local_score, 2)) | |
| return CandidateLLMResult( | |
| filename=filename, | |
| final_score=adj, | |
| fit_level=fit_level_from_score(adj), | |
| summary="LLM output incomplete; fallback score based on retrieval signals.", | |
| strengths=[], | |
| gaps=[], | |
| risks=[], | |
| checklist=[], | |
| top_evidence=[], | |
| ) | |
| def _llm_call_or_raise(prompt: str, temperature: float, max_tokens: int) -> str: | |
| client = get_hf_client() | |
| try: | |
| resp = client.chat_completion( | |
| model=LLM_MODEL, | |
| messages=[ | |
| {"role": "system", "content": "Return ONLY valid JSON matching the schema. No markdown."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| ) | |
| return (resp.choices[0].message.content or "").strip() | |
| except BadRequestError as e: | |
| msg = str(e) | |
| raise gr.Error( | |
| "LLM call failed. This usually means the model name is wrong or the model is gated.\n\n" | |
| f"Current LLM_MODEL: {LLM_MODEL}\n" | |
| "Try setting LLM_MODEL to a public model like:\n" | |
| "- Qwen/Qwen2.5-7B-Instruct\n" | |
| "- mistralai/Mistral-7B-Instruct-v0.3\n" | |
| "Or if you have Meta access:\n" | |
| "- meta-llama/Llama-3.1-8B-Instruct\n\n" | |
| f"Raw error: {msg}" | |
| ) from e | |
| except HfHubHTTPError as e: | |
| raise gr.Error(f"HF Inference error: {e}") from e | |
| def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, Any]]) -> LLMRankingOutput: | |
| prompt = build_llm_prompt( | |
| jd_text, | |
| must_haves or "", | |
| [{"filename": b["filename"], "evidence_chunks": b["evidence_chunks"]} for b in batch], | |
| ) | |
| out: Optional[LLMRankingOutput] = None | |
| text = _llm_call_or_raise(prompt, LLM_TEMPERATURE, LLM_MAX_TOKENS) | |
| try: | |
| out = LLMRankingOutput.model_validate(json.loads(text)) | |
| except Exception: | |
| obj = _extract_first_complete_json_object(text) | |
| if obj: | |
| out = LLMRankingOutput.model_validate(json.loads(obj)) | |
| if out is None: | |
| text2 = _llm_call_or_raise(prompt, 0.0, max(LLM_MAX_TOKENS, 3200)) | |
| try: | |
| out = LLMRankingOutput.model_validate(json.loads(text2)) | |
| except Exception: | |
| obj2 = _extract_first_complete_json_object(text2) | |
| if obj2: | |
| out = LLMRankingOutput.model_validate(json.loads(obj2)) | |
| if out is None: | |
| ranked = [fallback_candidate(b["filename"], b.get("local_score", 50.0)) for b in batch] | |
| return LLMRankingOutput(ranked=ranked, overall_notes="LLM parsing failed; used retrieval-based fallback.") | |
| returned = {c.filename: c for c in out.ranked} | |
| missing = [b for b in batch if b["filename"] not in returned] | |
| for b in missing: | |
| single_prompt = build_llm_prompt( | |
| jd_text, | |
| must_haves or "", | |
| [{"filename": b["filename"], "evidence_chunks": b["evidence_chunks"]}], | |
| ) | |
| single_text = _llm_call_or_raise(single_prompt, 0.0, min(2200, LLM_MAX_TOKENS)) | |
| single_out: Optional[LLMRankingOutput] = None | |
| try: | |
| single_out = LLMRankingOutput.model_validate(json.loads(single_text)) | |
| except Exception: | |
| single_obj = _extract_first_complete_json_object(single_text) | |
| if single_obj: | |
| single_out = LLMRankingOutput.model_validate(json.loads(single_obj)) | |
| if single_out and single_out.ranked: | |
| returned[b["filename"]] = single_out.ranked[0] | |
| else: | |
| returned[b["filename"]] = fallback_candidate(b["filename"], b.get("local_score", 50.0)) | |
| merged_ranked = sorted(returned.values(), key=lambda x: float(x.final_score), reverse=True) | |
| notes = (out.overall_notes or "").strip() | |
| if missing: | |
| notes = (notes + " | Some candidates re-judged individually / fallback used.").strip(" |") | |
| return LLMRankingOutput(ranked=merged_ranked, overall_notes=notes) | |
| def merge_llm_batches(batch_outputs: List[LLMRankingOutput]) -> LLMRankingOutput: | |
| all_ranked: List[CandidateLLMResult] = [] | |
| notes = [] | |
| for out in batch_outputs: | |
| notes.append(out.overall_notes) | |
| all_ranked.extend(out.ranked) | |
| # Apply strict scoring AFTER LLM returns (prevents "missing everything but 65" cases) | |
| all_ranked = [apply_strict_scoring(c) for c in all_ranked] | |
| all_ranked = sorted(all_ranked, key=lambda x: float(x.final_score), reverse=True) | |
| return LLMRankingOutput(ranked=all_ranked, overall_notes=" | ".join([n for n in notes if n])[:1200]) | |
| # ========================================================= | |
| # Local scoring (retrieval-only, scaled to 0-100) | |
| # ========================================================= | |
| def compute_retrieval_score(top_sims: List[float]) -> float: | |
| if not top_sims: | |
| return 0.0 | |
| top = sorted(top_sims, reverse=True)[:5] | |
| m = float(np.mean(top)) | |
| mx = float(np.max(top)) | |
| raw = 0.65 * m + 0.35 * mx | |
| return float(clamp(raw * 100.0, 0.0, 100.0)) | |
| # ========================================================= | |
| # UI rendering (SGS) | |
| # ========================================================= | |
| def fit_badge(level: str) -> str: | |
| level = (level or "").lower().strip() | |
| if level == "excellent": | |
| return '<span class="badge b-exc">Excellent</span>' | |
| if level == "good": | |
| return '<span class="badge b-good">Good</span>' | |
| if level == "maybe": | |
| return '<span class="badge b-maybe">Potential</span>' | |
| return '<span class="badge b-weak">Weak</span>' | |
| def score_pill(score: float) -> str: | |
| s = float(score) | |
| cls = "p-high" if s >= 80 else ("p-mid" if s >= 65 else ("p-low" if s >= 45 else "p-bad")) | |
| return f'<span class="pill {cls}">{s:.1f}</span>' | |
| def candidate_card_html(rank: int, c: CandidateLLMResult) -> str: | |
| score = float(c.final_score) | |
| w = max(0, min(100, int(round(score)))) | |
| checklist_rows = "" | |
| for item in (c.checklist or [])[:6]: | |
| st = (item.status or "").lower().strip() | |
| cls = "ok" if st == "met" else ("partial" if st == "partial" else "miss") | |
| ev = (item.evidence or "").strip().replace("<", "<").replace(">", ">") | |
| req = (item.requirement or "").strip().replace("<", "<").replace(">", ">") | |
| checklist_rows += f""" | |
| <div class="checkrow {cls}"> | |
| <div class="req">{req}</div> | |
| <div class="st">{st.upper()}</div> | |
| <div class="ev">{ev if ev else "β"}</div> | |
| </div> | |
| """ | |
| strengths = "".join([f"<li>{s}</li>" for s in (c.strengths or [])[:4]]) or "<li>β</li>" | |
| gaps = "".join([f"<li>{g}</li>" for g in (c.gaps or [])[:4]]) or "<li>β</li>" | |
| risks = "".join([f"<li>{r}</li>" for r in (c.risks or [])[:3]]) or "<li>β</li>" | |
| evidence_html = "" | |
| for q in (c.top_evidence or [])[:3]: | |
| q = q.replace("<", "<").replace(">", ">") | |
| evidence_html += f'<div class="quote">β{q}β</div>' | |
| return f""" | |
| <div class="card"> | |
| <div class="card-top"> | |
| <div class="card-title"> | |
| <div class="rank">#{rank}</div> | |
| <div class="file">{c.filename}</div> | |
| </div> | |
| <div class="card-meta"> | |
| {fit_badge(c.fit_level)} | |
| {score_pill(score)} | |
| </div> | |
| </div> | |
| <div class="bar"><div class="fill" style="width:{w}%"></div></div> | |
| <div class="summary">{c.summary}</div> | |
| <div class="grid"> | |
| <div> | |
| <div class="section-title">Strengths</div> | |
| <ul class="list">{strengths}</ul> | |
| </div> | |
| <div> | |
| <div class="section-title">Gaps</div> | |
| <ul class="list">{gaps}</ul> | |
| </div> | |
| </div> | |
| <div class="section-title">Risks</div> | |
| <ul class="list">{risks}</ul> | |
| <div class="section-title">Requirements Checklist</div> | |
| <div class="checklist"> | |
| {checklist_rows if checklist_rows else '<div class="quote muted">No checklist produced.</div>'} | |
| </div> | |
| <div class="section-title">Evidence</div> | |
| <div class="quotes"> | |
| {evidence_html if evidence_html else '<div class="quote muted">No evidence produced.</div>'} | |
| </div> | |
| </div> | |
| """ | |
| def _safe_int(x, default: int = 0) -> int: | |
| try: | |
| return int(x) | |
| except Exception: | |
| return default | |
| def render_single_html(ranked_dicts: List[Dict[str, Any]], idx: int) -> Tuple[str, str, int]: | |
| """Render ONE candidate card at a time to reduce DOM size / fullscreen lag.""" | |
| if not ranked_dicts: | |
| html = ''' | |
| <div class="hero report-hero"> | |
| <div class="hero-left"> | |
| <div class="hero-title">SGS Candidate Fit Report</div> | |
| <div class="hero-sub">Run matching to generate results.</div> | |
| </div> | |
| </div> | |
| ''' | |
| return html, "β", 0 | |
| idx = max(0, min(_safe_int(idx, 0), len(ranked_dicts) - 1)) | |
| c = CandidateLLMResult.model_validate(ranked_dicts[idx]) | |
| card = candidate_card_html(idx + 1, c) | |
| top_score = float(ranked_dicts[0].get("final_score", 0.0)) | |
| html = f''' | |
| <div class="hero report-hero"> | |
| <div class="hero-left"> | |
| <div class="hero-title">SGS Candidate Fit Report</div> | |
| <div class="hero-sub">Navigate candidates using β / βΆ (renders one card to reduce lag)</div> | |
| </div> | |
| <div class="hero-right"> | |
| <div class="kpi"> | |
| <div class="kpi-label">Candidate</div> | |
| <div class="kpi-val">{idx+1}/{len(ranked_dicts)}</div> | |
| </div> | |
| <div class="kpi"> | |
| <div class="kpi-label">Top Score</div> | |
| <div class="kpi-val">{top_score:.1f}</div> | |
| </div> | |
| </div> | |
| </div> | |
| {card} | |
| ''' | |
| nav = f"**Showing:** {idx+1} / {len(ranked_dicts)}" | |
| return html, nav, idx | |
| def nav_prev(ranked_dicts: List[Dict[str, Any]], idx: int): | |
| return render_single_html(ranked_dicts, _safe_int(idx, 0) - 1) | |
| def nav_next(ranked_dicts: List[Dict[str, Any]], idx: int): | |
| return render_single_html(ranked_dicts, _safe_int(idx, 0) + 1) | |
| # ========================================================= | |
| # Shortlist export | |
| # ========================================================= | |
| def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]: | |
| if shortlist_table is None or shortlist_table.empty: | |
| raise gr.Error("No shortlist data yet. Run ranking first.") | |
| shortlisted_df = shortlist_table[shortlist_table["Shortlisted"] == True] | |
| if shortlisted_df.empty: | |
| raise gr.Error("No candidates marked as shortlisted.") | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
| shortlisted_df.to_csv(tmp.name, index=False) | |
| emails = shortlisted_df["Email"].dropna().astype(str).str.strip().tolist() | |
| emails = [e for e in emails if e] | |
| email_block = ", ".join(sorted(set(emails))) | |
| msg = f"Exported {len(shortlisted_df)} shortlisted candidate(s)." | |
| return tmp.name, msg, email_block | |
| # ========================================================= | |
| # Mini refresh HTML (fix scroll lag after report generation) | |
| # ========================================================= | |
| def build_mini_refresh_script() -> str: | |
| nonce = str(int(time.time() * 1000)) | |
| # Forces a layout reflow similar to what happens when opening the accordion: | |
| # - dispatch resize twice across frames | |
| # - apply temporary will-change to hint GPU | |
| # - keep scroll position stable | |
| return f""" | |
| <div id="mini-refresh-{nonce}" style="display:none"></div> | |
| <script> | |
| (() => {{ | |
| try {{ | |
| const y = window.scrollY || 0; | |
| const root = document.querySelector('.gradio-container'); | |
| if (root) {{ | |
| root.style.willChange = 'transform'; | |
| root.style.transform = 'translateZ(0)'; | |
| }} | |
| requestAnimationFrame(() => {{ | |
| window.dispatchEvent(new Event('resize')); | |
| requestAnimationFrame(() => {{ | |
| window.dispatchEvent(new Event('resize')); | |
| setTimeout(() => {{ | |
| try {{ window.scrollTo(0, y); }} catch(e) {{}} | |
| if (root) {{ | |
| root.style.transform = ''; | |
| root.style.willChange = 'auto'; | |
| }} | |
| }}, 60); | |
| }}); | |
| }}); | |
| }} catch(e) {{}} | |
| }})(); | |
| </script> | |
| """.strip() | |
| # ========================================================= | |
| # Main app pipeline | |
| # ========================================================= | |
| def rank_app( | |
| jd_file_obj, | |
| cv_file_objs, | |
| must_haves: str, | |
| mask_pii_toggle: bool, | |
| show_contacts_toggle: bool, | |
| progress=gr.Progress(track_tqdm=False), | |
| ): | |
| t0 = time.time() | |
| get_hf_client() # validate token early | |
| progress(0.05, desc="Loading Job Description...") | |
| jd_path = gr_file_to_path(jd_file_obj) | |
| if not jd_path: | |
| raise gr.Error("Please upload a Job Description file (PDF/DOCX/TXT).") | |
| jd_text = clean_text(read_file_to_text(jd_path))[:MAX_JD_CHARS] | |
| if not jd_text: | |
| raise gr.Error("Could not extract text from the Job Description file.") | |
| if not cv_file_objs: | |
| raise gr.Error("Please upload at least 1 CV.") | |
| if len(cv_file_objs) > MAX_CV_UPLOADS: | |
| raise gr.Error(f"Maximum allowed CV uploads is {MAX_CV_UPLOADS}. You uploaded {len(cv_file_objs)}.") | |
| cv_paths = [] | |
| for f in cv_file_objs: | |
| p = gr_file_to_path(f) | |
| if p: | |
| cv_paths.append(p) | |
| if not cv_paths: | |
| raise gr.Error("Could not read uploaded CV files (no valid paths).") | |
| progress(0.10, desc="Checking duplicates...") | |
| seen = {} | |
| duplicates = [] | |
| unique_paths = [] | |
| for p in cv_paths: | |
| fname = os.path.basename(p) | |
| try: | |
| h = file_bytes_hash(p) | |
| except Exception: | |
| h = hashlib.sha256(clean_text(read_file_to_text(p)).encode("utf-8", errors="ignore")).hexdigest() | |
| if h in seen: | |
| duplicates.append((fname, seen[h])) | |
| continue | |
| seen[h] = fname | |
| unique_paths.append(p) | |
| progress(0.14, desc="Preparing retrieval engine...") | |
| use_embeddings = True | |
| jd_vec = None | |
| try: | |
| jd_vec = embed_texts_api([jd_text]) # [1,d] | |
| except Exception: | |
| if not ALLOW_LEXICAL_FALLBACK: | |
| raise gr.Error("Embedding endpoint failed. Try again later.") | |
| use_embeddings = False | |
| local_pool = [] | |
| contacts_map: Dict[str, Dict[str, str]] = {} | |
| total = len(unique_paths) | |
| for idx, p in enumerate(unique_paths, start=1): | |
| prog = 0.14 + 0.54 * (idx / max(1, total)) | |
| progress(prog, desc=f"Processing CVs ({idx}/{total}) β {os.path.basename(p)}") | |
| raw = clean_text(read_file_to_text(p))[:MAX_CV_CHARS] | |
| if not raw: | |
| continue | |
| filename = os.path.basename(p) | |
| contacts_map[filename] = ( | |
| extract_contact_info(raw) if show_contacts_toggle else {"name": "", "email": "", "phone": ""} | |
| ) | |
| chunks = chunk_text_safe(raw) | |
| if not chunks: | |
| continue | |
| if use_embeddings and jd_vec is not None: | |
| try: | |
| chunk_vecs = embed_texts_api(chunks) # [n,d] | |
| sims = cosine_sim_matrix(jd_vec, chunk_vecs)[0] # [n] | |
| idxs = np.argsort(sims)[::-1][:TOP_CHUNKS_PER_CV] | |
| top_chunks = [(int(i), float(sims[int(i)]), chunks[int(i)]) for i in idxs] | |
| except Exception: | |
| use_embeddings = False | |
| scored = lexical_rank_chunks(jd_text, chunks, TOP_CHUNKS_PER_CV) | |
| top_chunks = [(i, s, chunks[i]) for i, s in scored] | |
| else: | |
| scored = lexical_rank_chunks(jd_text, chunks, TOP_CHUNKS_PER_CV) | |
| top_chunks = [(i, s, chunks[i]) for i, s in scored] | |
| retr_sims = [s for _, s, _ in top_chunks] | |
| local_score = compute_retrieval_score(retr_sims) | |
| evidence_chunks = [txt for _, _, txt in top_chunks[:EVIDENCE_CHUNKS_PER_CV]] | |
| if mask_pii_toggle: | |
| evidence_chunks = [mask_pii(x) for x in evidence_chunks] | |
| local_pool.append({"filename": filename, "local_score": local_score, "evidence_chunks": evidence_chunks}) | |
| if not local_pool: | |
| raise gr.Error("Could not extract usable text from the uploaded CVs.") | |
| progress(0.70, desc="Preparing LLM ranking...") | |
| local_pool = sorted(local_pool, key=lambda x: float(x["local_score"]), reverse=True) | |
| batch_outputs: List[LLMRankingOutput] = [] | |
| batches = max(1, (len(local_pool) + LLM_BATCH_SIZE - 1) // LLM_BATCH_SIZE) | |
| for b in range(batches): | |
| start = b * LLM_BATCH_SIZE | |
| end = start + LLM_BATCH_SIZE | |
| batch = local_pool[start:end] | |
| prog = 0.70 + 0.22 * ((b + 1) / batches) | |
| progress(prog, desc=f"LLM judging batches ({b+1}/{batches})...") | |
| out = llm_judge_rank_batch(jd_text, must_haves or "", batch) | |
| batch_outputs.append(out) | |
| progress(0.94, desc="Finalizing report...") | |
| judged = merge_llm_batches(batch_outputs) | |
| ranked = judged.ranked | |
| if not ranked: | |
| raise gr.Error("LLM returned an empty ranking.") | |
| # Re-sort after strict scoring (already sorted in merge, but keep safe) | |
| ranked = sorted(ranked, key=lambda x: float(x.final_score), reverse=True) | |
| ranked_dicts = [c.model_dump() for c in ranked] | |
| idx0 = 0 | |
| first_html, nav, idx0 = render_single_html(ranked_dicts, idx0) | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
| with open(tmp.name, "w", newline="", encoding="utf-8") as f: | |
| w = csv.writer(f) | |
| w.writerow( | |
| ["Rank", "Filename", "FinalScore(0-100)", "FitLevel", "Name", "Email", "Phone", "Summary", "LocalScore"] | |
| ) | |
| for ridx, c in enumerate(ranked, start=1): | |
| ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""}) | |
| local = next((x["local_score"] for x in local_pool if x["filename"] == c.filename), "") | |
| w.writerow( | |
| [ | |
| ridx, | |
| c.filename, | |
| round(float(c.final_score), 2), | |
| c.fit_level, | |
| ci.get("name", ""), | |
| ci.get("email", ""), | |
| ci.get("phone", ""), | |
| c.summary, | |
| local, | |
| ] | |
| ) | |
| shortlist_rows = [] | |
| for ridx, c in enumerate(ranked, start=1): | |
| ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""}) | |
| shortlist_rows.append( | |
| [ | |
| False, | |
| ridx, | |
| c.filename, | |
| round(float(c.final_score), 2), | |
| c.fit_level, | |
| ci.get("name", ""), | |
| ci.get("email", ""), | |
| ci.get("phone", ""), | |
| ] | |
| ) | |
| shortlist_df = pd.DataFrame( | |
| shortlist_rows, columns=["Shortlisted", "Rank", "Filename", "Score", "Fit", "Name", "Email", "Phone"] | |
| ) | |
| elapsed = time.time() - t0 | |
| meta = ( | |
| f"**LLM model:** `{LLM_MODEL}` \n" | |
| f"**Embedding model:** `{EMBED_MODEL}` \n\n" | |
| f"**CVs uploaded:** {len(cv_paths)} (max {MAX_CV_UPLOADS}) β **Unique processed:** {len(unique_paths)} \n" | |
| f"**Ranked (ALL):** {len(ranked)} \n" | |
| f"**LLM batches:** {batches} (batch size={LLM_BATCH_SIZE}) \n" | |
| f"**Time:** {elapsed:.2f}s \n" | |
| f"**Duplicates skipped:** {len(duplicates)} \n" | |
| f"**Retrieval mode:** {'Embeddings (API)' if use_embeddings else 'Lexical fallback'} \n\n" | |
| f"**LLM Notes:** {(judged.overall_notes or '').strip()}" | |
| ) | |
| # Mini refresh to remove scroll lag after render | |
| refresh_html = build_mini_refresh_script() | |
| progress(1.0, desc="Done β ") | |
| return first_html, meta, tmp.name, shortlist_df, "", "", ranked_dicts, idx0, nav, refresh_html | |
| # ========================================================= | |
| # SGS CSS (neutral light-grey + visible borders) | |
| # + file uploader readable on both themes | |
| # + progress text white (like you asked) | |
| # ========================================================= | |
| CUSTOM_CSS = """ | |
| :root{ | |
| --sgs-blue:#0B3D91; | |
| --sgs-green:#00A651; | |
| --text:#111827; | |
| --muted: rgba(17,24,39,.70); | |
| --bg1:#f2f4f7; | |
| --bg2:#e9edf2; | |
| --line: rgba(17,24,39,.22); | |
| --line2: rgba(17,24,39,.28); | |
| --shadow: 0 14px 28px rgba(2,6,23,.10); | |
| } | |
| /* Layout */ | |
| .gradio-container{max-width:1180px !important;} | |
| /* Background */ | |
| body, .gradio-container{ | |
| background: | |
| radial-gradient(1200px 700px at 10% 10%, rgba(11,61,145,.08), transparent 55%), | |
| radial-gradient(900px 600px at 90% 20%, rgba(0,166,81,.07), transparent 60%), | |
| radial-gradient(800px 520px at 55% 90%, rgba(79,178,255,.07), transparent 60%), | |
| linear-gradient(180deg, var(--bg1), var(--bg2)) !important; | |
| } | |
| /* Subtle moving veil */ | |
| body:before{ | |
| content:""; | |
| position: fixed; | |
| inset: 0; | |
| pointer-events:none; | |
| background: linear-gradient(120deg, | |
| rgba(11,61,145,.06), | |
| rgba(0,166,81,.05), | |
| rgba(79,178,255,.05), | |
| rgba(11,61,145,.06) | |
| ); | |
| background-size: 320% 320%; | |
| mix-blend-mode: multiply; | |
| opacity: .35; | |
| animation: bgShift 10s ease-in-out infinite; | |
| } | |
| @keyframes bgShift{ | |
| 0%{ background-position: 0% 50%; } | |
| 50%{ background-position: 100% 50%; } | |
| 100%{ background-position: 0% 50%; } | |
| } | |
| /* Keep text dark always */ | |
| .gradio-container, .gradio-container *{ color: var(--text) !important; } | |
| /* Hero */ | |
| .hero{ | |
| border:1.2px solid var(--line2); | |
| background: linear-gradient(135deg, rgba(255,255,255,.86), rgba(247,248,250,.82)); | |
| border-radius: 22px; | |
| padding: 20px 20px 18px; | |
| display:flex; | |
| align-items:flex-end; | |
| justify-content:space-between; | |
| gap:16px; | |
| box-shadow: 0 18px 40px rgba(2,6,23,.12); | |
| margin: 12px 0 16px; | |
| position: relative; | |
| overflow: hidden; | |
| backdrop-filter: blur(10px); | |
| -webkit-backdrop-filter: blur(10px); | |
| animation: heroIn .65s ease-out both; | |
| } | |
| @keyframes heroIn{ | |
| from{ opacity:0; transform: translateY(10px); } | |
| to{ opacity:1; transform: translateY(0); } | |
| } | |
| .hero-left{max-width: 740px;} | |
| .hero *{ position: relative; z-index: 1; } | |
| .hero:before, .hero:after{ | |
| content:""; | |
| position:absolute; | |
| width: 360px; | |
| height: 360px; | |
| border-radius: 999px; | |
| filter: blur(44px); | |
| opacity: .26; | |
| pointer-events:none; | |
| animation: floaty 7s ease-in-out infinite; | |
| } | |
| .hero:before{ | |
| background: radial-gradient(circle at 35% 35%, rgba(11,61,145,.22), transparent 62%), | |
| radial-gradient(circle at 35% 35%, rgba(79,178,255,.18), transparent 70%); | |
| top:-190px; left:-170px; | |
| } | |
| .hero:after{ | |
| background: radial-gradient(circle at 60% 40%, rgba(0,166,81,.18), transparent 64%), | |
| radial-gradient(circle at 60% 40%, rgba(11,61,145,.10), transparent 72%); | |
| bottom:-220px; right:-190px; | |
| animation-delay: -2.8s; | |
| } | |
| @keyframes floaty{ | |
| 0%,100%{ transform: translate(0,0); } | |
| 50%{ transform: translate(18px, -12px); } | |
| } | |
| .hero-title{ | |
| font-weight: 1000; | |
| font-size: 28px; | |
| letter-spacing: -0.02em; | |
| line-height: 1.08; | |
| } | |
| .hero-title .accent{ display:inline-block; position: relative; } | |
| .hero-title .accent:after{ | |
| content:""; | |
| position:absolute; | |
| left:0; right:0; | |
| height: 10px; | |
| bottom: -7px; | |
| background: linear-gradient(90deg, | |
| rgba(11,61,145,0), | |
| rgba(11,61,145,.34), | |
| rgba(79,178,255,.34), | |
| rgba(0,166,81,.26), | |
| rgba(0,166,81,0) | |
| ); | |
| filter: blur(1px); | |
| opacity: .90; | |
| transform: scaleX(0); | |
| transform-origin: left; | |
| animation: underlineIn .9s ease-out .25s both; | |
| } | |
| @keyframes underlineIn{ | |
| from{ transform: scaleX(0); opacity: 0; } | |
| to{ transform: scaleX(1); opacity: .90; } | |
| } | |
| .hero-sub{ | |
| color: var(--muted) !important; | |
| margin-top: 8px; | |
| font-size: 13.5px; | |
| line-height: 1.55rem; | |
| max-width: 74ch; | |
| } | |
| .hero-right{ display:flex; gap:10px; flex-wrap:wrap; justify-content:flex-end; } | |
| /* KPI cards */ | |
| .kpi{ | |
| background: rgba(255,255,255,.78); | |
| border:1.2px solid var(--line); | |
| border-radius: 16px; | |
| padding: 10px 12px; | |
| min-width: 150px; | |
| backdrop-filter: blur(8px); | |
| -webkit-backdrop-filter: blur(8px); | |
| transition: transform .18s ease, box-shadow .18s ease, border-color .18s ease; | |
| } | |
| .kpi:hover{ | |
| transform: translateY(-2px); | |
| box-shadow: 0 18px 38px rgba(2,6,23,.12); | |
| border-color: var(--line2); | |
| } | |
| .kpi-label{ color:rgba(17,24,39,.78) !important; font-size:12px; font-weight:800; } | |
| .kpi-val{ font-size:18px; font-weight:1000; margin-top:2px; } | |
| /* Blocks */ | |
| .gradio-container .block{ | |
| border-radius: 18px !important; | |
| border: 1.2px solid var(--line) !important; | |
| background: rgba(255,255,255,.72) !important; | |
| box-shadow: var(--shadow); | |
| } | |
| /* Inputs */ | |
| textarea, input[type="text"]{ | |
| background: rgba(255,255,255,.90) !important; | |
| border: 1.2px solid var(--line) !important; | |
| border-radius: 14px !important; | |
| } | |
| textarea:focus, input[type="text"]:focus{ | |
| outline: none !important; | |
| box-shadow: 0 0 0 3px rgba(79,178,255,.18) !important; | |
| border-color: var(--line2) !important; | |
| } | |
| /* Buttons */ | |
| button.primary, .gradio-container button{ | |
| border-radius: 14px !important; | |
| border: 1px solid rgba(15,23,42,.18) !important; | |
| background: linear-gradient(90deg, rgba(11,61,145,.92), rgba(0,166,81,.78)) !important; | |
| color: #fff !important; | |
| transition: transform .15s ease, box-shadow .15s ease, filter .15s ease; | |
| } | |
| button.primary:hover, .gradio-container button:hover{ | |
| transform: translateY(-1px); | |
| box-shadow: 0 14px 35px rgba(11,61,145,.16); | |
| filter: brightness(1.05); | |
| } | |
| button.primary:active, .gradio-container button:active{ transform: translateY(0) scale(.99); } | |
| /* Tabs */ | |
| .gradio-container .tabs{ | |
| border: 1.2px solid var(--line) !important; | |
| border-radius: 18px !important; | |
| overflow: hidden; | |
| } | |
| .gradio-container .tabitem{ background: rgba(255,255,255,.70) !important; } | |
| .gradio-container .tab-nav{ | |
| background: rgba(255,255,255,.70) !important; | |
| border-bottom: 1.2px solid var(--line) !important; | |
| } | |
| /* Cards */ | |
| .cards{display:grid;grid-template-columns: 1fr; gap: 12px;} | |
| .card{ | |
| background: linear-gradient(180deg, rgba(255,255,255,.92), rgba(247,248,250,.88)); | |
| border:1.2px solid var(--line); | |
| border-radius: 18px; | |
| padding: 14px; | |
| box-shadow: var(--shadow); | |
| transition: transform .18s ease, box-shadow .18s ease, border-color .18s ease; | |
| } | |
| .card:hover{ | |
| transform: translateY(-2px); | |
| box-shadow: 0 20px 40px rgba(2,6,23,.12); | |
| border-color: var(--line2); | |
| } | |
| .card-top{display:flex;align-items:flex-start;justify-content:space-between;gap:10px;} | |
| .card-title{display:flex;gap:10px;align-items:baseline;flex-wrap:wrap;} | |
| .rank{ | |
| background: rgba(11,61,145,.10); | |
| border:1.2px solid rgba(11,61,145,.22); | |
| font-weight: 1000; | |
| border-radius: 999px; | |
| padding: 6px 10px; | |
| font-size: 12px; | |
| } | |
| .file{font-weight:1000;font-size:16px;} | |
| .card-meta{display:flex;gap:8px;align-items:center;flex-wrap:wrap;justify-content:flex-end;} | |
| /* Badges / Pills */ | |
| .badge{ | |
| display:inline-flex;align-items:center; | |
| padding: 6px 10px;border-radius: 999px;font-size:12px;font-weight:1000; | |
| border:1.2px solid var(--line); | |
| color: var(--text) !important; | |
| } | |
| .b-exc{ background: rgba(0,166,81,.12); border-color: rgba(0,166,81,.26); } | |
| .b-good{ background: rgba(11,61,145,.10); border-color: rgba(11,61,145,.24); } | |
| .b-maybe{ background: rgba(245,158,11,.12); border-color: rgba(245,158,11,.28); } | |
| .b-weak{ background: rgba(239,68,68,.10); border-color: rgba(239,68,68,.26); } | |
| .pill{ | |
| display:inline-flex;align-items:center;justify-content:center; | |
| min-width:60px;padding: 6px 10px;border-radius: 999px;font-weight: 1000; | |
| border:1.2px solid var(--line); | |
| background: rgba(255,255,255,.78); | |
| color: var(--text) !important; | |
| } | |
| .p-high{ background: rgba(0,166,81,.12); border-color: rgba(0,166,81,.26); } | |
| .p-mid{ background: rgba(11,61,145,.10); border-color: rgba(11,61,145,.24); } | |
| .p-low{ background: rgba(245,158,11,.12); border-color: rgba(245,158,11,.28); } | |
| .p-bad{ background: rgba(239,68,68,.10); border-color: rgba(239,68,68,.26); } | |
| /* Score bar */ | |
| .bar{ | |
| width: 100%; height: 10px; border-radius: 999px; | |
| background: rgba(17,24,39,.08); overflow: hidden; | |
| border:1.2px solid var(--line); | |
| margin: 10px 0 10px; | |
| } | |
| .fill{ | |
| height:100%; border-radius: 999px; | |
| background: linear-gradient(90deg, var(--sgs-green), #4fb2ff, var(--sgs-blue)); | |
| } | |
| .summary{font-size:13px;line-height:1.55rem;margin: 6px 0 10px;color:var(--text) !important;} | |
| .section-title{font-size:13px;font-weight:1000;margin:10px 0 6px;color:var(--text) !important;} | |
| .grid{display:grid;grid-template-columns: 1fr 1fr; gap: 14px;} | |
| @media(max-width:860px){ | |
| .grid{grid-template-columns:1fr;} | |
| .hero{flex-direction:column; align-items:flex-start;} | |
| .hero-right{justify-content:flex-start;} | |
| .kpi{min-width: 160px;} | |
| .hero-title{font-size: 24px;} | |
| } | |
| .list{margin:0;padding-left:18px;color:var(--text) !important;} | |
| .list li{margin:6px 0;line-height:1.30rem;color:var(--text) !important;} | |
| /* Quotes / Evidence */ | |
| .quotes{display:grid;gap:10px;margin-top:6px;} | |
| .quote{ | |
| background: rgba(255,255,255,.82); | |
| border:1.2px solid var(--line); | |
| border-radius: 14px; | |
| padding: 10px 12px; | |
| color: var(--text) !important; | |
| font-size: 13px; | |
| line-height: 1.45rem; | |
| } | |
| .quote.muted{opacity:.85;} | |
| /* Checklist */ | |
| .checklist{display:grid;gap:8px;margin-top:6px;} | |
| .checkrow{ | |
| display:grid; grid-template-columns: 1.1fr .4fr 1.5fr; gap:10px; | |
| padding:10px 12px; border-radius:14px; | |
| border:1.2px solid var(--line); | |
| background: rgba(255,255,255,.82); | |
| font-size:13px; | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .checkrow:before{ | |
| content:""; | |
| position:absolute; | |
| left:0; top:0; bottom:0; | |
| width:4px; | |
| background: rgba(17,24,39,.22); | |
| } | |
| .checkrow .req{font-weight:1000;color:var(--text) !important;} | |
| .checkrow .ev{color:rgba(17,24,39,0.88) !important;} | |
| .checkrow .st{font-weight:1000;text-align:center;letter-spacing:.4px;} | |
| /* Status colors */ | |
| .checkrow.ok:before{ background: rgba(0,166,81,.95); } | |
| .checkrow.partial:before{ background: rgba(245,158,11,.95); } | |
| .checkrow.miss:before{ background: rgba(239,68,68,.95); } | |
| .checkrow.ok .st{ color: rgba(0,120,70,1) !important; } | |
| .checkrow.partial .st{ color: rgba(150,95,10,1) !important; } | |
| .checkrow.miss .st{ color: rgba(160,20,20,1) !important; } | |
| /* ========================================================= | |
| File uploader: readable label/filename ALWAYS | |
| ========================================================= */ | |
| .gradio-container .file, | |
| .gradio-container .file-upload, | |
| .gradio-container .upload-button, | |
| .gradio-container .file-upload > div, | |
| .gradio-container [data-testid="file"]{ | |
| background: rgba(245,247,250,.92) !important; | |
| border: 1.4px solid rgba(17,24,39,.28) !important; | |
| border-radius: 16px !important; | |
| box-shadow: 0 12px 24px rgba(2,6,23,.10) !important; | |
| } | |
| .gradio-container .file *, | |
| .gradio-container .file-upload *, | |
| .gradio-container .upload-button *, | |
| .gradio-container [data-testid="file"] *{ | |
| color: #111827 !important; | |
| } | |
| .gradio-container .file-upload .file-title, | |
| .gradio-container .file-upload .file-label, | |
| .gradio-container .file-upload .label, | |
| .gradio-container .file-upload .wrap, | |
| .gradio-container .file-upload .header, | |
| .gradio-container [data-testid="file"] .label{ | |
| background: rgba(245,247,250,.92) !important; | |
| border-bottom: 1.4px solid rgba(17,24,39,.20) !important; | |
| } | |
| .gradio-container .file-upload .file-name, | |
| .gradio-container .file-upload .filename, | |
| .gradio-container [data-testid="file"] .file-name{ | |
| font-weight: 900 !important; | |
| } | |
| .gradio-container .file-upload button, | |
| .gradio-container [data-testid="file"] button{ | |
| background: rgba(255,255,255,.85) !important; | |
| border: 1.2px solid rgba(17,24,39,.28) !important; | |
| color: #111827 !important; | |
| } | |
| .gradio-container .file:hover, | |
| .gradio-container .file-upload:hover, | |
| .gradio-container [data-testid="file"]:hover{ | |
| border-color: rgba(17,24,39,.36) !important; | |
| box-shadow: 0 16px 32px rgba(2,6,23,.12) !important; | |
| } | |
| /* ========================================================= | |
| Progress label text = white | |
| ========================================================= */ | |
| .gradio-container .progress-text, | |
| .gradio-container .progress_label, | |
| .gradio-container .progress-label, | |
| .gradio-container .eta, | |
| .gradio-container [data-testid="progress-text"], | |
| .gradio-container [data-testid="progress-label"], | |
| .gradio-container [data-testid="progress-bar"] *{ | |
| color: #ffffff !important; | |
| text-shadow: 0 1px 2px rgba(0,0,0,.55); | |
| } | |
| /* Respect reduced motion */ | |
| @media (prefers-reduced-motion: reduce){ | |
| body:before, .hero, .hero:before, .hero:after{ | |
| animation: none !important; | |
| } | |
| } | |
| """ | |
| # ========================================================= | |
| # UI | |
| # ========================================================= | |
| theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="green", | |
| neutral_hue="slate", | |
| radius_size="lg", | |
| font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"], | |
| ) | |
| with gr.Blocks(title="SGS ATS Candidate Matcher", theme=theme, css=CUSTOM_CSS) as demo: | |
| gr.HTML(f""" | |
| <div class="hero"> | |
| <div class="hero-left"> | |
| <div class="hero-title"><span class="accent">Intelligent</span> CVβJD matching for SGS</div> | |
| <div class="hero-sub"> | |
| Analyze job descriptions and candidate CVs to deliver accurate matching, structured insights, | |
| and data-driven hiring decisions β all in minutes, not weeks. | |
| </div> | |
| </div> | |
| <div class="hero-right"> | |
| <div class="kpi"> | |
| <div class="kpi-label">Max CV uploads</div> | |
| <div class="kpi-val">{MAX_CV_UPLOADS}</div> | |
| </div> | |
| <div class="kpi"> | |
| <div class="kpi-label">Important</div> | |
| <div class="kpi-val">Set HF_TOKEN</div> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| jd_file = gr.File(label="Job Description file (PDF/DOCX/TXT)", file_types=[".pdf", ".docx", ".txt"]) | |
| cv_files = gr.File(label=f"Upload CVs (max {MAX_CV_UPLOADS})", file_count="multiple", file_types=[".pdf", ".docx", ".txt"]) | |
| with gr.Accordion("Settings", open=False): | |
| must_haves = gr.Textbox( | |
| label="Must-have requirements (optional) β one per line", | |
| lines=5, | |
| placeholder="Example:\nRecruitment lifecycle\nATS usage\nInterview scheduling\nOffer negotiation", | |
| ) | |
| mask_pii_toggle = gr.Checkbox(label="Mask PII (emails/phones) in evidence", value=True) | |
| show_contacts_toggle = gr.Checkbox(label="Extract contact info (Name / Email / Phone) from CVs", value=True) | |
| run_btn = gr.Button("Generate Candidate Fit Report", variant="primary") | |
| with gr.Tabs(): | |
| with gr.Tab("Executive Report"): | |
| ranked_state = gr.State([]) | |
| idx_state = gr.State(0) | |
| # invisible HTML output used to run the mini-refresh script after report generation | |
| mini_refresh = gr.HTML(visible=False) | |
| with gr.Row(): | |
| prev_btn = gr.Button("β", size="sm") | |
| nav_text = gr.Markdown("β") | |
| next_btn = gr.Button("βΆ", size="sm") | |
| report_html = gr.HTML() | |
| meta_md = gr.Markdown() | |
| export_full = gr.File(label="Download Full Ranking CSV (includes contacts)") | |
| with gr.Tab("Shortlist & Export"): | |
| gr.Markdown("Tick **Shortlisted** candidates, then click **Export Shortlist**.") | |
| shortlist_df = gr.Dataframe( | |
| headers=["Shortlisted", "Rank", "Filename", "Score", "Fit", "Name", "Email", "Phone"], | |
| datatype=["bool", "number", "str", "number", "str", "str", "str", "str"], | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| export_shortlist_btn = gr.Button("Export Shortlist CSV", variant="secondary") | |
| export_shortlist_file = gr.File(label="Download Shortlist CSV") | |
| export_shortlist_msg = gr.Markdown() | |
| email_list = gr.Textbox( | |
| label="Email list (copy/paste) β shortlisted only", | |
| lines=3, | |
| placeholder="Emails will appear here after exporting shortlist...", | |
| ) | |
| run_btn.click( | |
| fn=rank_app, | |
| inputs=[jd_file, cv_files, must_haves, mask_pii_toggle, show_contacts_toggle], | |
| outputs=[report_html, meta_md, export_full, shortlist_df, export_shortlist_msg, email_list, ranked_state, idx_state, nav_text, mini_refresh], | |
| ) | |
| prev_btn.click( | |
| fn=nav_prev, | |
| inputs=[ranked_state, idx_state], | |
| outputs=[report_html, nav_text, idx_state], | |
| ) | |
| next_btn.click( | |
| fn=nav_next, | |
| inputs=[ranked_state, idx_state], | |
| outputs=[report_html, nav_text, idx_state], | |
| ) | |
| export_shortlist_btn.click( | |
| fn=export_shortlist, | |
| inputs=[shortlist_df], | |
| outputs=[export_shortlist_file, export_shortlist_msg, email_list], | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |