Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,12 +23,7 @@ import docx2txt
|
|
| 23 |
# Models
|
| 24 |
# =========================================================
|
| 25 |
EMBED_MODEL_NAME = os.getenv("EMBED_MODEL_NAME", "BAAI/bge-base-en-v1.5")
|
| 26 |
-
|
| 27 |
-
# If CPU Space is slow, set Space Variable:
|
| 28 |
-
# RERANK_MODEL_NAME=BAAI/bge-reranker-base
|
| 29 |
RERANK_MODEL_NAME = os.getenv("RERANK_MODEL_NAME", "BAAI/bge-reranker-large")
|
| 30 |
-
|
| 31 |
-
# Default LLM judge (override via Space Variables)
|
| 32 |
LLM_MODEL = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
|
| 33 |
|
| 34 |
|
|
@@ -41,7 +36,6 @@ CHUNK_OVERLAP_CHARS = 180
|
|
| 41 |
TOP_CHUNKS_PER_CV = 10
|
| 42 |
EVIDENCE_CHUNKS_PER_CV = 4
|
| 43 |
|
| 44 |
-
# Smaller batch reduces truncation and "only 1 candidate" outputs
|
| 45 |
LLM_BATCH_SIZE = int(os.getenv("LLM_BATCH_SIZE", "4"))
|
| 46 |
LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "3500"))
|
| 47 |
LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15"))
|
|
@@ -49,6 +43,8 @@ LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15"))
|
|
| 49 |
MAX_CV_CHARS = 120_000
|
| 50 |
MAX_JD_CHARS = 60_000
|
| 51 |
|
|
|
|
|
|
|
| 52 |
# Global singletons
|
| 53 |
_embedder: Optional[SentenceTransformer] = None
|
| 54 |
_reranker: Optional[CrossEncoder] = None
|
|
@@ -258,7 +254,6 @@ def compute_local_score(retr_sims: List[float], rerank_logits: List[float]) -> f
|
|
| 258 |
# LLM Prompt (compact to avoid truncation)
|
| 259 |
# =========================================================
|
| 260 |
def build_llm_prompt(jd_text: str, must_haves: str, candidates: List[Dict[str, Any]]) -> str:
|
| 261 |
-
# IMPORTANT: no example filename like "example.pdf"
|
| 262 |
schema_example = {
|
| 263 |
"ranked": [
|
| 264 |
{
|
|
@@ -374,11 +369,6 @@ def fallback_candidate(filename: str, score: float) -> CandidateLLMResult:
|
|
| 374 |
|
| 375 |
|
| 376 |
def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, Any]]) -> LLMRankingOutput:
|
| 377 |
-
"""
|
| 378 |
-
Guarantees: returns a result for EVERY candidate in `batch`.
|
| 379 |
-
If LLM returns incomplete list, re-judge missing CVs individually.
|
| 380 |
-
If still missing, fallback to local_score.
|
| 381 |
-
"""
|
| 382 |
client = get_hf_client()
|
| 383 |
|
| 384 |
prompt = build_llm_prompt(
|
|
@@ -401,7 +391,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
|
|
| 401 |
|
| 402 |
out: Optional[LLMRankingOutput] = None
|
| 403 |
|
| 404 |
-
# Attempt 1
|
| 405 |
text = _call(LLM_TEMPERATURE, LLM_MAX_TOKENS, prompt)
|
| 406 |
try:
|
| 407 |
out = LLMRankingOutput.model_validate(json.loads(text))
|
|
@@ -410,7 +399,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
|
|
| 410 |
if obj:
|
| 411 |
out = LLMRankingOutput.model_validate(json.loads(obj))
|
| 412 |
|
| 413 |
-
# Retry once if parsing failed
|
| 414 |
if out is None:
|
| 415 |
text2 = _call(0.0, max(LLM_MAX_TOKENS, 4500), prompt)
|
| 416 |
try:
|
|
@@ -420,7 +408,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
|
|
| 420 |
if obj2:
|
| 421 |
out = LLMRankingOutput.model_validate(json.loads(obj2))
|
| 422 |
|
| 423 |
-
# If still failing: fallback all
|
| 424 |
if out is None:
|
| 425 |
ranked = [fallback_candidate(b["filename"], b.get("local_score", 50.0)) for b in batch]
|
| 426 |
return LLMRankingOutput(ranked=ranked, overall_notes="LLM parsing failed; used local scoring fallback.")
|
|
@@ -428,7 +415,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
|
|
| 428 |
returned = {c.filename: c for c in out.ranked}
|
| 429 |
missing = [b for b in batch if b["filename"] not in returned]
|
| 430 |
|
| 431 |
-
# Re-judge missing individually (more reliable)
|
| 432 |
for b in missing:
|
| 433 |
single_prompt = build_llm_prompt(
|
| 434 |
jd_text,
|
|
@@ -590,22 +576,19 @@ def render_top10_html(ranked: List[CandidateLLMResult], total_count: int) -> str
|
|
| 590 |
|
| 591 |
|
| 592 |
# =========================================================
|
| 593 |
-
# Shortlist export (
|
| 594 |
# =========================================================
|
| 595 |
def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]:
|
| 596 |
if shortlist_table is None or shortlist_table.empty:
|
| 597 |
raise gr.Error("No shortlist data yet. Run ranking first.")
|
| 598 |
|
| 599 |
-
# First column is Shortlisted (bool)
|
| 600 |
shortlisted_df = shortlist_table[shortlist_table.iloc[:, 0] == True]
|
| 601 |
if shortlisted_df.empty:
|
| 602 |
raise gr.Error("No candidates marked as shortlisted.")
|
| 603 |
|
| 604 |
-
# Export CSV
|
| 605 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
| 606 |
shortlisted_df.to_csv(tmp.name, index=False)
|
| 607 |
|
| 608 |
-
# Email list from Email column (index 6 in our table)
|
| 609 |
emails = (
|
| 610 |
shortlisted_df.iloc[:, 6]
|
| 611 |
.dropna()
|
|
@@ -622,7 +605,7 @@ def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]:
|
|
| 622 |
|
| 623 |
|
| 624 |
# =========================================================
|
| 625 |
-
# Main pipeline
|
| 626 |
# =========================================================
|
| 627 |
def rank_app(
|
| 628 |
jd_file_obj,
|
|
@@ -630,13 +613,15 @@ def rank_app(
|
|
| 630 |
must_haves: str,
|
| 631 |
mask_pii_toggle: bool,
|
| 632 |
show_contacts_toggle: bool,
|
|
|
|
| 633 |
):
|
| 634 |
t0 = time.time()
|
| 635 |
ensure_models()
|
| 636 |
embedder = _embedder
|
| 637 |
reranker = _reranker
|
| 638 |
|
| 639 |
-
|
|
|
|
| 640 |
jd_path = gr_file_to_path(jd_file_obj)
|
| 641 |
if not jd_path:
|
| 642 |
raise gr.Error("Please upload a Job Description file (PDF/DOCX/TXT).")
|
|
@@ -645,10 +630,13 @@ def rank_app(
|
|
| 645 |
if not jd_text:
|
| 646 |
raise gr.Error("Could not extract text from the Job Description file.")
|
| 647 |
|
| 648 |
-
# ---- CV paths
|
| 649 |
if not cv_file_objs:
|
| 650 |
raise gr.Error("Please upload at least 1 CV.")
|
| 651 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
cv_paths = []
|
| 653 |
for f in cv_file_objs:
|
| 654 |
p = gr_file_to_path(f)
|
|
@@ -657,7 +645,8 @@ def rank_app(
|
|
| 657 |
if not cv_paths:
|
| 658 |
raise gr.Error("Could not read uploaded CV files (no valid paths).")
|
| 659 |
|
| 660 |
-
|
|
|
|
| 661 |
seen = {}
|
| 662 |
duplicates = []
|
| 663 |
unique_paths = []
|
|
@@ -674,14 +663,19 @@ def rank_app(
|
|
| 674 |
seen[h] = fname
|
| 675 |
unique_paths.append(p)
|
| 676 |
|
| 677 |
-
|
|
|
|
| 678 |
jd_vec = np.array(embedder.encode([jd_text], normalize_embeddings=True), dtype=np.float32)
|
| 679 |
|
| 680 |
-
# ---- Process ALL CVs (retrieval + rerank + local_score + contacts)
|
| 681 |
local_pool = []
|
| 682 |
contacts_map: Dict[str, Dict[str, str]] = {}
|
| 683 |
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
raw = clean_text(read_file_to_text(p))[:MAX_CV_CHARS]
|
| 686 |
if not raw:
|
| 687 |
continue
|
|
@@ -721,42 +715,50 @@ def rank_app(
|
|
| 721 |
if not local_pool:
|
| 722 |
raise gr.Error("Could not extract usable text from the uploaded CVs.")
|
| 723 |
|
| 724 |
-
|
|
|
|
| 725 |
local_pool = sorted(local_pool, key=lambda x: float(x["local_score"]), reverse=True)
|
| 726 |
|
| 727 |
batch_outputs: List[LLMRankingOutput] = []
|
| 728 |
-
|
| 729 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
|
| 731 |
llm_batch = [
|
| 732 |
{
|
| 733 |
"filename": c["filename"],
|
| 734 |
"evidence_chunks": c["evidence_chunks"],
|
| 735 |
-
"local_score": c["local_score"],
|
| 736 |
}
|
| 737 |
for c in batch
|
| 738 |
]
|
| 739 |
-
|
| 740 |
out = llm_judge_rank_batch(jd_text, must_haves or "", llm_batch)
|
| 741 |
batch_outputs.append(out)
|
| 742 |
|
|
|
|
|
|
|
| 743 |
judged = merge_llm_batches(batch_outputs)
|
| 744 |
ranked = judged.ranked
|
| 745 |
if not ranked:
|
| 746 |
raise gr.Error("LLM returned an empty ranking.")
|
| 747 |
|
| 748 |
-
# ---- Top 10 report
|
| 749 |
report_html = render_top10_html(ranked, total_count=len(ranked))
|
| 750 |
|
| 751 |
-
# ---- Full ranking export (with contacts)
|
| 752 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
| 753 |
with open(tmp.name, "w", newline="", encoding="utf-8") as f:
|
| 754 |
w = csv.writer(f)
|
| 755 |
w.writerow(["Rank", "Filename", "FinalScore(0-100)", "FitLevel", "Name", "Email", "Phone", "Summary"])
|
| 756 |
-
for
|
| 757 |
ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
|
| 758 |
w.writerow([
|
| 759 |
-
|
| 760 |
c.filename,
|
| 761 |
round(float(c.final_score), 2),
|
| 762 |
c.fit_level,
|
|
@@ -766,13 +768,12 @@ def rank_app(
|
|
| 766 |
c.summary,
|
| 767 |
])
|
| 768 |
|
| 769 |
-
# ---- Shortlist table (pandas DataFrame so export works)
|
| 770 |
shortlist_rows = []
|
| 771 |
-
for
|
| 772 |
ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
|
| 773 |
shortlist_rows.append([
|
| 774 |
False,
|
| 775 |
-
|
| 776 |
c.filename,
|
| 777 |
round(float(c.final_score), 2),
|
| 778 |
c.fit_level,
|
|
@@ -788,28 +789,27 @@ def rank_app(
|
|
| 788 |
|
| 789 |
elapsed = time.time() - t0
|
| 790 |
meta = (
|
| 791 |
-
f"**CVs uploaded:** {len(cv_paths)} → **Unique processed:** {len(unique_paths)} \n"
|
| 792 |
f"**Ranked (ALL):** {len(ranked)} \n"
|
| 793 |
-
f"**LLM batches:** {
|
| 794 |
f"**Time:** {elapsed:.2f}s \n"
|
| 795 |
f"**Duplicates skipped:** {len(duplicates)} \n\n"
|
| 796 |
f"**LLM Notes:** {(judged.overall_notes or '').strip()}"
|
| 797 |
)
|
| 798 |
|
|
|
|
| 799 |
return report_html, meta, tmp.name, shortlist_df, "", ""
|
| 800 |
|
| 801 |
|
| 802 |
# =========================================================
|
| 803 |
-
# SGS Theme / CSS (
|
| 804 |
# =========================================================
|
| 805 |
CUSTOM_CSS = """
|
| 806 |
:root{
|
| 807 |
--sgs-blue:#0B3D91;
|
| 808 |
--sgs-green:#00A651;
|
| 809 |
--text:#F3F7FF;
|
| 810 |
-
--muted:#D7E3FF;
|
| 811 |
--line:rgba(255,255,255,.14);
|
| 812 |
-
--soft:rgba(255,255,255,.09);
|
| 813 |
}
|
| 814 |
|
| 815 |
.gradio-container{max-width:1180px !important;}
|
|
@@ -819,11 +819,9 @@ body, .gradio-container{
|
|
| 819 |
linear-gradient(180deg, #060914, #060914) !important;
|
| 820 |
}
|
| 821 |
|
| 822 |
-
|
| 823 |
-
.gradio-container, .gradio-container *{
|
| 824 |
-
color: var(--text);
|
| 825 |
-
}
|
| 826 |
|
|
|
|
| 827 |
.hero{
|
| 828 |
border:1px solid var(--line);
|
| 829 |
background: linear-gradient(135deg, rgba(11,61,145,.40), rgba(0,166,81,.20));
|
|
@@ -835,10 +833,20 @@ body, .gradio-container{
|
|
| 835 |
gap:16px;
|
| 836 |
box-shadow: 0 18px 40px rgba(0,0,0,.38);
|
| 837 |
margin: 12px 0 16px;
|
|
|
|
|
|
|
| 838 |
}
|
| 839 |
-
.hero
|
| 840 |
-
|
| 841 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 842 |
|
| 843 |
.kpi{
|
| 844 |
background: rgba(255,255,255,.08);
|
|
@@ -846,17 +854,25 @@ body, .gradio-container{
|
|
| 846 |
border-radius: 16px;
|
| 847 |
padding: 10px 12px;
|
| 848 |
min-width: 140px;
|
|
|
|
| 849 |
}
|
| 850 |
-
.kpi-label{color:rgba(243,247,255,.
|
| 851 |
-
.kpi-val{
|
| 852 |
|
|
|
|
| 853 |
.cards{display:grid;grid-template-columns: 1fr; gap: 12px;}
|
| 854 |
.card{
|
| 855 |
background: linear-gradient(180deg, rgba(16,26,44,.98), rgba(12,19,34,.88));
|
| 856 |
-
border:1px solid
|
| 857 |
border-radius: 18px;
|
| 858 |
padding: 14px;
|
| 859 |
box-shadow: 0 14px 28px rgba(0,0,0,.28);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 860 |
}
|
| 861 |
.card-top{display:flex;align-items:flex-start;justify-content:space-between;gap:10px;}
|
| 862 |
.card-title{display:flex;gap:10px;align-items:baseline;flex-wrap:wrap;}
|
|
@@ -871,6 +887,7 @@ body, .gradio-container{
|
|
| 871 |
.file{font-weight:900;font-size:16px;}
|
| 872 |
.card-meta{display:flex;gap:8px;align-items:center;flex-wrap:wrap;justify-content:flex-end;}
|
| 873 |
|
|
|
|
| 874 |
.badge{
|
| 875 |
display:inline-flex;align-items:center;
|
| 876 |
padding: 6px 10px;border-radius: 999px;font-size:12px;font-weight:900;
|
|
@@ -892,6 +909,7 @@ body, .gradio-container{
|
|
| 892 |
.p-low{ background: rgba(245,158,11,.16); border-color: rgba(245,158,11,.28); }
|
| 893 |
.p-bad{ background: rgba(239,68,68,.14); border-color: rgba(239,68,68,.28); }
|
| 894 |
|
|
|
|
| 895 |
.bar{
|
| 896 |
width: 100%; height: 10px; border-radius: 999px;
|
| 897 |
background: rgba(255,255,255,.10); overflow: hidden;
|
|
@@ -903,79 +921,60 @@ body, .gradio-container{
|
|
| 903 |
background: linear-gradient(90deg, var(--sgs-green), #4fb2ff, var(--sgs-blue));
|
| 904 |
}
|
| 905 |
|
| 906 |
-
.summary{
|
| 907 |
-
|
| 908 |
-
font-size:13px;
|
| 909 |
-
line-height:1.55rem;
|
| 910 |
-
margin: 6px 0 10px;
|
| 911 |
-
}
|
| 912 |
-
.section-title{
|
| 913 |
-
color:rgba(224,234,255,.98);
|
| 914 |
-
font-size:13px;
|
| 915 |
-
font-weight:900;
|
| 916 |
-
margin:10px 0 6px;
|
| 917 |
-
}
|
| 918 |
|
| 919 |
.grid{display:grid;grid-template-columns: 1fr 1fr; gap: 14px;}
|
| 920 |
@media(max-width:860px){.grid{grid-template-columns:1fr;}}
|
| 921 |
|
| 922 |
-
.list{margin:0;padding-left:18px;color
|
| 923 |
-
.list li{margin:6px 0;line-height:1.30rem;color
|
| 924 |
|
|
|
|
| 925 |
.quotes{display:grid;gap:10px;margin-top:6px;}
|
| 926 |
.quote{
|
| 927 |
-
background: rgba(255,255,255,.
|
| 928 |
-
border:1px solid rgba(255,255,255,.
|
| 929 |
border-radius: 14px;
|
| 930 |
padding: 10px 12px;
|
| 931 |
-
color:
|
| 932 |
font-size: 13px;
|
| 933 |
line-height: 1.45rem;
|
| 934 |
}
|
| 935 |
|
|
|
|
| 936 |
.checklist{display:grid;gap:8px;margin-top:6px;}
|
| 937 |
.checkrow{
|
| 938 |
display:grid; grid-template-columns: 1.1fr .4fr 1.5fr; gap:10px;
|
| 939 |
padding:10px 12px; border-radius:14px;
|
| 940 |
-
border:1px solid rgba(255,255,255,.
|
| 941 |
-
background: rgba(255,255,255,.
|
| 942 |
font-size:13px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 943 |
}
|
| 944 |
-
.checkrow .req{font-weight:900;}
|
| 945 |
-
.checkrow .
|
| 946 |
-
.checkrow .
|
| 947 |
|
| 948 |
-
|
| 949 |
-
.checkrow.
|
| 950 |
-
.checkrow.
|
|
|
|
| 951 |
|
| 952 |
-
|
| 953 |
-
|
|
|
|
| 954 |
|
| 955 |
-
/*
|
| 956 |
-
|
| 957 |
-
========================================================= */
|
| 958 |
-
.checkrow, .checkrow *{
|
| 959 |
-
color: #FFFFFF !important;
|
| 960 |
-
}
|
| 961 |
-
.checkrow .ev{
|
| 962 |
-
color: rgba(255,255,255,0.95) !important;
|
| 963 |
-
}
|
| 964 |
-
.checkrow .st{
|
| 965 |
-
color: #FFFFFF !important;
|
| 966 |
-
opacity: 1 !important;
|
| 967 |
-
}
|
| 968 |
-
.quote, .quote *{
|
| 969 |
-
color: #FFFFFF !important;
|
| 970 |
-
}
|
| 971 |
-
.summary, .section-title, .list, .list li{
|
| 972 |
-
color: #FFFFFF !important;
|
| 973 |
-
opacity: 1 !important;
|
| 974 |
-
}
|
| 975 |
-
.checkrow{
|
| 976 |
-
background: rgba(255,255,255,0.10) !important;
|
| 977 |
-
border-color: rgba(255,255,255,0.18) !important;
|
| 978 |
-
}
|
| 979 |
"""
|
| 980 |
|
| 981 |
|
|
@@ -991,15 +990,16 @@ theme = gr.themes.Soft(
|
|
| 991 |
)
|
| 992 |
|
| 993 |
with gr.Blocks(title="SGS ATS Candidate Matcher", theme=theme, css=CUSTOM_CSS) as demo:
|
| 994 |
-
gr.Markdown("""
|
| 995 |
# SGS ATS Candidate Matcher
|
| 996 |
Evidence-based CV ranking against a Job Description (Top 10 Report + Shortlisting).
|
|
|
|
| 997 |
**Important:** set `HF_TOKEN` in Space secrets.
|
| 998 |
""")
|
| 999 |
|
| 1000 |
with gr.Row():
|
| 1001 |
jd_file = gr.File(label="Job Description file (PDF/DOCX/TXT)", file_types=[".pdf", ".docx", ".txt"])
|
| 1002 |
-
cv_files = gr.File(label="Upload CVs (
|
| 1003 |
|
| 1004 |
with gr.Accordion("Settings", open=False):
|
| 1005 |
must_haves = gr.Textbox(
|
|
|
|
| 23 |
# Models
|
| 24 |
# =========================================================
|
| 25 |
EMBED_MODEL_NAME = os.getenv("EMBED_MODEL_NAME", "BAAI/bge-base-en-v1.5")
|
|
|
|
|
|
|
|
|
|
| 26 |
RERANK_MODEL_NAME = os.getenv("RERANK_MODEL_NAME", "BAAI/bge-reranker-large")
|
|
|
|
|
|
|
| 27 |
LLM_MODEL = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
|
| 28 |
|
| 29 |
|
|
|
|
| 36 |
TOP_CHUNKS_PER_CV = 10
|
| 37 |
EVIDENCE_CHUNKS_PER_CV = 4
|
| 38 |
|
|
|
|
| 39 |
LLM_BATCH_SIZE = int(os.getenv("LLM_BATCH_SIZE", "4"))
|
| 40 |
LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "3500"))
|
| 41 |
LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15"))
|
|
|
|
| 43 |
MAX_CV_CHARS = 120_000
|
| 44 |
MAX_JD_CHARS = 60_000
|
| 45 |
|
| 46 |
+
MAX_CV_UPLOADS = 20 # ✅ requested max
|
| 47 |
+
|
| 48 |
# Global singletons
|
| 49 |
_embedder: Optional[SentenceTransformer] = None
|
| 50 |
_reranker: Optional[CrossEncoder] = None
|
|
|
|
| 254 |
# LLM Prompt (compact to avoid truncation)
|
| 255 |
# =========================================================
|
| 256 |
def build_llm_prompt(jd_text: str, must_haves: str, candidates: List[Dict[str, Any]]) -> str:
|
|
|
|
| 257 |
schema_example = {
|
| 258 |
"ranked": [
|
| 259 |
{
|
|
|
|
| 369 |
|
| 370 |
|
| 371 |
def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, Any]]) -> LLMRankingOutput:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
client = get_hf_client()
|
| 373 |
|
| 374 |
prompt = build_llm_prompt(
|
|
|
|
| 391 |
|
| 392 |
out: Optional[LLMRankingOutput] = None
|
| 393 |
|
|
|
|
| 394 |
text = _call(LLM_TEMPERATURE, LLM_MAX_TOKENS, prompt)
|
| 395 |
try:
|
| 396 |
out = LLMRankingOutput.model_validate(json.loads(text))
|
|
|
|
| 399 |
if obj:
|
| 400 |
out = LLMRankingOutput.model_validate(json.loads(obj))
|
| 401 |
|
|
|
|
| 402 |
if out is None:
|
| 403 |
text2 = _call(0.0, max(LLM_MAX_TOKENS, 4500), prompt)
|
| 404 |
try:
|
|
|
|
| 408 |
if obj2:
|
| 409 |
out = LLMRankingOutput.model_validate(json.loads(obj2))
|
| 410 |
|
|
|
|
| 411 |
if out is None:
|
| 412 |
ranked = [fallback_candidate(b["filename"], b.get("local_score", 50.0)) for b in batch]
|
| 413 |
return LLMRankingOutput(ranked=ranked, overall_notes="LLM parsing failed; used local scoring fallback.")
|
|
|
|
| 415 |
returned = {c.filename: c for c in out.ranked}
|
| 416 |
missing = [b for b in batch if b["filename"] not in returned]
|
| 417 |
|
|
|
|
| 418 |
for b in missing:
|
| 419 |
single_prompt = build_llm_prompt(
|
| 420 |
jd_text,
|
|
|
|
| 576 |
|
| 577 |
|
| 578 |
# =========================================================
|
| 579 |
+
# Shortlist export (DataFrame-safe)
|
| 580 |
# =========================================================
|
| 581 |
def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]:
|
| 582 |
if shortlist_table is None or shortlist_table.empty:
|
| 583 |
raise gr.Error("No shortlist data yet. Run ranking first.")
|
| 584 |
|
|
|
|
| 585 |
shortlisted_df = shortlist_table[shortlist_table.iloc[:, 0] == True]
|
| 586 |
if shortlisted_df.empty:
|
| 587 |
raise gr.Error("No candidates marked as shortlisted.")
|
| 588 |
|
|
|
|
| 589 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
| 590 |
shortlisted_df.to_csv(tmp.name, index=False)
|
| 591 |
|
|
|
|
| 592 |
emails = (
|
| 593 |
shortlisted_df.iloc[:, 6]
|
| 594 |
.dropna()
|
|
|
|
| 605 |
|
| 606 |
|
| 607 |
# =========================================================
|
| 608 |
+
# Main pipeline (with progress bar)
|
| 609 |
# =========================================================
|
| 610 |
def rank_app(
|
| 611 |
jd_file_obj,
|
|
|
|
| 613 |
must_haves: str,
|
| 614 |
mask_pii_toggle: bool,
|
| 615 |
show_contacts_toggle: bool,
|
| 616 |
+
progress=gr.Progress(track_tqdm=False), # ✅ progress bar
|
| 617 |
):
|
| 618 |
t0 = time.time()
|
| 619 |
ensure_models()
|
| 620 |
embedder = _embedder
|
| 621 |
reranker = _reranker
|
| 622 |
|
| 623 |
+
progress(0.02, desc="Loading Job Description...")
|
| 624 |
+
|
| 625 |
jd_path = gr_file_to_path(jd_file_obj)
|
| 626 |
if not jd_path:
|
| 627 |
raise gr.Error("Please upload a Job Description file (PDF/DOCX/TXT).")
|
|
|
|
| 630 |
if not jd_text:
|
| 631 |
raise gr.Error("Could not extract text from the Job Description file.")
|
| 632 |
|
|
|
|
| 633 |
if not cv_file_objs:
|
| 634 |
raise gr.Error("Please upload at least 1 CV.")
|
| 635 |
|
| 636 |
+
# ✅ enforce max 20
|
| 637 |
+
if len(cv_file_objs) > MAX_CV_UPLOADS:
|
| 638 |
+
raise gr.Error(f"Maximum allowed CV uploads is {MAX_CV_UPLOADS}. You uploaded {len(cv_file_objs)}.")
|
| 639 |
+
|
| 640 |
cv_paths = []
|
| 641 |
for f in cv_file_objs:
|
| 642 |
p = gr_file_to_path(f)
|
|
|
|
| 645 |
if not cv_paths:
|
| 646 |
raise gr.Error("Could not read uploaded CV files (no valid paths).")
|
| 647 |
|
| 648 |
+
progress(0.06, desc="Checking duplicates...")
|
| 649 |
+
|
| 650 |
seen = {}
|
| 651 |
duplicates = []
|
| 652 |
unique_paths = []
|
|
|
|
| 663 |
seen[h] = fname
|
| 664 |
unique_paths.append(p)
|
| 665 |
|
| 666 |
+
progress(0.10, desc="Embedding Job Description...")
|
| 667 |
+
|
| 668 |
jd_vec = np.array(embedder.encode([jd_text], normalize_embeddings=True), dtype=np.float32)
|
| 669 |
|
|
|
|
| 670 |
local_pool = []
|
| 671 |
contacts_map: Dict[str, Dict[str, str]] = {}
|
| 672 |
|
| 673 |
+
total = len(unique_paths)
|
| 674 |
+
for idx, p in enumerate(unique_paths, start=1):
|
| 675 |
+
# progress 10% -> 70% while processing CVs
|
| 676 |
+
prog = 0.10 + 0.60 * (idx / max(1, total))
|
| 677 |
+
progress(prog, desc=f"Processing CVs ({idx}/{total}) — {os.path.basename(p)}")
|
| 678 |
+
|
| 679 |
raw = clean_text(read_file_to_text(p))[:MAX_CV_CHARS]
|
| 680 |
if not raw:
|
| 681 |
continue
|
|
|
|
| 715 |
if not local_pool:
|
| 716 |
raise gr.Error("Could not extract usable text from the uploaded CVs.")
|
| 717 |
|
| 718 |
+
progress(0.72, desc="Preparing LLM ranking...")
|
| 719 |
+
|
| 720 |
local_pool = sorted(local_pool, key=lambda x: float(x["local_score"]), reverse=True)
|
| 721 |
|
| 722 |
batch_outputs: List[LLMRankingOutput] = []
|
| 723 |
+
batches = max(1, (len(local_pool) + LLM_BATCH_SIZE - 1) // LLM_BATCH_SIZE)
|
| 724 |
+
|
| 725 |
+
for b in range(batches):
|
| 726 |
+
start = b * LLM_BATCH_SIZE
|
| 727 |
+
end = start + LLM_BATCH_SIZE
|
| 728 |
+
batch = local_pool[start:end]
|
| 729 |
+
|
| 730 |
+
# progress 72% -> 92% while LLM runs
|
| 731 |
+
prog = 0.72 + 0.20 * ((b + 1) / batches)
|
| 732 |
+
progress(prog, desc=f"LLM judging batches ({b+1}/{batches})...")
|
| 733 |
|
| 734 |
llm_batch = [
|
| 735 |
{
|
| 736 |
"filename": c["filename"],
|
| 737 |
"evidence_chunks": c["evidence_chunks"],
|
| 738 |
+
"local_score": c["local_score"],
|
| 739 |
}
|
| 740 |
for c in batch
|
| 741 |
]
|
|
|
|
| 742 |
out = llm_judge_rank_batch(jd_text, must_haves or "", llm_batch)
|
| 743 |
batch_outputs.append(out)
|
| 744 |
|
| 745 |
+
progress(0.94, desc="Finalizing report...")
|
| 746 |
+
|
| 747 |
judged = merge_llm_batches(batch_outputs)
|
| 748 |
ranked = judged.ranked
|
| 749 |
if not ranked:
|
| 750 |
raise gr.Error("LLM returned an empty ranking.")
|
| 751 |
|
|
|
|
| 752 |
report_html = render_top10_html(ranked, total_count=len(ranked))
|
| 753 |
|
|
|
|
| 754 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
| 755 |
with open(tmp.name, "w", newline="", encoding="utf-8") as f:
|
| 756 |
w = csv.writer(f)
|
| 757 |
w.writerow(["Rank", "Filename", "FinalScore(0-100)", "FitLevel", "Name", "Email", "Phone", "Summary"])
|
| 758 |
+
for ridx, c in enumerate(ranked, start=1):
|
| 759 |
ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
|
| 760 |
w.writerow([
|
| 761 |
+
ridx,
|
| 762 |
c.filename,
|
| 763 |
round(float(c.final_score), 2),
|
| 764 |
c.fit_level,
|
|
|
|
| 768 |
c.summary,
|
| 769 |
])
|
| 770 |
|
|
|
|
| 771 |
shortlist_rows = []
|
| 772 |
+
for ridx, c in enumerate(ranked, start=1):
|
| 773 |
ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
|
| 774 |
shortlist_rows.append([
|
| 775 |
False,
|
| 776 |
+
ridx,
|
| 777 |
c.filename,
|
| 778 |
round(float(c.final_score), 2),
|
| 779 |
c.fit_level,
|
|
|
|
| 789 |
|
| 790 |
elapsed = time.time() - t0
|
| 791 |
meta = (
|
| 792 |
+
f"**CVs uploaded:** {len(cv_paths)} → **Unique processed:** {len(unique_paths)} (Max allowed: {MAX_CV_UPLOADS}) \n"
|
| 793 |
f"**Ranked (ALL):** {len(ranked)} \n"
|
| 794 |
+
f"**LLM batches:** {batches} (batch size={LLM_BATCH_SIZE}) \n"
|
| 795 |
f"**Time:** {elapsed:.2f}s \n"
|
| 796 |
f"**Duplicates skipped:** {len(duplicates)} \n\n"
|
| 797 |
f"**LLM Notes:** {(judged.overall_notes or '').strip()}"
|
| 798 |
)
|
| 799 |
|
| 800 |
+
progress(1.0, desc="Done ✅")
|
| 801 |
return report_html, meta, tmp.name, shortlist_df, "", ""
|
| 802 |
|
| 803 |
|
| 804 |
# =========================================================
|
| 805 |
+
# SGS Theme / CSS (white text + MET green + nice touches)
|
| 806 |
# =========================================================
|
| 807 |
CUSTOM_CSS = """
|
| 808 |
:root{
|
| 809 |
--sgs-blue:#0B3D91;
|
| 810 |
--sgs-green:#00A651;
|
| 811 |
--text:#F3F7FF;
|
|
|
|
| 812 |
--line:rgba(255,255,255,.14);
|
|
|
|
| 813 |
}
|
| 814 |
|
| 815 |
.gradio-container{max-width:1180px !important;}
|
|
|
|
| 819 |
linear-gradient(180deg, #060914, #060914) !important;
|
| 820 |
}
|
| 821 |
|
| 822 |
+
.gradio-container, .gradio-container *{ color: var(--text); }
|
|
|
|
|
|
|
|
|
|
| 823 |
|
| 824 |
+
/* Hero */
|
| 825 |
.hero{
|
| 826 |
border:1px solid var(--line);
|
| 827 |
background: linear-gradient(135deg, rgba(11,61,145,.40), rgba(0,166,81,.20));
|
|
|
|
| 833 |
gap:16px;
|
| 834 |
box-shadow: 0 18px 40px rgba(0,0,0,.38);
|
| 835 |
margin: 12px 0 16px;
|
| 836 |
+
position: relative;
|
| 837 |
+
overflow: hidden;
|
| 838 |
}
|
| 839 |
+
.hero:before{
|
| 840 |
+
content:"";
|
| 841 |
+
position:absolute;
|
| 842 |
+
inset:-40%;
|
| 843 |
+
background: radial-gradient(circle at 30% 30%, rgba(255,255,255,.10), transparent 45%);
|
| 844 |
+
transform: rotate(18deg);
|
| 845 |
+
pointer-events:none;
|
| 846 |
+
}
|
| 847 |
+
.hero-title{font-weight:900;font-size:22px;position:relative;}
|
| 848 |
+
.hero-sub{color:rgba(243,247,255,.90);margin-top:6px;font-size:13px;position:relative;}
|
| 849 |
+
.hero-right{display:flex;gap:10px;flex-wrap:wrap;justify-content:flex-end;position:relative;}
|
| 850 |
|
| 851 |
.kpi{
|
| 852 |
background: rgba(255,255,255,.08);
|
|
|
|
| 854 |
border-radius: 16px;
|
| 855 |
padding: 10px 12px;
|
| 856 |
min-width: 140px;
|
| 857 |
+
backdrop-filter: blur(6px);
|
| 858 |
}
|
| 859 |
+
.kpi-label{color:rgba(243,247,255,.82);font-size:12px;font-weight:700;}
|
| 860 |
+
.kpi-val{font-size:18px;font-weight:900;margin-top:2px;}
|
| 861 |
|
| 862 |
+
/* Cards */
|
| 863 |
.cards{display:grid;grid-template-columns: 1fr; gap: 12px;}
|
| 864 |
.card{
|
| 865 |
background: linear-gradient(180deg, rgba(16,26,44,.98), rgba(12,19,34,.88));
|
| 866 |
+
border:1px solid rgba(255,255,255,.14);
|
| 867 |
border-radius: 18px;
|
| 868 |
padding: 14px;
|
| 869 |
box-shadow: 0 14px 28px rgba(0,0,0,.28);
|
| 870 |
+
transition: transform .18s ease, box-shadow .18s ease, border-color .18s ease;
|
| 871 |
+
}
|
| 872 |
+
.card:hover{
|
| 873 |
+
transform: translateY(-2px);
|
| 874 |
+
box-shadow: 0 20px 40px rgba(0,0,0,.38);
|
| 875 |
+
border-color: rgba(255,255,255,.20);
|
| 876 |
}
|
| 877 |
.card-top{display:flex;align-items:flex-start;justify-content:space-between;gap:10px;}
|
| 878 |
.card-title{display:flex;gap:10px;align-items:baseline;flex-wrap:wrap;}
|
|
|
|
| 887 |
.file{font-weight:900;font-size:16px;}
|
| 888 |
.card-meta{display:flex;gap:8px;align-items:center;flex-wrap:wrap;justify-content:flex-end;}
|
| 889 |
|
| 890 |
+
/* Badges */
|
| 891 |
.badge{
|
| 892 |
display:inline-flex;align-items:center;
|
| 893 |
padding: 6px 10px;border-radius: 999px;font-size:12px;font-weight:900;
|
|
|
|
| 909 |
.p-low{ background: rgba(245,158,11,.16); border-color: rgba(245,158,11,.28); }
|
| 910 |
.p-bad{ background: rgba(239,68,68,.14); border-color: rgba(239,68,68,.28); }
|
| 911 |
|
| 912 |
+
/* Score bar */
|
| 913 |
.bar{
|
| 914 |
width: 100%; height: 10px; border-radius: 999px;
|
| 915 |
background: rgba(255,255,255,.10); overflow: hidden;
|
|
|
|
| 921 |
background: linear-gradient(90deg, var(--sgs-green), #4fb2ff, var(--sgs-blue));
|
| 922 |
}
|
| 923 |
|
| 924 |
+
.summary{font-size:13px;line-height:1.55rem;margin: 6px 0 10px;color:#fff;}
|
| 925 |
+
.section-title{font-size:13px;font-weight:900;margin:10px 0 6px;color:#fff;}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 926 |
|
| 927 |
.grid{display:grid;grid-template-columns: 1fr 1fr; gap: 14px;}
|
| 928 |
@media(max-width:860px){.grid{grid-template-columns:1fr;}}
|
| 929 |
|
| 930 |
+
.list{margin:0;padding-left:18px;color:#fff;}
|
| 931 |
+
.list li{margin:6px 0;line-height:1.30rem;color:#fff;}
|
| 932 |
|
| 933 |
+
/* Quotes / Evidence */
|
| 934 |
.quotes{display:grid;gap:10px;margin-top:6px;}
|
| 935 |
.quote{
|
| 936 |
+
background: rgba(255,255,255,.10);
|
| 937 |
+
border:1px solid rgba(255,255,255,.16);
|
| 938 |
border-radius: 14px;
|
| 939 |
padding: 10px 12px;
|
| 940 |
+
color: #fff;
|
| 941 |
font-size: 13px;
|
| 942 |
line-height: 1.45rem;
|
| 943 |
}
|
| 944 |
|
| 945 |
+
/* Checklist */
|
| 946 |
.checklist{display:grid;gap:8px;margin-top:6px;}
|
| 947 |
.checkrow{
|
| 948 |
display:grid; grid-template-columns: 1.1fr .4fr 1.5fr; gap:10px;
|
| 949 |
padding:10px 12px; border-radius:14px;
|
| 950 |
+
border:1px solid rgba(255,255,255,.18);
|
| 951 |
+
background: rgba(255,255,255,.10);
|
| 952 |
font-size:13px;
|
| 953 |
+
position: relative;
|
| 954 |
+
overflow: hidden;
|
| 955 |
+
}
|
| 956 |
+
.checkrow:before{
|
| 957 |
+
content:"";
|
| 958 |
+
position:absolute;
|
| 959 |
+
left:0; top:0; bottom:0;
|
| 960 |
+
width:4px;
|
| 961 |
+
background: rgba(255,255,255,.20);
|
| 962 |
}
|
| 963 |
+
.checkrow .req{font-weight:900;color:#fff;}
|
| 964 |
+
.checkrow .ev{color:rgba(255,255,255,0.95);}
|
| 965 |
+
.checkrow .st{font-weight:1000;text-align:center;letter-spacing:.4px;}
|
| 966 |
|
| 967 |
+
/* ✅ Status colors (MET green) */
|
| 968 |
+
.checkrow.ok:before{ background: rgba(0,166,81,.95); }
|
| 969 |
+
.checkrow.partial:before{ background: rgba(245,158,11,.95); }
|
| 970 |
+
.checkrow.miss:before{ background: rgba(239,68,68,.95); }
|
| 971 |
|
| 972 |
+
.checkrow.ok .st{ color:#22ffb6 !important; text-shadow: 0 0 10px rgba(34,255,182,.18); }
|
| 973 |
+
.checkrow.partial .st{ color:#ffd27a !important; }
|
| 974 |
+
.checkrow.miss .st{ color:#ff9a9a !important; }
|
| 975 |
|
| 976 |
+
/* Dataframe border */
|
| 977 |
+
table { border-color: rgba(255,255,255,.14) !important; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 978 |
"""
|
| 979 |
|
| 980 |
|
|
|
|
| 990 |
)
|
| 991 |
|
| 992 |
with gr.Blocks(title="SGS ATS Candidate Matcher", theme=theme, css=CUSTOM_CSS) as demo:
|
| 993 |
+
gr.Markdown(f"""
|
| 994 |
# SGS ATS Candidate Matcher
|
| 995 |
Evidence-based CV ranking against a Job Description (Top 10 Report + Shortlisting).
|
| 996 |
+
**Max CV uploads:** {MAX_CV_UPLOADS}
|
| 997 |
**Important:** set `HF_TOKEN` in Space secrets.
|
| 998 |
""")
|
| 999 |
|
| 1000 |
with gr.Row():
|
| 1001 |
jd_file = gr.File(label="Job Description file (PDF/DOCX/TXT)", file_types=[".pdf", ".docx", ".txt"])
|
| 1002 |
+
cv_files = gr.File(label=f"Upload CVs (max {MAX_CV_UPLOADS})", file_count="multiple", file_types=[".pdf", ".docx", ".txt"])
|
| 1003 |
|
| 1004 |
with gr.Accordion("Settings", open=False):
|
| 1005 |
must_haves = gr.Textbox(
|