Klnimri commited on
Commit
db3bd3d
·
verified ·
1 Parent(s): 7622d61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -107
app.py CHANGED
@@ -23,12 +23,7 @@ import docx2txt
23
  # Models
24
  # =========================================================
25
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL_NAME", "BAAI/bge-base-en-v1.5")
26
-
27
- # If CPU Space is slow, set Space Variable:
28
- # RERANK_MODEL_NAME=BAAI/bge-reranker-base
29
  RERANK_MODEL_NAME = os.getenv("RERANK_MODEL_NAME", "BAAI/bge-reranker-large")
30
-
31
- # Default LLM judge (override via Space Variables)
32
  LLM_MODEL = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
33
 
34
 
@@ -41,7 +36,6 @@ CHUNK_OVERLAP_CHARS = 180
41
  TOP_CHUNKS_PER_CV = 10
42
  EVIDENCE_CHUNKS_PER_CV = 4
43
 
44
- # Smaller batch reduces truncation and "only 1 candidate" outputs
45
  LLM_BATCH_SIZE = int(os.getenv("LLM_BATCH_SIZE", "4"))
46
  LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "3500"))
47
  LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15"))
@@ -49,6 +43,8 @@ LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15"))
49
  MAX_CV_CHARS = 120_000
50
  MAX_JD_CHARS = 60_000
51
 
 
 
52
  # Global singletons
53
  _embedder: Optional[SentenceTransformer] = None
54
  _reranker: Optional[CrossEncoder] = None
@@ -258,7 +254,6 @@ def compute_local_score(retr_sims: List[float], rerank_logits: List[float]) -> f
258
  # LLM Prompt (compact to avoid truncation)
259
  # =========================================================
260
  def build_llm_prompt(jd_text: str, must_haves: str, candidates: List[Dict[str, Any]]) -> str:
261
- # IMPORTANT: no example filename like "example.pdf"
262
  schema_example = {
263
  "ranked": [
264
  {
@@ -374,11 +369,6 @@ def fallback_candidate(filename: str, score: float) -> CandidateLLMResult:
374
 
375
 
376
  def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, Any]]) -> LLMRankingOutput:
377
- """
378
- Guarantees: returns a result for EVERY candidate in `batch`.
379
- If LLM returns incomplete list, re-judge missing CVs individually.
380
- If still missing, fallback to local_score.
381
- """
382
  client = get_hf_client()
383
 
384
  prompt = build_llm_prompt(
@@ -401,7 +391,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
401
 
402
  out: Optional[LLMRankingOutput] = None
403
 
404
- # Attempt 1
405
  text = _call(LLM_TEMPERATURE, LLM_MAX_TOKENS, prompt)
406
  try:
407
  out = LLMRankingOutput.model_validate(json.loads(text))
@@ -410,7 +399,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
410
  if obj:
411
  out = LLMRankingOutput.model_validate(json.loads(obj))
412
 
413
- # Retry once if parsing failed
414
  if out is None:
415
  text2 = _call(0.0, max(LLM_MAX_TOKENS, 4500), prompt)
416
  try:
@@ -420,7 +408,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
420
  if obj2:
421
  out = LLMRankingOutput.model_validate(json.loads(obj2))
422
 
423
- # If still failing: fallback all
424
  if out is None:
425
  ranked = [fallback_candidate(b["filename"], b.get("local_score", 50.0)) for b in batch]
426
  return LLMRankingOutput(ranked=ranked, overall_notes="LLM parsing failed; used local scoring fallback.")
@@ -428,7 +415,6 @@ def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, An
428
  returned = {c.filename: c for c in out.ranked}
429
  missing = [b for b in batch if b["filename"] not in returned]
430
 
431
- # Re-judge missing individually (more reliable)
432
  for b in missing:
433
  single_prompt = build_llm_prompt(
434
  jd_text,
@@ -590,22 +576,19 @@ def render_top10_html(ranked: List[CandidateLLMResult], total_count: int) -> str
590
 
591
 
592
  # =========================================================
593
- # Shortlist export (FIXED for pandas DataFrame)
594
  # =========================================================
595
  def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]:
596
  if shortlist_table is None or shortlist_table.empty:
597
  raise gr.Error("No shortlist data yet. Run ranking first.")
598
 
599
- # First column is Shortlisted (bool)
600
  shortlisted_df = shortlist_table[shortlist_table.iloc[:, 0] == True]
601
  if shortlisted_df.empty:
602
  raise gr.Error("No candidates marked as shortlisted.")
603
 
604
- # Export CSV
605
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
606
  shortlisted_df.to_csv(tmp.name, index=False)
607
 
608
- # Email list from Email column (index 6 in our table)
609
  emails = (
610
  shortlisted_df.iloc[:, 6]
611
  .dropna()
@@ -622,7 +605,7 @@ def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]:
622
 
623
 
624
  # =========================================================
625
- # Main pipeline
626
  # =========================================================
627
  def rank_app(
628
  jd_file_obj,
@@ -630,13 +613,15 @@ def rank_app(
630
  must_haves: str,
631
  mask_pii_toggle: bool,
632
  show_contacts_toggle: bool,
 
633
  ):
634
  t0 = time.time()
635
  ensure_models()
636
  embedder = _embedder
637
  reranker = _reranker
638
 
639
- # ---- Load JD
 
640
  jd_path = gr_file_to_path(jd_file_obj)
641
  if not jd_path:
642
  raise gr.Error("Please upload a Job Description file (PDF/DOCX/TXT).")
@@ -645,10 +630,13 @@ def rank_app(
645
  if not jd_text:
646
  raise gr.Error("Could not extract text from the Job Description file.")
647
 
648
- # ---- CV paths
649
  if not cv_file_objs:
650
  raise gr.Error("Please upload at least 1 CV.")
651
 
 
 
 
 
652
  cv_paths = []
653
  for f in cv_file_objs:
654
  p = gr_file_to_path(f)
@@ -657,7 +645,8 @@ def rank_app(
657
  if not cv_paths:
658
  raise gr.Error("Could not read uploaded CV files (no valid paths).")
659
 
660
- # ---- Duplicate detection
 
661
  seen = {}
662
  duplicates = []
663
  unique_paths = []
@@ -674,14 +663,19 @@ def rank_app(
674
  seen[h] = fname
675
  unique_paths.append(p)
676
 
677
- # ---- Embed JD once
 
678
  jd_vec = np.array(embedder.encode([jd_text], normalize_embeddings=True), dtype=np.float32)
679
 
680
- # ---- Process ALL CVs (retrieval + rerank + local_score + contacts)
681
  local_pool = []
682
  contacts_map: Dict[str, Dict[str, str]] = {}
683
 
684
- for p in unique_paths:
 
 
 
 
 
685
  raw = clean_text(read_file_to_text(p))[:MAX_CV_CHARS]
686
  if not raw:
687
  continue
@@ -721,42 +715,50 @@ def rank_app(
721
  if not local_pool:
722
  raise gr.Error("Could not extract usable text from the uploaded CVs.")
723
 
724
- # ---- Sort by local_score, then LLM judge ALL (batched)
 
725
  local_pool = sorted(local_pool, key=lambda x: float(x["local_score"]), reverse=True)
726
 
727
  batch_outputs: List[LLMRankingOutput] = []
728
- for i in range(0, len(local_pool), LLM_BATCH_SIZE):
729
- batch = local_pool[i:i + LLM_BATCH_SIZE]
 
 
 
 
 
 
 
 
730
 
731
  llm_batch = [
732
  {
733
  "filename": c["filename"],
734
  "evidence_chunks": c["evidence_chunks"],
735
- "local_score": c["local_score"], # fallback only
736
  }
737
  for c in batch
738
  ]
739
-
740
  out = llm_judge_rank_batch(jd_text, must_haves or "", llm_batch)
741
  batch_outputs.append(out)
742
 
 
 
743
  judged = merge_llm_batches(batch_outputs)
744
  ranked = judged.ranked
745
  if not ranked:
746
  raise gr.Error("LLM returned an empty ranking.")
747
 
748
- # ---- Top 10 report
749
  report_html = render_top10_html(ranked, total_count=len(ranked))
750
 
751
- # ---- Full ranking export (with contacts)
752
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
753
  with open(tmp.name, "w", newline="", encoding="utf-8") as f:
754
  w = csv.writer(f)
755
  w.writerow(["Rank", "Filename", "FinalScore(0-100)", "FitLevel", "Name", "Email", "Phone", "Summary"])
756
- for idx, c in enumerate(ranked, start=1):
757
  ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
758
  w.writerow([
759
- idx,
760
  c.filename,
761
  round(float(c.final_score), 2),
762
  c.fit_level,
@@ -766,13 +768,12 @@ def rank_app(
766
  c.summary,
767
  ])
768
 
769
- # ---- Shortlist table (pandas DataFrame so export works)
770
  shortlist_rows = []
771
- for idx, c in enumerate(ranked, start=1):
772
  ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
773
  shortlist_rows.append([
774
  False,
775
- idx,
776
  c.filename,
777
  round(float(c.final_score), 2),
778
  c.fit_level,
@@ -788,28 +789,27 @@ def rank_app(
788
 
789
  elapsed = time.time() - t0
790
  meta = (
791
- f"**CVs uploaded:** {len(cv_paths)} → **Unique processed:** {len(unique_paths)} \n"
792
  f"**Ranked (ALL):** {len(ranked)} \n"
793
- f"**LLM batches:** {len(batch_outputs)} (batch size={LLM_BATCH_SIZE}) \n"
794
  f"**Time:** {elapsed:.2f}s \n"
795
  f"**Duplicates skipped:** {len(duplicates)} \n\n"
796
  f"**LLM Notes:** {(judged.overall_notes or '').strip()}"
797
  )
798
 
 
799
  return report_html, meta, tmp.name, shortlist_df, "", ""
800
 
801
 
802
  # =========================================================
803
- # SGS Theme / CSS (with WHITE FONT FIX)
804
  # =========================================================
805
  CUSTOM_CSS = """
806
  :root{
807
  --sgs-blue:#0B3D91;
808
  --sgs-green:#00A651;
809
  --text:#F3F7FF;
810
- --muted:#D7E3FF;
811
  --line:rgba(255,255,255,.14);
812
- --soft:rgba(255,255,255,.09);
813
  }
814
 
815
  .gradio-container{max-width:1180px !important;}
@@ -819,11 +819,9 @@ body, .gradio-container{
819
  linear-gradient(180deg, #060914, #060914) !important;
820
  }
821
 
822
- /* Improve default component text contrast */
823
- .gradio-container, .gradio-container *{
824
- color: var(--text);
825
- }
826
 
 
827
  .hero{
828
  border:1px solid var(--line);
829
  background: linear-gradient(135deg, rgba(11,61,145,.40), rgba(0,166,81,.20));
@@ -835,10 +833,20 @@ body, .gradio-container{
835
  gap:16px;
836
  box-shadow: 0 18px 40px rgba(0,0,0,.38);
837
  margin: 12px 0 16px;
 
 
838
  }
839
- .hero-title{color:var(--text);font-weight:900;font-size:22px;}
840
- .hero-sub{color:rgba(243,247,255,.88);margin-top:6px;font-size:13px;}
841
- .hero-right{display:flex;gap:10px;flex-wrap:wrap;justify-content:flex-end;}
 
 
 
 
 
 
 
 
842
 
843
  .kpi{
844
  background: rgba(255,255,255,.08);
@@ -846,17 +854,25 @@ body, .gradio-container{
846
  border-radius: 16px;
847
  padding: 10px 12px;
848
  min-width: 140px;
 
849
  }
850
- .kpi-label{color:rgba(243,247,255,.80);font-size:12px;font-weight:700;}
851
- .kpi-val{color:var(--text);font-size:18px;font-weight:900;margin-top:2px;}
852
 
 
853
  .cards{display:grid;grid-template-columns: 1fr; gap: 12px;}
854
  .card{
855
  background: linear-gradient(180deg, rgba(16,26,44,.98), rgba(12,19,34,.88));
856
- border:1px solid var(--line);
857
  border-radius: 18px;
858
  padding: 14px;
859
  box-shadow: 0 14px 28px rgba(0,0,0,.28);
 
 
 
 
 
 
860
  }
861
  .card-top{display:flex;align-items:flex-start;justify-content:space-between;gap:10px;}
862
  .card-title{display:flex;gap:10px;align-items:baseline;flex-wrap:wrap;}
@@ -871,6 +887,7 @@ body, .gradio-container{
871
  .file{font-weight:900;font-size:16px;}
872
  .card-meta{display:flex;gap:8px;align-items:center;flex-wrap:wrap;justify-content:flex-end;}
873
 
 
874
  .badge{
875
  display:inline-flex;align-items:center;
876
  padding: 6px 10px;border-radius: 999px;font-size:12px;font-weight:900;
@@ -892,6 +909,7 @@ body, .gradio-container{
892
  .p-low{ background: rgba(245,158,11,.16); border-color: rgba(245,158,11,.28); }
893
  .p-bad{ background: rgba(239,68,68,.14); border-color: rgba(239,68,68,.28); }
894
 
 
895
  .bar{
896
  width: 100%; height: 10px; border-radius: 999px;
897
  background: rgba(255,255,255,.10); overflow: hidden;
@@ -903,79 +921,60 @@ body, .gradio-container{
903
  background: linear-gradient(90deg, var(--sgs-green), #4fb2ff, var(--sgs-blue));
904
  }
905
 
906
- .summary{
907
- color:rgba(243,247,255,.92);
908
- font-size:13px;
909
- line-height:1.55rem;
910
- margin: 6px 0 10px;
911
- }
912
- .section-title{
913
- color:rgba(224,234,255,.98);
914
- font-size:13px;
915
- font-weight:900;
916
- margin:10px 0 6px;
917
- }
918
 
919
  .grid{display:grid;grid-template-columns: 1fr 1fr; gap: 14px;}
920
  @media(max-width:860px){.grid{grid-template-columns:1fr;}}
921
 
922
- .list{margin:0;padding-left:18px;color:rgba(243,247,255,.92);}
923
- .list li{margin:6px 0;line-height:1.30rem;color:rgba(243,247,255,.92);}
924
 
 
925
  .quotes{display:grid;gap:10px;margin-top:6px;}
926
  .quote{
927
- background: rgba(255,255,255,.09);
928
- border:1px solid rgba(255,255,255,.14);
929
  border-radius: 14px;
930
  padding: 10px 12px;
931
- color: rgba(243,247,255,.94);
932
  font-size: 13px;
933
  line-height: 1.45rem;
934
  }
935
 
 
936
  .checklist{display:grid;gap:8px;margin-top:6px;}
937
  .checkrow{
938
  display:grid; grid-template-columns: 1.1fr .4fr 1.5fr; gap:10px;
939
  padding:10px 12px; border-radius:14px;
940
- border:1px solid rgba(255,255,255,.14);
941
- background: rgba(255,255,255,.07);
942
  font-size:13px;
 
 
 
 
 
 
 
 
 
943
  }
944
- .checkrow .req{font-weight:900;}
945
- .checkrow .st{font-weight:900;text-align:center;}
946
- .checkrow .ev{opacity:.95;}
947
 
948
- .checkrow.ok{border-color: rgba(0,166,81,.30); background: rgba(0,166,81,.12);}
949
- .checkrow.partial{border-color: rgba(245,158,11,.30); background: rgba(245,158,11,.12);}
950
- .checkrow.miss{border-color: rgba(239,68,68,.30); background: rgba(239,68,68,.11);}
 
951
 
952
- /* Dataframe border */
953
- table { border-color: rgba(255,255,255,.12) !important; }
 
954
 
955
- /* =========================================================
956
- VISIBILITY FIX: Force WHITE text for checklist/evidence
957
- ========================================================= */
958
- .checkrow, .checkrow *{
959
- color: #FFFFFF !important;
960
- }
961
- .checkrow .ev{
962
- color: rgba(255,255,255,0.95) !important;
963
- }
964
- .checkrow .st{
965
- color: #FFFFFF !important;
966
- opacity: 1 !important;
967
- }
968
- .quote, .quote *{
969
- color: #FFFFFF !important;
970
- }
971
- .summary, .section-title, .list, .list li{
972
- color: #FFFFFF !important;
973
- opacity: 1 !important;
974
- }
975
- .checkrow{
976
- background: rgba(255,255,255,0.10) !important;
977
- border-color: rgba(255,255,255,0.18) !important;
978
- }
979
  """
980
 
981
 
@@ -991,15 +990,16 @@ theme = gr.themes.Soft(
991
  )
992
 
993
  with gr.Blocks(title="SGS ATS Candidate Matcher", theme=theme, css=CUSTOM_CSS) as demo:
994
- gr.Markdown("""
995
  # SGS ATS Candidate Matcher
996
  Evidence-based CV ranking against a Job Description (Top 10 Report + Shortlisting).
 
997
  **Important:** set `HF_TOKEN` in Space secrets.
998
  """)
999
 
1000
  with gr.Row():
1001
  jd_file = gr.File(label="Job Description file (PDF/DOCX/TXT)", file_types=[".pdf", ".docx", ".txt"])
1002
- cv_files = gr.File(label="Upload CVs (multiple)", file_count="multiple", file_types=[".pdf", ".docx", ".txt"])
1003
 
1004
  with gr.Accordion("Settings", open=False):
1005
  must_haves = gr.Textbox(
 
23
  # Models
24
  # =========================================================
25
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL_NAME", "BAAI/bge-base-en-v1.5")
 
 
 
26
  RERANK_MODEL_NAME = os.getenv("RERANK_MODEL_NAME", "BAAI/bge-reranker-large")
 
 
27
  LLM_MODEL = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3.1-8B-Instruct")
28
 
29
 
 
36
  TOP_CHUNKS_PER_CV = 10
37
  EVIDENCE_CHUNKS_PER_CV = 4
38
 
 
39
  LLM_BATCH_SIZE = int(os.getenv("LLM_BATCH_SIZE", "4"))
40
  LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "3500"))
41
  LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.15"))
 
43
  MAX_CV_CHARS = 120_000
44
  MAX_JD_CHARS = 60_000
45
 
46
+ MAX_CV_UPLOADS = 20 # ✅ requested max
47
+
48
  # Global singletons
49
  _embedder: Optional[SentenceTransformer] = None
50
  _reranker: Optional[CrossEncoder] = None
 
254
  # LLM Prompt (compact to avoid truncation)
255
  # =========================================================
256
  def build_llm_prompt(jd_text: str, must_haves: str, candidates: List[Dict[str, Any]]) -> str:
 
257
  schema_example = {
258
  "ranked": [
259
  {
 
369
 
370
 
371
  def llm_judge_rank_batch(jd_text: str, must_haves: str, batch: List[Dict[str, Any]]) -> LLMRankingOutput:
 
 
 
 
 
372
  client = get_hf_client()
373
 
374
  prompt = build_llm_prompt(
 
391
 
392
  out: Optional[LLMRankingOutput] = None
393
 
 
394
  text = _call(LLM_TEMPERATURE, LLM_MAX_TOKENS, prompt)
395
  try:
396
  out = LLMRankingOutput.model_validate(json.loads(text))
 
399
  if obj:
400
  out = LLMRankingOutput.model_validate(json.loads(obj))
401
 
 
402
  if out is None:
403
  text2 = _call(0.0, max(LLM_MAX_TOKENS, 4500), prompt)
404
  try:
 
408
  if obj2:
409
  out = LLMRankingOutput.model_validate(json.loads(obj2))
410
 
 
411
  if out is None:
412
  ranked = [fallback_candidate(b["filename"], b.get("local_score", 50.0)) for b in batch]
413
  return LLMRankingOutput(ranked=ranked, overall_notes="LLM parsing failed; used local scoring fallback.")
 
415
  returned = {c.filename: c for c in out.ranked}
416
  missing = [b for b in batch if b["filename"] not in returned]
417
 
 
418
  for b in missing:
419
  single_prompt = build_llm_prompt(
420
  jd_text,
 
576
 
577
 
578
  # =========================================================
579
+ # Shortlist export (DataFrame-safe)
580
  # =========================================================
581
  def export_shortlist(shortlist_table: pd.DataFrame) -> Tuple[str, str, str]:
582
  if shortlist_table is None or shortlist_table.empty:
583
  raise gr.Error("No shortlist data yet. Run ranking first.")
584
 
 
585
  shortlisted_df = shortlist_table[shortlist_table.iloc[:, 0] == True]
586
  if shortlisted_df.empty:
587
  raise gr.Error("No candidates marked as shortlisted.")
588
 
 
589
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
590
  shortlisted_df.to_csv(tmp.name, index=False)
591
 
 
592
  emails = (
593
  shortlisted_df.iloc[:, 6]
594
  .dropna()
 
605
 
606
 
607
  # =========================================================
608
+ # Main pipeline (with progress bar)
609
  # =========================================================
610
  def rank_app(
611
  jd_file_obj,
 
613
  must_haves: str,
614
  mask_pii_toggle: bool,
615
  show_contacts_toggle: bool,
616
+ progress=gr.Progress(track_tqdm=False), # ✅ progress bar
617
  ):
618
  t0 = time.time()
619
  ensure_models()
620
  embedder = _embedder
621
  reranker = _reranker
622
 
623
+ progress(0.02, desc="Loading Job Description...")
624
+
625
  jd_path = gr_file_to_path(jd_file_obj)
626
  if not jd_path:
627
  raise gr.Error("Please upload a Job Description file (PDF/DOCX/TXT).")
 
630
  if not jd_text:
631
  raise gr.Error("Could not extract text from the Job Description file.")
632
 
 
633
  if not cv_file_objs:
634
  raise gr.Error("Please upload at least 1 CV.")
635
 
636
+ # ✅ enforce max 20
637
+ if len(cv_file_objs) > MAX_CV_UPLOADS:
638
+ raise gr.Error(f"Maximum allowed CV uploads is {MAX_CV_UPLOADS}. You uploaded {len(cv_file_objs)}.")
639
+
640
  cv_paths = []
641
  for f in cv_file_objs:
642
  p = gr_file_to_path(f)
 
645
  if not cv_paths:
646
  raise gr.Error("Could not read uploaded CV files (no valid paths).")
647
 
648
+ progress(0.06, desc="Checking duplicates...")
649
+
650
  seen = {}
651
  duplicates = []
652
  unique_paths = []
 
663
  seen[h] = fname
664
  unique_paths.append(p)
665
 
666
+ progress(0.10, desc="Embedding Job Description...")
667
+
668
  jd_vec = np.array(embedder.encode([jd_text], normalize_embeddings=True), dtype=np.float32)
669
 
 
670
  local_pool = []
671
  contacts_map: Dict[str, Dict[str, str]] = {}
672
 
673
+ total = len(unique_paths)
674
+ for idx, p in enumerate(unique_paths, start=1):
675
+ # progress 10% -> 70% while processing CVs
676
+ prog = 0.10 + 0.60 * (idx / max(1, total))
677
+ progress(prog, desc=f"Processing CVs ({idx}/{total}) — {os.path.basename(p)}")
678
+
679
  raw = clean_text(read_file_to_text(p))[:MAX_CV_CHARS]
680
  if not raw:
681
  continue
 
715
  if not local_pool:
716
  raise gr.Error("Could not extract usable text from the uploaded CVs.")
717
 
718
+ progress(0.72, desc="Preparing LLM ranking...")
719
+
720
  local_pool = sorted(local_pool, key=lambda x: float(x["local_score"]), reverse=True)
721
 
722
  batch_outputs: List[LLMRankingOutput] = []
723
+ batches = max(1, (len(local_pool) + LLM_BATCH_SIZE - 1) // LLM_BATCH_SIZE)
724
+
725
+ for b in range(batches):
726
+ start = b * LLM_BATCH_SIZE
727
+ end = start + LLM_BATCH_SIZE
728
+ batch = local_pool[start:end]
729
+
730
+ # progress 72% -> 92% while LLM runs
731
+ prog = 0.72 + 0.20 * ((b + 1) / batches)
732
+ progress(prog, desc=f"LLM judging batches ({b+1}/{batches})...")
733
 
734
  llm_batch = [
735
  {
736
  "filename": c["filename"],
737
  "evidence_chunks": c["evidence_chunks"],
738
+ "local_score": c["local_score"],
739
  }
740
  for c in batch
741
  ]
 
742
  out = llm_judge_rank_batch(jd_text, must_haves or "", llm_batch)
743
  batch_outputs.append(out)
744
 
745
+ progress(0.94, desc="Finalizing report...")
746
+
747
  judged = merge_llm_batches(batch_outputs)
748
  ranked = judged.ranked
749
  if not ranked:
750
  raise gr.Error("LLM returned an empty ranking.")
751
 
 
752
  report_html = render_top10_html(ranked, total_count=len(ranked))
753
 
 
754
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
755
  with open(tmp.name, "w", newline="", encoding="utf-8") as f:
756
  w = csv.writer(f)
757
  w.writerow(["Rank", "Filename", "FinalScore(0-100)", "FitLevel", "Name", "Email", "Phone", "Summary"])
758
+ for ridx, c in enumerate(ranked, start=1):
759
  ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
760
  w.writerow([
761
+ ridx,
762
  c.filename,
763
  round(float(c.final_score), 2),
764
  c.fit_level,
 
768
  c.summary,
769
  ])
770
 
 
771
  shortlist_rows = []
772
+ for ridx, c in enumerate(ranked, start=1):
773
  ci = contacts_map.get(c.filename, {"name": "", "email": "", "phone": ""})
774
  shortlist_rows.append([
775
  False,
776
+ ridx,
777
  c.filename,
778
  round(float(c.final_score), 2),
779
  c.fit_level,
 
789
 
790
  elapsed = time.time() - t0
791
  meta = (
792
+ f"**CVs uploaded:** {len(cv_paths)} → **Unique processed:** {len(unique_paths)} (Max allowed: {MAX_CV_UPLOADS}) \n"
793
  f"**Ranked (ALL):** {len(ranked)} \n"
794
+ f"**LLM batches:** {batches} (batch size={LLM_BATCH_SIZE}) \n"
795
  f"**Time:** {elapsed:.2f}s \n"
796
  f"**Duplicates skipped:** {len(duplicates)} \n\n"
797
  f"**LLM Notes:** {(judged.overall_notes or '').strip()}"
798
  )
799
 
800
+ progress(1.0, desc="Done ✅")
801
  return report_html, meta, tmp.name, shortlist_df, "", ""
802
 
803
 
804
  # =========================================================
805
+ # SGS Theme / CSS (white text + MET green + nice touches)
806
  # =========================================================
807
  CUSTOM_CSS = """
808
  :root{
809
  --sgs-blue:#0B3D91;
810
  --sgs-green:#00A651;
811
  --text:#F3F7FF;
 
812
  --line:rgba(255,255,255,.14);
 
813
  }
814
 
815
  .gradio-container{max-width:1180px !important;}
 
819
  linear-gradient(180deg, #060914, #060914) !important;
820
  }
821
 
822
+ .gradio-container, .gradio-container *{ color: var(--text); }
 
 
 
823
 
824
+ /* Hero */
825
  .hero{
826
  border:1px solid var(--line);
827
  background: linear-gradient(135deg, rgba(11,61,145,.40), rgba(0,166,81,.20));
 
833
  gap:16px;
834
  box-shadow: 0 18px 40px rgba(0,0,0,.38);
835
  margin: 12px 0 16px;
836
+ position: relative;
837
+ overflow: hidden;
838
  }
839
+ .hero:before{
840
+ content:"";
841
+ position:absolute;
842
+ inset:-40%;
843
+ background: radial-gradient(circle at 30% 30%, rgba(255,255,255,.10), transparent 45%);
844
+ transform: rotate(18deg);
845
+ pointer-events:none;
846
+ }
847
+ .hero-title{font-weight:900;font-size:22px;position:relative;}
848
+ .hero-sub{color:rgba(243,247,255,.90);margin-top:6px;font-size:13px;position:relative;}
849
+ .hero-right{display:flex;gap:10px;flex-wrap:wrap;justify-content:flex-end;position:relative;}
850
 
851
  .kpi{
852
  background: rgba(255,255,255,.08);
 
854
  border-radius: 16px;
855
  padding: 10px 12px;
856
  min-width: 140px;
857
+ backdrop-filter: blur(6px);
858
  }
859
+ .kpi-label{color:rgba(243,247,255,.82);font-size:12px;font-weight:700;}
860
+ .kpi-val{font-size:18px;font-weight:900;margin-top:2px;}
861
 
862
+ /* Cards */
863
  .cards{display:grid;grid-template-columns: 1fr; gap: 12px;}
864
  .card{
865
  background: linear-gradient(180deg, rgba(16,26,44,.98), rgba(12,19,34,.88));
866
+ border:1px solid rgba(255,255,255,.14);
867
  border-radius: 18px;
868
  padding: 14px;
869
  box-shadow: 0 14px 28px rgba(0,0,0,.28);
870
+ transition: transform .18s ease, box-shadow .18s ease, border-color .18s ease;
871
+ }
872
+ .card:hover{
873
+ transform: translateY(-2px);
874
+ box-shadow: 0 20px 40px rgba(0,0,0,.38);
875
+ border-color: rgba(255,255,255,.20);
876
  }
877
  .card-top{display:flex;align-items:flex-start;justify-content:space-between;gap:10px;}
878
  .card-title{display:flex;gap:10px;align-items:baseline;flex-wrap:wrap;}
 
887
  .file{font-weight:900;font-size:16px;}
888
  .card-meta{display:flex;gap:8px;align-items:center;flex-wrap:wrap;justify-content:flex-end;}
889
 
890
+ /* Badges */
891
  .badge{
892
  display:inline-flex;align-items:center;
893
  padding: 6px 10px;border-radius: 999px;font-size:12px;font-weight:900;
 
909
  .p-low{ background: rgba(245,158,11,.16); border-color: rgba(245,158,11,.28); }
910
  .p-bad{ background: rgba(239,68,68,.14); border-color: rgba(239,68,68,.28); }
911
 
912
+ /* Score bar */
913
  .bar{
914
  width: 100%; height: 10px; border-radius: 999px;
915
  background: rgba(255,255,255,.10); overflow: hidden;
 
921
  background: linear-gradient(90deg, var(--sgs-green), #4fb2ff, var(--sgs-blue));
922
  }
923
 
924
+ .summary{font-size:13px;line-height:1.55rem;margin: 6px 0 10px;color:#fff;}
925
+ .section-title{font-size:13px;font-weight:900;margin:10px 0 6px;color:#fff;}
 
 
 
 
 
 
 
 
 
 
926
 
927
  .grid{display:grid;grid-template-columns: 1fr 1fr; gap: 14px;}
928
  @media(max-width:860px){.grid{grid-template-columns:1fr;}}
929
 
930
+ .list{margin:0;padding-left:18px;color:#fff;}
931
+ .list li{margin:6px 0;line-height:1.30rem;color:#fff;}
932
 
933
+ /* Quotes / Evidence */
934
  .quotes{display:grid;gap:10px;margin-top:6px;}
935
  .quote{
936
+ background: rgba(255,255,255,.10);
937
+ border:1px solid rgba(255,255,255,.16);
938
  border-radius: 14px;
939
  padding: 10px 12px;
940
+ color: #fff;
941
  font-size: 13px;
942
  line-height: 1.45rem;
943
  }
944
 
945
+ /* Checklist */
946
  .checklist{display:grid;gap:8px;margin-top:6px;}
947
  .checkrow{
948
  display:grid; grid-template-columns: 1.1fr .4fr 1.5fr; gap:10px;
949
  padding:10px 12px; border-radius:14px;
950
+ border:1px solid rgba(255,255,255,.18);
951
+ background: rgba(255,255,255,.10);
952
  font-size:13px;
953
+ position: relative;
954
+ overflow: hidden;
955
+ }
956
+ .checkrow:before{
957
+ content:"";
958
+ position:absolute;
959
+ left:0; top:0; bottom:0;
960
+ width:4px;
961
+ background: rgba(255,255,255,.20);
962
  }
963
+ .checkrow .req{font-weight:900;color:#fff;}
964
+ .checkrow .ev{color:rgba(255,255,255,0.95);}
965
+ .checkrow .st{font-weight:1000;text-align:center;letter-spacing:.4px;}
966
 
967
+ /* Status colors (MET green) */
968
+ .checkrow.ok:before{ background: rgba(0,166,81,.95); }
969
+ .checkrow.partial:before{ background: rgba(245,158,11,.95); }
970
+ .checkrow.miss:before{ background: rgba(239,68,68,.95); }
971
 
972
+ .checkrow.ok .st{ color:#22ffb6 !important; text-shadow: 0 0 10px rgba(34,255,182,.18); }
973
+ .checkrow.partial .st{ color:#ffd27a !important; }
974
+ .checkrow.miss .st{ color:#ff9a9a !important; }
975
 
976
+ /* Dataframe border */
977
+ table { border-color: rgba(255,255,255,.14) !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
978
  """
979
 
980
 
 
990
  )
991
 
992
  with gr.Blocks(title="SGS ATS Candidate Matcher", theme=theme, css=CUSTOM_CSS) as demo:
993
+ gr.Markdown(f"""
994
  # SGS ATS Candidate Matcher
995
  Evidence-based CV ranking against a Job Description (Top 10 Report + Shortlisting).
996
+ **Max CV uploads:** {MAX_CV_UPLOADS}
997
  **Important:** set `HF_TOKEN` in Space secrets.
998
  """)
999
 
1000
  with gr.Row():
1001
  jd_file = gr.File(label="Job Description file (PDF/DOCX/TXT)", file_types=[".pdf", ".docx", ".txt"])
1002
+ cv_files = gr.File(label=f"Upload CVs (max {MAX_CV_UPLOADS})", file_count="multiple", file_types=[".pdf", ".docx", ".txt"])
1003
 
1004
  with gr.Accordion("Settings", open=False):
1005
  must_haves = gr.Textbox(