yhng2525 commited on
Commit
1e7bf9e
·
0 Parent(s):

Initial clean release: production code only

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Secrets
2
+ .env
3
+ *.env
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.pyc
8
+ *.pyo
9
+ *.pyd
10
+
11
+ # Data outputs
12
+ data/
13
+ *.json
14
+
15
+ # OS / IDE
16
+ .DS_Store
17
+ .vscode/
18
+ .ipynb_checkpoints/
19
+ app-history/
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ai Resume Ranking (hybrid)
3
+ emoji: 🚀
4
+ colorFrom: pink
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.50.0
8
+ python_version: "3.11"
9
+ app_file: app.py
10
+ pinned: false
11
+ short_description: iti123-project AI-Powered JD-based Resume Ranking System
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+ # was sdk_version: 6.2.0
app.py ADDED
@@ -0,0 +1,1034 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # with Step1 patch, add try/except wrapper
3
+ # with added step6 agentic explain & interview kit
4
+ # _with_diagnostics_fixed
5
+
6
+ import os, json, shutil
7
+ from pathlib import Path
8
+
9
+ import gradio as gr
10
+ import pandas as pd
11
+
12
+ from dotenv import load_dotenv
13
+ load_dotenv()
14
+
15
+ from utils.file_loader import load_text_from_file
16
+ from core.jd_processor import generate_jd_rubric
17
+ from core.resume_parser import parse_resume
18
+ from core.matcher import match_candidate_to_jd
19
+ from core.ranking import build_ranking
20
+ from core.explainability import explain_candidate
21
+
22
+ from openai import OpenAI
23
+
24
+ import re
25
+ import datetime
26
+ import zipfile
27
+
28
+
29
+ # ---------- Folders ----------
30
+ RESUME_DIR = Path("data/resumes")
31
+ CAND_DIR = Path("data/candidates")
32
+ MATCH_DIR = Path("data/matches")
33
+ DATA_DIR = Path("data")
34
+
35
+ for p in [RESUME_DIR, CAND_DIR, MATCH_DIR]:
36
+ p.mkdir(parents=True, exist_ok=True)
37
+
38
+ JD_PATH = DATA_DIR / "jd.json"
39
+
40
+
41
+ # ---------- Theme CSS ----------
42
+ def theme_css(mode: str) -> str:
43
+ # Projector-friendly + layout improvements
44
+ base = """
45
+ .gradio-container {
46
+ max-width: 1200px !important;
47
+ margin: 0 auto !important;
48
+ padding-bottom: 24px !important;
49
+ }
50
+
51
+ /* Bigger default text + spacing (projector-friendly) */
52
+ .gradio-container, .gradio-container * {
53
+ font-size: 16px;
54
+ line-height: 1.55;
55
+ }
56
+
57
+ /* Buttons + inputs spacing */
58
+ button, .gr-button, .gr-input, .gr-textbox, .gr-file, .gr-dropdown {
59
+ padding-top: 10px !important;
60
+ padding-bottom: 10px !important;
61
+ }
62
+
63
+ /* Code/JSON blocks */
64
+ pre, code, .cm-editor, .cm-scroller {
65
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
66
+ font-size: 15px !important;
67
+ line-height: 1.65 !important;
68
+ border-radius: 10px !important;
69
+ }
70
+
71
+ /* Make Code component taller by default */
72
+ .cm-scroller { min-height: 420px !important; }
73
+ """
74
+
75
+ # Light theme
76
+ light = """
77
+ :root {
78
+ --panel-border: #e5e7eb;
79
+ --code-bg: #f8fafc;
80
+ --code-fg: #0f172a;
81
+ }
82
+ body { background: #f5f5f5; }
83
+ pre, code, .cm-editor {
84
+ background-color: var(--code-bg) !important;
85
+ color: var(--code-fg) !important;
86
+ border: 1px solid var(--panel-border) !important;
87
+ }
88
+ """
89
+
90
+ # HF-like dark theme (matches HF dark vibe better)
91
+ hf_dark = """
92
+ :root {
93
+ --panel-border: #1f2937;
94
+ --code-bg: #0f172a;
95
+ --code-fg: #e5e7eb;
96
+
97
+ /* Text colors */
98
+ --title-fg: #ffffff; /* pure white */
99
+ --header-fg: #d1d5db; /* light grey */
100
+ --muted-fg: #9ca3af; /* passive text */
101
+ }
102
+
103
+ body { background: #0b1220; }
104
+
105
+ /* ===== Main page title ===== */
106
+ h1, h1 * {
107
+ color: var(--title-fg) !important;
108
+ font-weight: 700 !important;
109
+ }
110
+
111
+ /* ===== Section headers ===== */
112
+ h2, h3, h4,
113
+ h2 *, h3 *, h4 * {
114
+ color: var(--header-fg) !important;
115
+ font-weight: 600 !important;
116
+ }
117
+
118
+ /* ===== Tab labels (inactive) ===== */
119
+ button[role="tab"] {
120
+ color: var(--muted-fg) !important;
121
+ font-weight: 500 !important;
122
+ }
123
+
124
+ /* ===== Active tab ===== */
125
+ button[role="tab"][aria-selected="true"] {
126
+ color: #fb923c !important; /* your orange highlight */
127
+ font-weight: 700 !important;
128
+ }
129
+
130
+ /* ===== Code / JSON blocks ===== */
131
+ pre, code, .cm-editor {
132
+ background-color: var(--code-bg) !important;
133
+ color: var(--code-fg) !important;
134
+ border: 1px solid var(--panel-border) !important;
135
+ }
136
+ """
137
+
138
+ # High contrast theme
139
+ high_contrast = """
140
+ :root {
141
+ --panel-border: #ffffff;
142
+ --code-bg: #000000;
143
+ --code-fg: #ffffff;
144
+
145
+ /* seeable text colors */
146
+ --title-fg: #ffffff;
147
+ --header-fg: #ffffff;
148
+ --muted-fg: #e5e7eb; /* brighter grey for inactive tabs */
149
+ }
150
+
151
+ body { background: #000000; }
152
+
153
+ /* ===== Main page title ===== */
154
+ h1, h1 * {
155
+ color: var(--title-fg) !important;
156
+ font-weight: 800 !important;
157
+ }
158
+
159
+ /* ===== Section headers ===== */
160
+ h2, h3, h4,
161
+ h2 *, h3 *, h4 * {
162
+ color: var(--header-fg) !important;
163
+ font-weight: 700 !important;
164
+ }
165
+
166
+ /* ===== Tab labels (inactive) ===== */
167
+ button[role="tab"] {
168
+ color: var(--muted-fg) !important;
169
+ font-weight: 600 !important;
170
+ }
171
+
172
+ /* ===== Active tab ===== */
173
+ button[role="tab"][aria-selected="true"] {
174
+ color: #ffd500 !important; /* high contrast yellow */
175
+ font-weight: 800 !important;
176
+ }
177
+
178
+ /* ===== Code / JSON blocks ===== */
179
+ pre, code, .cm-editor {
180
+ background-color: var(--code-bg) !important;
181
+ color: var(--code-fg) !important;
182
+ border: 2px solid var(--panel-border) !important;
183
+ }
184
+ """
185
+
186
+ css_map = {
187
+ "Light": light,
188
+ "HF Dark": hf_dark,
189
+ "High Contrast": high_contrast,
190
+ }
191
+ return base + css_map.get(mode, hf_dark)
192
+
193
+
194
+ def _ensure_api_key():
195
+ if not os.getenv("OPENAI_API_KEY"):
196
+ raise RuntimeError(
197
+ "OPENAI_API_KEY is not set. "
198
+ "On Hugging Face: Settings → Secrets → OPENAI_API_KEY. "
199
+ "On local/Colab: set environment variable before running."
200
+ )
201
+
202
+
203
+ def _pretty_json(obj) -> str:
204
+ return json.dumps(obj, indent=2, ensure_ascii=False)
205
+
206
+
207
+ # ---------- Steps ----------
208
+
209
+ def step1_generate_jd(file_obj):
210
+ try:
211
+ _ensure_api_key()
212
+ if file_obj is None:
213
+ return "", "Please upload a JD file."
214
+
215
+ with open(file_obj.name, "rb") as f:
216
+ jd_text = load_text_from_file(f)
217
+
218
+ jd_rubric = generate_jd_rubric(jd_text)
219
+
220
+ JD_PATH.parent.mkdir(parents=True, exist_ok=True)
221
+ with open(JD_PATH, "w", encoding="utf-8") as out:
222
+ json.dump(jd_rubric, out, indent=2, ensure_ascii=False)
223
+
224
+ return _pretty_json(jd_rubric), f"✅ JD rubric saved to {JD_PATH}"
225
+
226
+ except Exception as e:
227
+ return "", f"❌ Step 1 failed: {type(e).__name__}: {e}"
228
+
229
+
230
+ def step2_save_resumes(files, overwrite):
231
+ if not files:
232
+ return "Please upload at least one resume."
233
+
234
+ saved, skipped = 0, 0
235
+ for f in files:
236
+ dest = RESUME_DIR / Path(f.name).name
237
+ if dest.exists() and not overwrite:
238
+ skipped += 1
239
+ continue
240
+ shutil.copyfile(f.name, dest)
241
+ saved += 1
242
+
243
+ msg = f"✅ Saved {saved} resume(s) into {RESUME_DIR}"
244
+ if skipped:
245
+ msg += f" | ⏭ Skipped {skipped} duplicate(s) (overwrite=False)"
246
+ return msg
247
+
248
+
249
+
250
+ def step3_parse_resumes():
251
+ try:
252
+ _ensure_api_key()
253
+ resume_files = sorted([p for p in RESUME_DIR.iterdir() if p.suffix.lower() in [".pdf", ".docx"]])
254
+
255
+ if not resume_files:
256
+ return "No resumes found in data/resumes. Upload resumes first."
257
+
258
+ CAND_DIR.mkdir(parents=True, exist_ok=True)
259
+
260
+ failed = 0
261
+ for path in resume_files:
262
+ try:
263
+ with open(path, "rb") as f:
264
+ resume_text = load_text_from_file(f)
265
+
266
+ candidate_data = parse_resume(resume_text, path.name)
267
+
268
+ out_path = CAND_DIR / (path.stem + ".json")
269
+ with open(out_path, "w", encoding="utf-8") as out:
270
+ json.dump(candidate_data, out, indent=2, ensure_ascii=False)
271
+
272
+ except Exception as e:
273
+ failed += 1
274
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
275
+ with open(DATA_DIR / "parse_step3_errors.log", "a", encoding="utf-8") as log:
276
+ log.write(f"{path.name}: {type(e).__name__}: {e}\n")
277
+
278
+ msg = f"✅ Parsed {len(resume_files) - failed} resume(s) into {CAND_DIR}"
279
+ if failed:
280
+ msg += f" | ⚠️ Failed: {failed} (see data/parse_step3_errors.log)"
281
+ return msg
282
+
283
+ except Exception as e:
284
+ return f"❌ Step 3 failed: {type(e).__name__}: {e}"
285
+
286
+
287
+
288
+ def step4_generate_matches():
289
+ try:
290
+ _ensure_api_key()
291
+ if not JD_PATH.exists():
292
+ return "No JD rubric found. Run Step 1 first."
293
+
294
+ cand_files = sorted([p for p in CAND_DIR.iterdir() if p.suffix.lower() == ".json"])
295
+ if not cand_files:
296
+ return "No candidates found. Run Step 3 first."
297
+
298
+ with open(JD_PATH, "r", encoding="utf-8") as f:
299
+ jd_rubric = json.load(f)
300
+
301
+ created, failed = 0, 0
302
+ for cf in cand_files:
303
+ try:
304
+ with open(cf, "r", encoding="utf-8") as f:
305
+ candidate = json.load(f)
306
+
307
+ match_result = match_candidate_to_jd(jd_rubric, candidate)
308
+
309
+ out_path = MATCH_DIR / (cf.stem + "_match.json")
310
+ with open(out_path, "w", encoding="utf-8") as out:
311
+ json.dump(match_result, out, indent=2, ensure_ascii=False)
312
+
313
+ created += 1
314
+ except Exception as e:
315
+ failed += 1
316
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
317
+ with open(DATA_DIR / "match_errors.log", "a", encoding="utf-8") as log:
318
+ log.write(f"{cf.name}: {type(e).__name__}: {e}\n")
319
+
320
+ msg = f"✅ Created {created} match file(s) in {MATCH_DIR}"
321
+ if failed:
322
+ msg += f" | ⚠️ Failed: {failed} (see data/match_errors.log)"
323
+ return msg
324
+
325
+ except Exception as e:
326
+ return f"❌ Step 4 failed: {type(e).__name__}: {e}"
327
+
328
+
329
+ def step4_view_match(selected):
330
+ if not selected:
331
+ return ""
332
+ path = MATCH_DIR / selected
333
+ if not path.exists():
334
+ return ""
335
+ with open(path, "r", encoding="utf-8") as f:
336
+ return _pretty_json(json.load(f))
337
+
338
+
339
+
340
+ def step5_rank(top_k):
341
+ try:
342
+ _ensure_api_key()
343
+ if not JD_PATH.exists():
344
+ return None, "No JD rubric found. Run Step 1 first."
345
+
346
+ if not any(p.name.endswith("_match.json") for p in MATCH_DIR.iterdir()):
347
+ return None, "No match files found. Run Step 4 first."
348
+
349
+ ranking = build_ranking(top_k=int(top_k))
350
+
351
+ rows = []
352
+ for i, r in enumerate(ranking.get("ranking", []), start=1):
353
+ b = r.get("breakdown", {}) or {}
354
+ rows.append({
355
+ "Rank": i,
356
+ "Candidate": r.get("candidate_name", ""),
357
+ "Score": r.get("total_score", 0),
358
+ "Base Score": b.get("base_score", 0),
359
+ "Adj": b.get("bonus_penalty_adjustment", 0),
360
+ "Pos": b.get("positive_count", 0),
361
+ "Neg": b.get("negative_count", 0),
362
+ "Must-have": b.get("must_have_coverage", 0),
363
+ "Nice-to-have": b.get("nice_to_have_coverage", 0),
364
+ "Experience": b.get("experience_score", 0),
365
+ "Match File": r.get("match_file", "")
366
+ })
367
+
368
+ df = pd.DataFrame(rows)
369
+ return df, "✅ Ranking generated (also saved to data/ranking.json)"
370
+
371
+ except Exception as e:
372
+ return None, f"❌ Step 5 failed: {type(e).__name__}: {e}"
373
+
374
+
375
+ def list_match_files():
376
+ return sorted([p.name for p in MATCH_DIR.iterdir() if p.name.endswith("_match.json")])
377
+
378
+ def step6_explain(match_filename):
379
+ try:
380
+ _ensure_api_key()
381
+ if not match_filename:
382
+ return "", "Please select a match file."
383
+
384
+ match_path = MATCH_DIR / match_filename
385
+ if not match_path.exists():
386
+ return "", "Match file not found. Run Step 4 first."
387
+
388
+ if not JD_PATH.exists():
389
+ return "", "JD rubric not found. Run Step 1 first."
390
+
391
+ explanation = explain_candidate(
392
+ jd_path=str(JD_PATH),
393
+ match_path=str(match_path),
394
+ rank=None
395
+ )
396
+
397
+ return json.dumps(explanation, indent=2, ensure_ascii=False), "✅ Explanation generated"
398
+
399
+ except Exception as e:
400
+ return "", f"❌ Step 6 failed: {type(e).__name__}: {e}"
401
+
402
+ def _tail_file(path: Path, max_lines: int = 200) -> str:
403
+ """Read the last N lines of a text file safely."""
404
+ if not path.exists():
405
+ return f"(No file found) {path}"
406
+
407
+ try:
408
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
409
+ lines = f.readlines()
410
+ if len(lines) <= max_lines:
411
+ return "".join(lines)
412
+ return "".join(lines[-max_lines:])
413
+ except Exception as e:
414
+ return f"Failed to read {path}: {type(e).__name__}: {e}"
415
+
416
+
417
+ def view_diagnostics_log(show: bool, which: str, last_n: int) -> str:
418
+ if not show:
419
+ return "Diagnostics hidden. Enable 'Show diagnostics' to view logs."
420
+
421
+ log_map = {
422
+ "Resume parser (data/parse_errors.log)": DATA_DIR / "parse_errors.log",
423
+ "Step 3 batch parse (data/parse_step3_errors.log)": DATA_DIR / "parse_step3_errors.log",
424
+ "Step 4 matching (data/match_errors.log)": DATA_DIR / "match_errors.log",
425
+ }
426
+ path = log_map.get(which)
427
+ if not path:
428
+ return "Unknown log selection."
429
+
430
+ n = int(last_n) if last_n else 200
431
+ n = max(20, min(n, 2000)) # clamp for safety
432
+ return _tail_file(path, max_lines=n)
433
+
434
+
435
+ def clear_diagnostics_log(show: bool, which: str) -> str:
436
+ if not show:
437
+ return "Diagnostics hidden. Enable 'Show diagnostics' to clear logs."
438
+
439
+ log_map = {
440
+ "Resume parser (data/parse_errors.log)": DATA_DIR / "parse_errors.log",
441
+ "Step 3 batch parse (data/parse_step3_errors.log)": DATA_DIR / "parse_step3_errors.log",
442
+ "Step 4 matching (data/match_errors.log)": DATA_DIR / "match_errors.log",
443
+ }
444
+ path = log_map.get(which)
445
+ if not path:
446
+ return "Unknown log selection."
447
+
448
+ try:
449
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
450
+ with open(path, "w", encoding="utf-8") as f:
451
+ f.write("")
452
+ return f"✅ Cleared {path}"
453
+ except Exception as e:
454
+ return f"Failed to clear {path}: {type(e).__name__}: {e}"
455
+
456
+ # ---------- Weight Review & Adjustment (Step 1 enhancement) ----------
457
+
458
+ def _get_weight_dict(jd_rubric: dict):
459
+ """
460
+ Returns (weight_dict, mode) where mode indicates which key was used.
461
+ Preference order:
462
+ 1) recommended_weights (expected to be percentage numbers that sum to 100)
463
+ 2) weights (optional legacy key)
464
+ """
465
+ if not isinstance(jd_rubric, dict):
466
+ return None, "none"
467
+ rw = jd_rubric.get("recommended_weights")
468
+ if isinstance(rw, dict) and rw:
469
+ return rw, "recommended_weights"
470
+ w = jd_rubric.get("weights")
471
+ if isinstance(w, dict) and w:
472
+ return w, "weights"
473
+ return None, "none"
474
+
475
+
476
+ def step1_prepare_weight_editor(jd_json_text: str):
477
+ """
478
+ Build an editable table:
479
+ label | recommended | adjusted | key
480
+ """
481
+ try:
482
+ jd_rubric = json.loads(jd_json_text) if jd_json_text else {}
483
+ except Exception:
484
+ return pd.DataFrame([]), "none", "⚠️ Could not parse JD JSON. Generate JD rubric first."
485
+
486
+ weight_dict, mode = _get_weight_dict(jd_rubric)
487
+ if mode == "none" or not weight_dict:
488
+ return pd.DataFrame([]), "none", "ℹ️ No editable weights detected. Expected 'recommended_weights' in JD JSON."
489
+
490
+ rows = []
491
+ for k, v in weight_dict.items():
492
+ try:
493
+ vv = float(v)
494
+ except Exception:
495
+ vv = v
496
+ rows.append({
497
+ "label": k.replace("_", " ").title(),
498
+ "recommended": vv,
499
+ "adjusted": vv,
500
+ "key": k
501
+ })
502
+
503
+ df = pd.DataFrame(rows)[["label", "recommended", "adjusted", "key"]]
504
+
505
+ total = 0.0
506
+ try:
507
+ total = float(df["adjusted"].sum())
508
+ except Exception:
509
+ total = 0.0
510
+
511
+ hint = ""
512
+ if mode == "recommended_weights":
513
+ hint = f" Current total = {total:.2f} (must be 100)."
514
+ return df, mode, f"✅ Loaded {len(rows)} weight item(s). Edit 'adjusted' then click Apply.{hint}"
515
+
516
+
517
+ def step1_apply_weight_adjustments(jd_json_text: str, weight_df, mode: str):
518
+ """
519
+ Apply edited weights back into JD JSON and save to data/jd.json.
520
+ Closed-loop validation:
521
+ - If mode == recommended_weights, adjusted weights must sum to 100.
522
+ """
523
+ try:
524
+ jd_rubric = json.loads(jd_json_text) if jd_json_text else {}
525
+ except Exception as e:
526
+ return "", f"❌ Cannot apply weights: JD JSON invalid: {type(e).__name__}: {e}"
527
+
528
+ if mode not in ("recommended_weights", "weights"):
529
+ return _pretty_json(jd_rubric), "ℹ️ No weight adjustments applied (no editable weights detected)."
530
+
531
+ try:
532
+ df = weight_df if isinstance(weight_df, pd.DataFrame) else pd.DataFrame(weight_df)
533
+ except Exception as e:
534
+ return _pretty_json(jd_rubric), f"❌ Cannot read edited weights table: {type(e).__name__}: {e}"
535
+
536
+ if not {"key", "adjusted"}.issubset(set(df.columns)):
537
+ return _pretty_json(jd_rubric), "❌ Weights table format unexpected. Click 'Load weights' again."
538
+
539
+ adjusted = {}
540
+ try:
541
+ for _, r in df.iterrows():
542
+ k = str(r.get("key"))
543
+ adjusted[k] = float(r.get("adjusted"))
544
+ except Exception as e:
545
+ return _pretty_json(jd_rubric), f"❌ Adjusted weights must be numeric: {type(e).__name__}: {e}"
546
+
547
+ total = sum(adjusted.values())
548
+ if mode == "recommended_weights" and abs(total - 100.0) > 1e-6:
549
+ return _pretty_json(jd_rubric), f"⚠️ Adjusted weights must add up to 100. Current total = {total:.2f}. No changes applied."
550
+
551
+ if mode == "recommended_weights":
552
+ if "recommended_weights_original" not in jd_rubric and isinstance(jd_rubric.get("recommended_weights"), dict):
553
+ jd_rubric["recommended_weights_original"] = jd_rubric["recommended_weights"]
554
+ jd_rubric["recommended_weights"] = adjusted
555
+ else:
556
+ jd_rubric["weights"] = adjusted
557
+
558
+ try:
559
+ JD_PATH.parent.mkdir(parents=True, exist_ok=True)
560
+ with open(JD_PATH, "w", encoding="utf-8") as out:
561
+ json.dump(jd_rubric, out, indent=2, ensure_ascii=False)
562
+ except Exception as e:
563
+ return _pretty_json(jd_rubric), f"⚠️ Weights updated but failed to save: {type(e).__name__}: {e}"
564
+
565
+ return _pretty_json(jd_rubric), "✅ Weight adjustments applied and saved to data/jd.json"
566
+
567
+ # ---------- Email Generator (Step 7) ----------
568
+
569
+ def _llm_polish_email(draft_email: str,
570
+ candidate_name: str,
571
+ role_title: str,
572
+ company_name: str,
573
+ strengths,
574
+ gaps,
575
+ next_step: str,
576
+ tone: str,
577
+ model: str,
578
+ temperature: float):
579
+ """
580
+ Returns: (email_text, used_ai: bool, err: str)
581
+ Uses OpenAI Responses API (SDK v2 compatible).
582
+ """
583
+ api_key = os.getenv("OPENAI_API_KEY")
584
+ if not api_key:
585
+ return draft_email, False, "OPENAI_API_KEY missing"
586
+
587
+ try:
588
+ client = OpenAI(api_key=api_key)
589
+
590
+ strengths_list = strengths or []
591
+ gaps_list = gaps or []
592
+
593
+ strengths_txt = "\n".join([f"- {s}" for s in strengths_list[:5]]) or "- (To be discussed)"
594
+ gaps_txt = "\n".join([f"- {g}" for g in gaps_list[:5]]) or "- (None noted)"
595
+
596
+ style_rules = {
597
+ "Warm": "Warm, respectful, encouraging.",
598
+ "Formal": "Formal, concise, professional.",
599
+ "Casual": "Friendly, short, professional (not slangy).",
600
+ }
601
+ style = style_rules.get(tone or "Warm", style_rules["Warm"])
602
+
603
+ system_prompt = (
604
+ "You are an HR assistant. Rewrite the email draft into a polished, professional hiring email.\n"
605
+ "Hard requirements:\n"
606
+ "- Output plain text only.\n"
607
+ "- Keep a Subject line starting with 'Subject:'.\n"
608
+ "- 150–220 words.\n"
609
+ "- Do NOT invent facts.\n"
610
+ "- IMPORTANT: Do not reuse any sentence from the draft longer than 8 words. Rephrase fully.\n"
611
+ )
612
+
613
+ user_prompt = f"""
614
+ Tone: {style}
615
+
616
+ Role title: {role_title}
617
+ Company: {company_name}
618
+ Candidate: {candidate_name}
619
+
620
+ Key strengths (use if relevant):
621
+ {strengths_txt}
622
+
623
+ Potential gaps / points to clarify (use gently):
624
+ {gaps_txt}
625
+
626
+ Next step text (must include):
627
+ {next_step}
628
+
629
+ Draft to rewrite (rewrite fully, don’t copy sentences):
630
+ {draft_email}
631
+ """.strip()
632
+
633
+ resp = client.responses.create(
634
+ model=(model or "gpt-4o-mini"),
635
+ input=[
636
+ {"role": "system", "content": system_prompt},
637
+ {"role": "user", "content": user_prompt},
638
+ ],
639
+ temperature=float(temperature) if temperature is not None else 0.6,
640
+ max_output_tokens=450,
641
+ )
642
+
643
+ out = (resp.output_text or "").strip()
644
+ if not out:
645
+ return draft_email, False, "Empty response from model"
646
+
647
+ if out.strip() == draft_email.strip():
648
+ return draft_email, True, "Model returned identical text"
649
+
650
+ return out, True, ""
651
+
652
+ except Exception as e:
653
+ return draft_email, False, f"{type(e).__name__}: {e}"
654
+
655
+
656
+
657
+ RANKING_PATH = DATA_DIR / "ranking.json"
658
+
659
+ def _safe_filename(s: str) -> str:
660
+ s = re.sub(r"[^\w\-\. ]+", "", s.strip())
661
+ s = re.sub(r"\s+", "_", s)
662
+ return s[:80] or "candidate"
663
+
664
+
665
+ def _load_json(path: Path) -> dict:
666
+ with open(path, "r", encoding="utf-8") as f:
667
+ return json.load(f)
668
+
669
+
670
+ def load_ranking_for_email():
671
+ """
672
+ Returns a list of rows for email generation: [{rank, candidate_name, match_file, score}, ...]
673
+ """
674
+ if not RANKING_PATH.exists():
675
+ return [], "No ranking.json found. Run Step 5 first."
676
+
677
+ try:
678
+ data = _load_json(RANKING_PATH)
679
+ items = data.get("ranking", []) if isinstance(data, dict) else []
680
+ rows = []
681
+ for r in items:
682
+ rows.append({
683
+ "rank": r.get("rank") or r.get("Rank") or "",
684
+ "candidate_name": r.get("candidate_name") or r.get("Candidate") or "",
685
+ "score": r.get("total_score") or r.get("Score") or 0,
686
+ "match_file": r.get("match_file") or r.get("Match File") or ""
687
+ })
688
+ # If rank not present, enumerate
689
+ for i, rr in enumerate(rows, start=1):
690
+ if not rr.get("rank"):
691
+ rr["rank"] = i
692
+ return rows, f"✅ Loaded {len(rows)} candidate(s) from data/ranking.json"
693
+ except Exception as e:
694
+ return [], f"❌ Failed to read ranking.json: {type(e).__name__}: {e}"
695
+
696
+
697
+ def _extract_candidate_highlights(match_data: dict):
698
+ """
699
+ Best-effort extraction of strengths/gaps from match JSON.
700
+ Returns (strengths_list, gaps_list, summary_text)
701
+ """
702
+ if not isinstance(match_data, dict):
703
+ return [], [], ""
704
+
705
+ strengths = match_data.get("strengths") or match_data.get("positives") or match_data.get("highlights") or []
706
+ gaps = match_data.get("gaps") or match_data.get("negatives") or match_data.get("risks") or []
707
+
708
+ # If stored as strings, wrap
709
+ if isinstance(strengths, str):
710
+ strengths = [strengths]
711
+ if isinstance(gaps, str):
712
+ gaps = [gaps]
713
+
714
+ # fallback: sometimes stored under "analysis" or "summary"
715
+ summary = match_data.get("summary") or match_data.get("jd_aligned_summary") or match_data.get("final_summary") or ""
716
+ if isinstance(summary, dict):
717
+ summary = json.dumps(summary, ensure_ascii=False)
718
+
719
+ # ensure lists
720
+ strengths = [str(x) for x in strengths if x]
721
+ gaps = [str(x) for x in gaps if x]
722
+ return strengths[:5], gaps[:5], str(summary)[:1200]
723
+
724
+
725
+ def _template_email(candidate_name: str, role_title: str, company_name: str, sender_name: str,
726
+ strengths, next_step: str, contact: str, tone: str):
727
+ """
728
+ Deterministic, fast email template (no LLM) suitable for HF stability.
729
+ """
730
+ greet_name = candidate_name.strip() or "there"
731
+
732
+ if tone == "Warm":
733
+ opening = f"Thank you for taking the time to apply for the {role_title} position at {company_name}."
734
+ elif tone == "Formal":
735
+ opening = f"Thank you for your application for the {role_title} position at {company_name}."
736
+ else:
737
+ opening = f"Thanks for applying for the {role_title} role at {company_name}."
738
+
739
+ bullets = ""
740
+ if strengths:
741
+ bullets = "\n".join([f"- {s}" for s in strengths])
742
+
743
+ body = f"""Subject: Next steps — {role_title} at {company_name}
744
+
745
+ Hi {greet_name},
746
+
747
+ {opening}
748
+
749
+ After reviewing your application, we’d like to move you forward to the next stage.
750
+
751
+ Key highlights from your profile:
752
+ {bullets if bullets else "- (Highlights will be discussed during the interview)"}
753
+
754
+ Next step:
755
+ {next_step}
756
+
757
+ If you have any questions, reply to this email or contact us at {contact}.
758
+
759
+ Best regards,
760
+ {sender_name}
761
+ {company_name}
762
+ """
763
+ return body
764
+
765
+
766
+ def step7_generate_emails(top_k: int,
767
+ role_title: str,
768
+ company_name: str,
769
+ sender_name: str,
770
+ contact_email: str,
771
+ next_step_text: str,
772
+ tone: str,
773
+ use_ai: bool,
774
+ ai_model: str,
775
+ ai_temperature: float):
776
+ """
777
+ V2.1 — Email generator with optional AI polish (OpenAI Responses API).
778
+ Always falls back to template output on error.
779
+ """
780
+ try:
781
+ rows, msg = load_ranking_for_email()
782
+ if not rows:
783
+ return None, "", msg
784
+
785
+ k = int(top_k) if top_k else 5
786
+ k = max(1, min(k, len(rows)))
787
+ rows = rows[:k]
788
+
789
+ out_dir = DATA_DIR / "out_emails"
790
+ out_dir.mkdir(parents=True, exist_ok=True)
791
+
792
+ stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
793
+ zip_path = out_dir / f"emails_top{k}_{stamp}.zip"
794
+
795
+ previews = []
796
+ ai_used = False
797
+ last_ai_reason = ''
798
+
799
+ with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
800
+ for r in rows:
801
+ match_file = r.get("match_file", "")
802
+ match_path = MATCH_DIR / match_file if match_file else None
803
+
804
+ strengths, gaps, summary = [], [], ""
805
+ if match_path and match_path.exists():
806
+ try:
807
+ mdata = _load_json(match_path)
808
+ strengths, gaps, summary = _extract_candidate_highlights(mdata)
809
+ except Exception:
810
+ pass
811
+
812
+ email_text = _template_email(
813
+ candidate_name=r.get("candidate_name", ""),
814
+ role_title=role_title or "the role",
815
+ company_name=company_name or "our company",
816
+ sender_name=sender_name or "Hiring Team",
817
+ strengths=strengths,
818
+ next_step=next_step_text or "Please reply with your availability for a short interview this week.",
819
+ contact=contact_email or "hr@example.com",
820
+ tone=tone or "Warm"
821
+ )
822
+
823
+ # Optional AI polish (V2)
824
+ if use_ai:
825
+ polished, used_ai, reason = _llm_polish_email(
826
+ draft_email=email_text,
827
+ candidate_name=r.get("candidate_name", "") or "",
828
+ role_title=role_title or "the role",
829
+ company_name=company_name or "our company",
830
+ strengths=strengths,
831
+ gaps=gaps,
832
+ next_step=next_step_text or "Please reply with your availability for a short interview this week.",
833
+ tone=tone or "Warm",
834
+ model=ai_model or "gpt-4o-mini",
835
+ temperature=ai_temperature if ai_temperature is not None else 0.6,
836
+ )
837
+
838
+ # Track why AI didn't change content (for clarity)
839
+ last_ai_reason = reason or last_ai_reason
840
+
841
+ if used_ai and polished and polished.strip() != email_text.strip():
842
+ ai_used = True
843
+ email_text = polished
844
+
845
+ filename = f"Rank{r.get('rank')}_{_safe_filename(r.get('candidate_name','candidate'))}.txt"
846
+ zf.writestr(filename, email_text)
847
+
848
+ if len(previews) < 2:
849
+ previews.append(email_text)
850
+
851
+ preview_text = ""
852
+ if previews:
853
+ preview_text = "\n\n" + ("-" * 40) + "\n\n".join(previews)
854
+
855
+ status = f"✅ Generated {k} email draft(s)."
856
+ if use_ai:
857
+ if ai_used:
858
+ status += f" ✨ AI polish applied (model={ai_model or 'gpt-4o-mini'})."
859
+ else:
860
+ reason = last_ai_reason or "AI not applied (fallback to template)."
861
+ status += f" ⚠️ AI polish not applied. Reason: {reason}"
862
+
863
+ return str(zip_path), preview_text, status
864
+
865
+ except Exception as e:
866
+ return None, "", f"❌ Step 7 failed: {type(e).__name__}: {e}"
867
+
868
+
869
+ # ---------- UI ----------
870
+ with gr.Blocks(title="AI-Powered Resume Screening & Ranking System") as demo:
871
+ gr.Markdown("# 📄 AI-Powered JD-Based Resumes Ranking System")
872
+
873
+ # Theme selector + CSS injector
874
+ with gr.Row():
875
+ theme_mode = gr.Dropdown(
876
+ choices=["Light", "HF Dark", "High Contrast"],
877
+ value="HF Dark",
878
+ label="Reader Theme"
879
+ )
880
+
881
+ style_tag = gr.HTML(value=f"<style>{theme_css('HF Dark')}</style>")
882
+
883
+ def apply_theme(mode):
884
+ return gr.HTML(value=f"<style>{theme_css(mode)}</style>")
885
+
886
+ theme_mode.change(fn=apply_theme, inputs=theme_mode, outputs=style_tag)
887
+
888
+ with gr.Tab("Step 1 — JD Upload & Rubric"):
889
+ jd_file = gr.File(label="Upload JD (PDF/DOCX/TXT)")
890
+ jd_btn = gr.Button("Generate JD Rubric")
891
+
892
+ # Use Code for better readability + styling control
893
+ jd_json = gr.Code(label="JD Rubric (JSON)", language="json")
894
+ jd_status = gr.Textbox(label="Status")
895
+
896
+ jd_btn.click(step1_generate_jd, inputs=[jd_file], outputs=[jd_json, jd_status])
897
+
898
+ gr.Markdown("### Review & adjust weightage (optional)")
899
+ gr.Markdown("Edit the **adjusted** column. For `recommended_weights`, the adjusted values must sum to **100**.")
900
+
901
+ load_weights_btn = gr.Button("Load weights from JD JSON")
902
+ weight_mode = gr.State(value="none")
903
+ weight_msg = gr.Textbox(label="Weight editor status", interactive=False)
904
+
905
+ weight_df = gr.Dataframe(
906
+ label="Editable weights (edit 'adjusted' column)",
907
+ headers=["label", "recommended", "adjusted", "key"],
908
+ datatype=["str", "number", "number", "str"],
909
+ interactive=True,
910
+ wrap=True,
911
+ row_count=(0, "dynamic"),
912
+ col_count=(4, "fixed"),
913
+ )
914
+
915
+ apply_weights_btn = gr.Button("Apply weight adjustments to JD (save)")
916
+
917
+ load_weights_btn.click(
918
+ step1_prepare_weight_editor,
919
+ inputs=[jd_json],
920
+ outputs=[weight_df, weight_mode, weight_msg],
921
+ )
922
+
923
+ apply_weights_btn.click(
924
+ step1_apply_weight_adjustments,
925
+ inputs=[jd_json, weight_df, weight_mode],
926
+ outputs=[jd_json, jd_status],
927
+ )
928
+
929
+
930
+ with gr.Tab("Step 2 — Resume Batch Upload"):
931
+ resume_files = gr.File(label="Upload resumes (PDF/DOCX)", file_count="multiple")
932
+ overwrite = gr.Checkbox(label="Overwrite duplicates", value=False)
933
+ save_btn = gr.Button("Save Resumes to data/resumes")
934
+ save_status = gr.Textbox(label="Status")
935
+ save_btn.click(step2_save_resumes, inputs=[resume_files, overwrite], outputs=[save_status])
936
+
937
+ with gr.Tab("Step 3 — Parse Resumes"):
938
+ parse_btn = gr.Button("Parse Resumes → data/candidates")
939
+ parse_status = gr.Textbox(label="Status")
940
+ parse_btn.click(step3_parse_resumes, inputs=[], outputs=[parse_status])
941
+
942
+ with gr.Tab("Step 4 — JD-aligned Summaries"):
943
+ match_btn = gr.Button("Generate match files → data/matches")
944
+ match_status = gr.Textbox(label="Status")
945
+
946
+ refresh_btn = gr.Button("Refresh match list")
947
+ match_list = gr.Dropdown(choices=[], label="Select a match file")
948
+ view_btn = gr.Button("View selected match JSON")
949
+
950
+ match_json = gr.Code(label="Match JSON", language="json")
951
+
952
+ match_btn.click(step4_generate_matches, inputs=[], outputs=[match_status])
953
+ refresh_btn.click(lambda: gr.update(choices=list_match_files()), inputs=[], outputs=[match_list])
954
+ view_btn.click(step4_view_match, inputs=[match_list], outputs=[match_json])
955
+
956
+ with gr.Tab("Step 5 — Scoring & Ranking"):
957
+ topk = gr.Number(value=10, label="Top K", precision=0)
958
+ rank_btn = gr.Button("Generate Ranking")
959
+ rank_df = gr.Dataframe(label="Top K Ranking")
960
+ rank_status = gr.Textbox(label="Status")
961
+
962
+ rank_btn.click(step5_rank, inputs=[topk], outputs=[rank_df, rank_status])
963
+
964
+ with gr.Tab("Step 6 — Explain & Interview Kit (Agentic)"):
965
+ refresh6 = gr.Button("Refresh match list")
966
+ match_pick6 = gr.Dropdown(choices=[], label="Select a match file")
967
+ explain_btn = gr.Button("Generate Explanation (CrewAI)")
968
+ explain_json = gr.Code(label="Explainability Output (JSON)", language="json")
969
+ explain_status = gr.Textbox(label="Status")
970
+
971
+ refresh6.click(lambda: gr.update(choices=list_match_files()), inputs=[], outputs=[match_pick6])
972
+ explain_btn.click(step6_explain, inputs=[match_pick6], outputs=[explain_json, explain_status])
973
+
974
+
975
+ with gr.Tab("Step 7 — Email Generator"):
976
+ gr.Markdown("### Generate professional email drafts for Top‑K candidates")
977
+ gr.Markdown("This step uses a fast **template-based** generator (HF-stable). We can add AI polishing later if you want.")
978
+
979
+ with gr.Row():
980
+ topk_email = gr.Number(value=5, precision=0, label="Top-K candidates")
981
+ tone = gr.Dropdown(choices=["Warm", "Formal", "Casual"], value="Warm", label="Tone")
982
+
983
+ with gr.Accordion("V2 (Optional) — AI polish emails", open=False):
984
+ use_ai = gr.Checkbox(label="Use AI to polish emails (requires OPENAI_API_KEY on HF)", value=False)
985
+ ai_model = gr.Textbox(label="Model", value="gpt-4o-mini")
986
+ ai_temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.1, label="Temperature")
987
+
988
+ role_title = gr.Textbox(label="Role title", value="(Your role title)")
989
+ company_name = gr.Textbox(label="Company name", value="(Your company)")
990
+ sender_name = gr.Textbox(label="Sender name", value="Hiring Team")
991
+ contact_email = gr.Textbox(label="Contact email", value="hr@yourcompany.com")
992
+ next_step_text = gr.Textbox(
993
+ label="Next step message (shown in the email)",
994
+ value="Please reply with your availability for a 20–30 minute interview this week.",
995
+ lines=2
996
+ )
997
+
998
+ gen_btn = gr.Button("Generate emails + ZIP download")
999
+ email_zip = gr.File(label="Download ZIP (Top‑K emails)")
1000
+ email_preview = gr.Textbox(label="Preview (first 1–2 emails)", lines=16, interactive=False)
1001
+ email_status = gr.Textbox(label="Status", interactive=False)
1002
+
1003
+ gen_btn.click(
1004
+ step7_generate_emails,
1005
+ inputs=[topk_email, role_title, company_name, sender_name, contact_email, next_step_text, tone, use_ai, ai_model, ai_temperature],
1006
+ outputs=[email_zip, email_preview, email_status]
1007
+ )
1008
+
1009
+ with gr.Tab("Admin — Diagnostics"):
1010
+ gr.Markdown("### Diagnostics (Admin)\nEnable this only when you need to troubleshoot or validate robustness.")
1011
+ show_diag = gr.Checkbox(label="Show diagnostics", value=False)
1012
+
1013
+ log_choice = gr.Dropdown(
1014
+ choices=[
1015
+ "Resume parser (data/parse_errors.log)",
1016
+ "Step 3 batch parse (data/parse_step3_errors.log)",
1017
+ "Step 4 matching (data/match_errors.log)",
1018
+ ],
1019
+ value="Resume parser (data/parse_errors.log)",
1020
+ label="Select log"
1021
+ )
1022
+
1023
+ last_n = gr.Number(value=200, precision=0, label="Show last N lines (20–2000)")
1024
+ with gr.Row():
1025
+ refresh_log = gr.Button("Refresh log")
1026
+ clear_log = gr.Button("Clear selected log")
1027
+
1028
+ log_view = gr.Textbox(label="Log output", lines=20, interactive=False)
1029
+
1030
+ refresh_log.click(view_diagnostics_log, inputs=[show_diag, log_choice, last_n], outputs=[log_view])
1031
+ clear_log.click(clear_diagnostics_log, inputs=[show_diag, log_choice], outputs=[log_view])
1032
+ # HF Spaces: share should be False (share links are mainly for local use)
1033
+
1034
+ demo.launch(share=False)
core/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # utils/__init__.py
2
+ # core/__init__.py
core/crew_explainer.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+
5
+ from crewai import Agent, Task, Crew, Process
6
+
7
+ # ---------- Helpers ----------
8
+ def _ensure_key():
9
+ if not os.getenv("OPENAI_API_KEY"):
10
+ raise RuntimeError("OPENAI_API_KEY is not set (HF: Settings → Secrets → OPENAI_API_KEY).")
11
+
12
+ def _load_json(path: str):
13
+ with open(path, "r", encoding="utf-8") as f:
14
+ return json.load(f)
15
+
16
+ def _dump_json(obj) -> str:
17
+ return json.dumps(obj, ensure_ascii=False, indent=2)
18
+
19
+ # ---------- Agent ----------
20
+ def _explainer_agent():
21
+ return Agent(
22
+ role="Hiring Explainability Analyst",
23
+ goal="Explain and justify candidate ranking using provided JD rubric and match JSON evidence.",
24
+ backstory=(
25
+ "You help HR reviewers understand why a candidate is ranked, what gaps exist, "
26
+ "and what interview questions to ask. You do not change the original scoring."
27
+ ),
28
+ verbose=False,
29
+ max_iter=1,
30
+ )
31
+
32
+ # ---------- Public API (called by app.py) ----------
33
+ def generate_explanation(jd_path: str, match_path: str, top_k_rank: int | None = None) -> dict:
34
+ """
35
+ Returns a structured JSON:
36
+ {
37
+ "summary": "...",
38
+ "strengths": [...],
39
+ "gaps": [...],
40
+ "risk_flags": [...],
41
+ "interview_questions": [...],
42
+ "recommended_next_step": "..."
43
+ }
44
+ """
45
+ _ensure_key()
46
+
47
+ jd = _load_json(jd_path)
48
+ match_obj = _load_json(match_path)
49
+
50
+ # Keep prompt compact and safe for latency
51
+ prompt_payload = {
52
+ "jd_rubric": jd,
53
+ "match": match_obj,
54
+ "ranking_context": {"top_k_rank": top_k_rank},
55
+ "output_schema": {
56
+ "summary": "string (2-4 sentences)",
57
+ "strengths": ["bullet strings"],
58
+ "gaps": ["bullet strings"],
59
+ "risk_flags": ["bullet strings"],
60
+ "interview_questions": ["bullet strings (5-8 questions)"],
61
+ "recommended_next_step": "string (one action)"
62
+ }
63
+ }
64
+
65
+ task = Task(
66
+ description=(
67
+ "You will produce an explainability report for a candidate.\n"
68
+ "Use the provided payload which includes the JD rubric and the candidate match JSON.\n"
69
+ "Rules:\n"
70
+ "- Do NOT invent skills/experience not present in the match JSON.\n"
71
+ "- Tie strengths/gaps to rubric categories if possible.\n"
72
+ "- If evidence is insufficient, say so as a risk_flag.\n"
73
+ "- Output MUST be valid JSON only, matching the output_schema exactly.\n\n"
74
+ f"PAYLOAD:\n{_dump_json(prompt_payload)}"
75
+ ),
76
+ expected_output="Valid JSON only.",
77
+ agent=_explainer_agent(),
78
+ )
79
+
80
+ crew = Crew(
81
+ agents=[_explainer_agent()],
82
+ tasks=[task],
83
+ process=Process.sequential,
84
+ planning=False,
85
+ verbose=False,
86
+ )
87
+
88
+ result = crew.kickoff()
89
+ # CrewAI returns text; we enforce JSON-only in prompt
90
+ try:
91
+ return json.loads(str(result))
92
+ except Exception as e:
93
+ # fallback that doesn't crash UI
94
+ return {
95
+ "summary": "",
96
+ "strengths": [],
97
+ "gaps": [],
98
+ "risk_flags": [f"Failed to parse agent output as JSON: {type(e).__name__}: {e}"],
99
+ "interview_questions": [],
100
+ "recommended_next_step": "Retry generation."
101
+ }
core/explainability.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from core.crew_explainer import generate_explanation
4
+
5
+ def explain_candidate(jd_path: str, match_path: str, rank: int | None = None) -> dict:
6
+ # You can add caching later if you want (by match_path hash)
7
+ return generate_explanation(jd_path=jd_path, match_path=match_path, top_k_rank=rank)
core/jd_processor.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/jd_processor.py
2
+ import logging
3
+ import json
4
+ import os
5
+ from openai import OpenAI, APIError
6
+ from utils.prompts import JD_PROMPT
7
+
8
+ logger = logging.getLogger(__name__)
9
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
10
+
11
+ def generate_jd_rubric(jd_text, max_retries=3):
12
+ """Generate JD rubric with retry logic and better error handling."""
13
+
14
+ for attempt in range(max_retries):
15
+ try:
16
+ response = client.chat.completions.create(
17
+ model="gpt-4o-mini",
18
+ messages=[
19
+ {"role": "system", "content": "You are an experienced HR Talent Manager AI assistant. Your specialty is analyzing job descriptions and creating structured hiring rubrics."},
20
+ {"role": "user", "content": JD_PROMPT.format(jd_text=jd_text)}
21
+ ],
22
+ temperature=0.2,
23
+ response_format={"type": "json_object"} # New: Force JSON response
24
+ )
25
+
26
+ content = response.choices[0].message.content
27
+ logger.debug(f"LLM response (attempt {attempt+1}): {content[:200]}...")
28
+
29
+ # Parse JSON
30
+ jd_data = json.loads(content) # No need for find() with response_format
31
+
32
+ # Validate structure
33
+ required_keys = {"role_title", "must_have_skills", "nice_to_have_skills",
34
+ "soft_skills", "minimum_years_experience", "recommended_weights"}
35
+
36
+ if not all(key in jd_data for key in required_keys):
37
+ raise ValueError("Missing required keys in response")
38
+
39
+ return jd_data
40
+
41
+ except json.JSONDecodeError as e:
42
+ logger.warning(f"JSON decode failed (attempt {attempt+1}): {e}")
43
+ if attempt == max_retries - 1:
44
+ return get_empty_template()
45
+ except APIError as e:
46
+ logger.error(f"OpenAI API error: {e}")
47
+ if attempt == max_retries - 1:
48
+ return get_empty_template()
49
+ except Exception as e:
50
+ logger.error(f"Unexpected error: {e}")
51
+ return get_empty_template()
52
+
53
+ return get_empty_template()
54
+
55
+ def get_empty_template():
56
+ """Return empty rubric template."""
57
+ return {
58
+ "role_title": "",
59
+ "must_have_skills": [],
60
+ "nice_to_have_skills": [],
61
+ "soft_skills": [],
62
+ "minimum_years_experience": 0,
63
+ "recommended_weights": {}
64
+ }
core/matcher.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/matcher.py
2
+ import json
3
+ from openai import OpenAI
4
+ from utils.prompts import JD_RESUME_MATCH_PROMPT
5
+ import os
6
+
7
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
8
+
9
+ def match_candidate_to_jd(jd_rubric, candidate_profile):
10
+ response = client.chat.completions.create(
11
+ model="gpt-4o-mini",
12
+ messages=[
13
+ {
14
+ "role": "system",
15
+ "content": JD_RESUME_MATCH_PROMPT
16
+ },
17
+ {
18
+ "role": "user",
19
+ "content": f"""
20
+ Job Description Rubric:
21
+ {json.dumps(jd_rubric, indent=2)}
22
+
23
+ Candidate Profile:
24
+ {json.dumps(candidate_profile, indent=2)}
25
+ """
26
+ }
27
+ ],
28
+ temperature=0.2
29
+ )
30
+
31
+ content = response.choices[0].message.content.strip()
32
+ return json.loads(content)
core/ranking.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/ranking.py
2
+ import os
3
+ import json
4
+ from core.scoring import score_candidate
5
+
6
+ JD_PATH = "data/jd.json"
7
+ MATCH_DIR = "data/matches"
8
+ OUT_PATH = "data/ranking.json"
9
+
10
+
11
+ def build_ranking(top_k: int = 10):
12
+ with open(JD_PATH, "r", encoding="utf-8") as f:
13
+ jd_rubric = json.load(f)
14
+
15
+ rows = []
16
+ for fname in os.listdir(MATCH_DIR):
17
+ if not fname.endswith("_match.json"):
18
+ continue
19
+
20
+ fpath = os.path.join(MATCH_DIR, fname)
21
+ with open(fpath, "r", encoding="utf-8") as f:
22
+ match_summary = json.load(f)
23
+
24
+ scored = score_candidate(jd_rubric, match_summary)
25
+ scored["match_file"] = fname
26
+ rows.append(scored)
27
+
28
+ rows.sort(key=lambda x: x["total_score"], reverse=True)
29
+
30
+ result = {
31
+ "jd_role_title": jd_rubric.get("role_title", ""),
32
+ "top_k": top_k,
33
+ "ranking": rows[:top_k],
34
+ "all_candidates": rows
35
+ }
36
+
37
+ with open(OUT_PATH, "w", encoding="utf-8") as f:
38
+ json.dump(result, f, indent=2)
39
+
40
+ return result
41
+
core/resume_parser.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # added Step1 patch, added parser_error.log
2
+ import os
3
+ import json
4
+ from datetime import datetime
5
+
6
+ from openai import OpenAI
7
+ from utils.prompts import RESUME_PARSE_PROMPT
8
+
9
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
10
+
11
+
12
+ def _append_log(path: str, line: str) -> None:
13
+ os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
14
+ with open(path, "a", encoding="utf-8") as f:
15
+ f.write(line.rstrip("\n") + "\n")
16
+
17
+
18
+ def parse_resume(resume_text, filename):
19
+ response = client.chat.completions.create(
20
+ model="gpt-4o-mini",
21
+ messages=[
22
+ {"role": "system", "content": "You are an expert HR resume analyst specializing in extracting structured information from resumes with high accuracy."},
23
+ {"role": "user", "content": RESUME_PARSE_PROMPT.format(resume_text=resume_text)}
24
+ ],
25
+ temperature=0.2
26
+ )
27
+
28
+ content = response.choices[0].message.content
29
+
30
+ try:
31
+ start = content.find("{")
32
+ end = content.rfind("}") + 1
33
+ json_str = content[start:end]
34
+ data = json.loads(json_str)
35
+ except Exception as e:
36
+ ts = datetime.now().isoformat(timespec="seconds")
37
+ msg = f"{ts} | {filename} | {type(e).__name__}: {e} | content_len={len(content)}"
38
+ _append_log("data/parse_errors.log", msg)
39
+
40
+ data = {
41
+ "name": "",
42
+ "skills": [],
43
+ "education": [],
44
+ "work_experience": [],
45
+ "total_years_experience": 0,
46
+ "summary": ""
47
+ }
48
+
49
+ data["candidate_id"] = filename
50
+ return data
core/scoring.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/scoring.py
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Any, List
5
+
6
+
7
+ def _safe_len(x) -> int:
8
+ return len(x) if isinstance(x, list) else 0
9
+
10
+
11
+ def normalize_weights(weights: Dict[str, Any]) -> Dict[str, float]:
12
+ w_must = float(weights.get("must_have", 0))
13
+ w_nice = float(weights.get("nice_to_have", 0))
14
+ w_exp = float(weights.get("experience", 0))
15
+ w_soft = float(weights.get("soft_skills", 0))
16
+
17
+ total = w_must + w_nice + w_exp + w_soft
18
+ if total <= 0:
19
+ return {"must_have": 0.6, "nice_to_have": 0.25, "experience": 0.15, "soft_skills": 0.0}
20
+
21
+ return {
22
+ "must_have": w_must / total,
23
+ "nice_to_have": w_nice / total,
24
+ "experience": w_exp / total,
25
+ "soft_skills": w_soft / total
26
+ }
27
+
28
+
29
+ def compute_coverage(matched: List[Any], missing: List[Any], partial: List[Any] | None = None) -> float:
30
+ m = _safe_len(matched)
31
+ miss = _safe_len(missing)
32
+ p = _safe_len(partial) if partial is not None else 0
33
+
34
+ denom = m + miss + p
35
+ if denom == 0:
36
+ return 0.0
37
+
38
+ return (m + 0.5 * p) / denom
39
+
40
+
41
+ def experience_score(assessment: str) -> float:
42
+ assessment = (assessment or "").strip().lower()
43
+ if assessment == "below":
44
+ return 0.0
45
+ if assessment in ("meets", "exceeds"):
46
+ return 1.0
47
+ return 0.0
48
+
49
+
50
+ def clamp(x: float, lo: float, hi: float) -> float:
51
+ return max(lo, min(hi, x))
52
+
53
+
54
+ def score_candidate(jd_rubric: Dict[str, Any], match_summary: Dict[str, Any]) -> Dict[str, Any]:
55
+ weights = normalize_weights(jd_rubric.get("recommended_weights", {}))
56
+
57
+ mh = match_summary.get("must_have_match", {}) or {}
58
+ nh = match_summary.get("nice_to_have_match", {}) or {}
59
+ exp = match_summary.get("experience_analysis", {}) or {}
60
+
61
+ must_cov = compute_coverage(
62
+ matched=mh.get("matched", []),
63
+ missing=mh.get("missing", []),
64
+ partial=mh.get("partial", [])
65
+ )
66
+
67
+ nice_cov = compute_coverage(
68
+ matched=nh.get("matched", []),
69
+ missing=nh.get("missing", []),
70
+ partial=None
71
+ )
72
+
73
+ exp_sc = experience_score(exp.get("assessment", ""))
74
+
75
+ # Soft skills scoring (optional). Keep 0 for student MVP unless you add logic later.
76
+ soft_sc = 0.0
77
+
78
+ base_total = (
79
+ weights["must_have"] * must_cov +
80
+ weights["nice_to_have"] * nice_cov +
81
+ weights["experience"] * exp_sc +
82
+ weights["soft_skills"] * soft_sc
83
+ ) * 100.0
84
+
85
+ # ✅ Bonus/Penalty based on Step-4 indicators (transparent & capped)
86
+ positives = match_summary.get("positive_indicators", []) or []
87
+ negatives = match_summary.get("negative_indicators", []) or []
88
+
89
+ bonus_per_positive = 1.0
90
+ penalty_per_negative = 1.5
91
+ raw_adjustment = (len(positives) * bonus_per_positive) - (len(negatives) * penalty_per_negative)
92
+
93
+ # Cap adjustment so it doesn't dominate scoring
94
+ adjustment_cap = 8.0
95
+ adjustment = clamp(raw_adjustment, -adjustment_cap, adjustment_cap)
96
+
97
+ final_total = clamp(base_total + adjustment, 0.0, 100.0)
98
+
99
+ return {
100
+ "candidate_name": match_summary.get("candidate_name", ""),
101
+ "total_score": round(final_total, 2),
102
+ "breakdown": {
103
+ "base_score": round(base_total, 2),
104
+ "bonus_penalty_adjustment": round(adjustment, 2),
105
+ "positive_count": len(positives),
106
+ "negative_count": len(negatives),
107
+ "must_have_coverage": round(must_cov, 3),
108
+ "nice_to_have_coverage": round(nice_cov, 3),
109
+ "experience_score": round(exp_sc, 3),
110
+ "weights_normalized": weights
111
+ }
112
+ }
113
+
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from pip freeze
2
+
3
+ gradio==5.50.0
4
+ numpy==2.0.2
5
+ pandas==2.2.2
6
+ openai==2.12.0
7
+ pydantic==2.12.3
8
+ PyPDF2==3.0.1
9
+ python-docx==1.2.0
10
+ python-dotenv==1.2.1
11
+ tqdm==4.67.1
12
+ crewai==0.175.0
13
+
14
+ # Colab already have both numpy 2.0.2 and pandas 2.2.2 installed.
15
+ # !pip install -r requirements.txt
16
+ # pip freeze > requirements.txt
17
+ # added pandas==2.2.2
18
+ # added crewai==0.175.0
utils/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # utils/__init__.py
2
+ # core/__init__.py
utils/file_loader.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile file_loader.py
2
+ from PyPDF2 import PdfReader
3
+ from docx import Document
4
+
5
+
6
+ def load_text_from_file(uploaded_file):
7
+ if "." not in uploaded_file.name:
8
+ raise ValueError("File has no extension")
9
+
10
+ file_type = uploaded_file.name.split(".")[-1].lower()
11
+
12
+ try:
13
+ if file_type == "pdf":
14
+ reader = PdfReader(uploaded_file)
15
+ return "\n".join([page.extract_text() for page in reader.pages])
16
+
17
+ elif file_type in ["docx", "doc"]:
18
+ doc = Document(uploaded_file)
19
+ return "\n".join([para.text for para in doc.paragraphs])
20
+
21
+ elif file_type == "txt":
22
+ content = uploaded_file.read()
23
+ for encoding in ["utf-8", "latin-1", "cp1252"]:
24
+ try:
25
+ return content.decode(encoding)
26
+ except UnicodeDecodeError:
27
+ continue
28
+ raise ValueError("Unable to decode text file")
29
+
30
+ else:
31
+ raise ValueError(f"Unsupported file type: {file_type}")
32
+
33
+ except ImportError as e:
34
+ raise ImportError(f"Required library not installed: {e}")
35
+ except Exception as e:
36
+ raise ValueError(f"Error processing file: {e}")
utils/prompts.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile utils/prompts.py
2
+ JD_PROMPT = """
3
+ Given the following Job Description, extract a structured hiring rubric.
4
+
5
+ Return STRICT JSON with the following fields:
6
+ - role_title
7
+ - must_have_skills (list)
8
+ - nice_to_have_skills (list)
9
+ - soft_skills (list)
10
+ - minimum_years_experience (number)
11
+ - minimum education requirements (list)
12
+ - recommended_weights (object with must_have, nice_to_have, experience, education, soft_skills)
13
+
14
+ Job Description:
15
+ ----------------
16
+ {jd_text}
17
+ """
18
+
19
+ RESUME_PARSE_PROMPT = """
20
+ You are an experienced HR resume analyst.
21
+
22
+ Given the following resume text, extract a structured candidate profile.
23
+
24
+ Return STRICT JSON with these fields:
25
+ - name
26
+ - technical skills (list)
27
+ - soft skills (list)
28
+ - education (list)
29
+ - work_experience (list of short role summaries)
30
+ - total_years_experience (number)
31
+ - summary (2–3 sentence professional summary)
32
+
33
+ Resume Text:
34
+ -------------
35
+ {resume_text}
36
+
37
+ IMPORTANT:
38
+ - Output JSON only
39
+ - Do not include explanations
40
+ """
41
+
42
+ JD_RESUME_MATCH_PROMPT = """
43
+ You are an experienced HR hiring analyst.
44
+
45
+ You will be given:
46
+ 1. A structured Job Description rubric (JSON)
47
+ 2. A structured candidate profile (JSON)
48
+
49
+ Your task:
50
+ - Compare the candidate against the JD rubric
51
+ - Identify matches, partial matches, and gaps
52
+ - Identify positive and negative indicators
53
+ - DO NOT calculate a score
54
+ - Be factual and conservative
55
+ - Do NOT infer sensitive personal attributes
56
+ - Output ONLY valid JSON in the specified schema
57
+
58
+ Matching rules:
59
+ - A skill is matched if clearly demonstrated in experience or skills
60
+ - Partial if loosely related or implied
61
+ - Missing if not found
62
+
63
+ Output JSON schema:
64
+ {
65
+ "candidate_name": "",
66
+ "must_have_match": {
67
+ "matched": [],
68
+ "missing": [],
69
+ "partial": []
70
+ },
71
+ "nice_to_have_match": {
72
+ "matched": [],
73
+ "missing": []
74
+ },
75
+ "experience_analysis": {
76
+ "required_years": 0,
77
+ "candidate_years": 0,
78
+ "assessment": "below | meets | exceeds"
79
+ },
80
+ "positive_indicators": [],
81
+ "negative_indicators": [],
82
+ "overall_fit_summary": ""
83
+ }
84
+ """
utils/resume_loader.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile utils/resume_loader.py
2
+ import os
3
+
4
+ def load_resume_files(resume_dir="data/resumes"):
5
+ return [
6
+ os.path.join(resume_dir, f)
7
+ for f in os.listdir(resume_dir)
8
+ if os.path.isfile(os.path.join(resume_dir, f))
9
+ and f.lower().endswith((".pdf", ".docx", ".txt"))
10
+ and not f.startswith(".")
11
+ ]