yhng2525 commited on
Commit
1595f22
·
verified ·
1 Parent(s): a4fe886

Upload 15 files

Browse files
README.md CHANGED
@@ -1,14 +1,13 @@
1
- ---
2
- title: Ai Resume Ranking
3
- emoji: 🐠
4
- colorFrom: green
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 6.3.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: CrewAI Version
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Ai Resume Match
3
+ emoji: 🚀
4
+ colorFrom: pink
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 6.2.0
8
+ app_file: app.py
9
+ pinned: false
10
+ short_description: iti123-project
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
app.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, shutil
2
+ from pathlib import Path
3
+
4
+ import gradio as gr
5
+ import pandas as pd
6
+
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ from utils.file_loader import load_text_from_file
11
+ from core.crew_pipeline import (
12
+ crew_step1_generate_jd,
13
+ crew_step3_parse_resumes,
14
+ crew_step4_generate_matches,
15
+ crew_step5_rank,
16
+ )
17
+
18
+ # ---------- Folders ----------
19
+ RESUME_DIR = Path("data/resumes")
20
+ CAND_DIR = Path("data/candidates")
21
+ MATCH_DIR = Path("data/matches")
22
+ DATA_DIR = Path("data")
23
+
24
+ for p in [RESUME_DIR, CAND_DIR, MATCH_DIR]:
25
+ p.mkdir(parents=True, exist_ok=True)
26
+
27
+ JD_PATH = DATA_DIR / "jd.json"
28
+
29
+ # ---------- Theme CSS ----------
30
+ def theme_css(mode: str) -> str:
31
+ base = """
32
+ .gradio-container { max-width: 1200px !important; margin: 0 auto !important; padding-bottom: 24px !important; }
33
+ .gradio-container, .gradio-container * { font-size: 16px; line-height: 1.55; }
34
+ button, .gr-button, .gr-input, .gr-textbox, .gr-file, .gr-dropdown { padding-top: 10px !important; padding-bottom: 10px !important; }
35
+ pre, code, .cm-editor, .cm-scroller {
36
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
37
+ font-size: 15px !important; line-height: 1.65 !important; border-radius: 10px !important;
38
+ }
39
+ .cm-scroller { min-height: 420px !important; }
40
+ """
41
+
42
+ light = """
43
+ :root { --panel-border: #e5e7eb; --code-bg: #f8fafc; --code-fg: #0f172a; }
44
+ body { background: #f5f5f5; }
45
+ pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 1px solid var(--panel-border) !important; }
46
+ """
47
+
48
+ hf_dark = """
49
+ :root {
50
+ --panel-border: #1f2937;
51
+ --code-bg: #0f172a;
52
+ --code-fg: #e5e7eb;
53
+ --title-fg: #ffffff;
54
+ --header-fg: #d1d5db;
55
+ --muted-fg: #9ca3af;
56
+ }
57
+ body { background: #0b1220; }
58
+ h1, h1 * { color: var(--title-fg) !important; font-weight: 700 !important; }
59
+ h2, h3, h4, h2 *, h3 *, h4 * { color: var(--header-fg) !important; font-weight: 600 !important; }
60
+ button[role="tab"] { color: var(--muted-fg) !important; font-weight: 500 !important; }
61
+ button[role="tab"][aria-selected="true"] { color: #fb923c !important; font-weight: 700 !important; }
62
+ pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 1px solid var(--panel-border) !important; }
63
+ """
64
+
65
+ high_contrast = """
66
+ :root {
67
+ --panel-border: #ffffff;
68
+ --code-bg: #000000;
69
+ --code-fg: #ffffff;
70
+ --title-fg: #ffffff;
71
+ --header-fg: #ffffff;
72
+ --muted-fg: #e5e7eb;
73
+ }
74
+ body { background: #000000; }
75
+ h1, h1 * { color: var(--title-fg) !important; font-weight: 800 !important; }
76
+ h2, h3, h4, h2 *, h3 *, h4 * { color: var(--header-fg) !important; font-weight: 700 !important; }
77
+ button[role="tab"] { color: var(--muted-fg) !important; font-weight: 600 !important; }
78
+ button[role="tab"][aria-selected="true"] { color: #ffd500 !important; font-weight: 800 !important; }
79
+ pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 2px solid var(--panel-border) !important; }
80
+ """
81
+
82
+ css_map = {"Light": light, "HF Dark": hf_dark, "High Contrast": high_contrast}
83
+ return base + css_map.get(mode, hf_dark)
84
+
85
+
86
+ def _ensure_api_key():
87
+ if not os.getenv("OPENAI_API_KEY"):
88
+ raise RuntimeError(
89
+ "OPENAI_API_KEY is not set. "
90
+ "On Hugging Face: Settings → Secrets → OPENAI_API_KEY. "
91
+ "On local: set environment variable before running."
92
+ )
93
+
94
+
95
+ def _pretty_json(obj) -> str:
96
+ return json.dumps(obj, indent=2, ensure_ascii=False)
97
+
98
+
99
+ # ---------- Steps ----------
100
+ def step1_generate_jd(file_obj):
101
+ _ensure_api_key()
102
+ if file_obj is None:
103
+ return "", "Please upload a JD file."
104
+
105
+ with open(file_obj.name, "rb") as f:
106
+ jd_text = load_text_from_file(f)
107
+
108
+ # CrewAI orchestration (JD Analyst agent uses a tool that calls your existing generate_jd_rubric)
109
+ jd_rubric = crew_step1_generate_jd(jd_text, jd_path=str(JD_PATH))
110
+ return _pretty_json(jd_rubric), f"✅ JD rubric saved to {JD_PATH}"
111
+
112
+
113
+ def step2_save_resumes(files, overwrite):
114
+ if not files:
115
+ return "Please upload at least one resume."
116
+
117
+ saved, skipped = 0, 0
118
+ for f in files:
119
+ dest = RESUME_DIR / Path(f.name).name
120
+ if dest.exists() and not overwrite:
121
+ skipped += 1
122
+ continue
123
+ shutil.copyfile(f.name, dest)
124
+ saved += 1
125
+
126
+ msg = f"✅ Saved {saved} resume(s) into {RESUME_DIR}"
127
+ if skipped:
128
+ msg += f" | ⏭ Skipped {skipped} duplicate(s) (overwrite=False)"
129
+ return msg
130
+
131
+
132
+ def step3_parse_resumes():
133
+ _ensure_api_key()
134
+ resume_files = sorted([p for p in RESUME_DIR.iterdir() if p.suffix.lower() in [".pdf", ".docx"]])
135
+ if not resume_files:
136
+ return "No resumes found in data/resumes. Upload resumes first."
137
+
138
+ # CrewAI orchestration (Resume Parser agent runs the parsing tool per file)
139
+ summary = crew_step3_parse_resumes(
140
+ resume_dir=str(RESUME_DIR),
141
+ cand_dir=str(CAND_DIR),
142
+ )
143
+ return summary
144
+
145
+
146
+ def step4_generate_matches():
147
+ _ensure_api_key()
148
+ if not JD_PATH.exists():
149
+ return "No JD rubric found. Run Step 1 first."
150
+ if not any(p.suffix.lower() == ".json" for p in CAND_DIR.glob("*.json")):
151
+ return "No candidates found. Run Step 3 first."
152
+
153
+ # CrewAI orchestration (Matcher agent calls your existing match_candidate_to_jd)
154
+ summary = crew_step4_generate_matches(
155
+ jd_path=str(JD_PATH),
156
+ cand_dir=str(CAND_DIR),
157
+ match_dir=str(MATCH_DIR),
158
+ )
159
+ return summary
160
+
161
+
162
+ def step4_view_match(selected):
163
+ if not selected:
164
+ return ""
165
+ path = MATCH_DIR / selected
166
+ if not path.exists():
167
+ return ""
168
+ with open(path, "r", encoding="utf-8") as f:
169
+ return _pretty_json(json.load(f))
170
+
171
+
172
+ def step5_rank(top_k):
173
+ _ensure_api_key()
174
+ if not JD_PATH.exists():
175
+ return None, "No JD rubric found. Run Step 1 first."
176
+ if not any(p.name.endswith("_match.json") for p in MATCH_DIR.iterdir()):
177
+ return None, "No match files found. Run Step 4 first."
178
+
179
+ ranking = crew_step5_rank(top_k=int(top_k), match_dir=str(MATCH_DIR))
180
+ rows = []
181
+ for i, r in enumerate(ranking["ranking"], start=1):
182
+ b = r.get("breakdown", {})
183
+ rows.append({
184
+ "Rank": i,
185
+ "Candidate": r.get("candidate_name", ""),
186
+ "Score": r.get("total_score", 0),
187
+ "Base Score": b.get("base_score", 0),
188
+ "Adj": b.get("bonus_penalty_adjustment", 0),
189
+ "Pos": b.get("positive_count", 0),
190
+ "Neg": b.get("negative_count", 0),
191
+ "Must-have": b.get("must_have_coverage", 0),
192
+ "Nice-to-have": b.get("nice_to_have_coverage", 0),
193
+ "Experience": b.get("experience_score", 0),
194
+ "Match File": r.get("match_file", "")
195
+ })
196
+ df = pd.DataFrame(rows)
197
+ return df, "✅ Ranking generated (also saved to data/ranking.json)"
198
+
199
+
200
+ def list_match_files():
201
+ return sorted([p.name for p in MATCH_DIR.iterdir() if p.name.endswith("_match.json")])
202
+
203
+
204
+ # ---------- UI ----------
205
+ with gr.Blocks(title="AI-Powered Resume Screening & Ranking System") as demo:
206
+ gr.Markdown("# 📄 AI-Powered JD-Based Resumes Ranking System")
207
+
208
+ with gr.Row():
209
+ theme_mode = gr.Dropdown(
210
+ choices=["Light", "HF Dark", "High Contrast"],
211
+ value="HF Dark",
212
+ label="Reader Theme"
213
+ )
214
+ style_tag = gr.HTML(value=f"<style>{theme_css('HF Dark')}</style>")
215
+
216
+ def apply_theme(mode):
217
+ return gr.HTML(value=f"<style>{theme_css(mode)}</style>")
218
+ theme_mode.change(fn=apply_theme, inputs=theme_mode, outputs=style_tag)
219
+
220
+ with gr.Tab("Step 1 — JD Upload & Rubric"):
221
+ jd_file = gr.File(label="Upload JD (PDF/DOCX/TXT)")
222
+ jd_btn = gr.Button("Generate JD Rubric")
223
+ jd_json = gr.Code(label="JD Rubric (JSON)", language="json")
224
+ jd_status = gr.Textbox(label="Status")
225
+ jd_btn.click(step1_generate_jd, inputs=[jd_file], outputs=[jd_json, jd_status])
226
+
227
+ with gr.Tab("Step 2 — Resume Batch Upload"):
228
+ resume_files = gr.File(label="Upload resumes (PDF/DOCX)", file_count="multiple")
229
+ overwrite = gr.Checkbox(label="Overwrite duplicates", value=False)
230
+ save_btn = gr.Button("Save Resumes to data/resumes")
231
+ save_status = gr.Textbox(label="Status")
232
+ save_btn.click(step2_save_resumes, inputs=[resume_files, overwrite], outputs=[save_status])
233
+
234
+ with gr.Tab("Step 3 — Parse Resumes (CrewAI)"):
235
+ parse_btn = gr.Button("Parse Resumes → data/candidates")
236
+ parse_status = gr.Textbox(label="Status")
237
+ parse_btn.click(step3_parse_resumes, inputs=[], outputs=[parse_status])
238
+
239
+ with gr.Tab("Step 4 — JD-aligned Summaries (CrewAI)"):
240
+ match_btn = gr.Button("Generate match files → data/matches")
241
+ match_status = gr.Textbox(label="Status")
242
+
243
+ refresh_btn = gr.Button("Refresh match list")
244
+ match_list = gr.Dropdown(choices=[], label="Select a match file")
245
+ view_btn = gr.Button("View selected match JSON")
246
+
247
+ match_json = gr.Code(label="Match JSON", language="json")
248
+
249
+ match_btn.click(step4_generate_matches, inputs=[], outputs=[match_status])
250
+ refresh_btn.click(lambda: gr.update(choices=list_match_files()), inputs=[], outputs=[match_list])
251
+ view_btn.click(step4_view_match, inputs=[match_list], outputs=[match_json])
252
+
253
+ with gr.Tab("Step 5 — Scoring & Ranking (CrewAI)"):
254
+ topk = gr.Number(value=10, label="Top K", precision=0)
255
+ rank_btn = gr.Button("Generate Ranking")
256
+ rank_df = gr.Dataframe(label="Top K Ranking")
257
+ rank_status = gr.Textbox(label="Status")
258
+ rank_btn.click(step5_rank, inputs=[topk], outputs=[rank_df, rank_status])
259
+
260
+ demo.launch(share=False)
core/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # utils/__init__.py
2
+ # core/__init__.py
core/crew_pipeline.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from crewai import Agent, Task, Crew, Process
6
+
7
+ from utils.file_loader import load_text_from_file
8
+ from core.crew_tools import (
9
+ generate_jd_rubric_tool,
10
+ parse_resume_tool,
11
+ match_candidate_tool,
12
+ build_ranking_tool,
13
+ )
14
+
15
+ # ---------- Helpers ----------
16
+ def _ensure_openai_env():
17
+ # CrewAI expects OPENAI_API_KEY to exist (you already use this in app.py)
18
+ if not os.getenv("OPENAI_API_KEY"):
19
+ raise RuntimeError("OPENAI_API_KEY is not set (HF: Settings → Secrets → OPENAI_API_KEY).")
20
+
21
+ # Optional: pin model for CrewAI if you want consistency
22
+ # (CrewAI supports different LLM providers; leaving it unset is fine if defaults work in your env)
23
+ os.environ.setdefault("OPENAI_MODEL_NAME", "gpt-4o-mini")
24
+
25
+
26
+ def _json(s: str):
27
+ return json.loads(s)
28
+
29
+
30
+ # ---------- Agents ----------
31
+ def _jd_agent():
32
+ return Agent(
33
+ role="JD Analyst",
34
+ goal="Convert JD text into a structured rubric JSON.",
35
+ backstory="You are an HR analyst who produces consistent rubric structures for automated screening.",
36
+ tools=[generate_jd_rubric_tool],
37
+ verbose=False,
38
+ )
39
+
40
+ def _resume_agent():
41
+ return Agent(
42
+ role="Resume Parser",
43
+ goal="Extract structured candidate profile JSON from resume text.",
44
+ backstory="You are an ATS-style parser; you output consistent JSON that downstream scoring depends on.",
45
+ tools=[parse_resume_tool],
46
+ verbose=False,
47
+ )
48
+
49
+ def _matcher_agent():
50
+ return Agent(
51
+ role="JD-Candidate Matcher",
52
+ goal="Create a JD-aligned match JSON for each candidate.",
53
+ backstory="You score alignment and produce structured evidence for ranking.",
54
+ tools=[match_candidate_tool],
55
+ verbose=False,
56
+ )
57
+
58
+ def _ranker_agent():
59
+ return Agent(
60
+ role="Ranker",
61
+ goal="Build a final ranking JSON from match files.",
62
+ backstory="You turn match outputs into a clean Top-K ranking.",
63
+ tools=[build_ranking_tool],
64
+ verbose=False,
65
+ )
66
+
67
+
68
+ # ---------- Crew wrappers (called by app.py) ----------
69
+ def crew_step1_generate_jd(jd_text: str, jd_path: str) -> dict:
70
+ _ensure_openai_env()
71
+ jd_path = Path(jd_path)
72
+ jd_path.parent.mkdir(parents=True, exist_ok=True)
73
+
74
+ task = Task(
75
+ description=(
76
+ "Use generate_jd_rubric_tool on the provided JD text and return the JSON rubric.\n"
77
+ "Return ONLY the JSON."
78
+ ),
79
+ expected_output="A valid JSON object as a string.",
80
+ agent=_jd_agent(),
81
+ )
82
+
83
+ crew = Crew(
84
+ agents=[_jd_agent()],
85
+ tasks=[task],
86
+ process=Process.sequential,
87
+ verbose=False,
88
+ planning=False,
89
+ )
90
+
91
+ result = crew.kickoff(inputs={"jd_text": jd_text})
92
+ rubric = _json(str(result))
93
+
94
+ with open(jd_path, "w", encoding="utf-8") as f:
95
+ json.dump(rubric, f, indent=2, ensure_ascii=False)
96
+
97
+ return rubric
98
+
99
+
100
+ def crew_step3_parse_resumes(resume_dir: str, cand_dir: str) -> str:
101
+ _ensure_openai_env()
102
+ resume_dir = Path(resume_dir)
103
+ cand_dir = Path(cand_dir)
104
+ cand_dir.mkdir(parents=True, exist_ok=True)
105
+
106
+ resume_files = sorted([p for p in resume_dir.iterdir() if p.suffix.lower() in [".pdf", ".docx"]])
107
+ if not resume_files:
108
+ return "No resumes found in data/resumes. Upload resumes first."
109
+
110
+ agent = _resume_agent()
111
+ ok, failed = 0, 0
112
+
113
+ for path in resume_files:
114
+ try:
115
+ with open(path, "rb") as f:
116
+ resume_text = load_text_from_file(f)
117
+
118
+ task = Task(
119
+ description=(
120
+ f"Parse this resume into candidate JSON.\n"
121
+ f"Filename: {path.name}\n"
122
+ "Call parse_resume_tool(resume_text, filename) and return ONLY JSON."
123
+ ),
124
+ expected_output="A valid candidate JSON object as a string.",
125
+ agent=agent,
126
+ )
127
+ crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
128
+ result = crew.kickoff(inputs={"resume_text": resume_text, "filename": path.name})
129
+
130
+ candidate = _json(str(result))
131
+ out_path = cand_dir / (path.stem + ".json")
132
+ with open(out_path, "w", encoding="utf-8") as out:
133
+ json.dump(candidate, out, indent=2, ensure_ascii=False)
134
+
135
+ ok += 1
136
+ except Exception:
137
+ failed += 1
138
+
139
+ return f"✅ Parsed {ok} resume(s) into {cand_dir} | ⚠️ Failed: {failed}"
140
+
141
+
142
+ def crew_step4_generate_matches(jd_path: str, cand_dir: str, match_dir: str) -> str:
143
+ _ensure_openai_env()
144
+ jd_path = Path(jd_path)
145
+ cand_dir = Path(cand_dir)
146
+ match_dir = Path(match_dir)
147
+ match_dir.mkdir(parents=True, exist_ok=True)
148
+
149
+ if not jd_path.exists():
150
+ return "No JD rubric found. Run Step 1 first."
151
+
152
+ cand_files = sorted([p for p in cand_dir.iterdir() if p.suffix.lower() == ".json"])
153
+ if not cand_files:
154
+ return "No candidates found. Run Step 3 first."
155
+
156
+ with open(jd_path, "r", encoding="utf-8") as f:
157
+ jd_rubric = json.load(f)
158
+ jd_rubric_json = json.dumps(jd_rubric, ensure_ascii=False)
159
+
160
+ agent = _matcher_agent()
161
+ created, failed = 0, 0
162
+
163
+ for cf in cand_files:
164
+ try:
165
+ with open(cf, "r", encoding="utf-8") as f:
166
+ candidate = json.load(f)
167
+ candidate_json = json.dumps(candidate, ensure_ascii=False)
168
+
169
+ task = Task(
170
+ description=(
171
+ f"Create a JD-aligned match JSON for candidate file {cf.name}.\n"
172
+ "Call match_candidate_tool(jd_rubric_json, candidate_json) and return ONLY JSON."
173
+ ),
174
+ expected_output="A valid match JSON object as a string.",
175
+ agent=agent,
176
+ )
177
+ crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
178
+ result = crew.kickoff(inputs={"jd_rubric_json": jd_rubric_json, "candidate_json": candidate_json})
179
+
180
+ match_obj = _json(str(result))
181
+ out_path = match_dir / (cf.stem + "_match.json")
182
+ with open(out_path, "w", encoding="utf-8") as out:
183
+ json.dump(match_obj, out, indent=2, ensure_ascii=False)
184
+
185
+ created += 1
186
+ except Exception:
187
+ failed += 1
188
+
189
+ return f"✅ Created {created} match file(s) in {match_dir} | ⚠️ Failed: {failed}"
190
+
191
+
192
+ def crew_step5_rank(top_k: int, match_dir: str) -> dict:
193
+ _ensure_openai_env()
194
+
195
+ # build_ranking() already reads from data/matches internally in your current design.
196
+ # This crew step just triggers it via a tool and then returns JSON.
197
+ agent = _ranker_agent()
198
+ task = Task(
199
+ description=f"Build Top-{int(top_k)} ranking JSON using build_ranking_tool(top_k). Return ONLY JSON.",
200
+ expected_output="A valid ranking JSON object as a string.",
201
+ agent=agent,
202
+ )
203
+ crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
204
+ result = crew.kickoff(inputs={"top_k": int(top_k)})
205
+
206
+ ranking = _json(str(result))
207
+
208
+ # Save alongside your current behavior
209
+ out_path = Path("data/ranking.json")
210
+ out_path.parent.mkdir(parents=True, exist_ok=True)
211
+ with open(out_path, "w", encoding="utf-8") as f:
212
+ json.dump(ranking, f, indent=2, ensure_ascii=False)
213
+
214
+ return ranking
core/crew_tools.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ from typing import List, Dict, Any
4
+
5
+ from crewai.tools import tool
6
+
7
+ from core.jd_processor import generate_jd_rubric
8
+ from core.resume_parser import parse_resume
9
+ from core.matcher import match_candidate_to_jd
10
+ from core.ranking import build_ranking
11
+ from utils.file_loader import load_text_from_file
12
+
13
+
14
+ @tool("generate_jd_rubric_tool")
15
+ def generate_jd_rubric_tool(jd_text: str) -> str:
16
+ """Generate a structured JD rubric JSON (string) from JD text."""
17
+ rubric = generate_jd_rubric(jd_text)
18
+ return json.dumps(rubric, ensure_ascii=False)
19
+
20
+
21
+ @tool("parse_resume_tool")
22
+ def parse_resume_tool(resume_text: str, filename: str) -> str:
23
+ """Parse a resume into candidate JSON (string) from resume text."""
24
+ candidate = parse_resume(resume_text, filename)
25
+ return json.dumps(candidate, ensure_ascii=False)
26
+
27
+
28
+ @tool("match_candidate_tool")
29
+ def match_candidate_tool(jd_rubric_json: str, candidate_json: str) -> str:
30
+ """Match a candidate against JD rubric; returns match JSON (string)."""
31
+ jd_rubric = json.loads(jd_rubric_json)
32
+ candidate = json.loads(candidate_json)
33
+ match_result = match_candidate_to_jd(jd_rubric, candidate)
34
+ return json.dumps(match_result, ensure_ascii=False)
35
+
36
+
37
+ @tool("build_ranking_tool")
38
+ def build_ranking_tool(top_k: int) -> str:
39
+ """Build ranking from data/matches; returns ranking JSON (string)."""
40
+ ranking = build_ranking(top_k=int(top_k))
41
+ return json.dumps(ranking, ensure_ascii=False)
core/jd_processor.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/jd_processor.py
2
+ import logging
3
+ import json
4
+ import os
5
+ from openai import OpenAI, APIError
6
+ from utils.prompts import JD_PROMPT
7
+
8
+ logger = logging.getLogger(__name__)
9
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
10
+
11
+ def generate_jd_rubric(jd_text, max_retries=3):
12
+ """Generate JD rubric with retry logic and better error handling."""
13
+
14
+ for attempt in range(max_retries):
15
+ try:
16
+ response = client.chat.completions.create(
17
+ model="gpt-4o-mini",
18
+ messages=[
19
+ {"role": "system", "content": "You are an experienced HR Talent Manager AI assistant. Your specialty is analyzing job descriptions and creating structured hiring rubrics."},
20
+ {"role": "user", "content": JD_PROMPT.format(jd_text=jd_text)}
21
+ ],
22
+ temperature=0.2,
23
+ response_format={"type": "json_object"} # New: Force JSON response
24
+ )
25
+
26
+ content = response.choices[0].message.content
27
+ logger.debug(f"LLM response (attempt {attempt+1}): {content[:200]}...")
28
+
29
+ # Parse JSON
30
+ jd_data = json.loads(content) # No need for find() with response_format
31
+
32
+ # Validate structure
33
+ required_keys = {"role_title", "must_have_skills", "nice_to_have_skills",
34
+ "soft_skills", "minimum_years_experience", "recommended_weights"}
35
+
36
+ if not all(key in jd_data for key in required_keys):
37
+ raise ValueError("Missing required keys in response")
38
+
39
+ return jd_data
40
+
41
+ except json.JSONDecodeError as e:
42
+ logger.warning(f"JSON decode failed (attempt {attempt+1}): {e}")
43
+ if attempt == max_retries - 1:
44
+ return get_empty_template()
45
+ except APIError as e:
46
+ logger.error(f"OpenAI API error: {e}")
47
+ if attempt == max_retries - 1:
48
+ return get_empty_template()
49
+ except Exception as e:
50
+ logger.error(f"Unexpected error: {e}")
51
+ return get_empty_template()
52
+
53
+ return get_empty_template()
54
+
55
+ def get_empty_template():
56
+ """Return empty rubric template."""
57
+ return {
58
+ "role_title": "",
59
+ "must_have_skills": [],
60
+ "nice_to_have_skills": [],
61
+ "soft_skills": [],
62
+ "minimum_years_experience": 0,
63
+ "recommended_weights": {}
64
+ }
core/matcher.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/matcher.py
2
+ import json
3
+ from openai import OpenAI
4
+ from utils.prompts import JD_RESUME_MATCH_PROMPT
5
+ import os
6
+
7
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
8
+
9
+ def match_candidate_to_jd(jd_rubric, candidate_profile):
10
+ response = client.chat.completions.create(
11
+ model="gpt-4o-mini",
12
+ messages=[
13
+ {
14
+ "role": "system",
15
+ "content": JD_RESUME_MATCH_PROMPT
16
+ },
17
+ {
18
+ "role": "user",
19
+ "content": f"""
20
+ Job Description Rubric:
21
+ {json.dumps(jd_rubric, indent=2)}
22
+
23
+ Candidate Profile:
24
+ {json.dumps(candidate_profile, indent=2)}
25
+ """
26
+ }
27
+ ],
28
+ temperature=0.2
29
+ )
30
+
31
+ content = response.choices[0].message.content.strip()
32
+ return json.loads(content)
core/ranking.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/ranking.py
2
+ import os
3
+ import json
4
+ from core.scoring import score_candidate
5
+
6
+ JD_PATH = "data/jd.json"
7
+ MATCH_DIR = "data/matches"
8
+ OUT_PATH = "data/ranking.json"
9
+
10
+
11
+ def build_ranking(top_k: int = 10):
12
+ with open(JD_PATH, "r", encoding="utf-8") as f:
13
+ jd_rubric = json.load(f)
14
+
15
+ rows = []
16
+ for fname in os.listdir(MATCH_DIR):
17
+ if not fname.endswith("_match.json"):
18
+ continue
19
+
20
+ fpath = os.path.join(MATCH_DIR, fname)
21
+ with open(fpath, "r", encoding="utf-8") as f:
22
+ match_summary = json.load(f)
23
+
24
+ scored = score_candidate(jd_rubric, match_summary)
25
+ scored["match_file"] = fname
26
+ rows.append(scored)
27
+
28
+ rows.sort(key=lambda x: x["total_score"], reverse=True)
29
+
30
+ result = {
31
+ "jd_role_title": jd_rubric.get("role_title", ""),
32
+ "top_k": top_k,
33
+ "ranking": rows[:top_k],
34
+ "all_candidates": rows
35
+ }
36
+
37
+ with open(OUT_PATH, "w", encoding="utf-8") as f:
38
+ json.dump(result, f, indent=2)
39
+
40
+ return result
41
+
core/resume_parser.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/resume_parser.py
2
+ import os
3
+ import json
4
+ from openai import OpenAI
5
+ from utils.prompts import RESUME_PARSE_PROMPT
6
+
7
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
8
+
9
+ def parse_resume(resume_text, filename):
10
+ response = client.chat.completions.create(
11
+ model="gpt-4o-mini",
12
+ messages=[
13
+ {"role": "system", "content": "You are an expert HR resume analyst specializing in extracting structured information from resumes with high accuracy."},
14
+ {"role": "user", "content": RESUME_PARSE_PROMPT.format(resume_text=resume_text)}
15
+ ],
16
+ temperature=0.2
17
+ )
18
+
19
+ content = response.choices[0].message.content
20
+
21
+ try:
22
+ start = content.find("{")
23
+ end = content.rfind("}") + 1
24
+ json_str = content[start:end]
25
+ data = json.loads(json_str)
26
+ except Exception:
27
+ data = {
28
+ "name": "",
29
+ "skills": [],
30
+ "education": [],
31
+ "work_experience": [],
32
+ "total_years_experience": 0,
33
+ "summary": ""
34
+ }
35
+
36
+ data["candidate_id"] = filename
37
+ return data
core/scoring.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile core/scoring.py
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Any, List
5
+
6
+
7
+ def _safe_len(x) -> int:
8
+ return len(x) if isinstance(x, list) else 0
9
+
10
+
11
+ def normalize_weights(weights: Dict[str, Any]) -> Dict[str, float]:
12
+ w_must = float(weights.get("must_have", 0))
13
+ w_nice = float(weights.get("nice_to_have", 0))
14
+ w_exp = float(weights.get("experience", 0))
15
+ w_soft = float(weights.get("soft_skills", 0))
16
+
17
+ total = w_must + w_nice + w_exp + w_soft
18
+ if total <= 0:
19
+ return {"must_have": 0.6, "nice_to_have": 0.25, "experience": 0.15, "soft_skills": 0.0}
20
+
21
+ return {
22
+ "must_have": w_must / total,
23
+ "nice_to_have": w_nice / total,
24
+ "experience": w_exp / total,
25
+ "soft_skills": w_soft / total
26
+ }
27
+
28
+
29
+ def compute_coverage(matched: List[Any], missing: List[Any], partial: List[Any] | None = None) -> float:
30
+ m = _safe_len(matched)
31
+ miss = _safe_len(missing)
32
+ p = _safe_len(partial) if partial is not None else 0
33
+
34
+ denom = m + miss + p
35
+ if denom == 0:
36
+ return 0.0
37
+
38
+ return (m + 0.5 * p) / denom
39
+
40
+
41
+ def experience_score(assessment: str) -> float:
42
+ assessment = (assessment or "").strip().lower()
43
+ if assessment == "below":
44
+ return 0.0
45
+ if assessment in ("meets", "exceeds"):
46
+ return 1.0
47
+ return 0.0
48
+
49
+
50
+ def clamp(x: float, lo: float, hi: float) -> float:
51
+ return max(lo, min(hi, x))
52
+
53
+
54
+ def score_candidate(jd_rubric: Dict[str, Any], match_summary: Dict[str, Any]) -> Dict[str, Any]:
55
+ weights = normalize_weights(jd_rubric.get("recommended_weights", {}))
56
+
57
+ mh = match_summary.get("must_have_match", {}) or {}
58
+ nh = match_summary.get("nice_to_have_match", {}) or {}
59
+ exp = match_summary.get("experience_analysis", {}) or {}
60
+
61
+ must_cov = compute_coverage(
62
+ matched=mh.get("matched", []),
63
+ missing=mh.get("missing", []),
64
+ partial=mh.get("partial", [])
65
+ )
66
+
67
+ nice_cov = compute_coverage(
68
+ matched=nh.get("matched", []),
69
+ missing=nh.get("missing", []),
70
+ partial=None
71
+ )
72
+
73
+ exp_sc = experience_score(exp.get("assessment", ""))
74
+
75
+ # Soft skills scoring (optional). Keep 0 for student MVP unless you add logic later.
76
+ soft_sc = 0.0
77
+
78
+ base_total = (
79
+ weights["must_have"] * must_cov +
80
+ weights["nice_to_have"] * nice_cov +
81
+ weights["experience"] * exp_sc +
82
+ weights["soft_skills"] * soft_sc
83
+ ) * 100.0
84
+
85
+ # ✅ Bonus/Penalty based on Step-4 indicators (transparent & capped)
86
+ positives = match_summary.get("positive_indicators", []) or []
87
+ negatives = match_summary.get("negative_indicators", []) or []
88
+
89
+ bonus_per_positive = 1.0
90
+ penalty_per_negative = 1.5
91
+ raw_adjustment = (len(positives) * bonus_per_positive) - (len(negatives) * penalty_per_negative)
92
+
93
+ # Cap adjustment so it doesn't dominate scoring
94
+ adjustment_cap = 8.0
95
+ adjustment = clamp(raw_adjustment, -adjustment_cap, adjustment_cap)
96
+
97
+ final_total = clamp(base_total + adjustment, 0.0, 100.0)
98
+
99
+ return {
100
+ "candidate_name": match_summary.get("candidate_name", ""),
101
+ "total_score": round(final_total, 2),
102
+ "breakdown": {
103
+ "base_score": round(base_total, 2),
104
+ "bonus_penalty_adjustment": round(adjustment, 2),
105
+ "positive_count": len(positives),
106
+ "negative_count": len(negatives),
107
+ "must_have_coverage": round(must_cov, 3),
108
+ "nice_to_have_coverage": round(nice_cov, 3),
109
+ "experience_score": round(exp_sc, 3),
110
+ "weights_normalized": weights
111
+ }
112
+ }
113
+
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==5.50.0
2
+ numpy==2.0.2
3
+ pandas==2.2.2
4
+ openai==2.12.0
5
+ pydantic==2.12.3
6
+ PyPDF2==3.0.1
7
+ python-docx==1.2.0
8
+ python-dotenv==1.2.1
9
+ tqdm==4.67.1
10
+
11
+ # CrewAI core (enough for our custom tool wrappers)
12
+ crewai==0.175.0
13
+ crewai-tools
utils/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # utils/__init__.py
2
+ # core/__init__.py
utils/file_loader.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile file_loader.py
2
+ from PyPDF2 import PdfReader
3
+ from docx import Document
4
+
5
+
6
+ def load_text_from_file(uploaded_file):
7
+ if "." not in uploaded_file.name:
8
+ raise ValueError("File has no extension")
9
+
10
+ file_type = uploaded_file.name.split(".")[-1].lower()
11
+
12
+ try:
13
+ if file_type == "pdf":
14
+ reader = PdfReader(uploaded_file)
15
+ return "\n".join([page.extract_text() for page in reader.pages])
16
+
17
+ elif file_type in ["docx", "doc"]:
18
+ doc = Document(uploaded_file)
19
+ return "\n".join([para.text for para in doc.paragraphs])
20
+
21
+ elif file_type == "txt":
22
+ content = uploaded_file.read()
23
+ for encoding in ["utf-8", "latin-1", "cp1252"]:
24
+ try:
25
+ return content.decode(encoding)
26
+ except UnicodeDecodeError:
27
+ continue
28
+ raise ValueError("Unable to decode text file")
29
+
30
+ else:
31
+ raise ValueError(f"Unsupported file type: {file_type}")
32
+
33
+ except ImportError as e:
34
+ raise ImportError(f"Required library not installed: {e}")
35
+ except Exception as e:
36
+ raise ValueError(f"Error processing file: {e}")
utils/prompts.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile utils/prompts.py
2
+ JD_PROMPT = """
3
+ Given the following Job Description, extract a structured hiring rubric.
4
+
5
+ Return STRICT JSON with the following fields:
6
+ - role_title
7
+ - must_have_skills (list)
8
+ - nice_to_have_skills (list)
9
+ - soft_skills (list)
10
+ - minimum_years_experience (number)
11
+ - minimum education requirements (list)
12
+ - recommended_weights (object with must_have, nice_to_have, experience, education, soft_skills)
13
+
14
+ Job Description:
15
+ ----------------
16
+ {jd_text}
17
+ """
18
+
19
+ RESUME_PARSE_PROMPT = """
20
+ You are an experienced HR resume analyst.
21
+
22
+ Given the following resume text, extract a structured candidate profile.
23
+
24
+ Return STRICT JSON with these fields:
25
+ - name
26
+ - technical skills (list)
27
+ - soft skills (list)
28
+ - education (list)
29
+ - work_experience (list of short role summaries)
30
+ - total_years_experience (number)
31
+ - summary (2–3 sentence professional summary)
32
+
33
+ Resume Text:
34
+ -------------
35
+ {resume_text}
36
+
37
+ IMPORTANT:
38
+ - Output JSON only
39
+ - Do not include explanations
40
+ """
41
+
42
+ JD_RESUME_MATCH_PROMPT = """
43
+ You are an experienced HR hiring analyst.
44
+
45
+ You will be given:
46
+ 1. A structured Job Description rubric (JSON)
47
+ 2. A structured candidate profile (JSON)
48
+
49
+ Your task:
50
+ - Compare the candidate against the JD rubric
51
+ - Identify matches, partial matches, and gaps
52
+ - Identify positive and negative indicators
53
+ - DO NOT calculate a score
54
+ - Be factual and conservative
55
+ - Do NOT infer sensitive personal attributes
56
+ - Output ONLY valid JSON in the specified schema
57
+
58
+ Matching rules:
59
+ - A skill is matched if clearly demonstrated in experience or skills
60
+ - Partial if loosely related or implied
61
+ - Missing if not found
62
+
63
+ Output JSON schema:
64
+ {
65
+ "candidate_name": "",
66
+ "must_have_match": {
67
+ "matched": [],
68
+ "missing": [],
69
+ "partial": []
70
+ },
71
+ "nice_to_have_match": {
72
+ "matched": [],
73
+ "missing": []
74
+ },
75
+ "experience_analysis": {
76
+ "required_years": 0,
77
+ "candidate_years": 0,
78
+ "assessment": "below | meets | exceeds"
79
+ },
80
+ "positive_indicators": [],
81
+ "negative_indicators": [],
82
+ "overall_fit_summary": ""
83
+ }
84
+ """
utils/resume_loader.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%writefile utils/resume_loader.py
2
+ import os
3
+
4
+ def load_resume_files(resume_dir="data/resumes"):
5
+ return [
6
+ os.path.join(resume_dir, f)
7
+ for f in os.listdir(resume_dir)
8
+ if os.path.isfile(os.path.join(resume_dir, f))
9
+ and f.lower().endswith((".pdf", ".docx", ".txt"))
10
+ and not f.startswith(".")
11
+ ]