Spaces:
Sleeping
Sleeping
Upload 15 files
Browse files- README.md +13 -14
- app.py +260 -0
- core/__init__.py +2 -0
- core/crew_pipeline.py +214 -0
- core/crew_tools.py +41 -0
- core/jd_processor.py +64 -0
- core/matcher.py +32 -0
- core/ranking.py +41 -0
- core/resume_parser.py +37 -0
- core/scoring.py +113 -0
- requirements.txt +13 -0
- utils/__init__.py +2 -0
- utils/file_loader.py +36 -0
- utils/prompts.py +84 -0
- utils/resume_loader.py +11 -0
README.md
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Ai Resume
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 6.
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Ai Resume Match
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 6.2.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
short_description: iti123-project
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
app.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, json, shutil
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
from utils.file_loader import load_text_from_file
|
| 11 |
+
from core.crew_pipeline import (
|
| 12 |
+
crew_step1_generate_jd,
|
| 13 |
+
crew_step3_parse_resumes,
|
| 14 |
+
crew_step4_generate_matches,
|
| 15 |
+
crew_step5_rank,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# ---------- Folders ----------
|
| 19 |
+
RESUME_DIR = Path("data/resumes")
|
| 20 |
+
CAND_DIR = Path("data/candidates")
|
| 21 |
+
MATCH_DIR = Path("data/matches")
|
| 22 |
+
DATA_DIR = Path("data")
|
| 23 |
+
|
| 24 |
+
for p in [RESUME_DIR, CAND_DIR, MATCH_DIR]:
|
| 25 |
+
p.mkdir(parents=True, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
JD_PATH = DATA_DIR / "jd.json"
|
| 28 |
+
|
| 29 |
+
# ---------- Theme CSS ----------
|
| 30 |
+
def theme_css(mode: str) -> str:
|
| 31 |
+
base = """
|
| 32 |
+
.gradio-container { max-width: 1200px !important; margin: 0 auto !important; padding-bottom: 24px !important; }
|
| 33 |
+
.gradio-container, .gradio-container * { font-size: 16px; line-height: 1.55; }
|
| 34 |
+
button, .gr-button, .gr-input, .gr-textbox, .gr-file, .gr-dropdown { padding-top: 10px !important; padding-bottom: 10px !important; }
|
| 35 |
+
pre, code, .cm-editor, .cm-scroller {
|
| 36 |
+
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
|
| 37 |
+
font-size: 15px !important; line-height: 1.65 !important; border-radius: 10px !important;
|
| 38 |
+
}
|
| 39 |
+
.cm-scroller { min-height: 420px !important; }
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
light = """
|
| 43 |
+
:root { --panel-border: #e5e7eb; --code-bg: #f8fafc; --code-fg: #0f172a; }
|
| 44 |
+
body { background: #f5f5f5; }
|
| 45 |
+
pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 1px solid var(--panel-border) !important; }
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
hf_dark = """
|
| 49 |
+
:root {
|
| 50 |
+
--panel-border: #1f2937;
|
| 51 |
+
--code-bg: #0f172a;
|
| 52 |
+
--code-fg: #e5e7eb;
|
| 53 |
+
--title-fg: #ffffff;
|
| 54 |
+
--header-fg: #d1d5db;
|
| 55 |
+
--muted-fg: #9ca3af;
|
| 56 |
+
}
|
| 57 |
+
body { background: #0b1220; }
|
| 58 |
+
h1, h1 * { color: var(--title-fg) !important; font-weight: 700 !important; }
|
| 59 |
+
h2, h3, h4, h2 *, h3 *, h4 * { color: var(--header-fg) !important; font-weight: 600 !important; }
|
| 60 |
+
button[role="tab"] { color: var(--muted-fg) !important; font-weight: 500 !important; }
|
| 61 |
+
button[role="tab"][aria-selected="true"] { color: #fb923c !important; font-weight: 700 !important; }
|
| 62 |
+
pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 1px solid var(--panel-border) !important; }
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
high_contrast = """
|
| 66 |
+
:root {
|
| 67 |
+
--panel-border: #ffffff;
|
| 68 |
+
--code-bg: #000000;
|
| 69 |
+
--code-fg: #ffffff;
|
| 70 |
+
--title-fg: #ffffff;
|
| 71 |
+
--header-fg: #ffffff;
|
| 72 |
+
--muted-fg: #e5e7eb;
|
| 73 |
+
}
|
| 74 |
+
body { background: #000000; }
|
| 75 |
+
h1, h1 * { color: var(--title-fg) !important; font-weight: 800 !important; }
|
| 76 |
+
h2, h3, h4, h2 *, h3 *, h4 * { color: var(--header-fg) !important; font-weight: 700 !important; }
|
| 77 |
+
button[role="tab"] { color: var(--muted-fg) !important; font-weight: 600 !important; }
|
| 78 |
+
button[role="tab"][aria-selected="true"] { color: #ffd500 !important; font-weight: 800 !important; }
|
| 79 |
+
pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 2px solid var(--panel-border) !important; }
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
css_map = {"Light": light, "HF Dark": hf_dark, "High Contrast": high_contrast}
|
| 83 |
+
return base + css_map.get(mode, hf_dark)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _ensure_api_key():
|
| 87 |
+
if not os.getenv("OPENAI_API_KEY"):
|
| 88 |
+
raise RuntimeError(
|
| 89 |
+
"OPENAI_API_KEY is not set. "
|
| 90 |
+
"On Hugging Face: Settings → Secrets → OPENAI_API_KEY. "
|
| 91 |
+
"On local: set environment variable before running."
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _pretty_json(obj) -> str:
|
| 96 |
+
return json.dumps(obj, indent=2, ensure_ascii=False)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# ---------- Steps ----------
|
| 100 |
+
def step1_generate_jd(file_obj):
|
| 101 |
+
_ensure_api_key()
|
| 102 |
+
if file_obj is None:
|
| 103 |
+
return "", "Please upload a JD file."
|
| 104 |
+
|
| 105 |
+
with open(file_obj.name, "rb") as f:
|
| 106 |
+
jd_text = load_text_from_file(f)
|
| 107 |
+
|
| 108 |
+
# CrewAI orchestration (JD Analyst agent uses a tool that calls your existing generate_jd_rubric)
|
| 109 |
+
jd_rubric = crew_step1_generate_jd(jd_text, jd_path=str(JD_PATH))
|
| 110 |
+
return _pretty_json(jd_rubric), f"✅ JD rubric saved to {JD_PATH}"
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def step2_save_resumes(files, overwrite):
|
| 114 |
+
if not files:
|
| 115 |
+
return "Please upload at least one resume."
|
| 116 |
+
|
| 117 |
+
saved, skipped = 0, 0
|
| 118 |
+
for f in files:
|
| 119 |
+
dest = RESUME_DIR / Path(f.name).name
|
| 120 |
+
if dest.exists() and not overwrite:
|
| 121 |
+
skipped += 1
|
| 122 |
+
continue
|
| 123 |
+
shutil.copyfile(f.name, dest)
|
| 124 |
+
saved += 1
|
| 125 |
+
|
| 126 |
+
msg = f"✅ Saved {saved} resume(s) into {RESUME_DIR}"
|
| 127 |
+
if skipped:
|
| 128 |
+
msg += f" | ⏭ Skipped {skipped} duplicate(s) (overwrite=False)"
|
| 129 |
+
return msg
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def step3_parse_resumes():
|
| 133 |
+
_ensure_api_key()
|
| 134 |
+
resume_files = sorted([p for p in RESUME_DIR.iterdir() if p.suffix.lower() in [".pdf", ".docx"]])
|
| 135 |
+
if not resume_files:
|
| 136 |
+
return "No resumes found in data/resumes. Upload resumes first."
|
| 137 |
+
|
| 138 |
+
# CrewAI orchestration (Resume Parser agent runs the parsing tool per file)
|
| 139 |
+
summary = crew_step3_parse_resumes(
|
| 140 |
+
resume_dir=str(RESUME_DIR),
|
| 141 |
+
cand_dir=str(CAND_DIR),
|
| 142 |
+
)
|
| 143 |
+
return summary
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def step4_generate_matches():
|
| 147 |
+
_ensure_api_key()
|
| 148 |
+
if not JD_PATH.exists():
|
| 149 |
+
return "No JD rubric found. Run Step 1 first."
|
| 150 |
+
if not any(p.suffix.lower() == ".json" for p in CAND_DIR.glob("*.json")):
|
| 151 |
+
return "No candidates found. Run Step 3 first."
|
| 152 |
+
|
| 153 |
+
# CrewAI orchestration (Matcher agent calls your existing match_candidate_to_jd)
|
| 154 |
+
summary = crew_step4_generate_matches(
|
| 155 |
+
jd_path=str(JD_PATH),
|
| 156 |
+
cand_dir=str(CAND_DIR),
|
| 157 |
+
match_dir=str(MATCH_DIR),
|
| 158 |
+
)
|
| 159 |
+
return summary
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def step4_view_match(selected):
|
| 163 |
+
if not selected:
|
| 164 |
+
return ""
|
| 165 |
+
path = MATCH_DIR / selected
|
| 166 |
+
if not path.exists():
|
| 167 |
+
return ""
|
| 168 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 169 |
+
return _pretty_json(json.load(f))
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def step5_rank(top_k):
|
| 173 |
+
_ensure_api_key()
|
| 174 |
+
if not JD_PATH.exists():
|
| 175 |
+
return None, "No JD rubric found. Run Step 1 first."
|
| 176 |
+
if not any(p.name.endswith("_match.json") for p in MATCH_DIR.iterdir()):
|
| 177 |
+
return None, "No match files found. Run Step 4 first."
|
| 178 |
+
|
| 179 |
+
ranking = crew_step5_rank(top_k=int(top_k), match_dir=str(MATCH_DIR))
|
| 180 |
+
rows = []
|
| 181 |
+
for i, r in enumerate(ranking["ranking"], start=1):
|
| 182 |
+
b = r.get("breakdown", {})
|
| 183 |
+
rows.append({
|
| 184 |
+
"Rank": i,
|
| 185 |
+
"Candidate": r.get("candidate_name", ""),
|
| 186 |
+
"Score": r.get("total_score", 0),
|
| 187 |
+
"Base Score": b.get("base_score", 0),
|
| 188 |
+
"Adj": b.get("bonus_penalty_adjustment", 0),
|
| 189 |
+
"Pos": b.get("positive_count", 0),
|
| 190 |
+
"Neg": b.get("negative_count", 0),
|
| 191 |
+
"Must-have": b.get("must_have_coverage", 0),
|
| 192 |
+
"Nice-to-have": b.get("nice_to_have_coverage", 0),
|
| 193 |
+
"Experience": b.get("experience_score", 0),
|
| 194 |
+
"Match File": r.get("match_file", "")
|
| 195 |
+
})
|
| 196 |
+
df = pd.DataFrame(rows)
|
| 197 |
+
return df, "✅ Ranking generated (also saved to data/ranking.json)"
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def list_match_files():
|
| 201 |
+
return sorted([p.name for p in MATCH_DIR.iterdir() if p.name.endswith("_match.json")])
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# ---------- UI ----------
|
| 205 |
+
with gr.Blocks(title="AI-Powered Resume Screening & Ranking System") as demo:
|
| 206 |
+
gr.Markdown("# 📄 AI-Powered JD-Based Resumes Ranking System")
|
| 207 |
+
|
| 208 |
+
with gr.Row():
|
| 209 |
+
theme_mode = gr.Dropdown(
|
| 210 |
+
choices=["Light", "HF Dark", "High Contrast"],
|
| 211 |
+
value="HF Dark",
|
| 212 |
+
label="Reader Theme"
|
| 213 |
+
)
|
| 214 |
+
style_tag = gr.HTML(value=f"<style>{theme_css('HF Dark')}</style>")
|
| 215 |
+
|
| 216 |
+
def apply_theme(mode):
|
| 217 |
+
return gr.HTML(value=f"<style>{theme_css(mode)}</style>")
|
| 218 |
+
theme_mode.change(fn=apply_theme, inputs=theme_mode, outputs=style_tag)
|
| 219 |
+
|
| 220 |
+
with gr.Tab("Step 1 — JD Upload & Rubric"):
|
| 221 |
+
jd_file = gr.File(label="Upload JD (PDF/DOCX/TXT)")
|
| 222 |
+
jd_btn = gr.Button("Generate JD Rubric")
|
| 223 |
+
jd_json = gr.Code(label="JD Rubric (JSON)", language="json")
|
| 224 |
+
jd_status = gr.Textbox(label="Status")
|
| 225 |
+
jd_btn.click(step1_generate_jd, inputs=[jd_file], outputs=[jd_json, jd_status])
|
| 226 |
+
|
| 227 |
+
with gr.Tab("Step 2 — Resume Batch Upload"):
|
| 228 |
+
resume_files = gr.File(label="Upload resumes (PDF/DOCX)", file_count="multiple")
|
| 229 |
+
overwrite = gr.Checkbox(label="Overwrite duplicates", value=False)
|
| 230 |
+
save_btn = gr.Button("Save Resumes to data/resumes")
|
| 231 |
+
save_status = gr.Textbox(label="Status")
|
| 232 |
+
save_btn.click(step2_save_resumes, inputs=[resume_files, overwrite], outputs=[save_status])
|
| 233 |
+
|
| 234 |
+
with gr.Tab("Step 3 — Parse Resumes (CrewAI)"):
|
| 235 |
+
parse_btn = gr.Button("Parse Resumes → data/candidates")
|
| 236 |
+
parse_status = gr.Textbox(label="Status")
|
| 237 |
+
parse_btn.click(step3_parse_resumes, inputs=[], outputs=[parse_status])
|
| 238 |
+
|
| 239 |
+
with gr.Tab("Step 4 — JD-aligned Summaries (CrewAI)"):
|
| 240 |
+
match_btn = gr.Button("Generate match files → data/matches")
|
| 241 |
+
match_status = gr.Textbox(label="Status")
|
| 242 |
+
|
| 243 |
+
refresh_btn = gr.Button("Refresh match list")
|
| 244 |
+
match_list = gr.Dropdown(choices=[], label="Select a match file")
|
| 245 |
+
view_btn = gr.Button("View selected match JSON")
|
| 246 |
+
|
| 247 |
+
match_json = gr.Code(label="Match JSON", language="json")
|
| 248 |
+
|
| 249 |
+
match_btn.click(step4_generate_matches, inputs=[], outputs=[match_status])
|
| 250 |
+
refresh_btn.click(lambda: gr.update(choices=list_match_files()), inputs=[], outputs=[match_list])
|
| 251 |
+
view_btn.click(step4_view_match, inputs=[match_list], outputs=[match_json])
|
| 252 |
+
|
| 253 |
+
with gr.Tab("Step 5 — Scoring & Ranking (CrewAI)"):
|
| 254 |
+
topk = gr.Number(value=10, label="Top K", precision=0)
|
| 255 |
+
rank_btn = gr.Button("Generate Ranking")
|
| 256 |
+
rank_df = gr.Dataframe(label="Top K Ranking")
|
| 257 |
+
rank_status = gr.Textbox(label="Status")
|
| 258 |
+
rank_btn.click(step5_rank, inputs=[topk], outputs=[rank_df, rank_status])
|
| 259 |
+
|
| 260 |
+
demo.launch(share=False)
|
core/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# utils/__init__.py
|
| 2 |
+
# core/__init__.py
|
core/crew_pipeline.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from crewai import Agent, Task, Crew, Process
|
| 6 |
+
|
| 7 |
+
from utils.file_loader import load_text_from_file
|
| 8 |
+
from core.crew_tools import (
|
| 9 |
+
generate_jd_rubric_tool,
|
| 10 |
+
parse_resume_tool,
|
| 11 |
+
match_candidate_tool,
|
| 12 |
+
build_ranking_tool,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# ---------- Helpers ----------
|
| 16 |
+
def _ensure_openai_env():
|
| 17 |
+
# CrewAI expects OPENAI_API_KEY to exist (you already use this in app.py)
|
| 18 |
+
if not os.getenv("OPENAI_API_KEY"):
|
| 19 |
+
raise RuntimeError("OPENAI_API_KEY is not set (HF: Settings → Secrets → OPENAI_API_KEY).")
|
| 20 |
+
|
| 21 |
+
# Optional: pin model for CrewAI if you want consistency
|
| 22 |
+
# (CrewAI supports different LLM providers; leaving it unset is fine if defaults work in your env)
|
| 23 |
+
os.environ.setdefault("OPENAI_MODEL_NAME", "gpt-4o-mini")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _json(s: str):
|
| 27 |
+
return json.loads(s)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ---------- Agents ----------
|
| 31 |
+
def _jd_agent():
|
| 32 |
+
return Agent(
|
| 33 |
+
role="JD Analyst",
|
| 34 |
+
goal="Convert JD text into a structured rubric JSON.",
|
| 35 |
+
backstory="You are an HR analyst who produces consistent rubric structures for automated screening.",
|
| 36 |
+
tools=[generate_jd_rubric_tool],
|
| 37 |
+
verbose=False,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
def _resume_agent():
|
| 41 |
+
return Agent(
|
| 42 |
+
role="Resume Parser",
|
| 43 |
+
goal="Extract structured candidate profile JSON from resume text.",
|
| 44 |
+
backstory="You are an ATS-style parser; you output consistent JSON that downstream scoring depends on.",
|
| 45 |
+
tools=[parse_resume_tool],
|
| 46 |
+
verbose=False,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def _matcher_agent():
|
| 50 |
+
return Agent(
|
| 51 |
+
role="JD-Candidate Matcher",
|
| 52 |
+
goal="Create a JD-aligned match JSON for each candidate.",
|
| 53 |
+
backstory="You score alignment and produce structured evidence for ranking.",
|
| 54 |
+
tools=[match_candidate_tool],
|
| 55 |
+
verbose=False,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
def _ranker_agent():
|
| 59 |
+
return Agent(
|
| 60 |
+
role="Ranker",
|
| 61 |
+
goal="Build a final ranking JSON from match files.",
|
| 62 |
+
backstory="You turn match outputs into a clean Top-K ranking.",
|
| 63 |
+
tools=[build_ranking_tool],
|
| 64 |
+
verbose=False,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# ---------- Crew wrappers (called by app.py) ----------
|
| 69 |
+
def crew_step1_generate_jd(jd_text: str, jd_path: str) -> dict:
|
| 70 |
+
_ensure_openai_env()
|
| 71 |
+
jd_path = Path(jd_path)
|
| 72 |
+
jd_path.parent.mkdir(parents=True, exist_ok=True)
|
| 73 |
+
|
| 74 |
+
task = Task(
|
| 75 |
+
description=(
|
| 76 |
+
"Use generate_jd_rubric_tool on the provided JD text and return the JSON rubric.\n"
|
| 77 |
+
"Return ONLY the JSON."
|
| 78 |
+
),
|
| 79 |
+
expected_output="A valid JSON object as a string.",
|
| 80 |
+
agent=_jd_agent(),
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
crew = Crew(
|
| 84 |
+
agents=[_jd_agent()],
|
| 85 |
+
tasks=[task],
|
| 86 |
+
process=Process.sequential,
|
| 87 |
+
verbose=False,
|
| 88 |
+
planning=False,
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
result = crew.kickoff(inputs={"jd_text": jd_text})
|
| 92 |
+
rubric = _json(str(result))
|
| 93 |
+
|
| 94 |
+
with open(jd_path, "w", encoding="utf-8") as f:
|
| 95 |
+
json.dump(rubric, f, indent=2, ensure_ascii=False)
|
| 96 |
+
|
| 97 |
+
return rubric
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def crew_step3_parse_resumes(resume_dir: str, cand_dir: str) -> str:
|
| 101 |
+
_ensure_openai_env()
|
| 102 |
+
resume_dir = Path(resume_dir)
|
| 103 |
+
cand_dir = Path(cand_dir)
|
| 104 |
+
cand_dir.mkdir(parents=True, exist_ok=True)
|
| 105 |
+
|
| 106 |
+
resume_files = sorted([p for p in resume_dir.iterdir() if p.suffix.lower() in [".pdf", ".docx"]])
|
| 107 |
+
if not resume_files:
|
| 108 |
+
return "No resumes found in data/resumes. Upload resumes first."
|
| 109 |
+
|
| 110 |
+
agent = _resume_agent()
|
| 111 |
+
ok, failed = 0, 0
|
| 112 |
+
|
| 113 |
+
for path in resume_files:
|
| 114 |
+
try:
|
| 115 |
+
with open(path, "rb") as f:
|
| 116 |
+
resume_text = load_text_from_file(f)
|
| 117 |
+
|
| 118 |
+
task = Task(
|
| 119 |
+
description=(
|
| 120 |
+
f"Parse this resume into candidate JSON.\n"
|
| 121 |
+
f"Filename: {path.name}\n"
|
| 122 |
+
"Call parse_resume_tool(resume_text, filename) and return ONLY JSON."
|
| 123 |
+
),
|
| 124 |
+
expected_output="A valid candidate JSON object as a string.",
|
| 125 |
+
agent=agent,
|
| 126 |
+
)
|
| 127 |
+
crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
|
| 128 |
+
result = crew.kickoff(inputs={"resume_text": resume_text, "filename": path.name})
|
| 129 |
+
|
| 130 |
+
candidate = _json(str(result))
|
| 131 |
+
out_path = cand_dir / (path.stem + ".json")
|
| 132 |
+
with open(out_path, "w", encoding="utf-8") as out:
|
| 133 |
+
json.dump(candidate, out, indent=2, ensure_ascii=False)
|
| 134 |
+
|
| 135 |
+
ok += 1
|
| 136 |
+
except Exception:
|
| 137 |
+
failed += 1
|
| 138 |
+
|
| 139 |
+
return f"✅ Parsed {ok} resume(s) into {cand_dir} | ⚠️ Failed: {failed}"
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def crew_step4_generate_matches(jd_path: str, cand_dir: str, match_dir: str) -> str:
|
| 143 |
+
_ensure_openai_env()
|
| 144 |
+
jd_path = Path(jd_path)
|
| 145 |
+
cand_dir = Path(cand_dir)
|
| 146 |
+
match_dir = Path(match_dir)
|
| 147 |
+
match_dir.mkdir(parents=True, exist_ok=True)
|
| 148 |
+
|
| 149 |
+
if not jd_path.exists():
|
| 150 |
+
return "No JD rubric found. Run Step 1 first."
|
| 151 |
+
|
| 152 |
+
cand_files = sorted([p for p in cand_dir.iterdir() if p.suffix.lower() == ".json"])
|
| 153 |
+
if not cand_files:
|
| 154 |
+
return "No candidates found. Run Step 3 first."
|
| 155 |
+
|
| 156 |
+
with open(jd_path, "r", encoding="utf-8") as f:
|
| 157 |
+
jd_rubric = json.load(f)
|
| 158 |
+
jd_rubric_json = json.dumps(jd_rubric, ensure_ascii=False)
|
| 159 |
+
|
| 160 |
+
agent = _matcher_agent()
|
| 161 |
+
created, failed = 0, 0
|
| 162 |
+
|
| 163 |
+
for cf in cand_files:
|
| 164 |
+
try:
|
| 165 |
+
with open(cf, "r", encoding="utf-8") as f:
|
| 166 |
+
candidate = json.load(f)
|
| 167 |
+
candidate_json = json.dumps(candidate, ensure_ascii=False)
|
| 168 |
+
|
| 169 |
+
task = Task(
|
| 170 |
+
description=(
|
| 171 |
+
f"Create a JD-aligned match JSON for candidate file {cf.name}.\n"
|
| 172 |
+
"Call match_candidate_tool(jd_rubric_json, candidate_json) and return ONLY JSON."
|
| 173 |
+
),
|
| 174 |
+
expected_output="A valid match JSON object as a string.",
|
| 175 |
+
agent=agent,
|
| 176 |
+
)
|
| 177 |
+
crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
|
| 178 |
+
result = crew.kickoff(inputs={"jd_rubric_json": jd_rubric_json, "candidate_json": candidate_json})
|
| 179 |
+
|
| 180 |
+
match_obj = _json(str(result))
|
| 181 |
+
out_path = match_dir / (cf.stem + "_match.json")
|
| 182 |
+
with open(out_path, "w", encoding="utf-8") as out:
|
| 183 |
+
json.dump(match_obj, out, indent=2, ensure_ascii=False)
|
| 184 |
+
|
| 185 |
+
created += 1
|
| 186 |
+
except Exception:
|
| 187 |
+
failed += 1
|
| 188 |
+
|
| 189 |
+
return f"✅ Created {created} match file(s) in {match_dir} | ⚠️ Failed: {failed}"
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def crew_step5_rank(top_k: int, match_dir: str) -> dict:
|
| 193 |
+
_ensure_openai_env()
|
| 194 |
+
|
| 195 |
+
# build_ranking() already reads from data/matches internally in your current design.
|
| 196 |
+
# This crew step just triggers it via a tool and then returns JSON.
|
| 197 |
+
agent = _ranker_agent()
|
| 198 |
+
task = Task(
|
| 199 |
+
description=f"Build Top-{int(top_k)} ranking JSON using build_ranking_tool(top_k). Return ONLY JSON.",
|
| 200 |
+
expected_output="A valid ranking JSON object as a string.",
|
| 201 |
+
agent=agent,
|
| 202 |
+
)
|
| 203 |
+
crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
|
| 204 |
+
result = crew.kickoff(inputs={"top_k": int(top_k)})
|
| 205 |
+
|
| 206 |
+
ranking = _json(str(result))
|
| 207 |
+
|
| 208 |
+
# Save alongside your current behavior
|
| 209 |
+
out_path = Path("data/ranking.json")
|
| 210 |
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
| 211 |
+
with open(out_path, "w", encoding="utf-8") as f:
|
| 212 |
+
json.dump(ranking, f, indent=2, ensure_ascii=False)
|
| 213 |
+
|
| 214 |
+
return ranking
|
core/crew_tools.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from typing import List, Dict, Any
|
| 4 |
+
|
| 5 |
+
from crewai.tools import tool
|
| 6 |
+
|
| 7 |
+
from core.jd_processor import generate_jd_rubric
|
| 8 |
+
from core.resume_parser import parse_resume
|
| 9 |
+
from core.matcher import match_candidate_to_jd
|
| 10 |
+
from core.ranking import build_ranking
|
| 11 |
+
from utils.file_loader import load_text_from_file
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@tool("generate_jd_rubric_tool")
|
| 15 |
+
def generate_jd_rubric_tool(jd_text: str) -> str:
|
| 16 |
+
"""Generate a structured JD rubric JSON (string) from JD text."""
|
| 17 |
+
rubric = generate_jd_rubric(jd_text)
|
| 18 |
+
return json.dumps(rubric, ensure_ascii=False)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@tool("parse_resume_tool")
|
| 22 |
+
def parse_resume_tool(resume_text: str, filename: str) -> str:
|
| 23 |
+
"""Parse a resume into candidate JSON (string) from resume text."""
|
| 24 |
+
candidate = parse_resume(resume_text, filename)
|
| 25 |
+
return json.dumps(candidate, ensure_ascii=False)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@tool("match_candidate_tool")
|
| 29 |
+
def match_candidate_tool(jd_rubric_json: str, candidate_json: str) -> str:
|
| 30 |
+
"""Match a candidate against JD rubric; returns match JSON (string)."""
|
| 31 |
+
jd_rubric = json.loads(jd_rubric_json)
|
| 32 |
+
candidate = json.loads(candidate_json)
|
| 33 |
+
match_result = match_candidate_to_jd(jd_rubric, candidate)
|
| 34 |
+
return json.dumps(match_result, ensure_ascii=False)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@tool("build_ranking_tool")
|
| 38 |
+
def build_ranking_tool(top_k: int) -> str:
|
| 39 |
+
"""Build ranking from data/matches; returns ranking JSON (string)."""
|
| 40 |
+
ranking = build_ranking(top_k=int(top_k))
|
| 41 |
+
return json.dumps(ranking, ensure_ascii=False)
|
core/jd_processor.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile core/jd_processor.py
|
| 2 |
+
import logging
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from openai import OpenAI, APIError
|
| 6 |
+
from utils.prompts import JD_PROMPT
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 10 |
+
|
| 11 |
+
def generate_jd_rubric(jd_text, max_retries=3):
|
| 12 |
+
"""Generate JD rubric with retry logic and better error handling."""
|
| 13 |
+
|
| 14 |
+
for attempt in range(max_retries):
|
| 15 |
+
try:
|
| 16 |
+
response = client.chat.completions.create(
|
| 17 |
+
model="gpt-4o-mini",
|
| 18 |
+
messages=[
|
| 19 |
+
{"role": "system", "content": "You are an experienced HR Talent Manager AI assistant. Your specialty is analyzing job descriptions and creating structured hiring rubrics."},
|
| 20 |
+
{"role": "user", "content": JD_PROMPT.format(jd_text=jd_text)}
|
| 21 |
+
],
|
| 22 |
+
temperature=0.2,
|
| 23 |
+
response_format={"type": "json_object"} # New: Force JSON response
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
content = response.choices[0].message.content
|
| 27 |
+
logger.debug(f"LLM response (attempt {attempt+1}): {content[:200]}...")
|
| 28 |
+
|
| 29 |
+
# Parse JSON
|
| 30 |
+
jd_data = json.loads(content) # No need for find() with response_format
|
| 31 |
+
|
| 32 |
+
# Validate structure
|
| 33 |
+
required_keys = {"role_title", "must_have_skills", "nice_to_have_skills",
|
| 34 |
+
"soft_skills", "minimum_years_experience", "recommended_weights"}
|
| 35 |
+
|
| 36 |
+
if not all(key in jd_data for key in required_keys):
|
| 37 |
+
raise ValueError("Missing required keys in response")
|
| 38 |
+
|
| 39 |
+
return jd_data
|
| 40 |
+
|
| 41 |
+
except json.JSONDecodeError as e:
|
| 42 |
+
logger.warning(f"JSON decode failed (attempt {attempt+1}): {e}")
|
| 43 |
+
if attempt == max_retries - 1:
|
| 44 |
+
return get_empty_template()
|
| 45 |
+
except APIError as e:
|
| 46 |
+
logger.error(f"OpenAI API error: {e}")
|
| 47 |
+
if attempt == max_retries - 1:
|
| 48 |
+
return get_empty_template()
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logger.error(f"Unexpected error: {e}")
|
| 51 |
+
return get_empty_template()
|
| 52 |
+
|
| 53 |
+
return get_empty_template()
|
| 54 |
+
|
| 55 |
+
def get_empty_template():
|
| 56 |
+
"""Return empty rubric template."""
|
| 57 |
+
return {
|
| 58 |
+
"role_title": "",
|
| 59 |
+
"must_have_skills": [],
|
| 60 |
+
"nice_to_have_skills": [],
|
| 61 |
+
"soft_skills": [],
|
| 62 |
+
"minimum_years_experience": 0,
|
| 63 |
+
"recommended_weights": {}
|
| 64 |
+
}
|
core/matcher.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile core/matcher.py
|
| 2 |
+
import json
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
from utils.prompts import JD_RESUME_MATCH_PROMPT
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 8 |
+
|
| 9 |
+
def match_candidate_to_jd(jd_rubric, candidate_profile):
|
| 10 |
+
response = client.chat.completions.create(
|
| 11 |
+
model="gpt-4o-mini",
|
| 12 |
+
messages=[
|
| 13 |
+
{
|
| 14 |
+
"role": "system",
|
| 15 |
+
"content": JD_RESUME_MATCH_PROMPT
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"role": "user",
|
| 19 |
+
"content": f"""
|
| 20 |
+
Job Description Rubric:
|
| 21 |
+
{json.dumps(jd_rubric, indent=2)}
|
| 22 |
+
|
| 23 |
+
Candidate Profile:
|
| 24 |
+
{json.dumps(candidate_profile, indent=2)}
|
| 25 |
+
"""
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
temperature=0.2
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
content = response.choices[0].message.content.strip()
|
| 32 |
+
return json.loads(content)
|
core/ranking.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile core/ranking.py
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from core.scoring import score_candidate
|
| 5 |
+
|
| 6 |
+
JD_PATH = "data/jd.json"
|
| 7 |
+
MATCH_DIR = "data/matches"
|
| 8 |
+
OUT_PATH = "data/ranking.json"
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def build_ranking(top_k: int = 10):
|
| 12 |
+
with open(JD_PATH, "r", encoding="utf-8") as f:
|
| 13 |
+
jd_rubric = json.load(f)
|
| 14 |
+
|
| 15 |
+
rows = []
|
| 16 |
+
for fname in os.listdir(MATCH_DIR):
|
| 17 |
+
if not fname.endswith("_match.json"):
|
| 18 |
+
continue
|
| 19 |
+
|
| 20 |
+
fpath = os.path.join(MATCH_DIR, fname)
|
| 21 |
+
with open(fpath, "r", encoding="utf-8") as f:
|
| 22 |
+
match_summary = json.load(f)
|
| 23 |
+
|
| 24 |
+
scored = score_candidate(jd_rubric, match_summary)
|
| 25 |
+
scored["match_file"] = fname
|
| 26 |
+
rows.append(scored)
|
| 27 |
+
|
| 28 |
+
rows.sort(key=lambda x: x["total_score"], reverse=True)
|
| 29 |
+
|
| 30 |
+
result = {
|
| 31 |
+
"jd_role_title": jd_rubric.get("role_title", ""),
|
| 32 |
+
"top_k": top_k,
|
| 33 |
+
"ranking": rows[:top_k],
|
| 34 |
+
"all_candidates": rows
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
with open(OUT_PATH, "w", encoding="utf-8") as f:
|
| 38 |
+
json.dump(result, f, indent=2)
|
| 39 |
+
|
| 40 |
+
return result
|
| 41 |
+
|
core/resume_parser.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile core/resume_parser.py
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
from utils.prompts import RESUME_PARSE_PROMPT
|
| 6 |
+
|
| 7 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 8 |
+
|
| 9 |
+
def parse_resume(resume_text, filename):
|
| 10 |
+
response = client.chat.completions.create(
|
| 11 |
+
model="gpt-4o-mini",
|
| 12 |
+
messages=[
|
| 13 |
+
{"role": "system", "content": "You are an expert HR resume analyst specializing in extracting structured information from resumes with high accuracy."},
|
| 14 |
+
{"role": "user", "content": RESUME_PARSE_PROMPT.format(resume_text=resume_text)}
|
| 15 |
+
],
|
| 16 |
+
temperature=0.2
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
content = response.choices[0].message.content
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
start = content.find("{")
|
| 23 |
+
end = content.rfind("}") + 1
|
| 24 |
+
json_str = content[start:end]
|
| 25 |
+
data = json.loads(json_str)
|
| 26 |
+
except Exception:
|
| 27 |
+
data = {
|
| 28 |
+
"name": "",
|
| 29 |
+
"skills": [],
|
| 30 |
+
"education": [],
|
| 31 |
+
"work_experience": [],
|
| 32 |
+
"total_years_experience": 0,
|
| 33 |
+
"summary": ""
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
data["candidate_id"] = filename
|
| 37 |
+
return data
|
core/scoring.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile core/scoring.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Dict, Any, List
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _safe_len(x) -> int:
|
| 8 |
+
return len(x) if isinstance(x, list) else 0
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def normalize_weights(weights: Dict[str, Any]) -> Dict[str, float]:
|
| 12 |
+
w_must = float(weights.get("must_have", 0))
|
| 13 |
+
w_nice = float(weights.get("nice_to_have", 0))
|
| 14 |
+
w_exp = float(weights.get("experience", 0))
|
| 15 |
+
w_soft = float(weights.get("soft_skills", 0))
|
| 16 |
+
|
| 17 |
+
total = w_must + w_nice + w_exp + w_soft
|
| 18 |
+
if total <= 0:
|
| 19 |
+
return {"must_have": 0.6, "nice_to_have": 0.25, "experience": 0.15, "soft_skills": 0.0}
|
| 20 |
+
|
| 21 |
+
return {
|
| 22 |
+
"must_have": w_must / total,
|
| 23 |
+
"nice_to_have": w_nice / total,
|
| 24 |
+
"experience": w_exp / total,
|
| 25 |
+
"soft_skills": w_soft / total
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def compute_coverage(matched: List[Any], missing: List[Any], partial: List[Any] | None = None) -> float:
|
| 30 |
+
m = _safe_len(matched)
|
| 31 |
+
miss = _safe_len(missing)
|
| 32 |
+
p = _safe_len(partial) if partial is not None else 0
|
| 33 |
+
|
| 34 |
+
denom = m + miss + p
|
| 35 |
+
if denom == 0:
|
| 36 |
+
return 0.0
|
| 37 |
+
|
| 38 |
+
return (m + 0.5 * p) / denom
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def experience_score(assessment: str) -> float:
|
| 42 |
+
assessment = (assessment or "").strip().lower()
|
| 43 |
+
if assessment == "below":
|
| 44 |
+
return 0.0
|
| 45 |
+
if assessment in ("meets", "exceeds"):
|
| 46 |
+
return 1.0
|
| 47 |
+
return 0.0
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def clamp(x: float, lo: float, hi: float) -> float:
|
| 51 |
+
return max(lo, min(hi, x))
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def score_candidate(jd_rubric: Dict[str, Any], match_summary: Dict[str, Any]) -> Dict[str, Any]:
|
| 55 |
+
weights = normalize_weights(jd_rubric.get("recommended_weights", {}))
|
| 56 |
+
|
| 57 |
+
mh = match_summary.get("must_have_match", {}) or {}
|
| 58 |
+
nh = match_summary.get("nice_to_have_match", {}) or {}
|
| 59 |
+
exp = match_summary.get("experience_analysis", {}) or {}
|
| 60 |
+
|
| 61 |
+
must_cov = compute_coverage(
|
| 62 |
+
matched=mh.get("matched", []),
|
| 63 |
+
missing=mh.get("missing", []),
|
| 64 |
+
partial=mh.get("partial", [])
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
nice_cov = compute_coverage(
|
| 68 |
+
matched=nh.get("matched", []),
|
| 69 |
+
missing=nh.get("missing", []),
|
| 70 |
+
partial=None
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
exp_sc = experience_score(exp.get("assessment", ""))
|
| 74 |
+
|
| 75 |
+
# Soft skills scoring (optional). Keep 0 for student MVP unless you add logic later.
|
| 76 |
+
soft_sc = 0.0
|
| 77 |
+
|
| 78 |
+
base_total = (
|
| 79 |
+
weights["must_have"] * must_cov +
|
| 80 |
+
weights["nice_to_have"] * nice_cov +
|
| 81 |
+
weights["experience"] * exp_sc +
|
| 82 |
+
weights["soft_skills"] * soft_sc
|
| 83 |
+
) * 100.0
|
| 84 |
+
|
| 85 |
+
# ✅ Bonus/Penalty based on Step-4 indicators (transparent & capped)
|
| 86 |
+
positives = match_summary.get("positive_indicators", []) or []
|
| 87 |
+
negatives = match_summary.get("negative_indicators", []) or []
|
| 88 |
+
|
| 89 |
+
bonus_per_positive = 1.0
|
| 90 |
+
penalty_per_negative = 1.5
|
| 91 |
+
raw_adjustment = (len(positives) * bonus_per_positive) - (len(negatives) * penalty_per_negative)
|
| 92 |
+
|
| 93 |
+
# Cap adjustment so it doesn't dominate scoring
|
| 94 |
+
adjustment_cap = 8.0
|
| 95 |
+
adjustment = clamp(raw_adjustment, -adjustment_cap, adjustment_cap)
|
| 96 |
+
|
| 97 |
+
final_total = clamp(base_total + adjustment, 0.0, 100.0)
|
| 98 |
+
|
| 99 |
+
return {
|
| 100 |
+
"candidate_name": match_summary.get("candidate_name", ""),
|
| 101 |
+
"total_score": round(final_total, 2),
|
| 102 |
+
"breakdown": {
|
| 103 |
+
"base_score": round(base_total, 2),
|
| 104 |
+
"bonus_penalty_adjustment": round(adjustment, 2),
|
| 105 |
+
"positive_count": len(positives),
|
| 106 |
+
"negative_count": len(negatives),
|
| 107 |
+
"must_have_coverage": round(must_cov, 3),
|
| 108 |
+
"nice_to_have_coverage": round(nice_cov, 3),
|
| 109 |
+
"experience_score": round(exp_sc, 3),
|
| 110 |
+
"weights_normalized": weights
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.50.0
|
| 2 |
+
numpy==2.0.2
|
| 3 |
+
pandas==2.2.2
|
| 4 |
+
openai==2.12.0
|
| 5 |
+
pydantic==2.12.3
|
| 6 |
+
PyPDF2==3.0.1
|
| 7 |
+
python-docx==1.2.0
|
| 8 |
+
python-dotenv==1.2.1
|
| 9 |
+
tqdm==4.67.1
|
| 10 |
+
|
| 11 |
+
# CrewAI core (enough for our custom tool wrappers)
|
| 12 |
+
crewai==0.175.0
|
| 13 |
+
crewai-tools
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# utils/__init__.py
|
| 2 |
+
# core/__init__.py
|
utils/file_loader.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile file_loader.py
|
| 2 |
+
from PyPDF2 import PdfReader
|
| 3 |
+
from docx import Document
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def load_text_from_file(uploaded_file):
|
| 7 |
+
if "." not in uploaded_file.name:
|
| 8 |
+
raise ValueError("File has no extension")
|
| 9 |
+
|
| 10 |
+
file_type = uploaded_file.name.split(".")[-1].lower()
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
if file_type == "pdf":
|
| 14 |
+
reader = PdfReader(uploaded_file)
|
| 15 |
+
return "\n".join([page.extract_text() for page in reader.pages])
|
| 16 |
+
|
| 17 |
+
elif file_type in ["docx", "doc"]:
|
| 18 |
+
doc = Document(uploaded_file)
|
| 19 |
+
return "\n".join([para.text for para in doc.paragraphs])
|
| 20 |
+
|
| 21 |
+
elif file_type == "txt":
|
| 22 |
+
content = uploaded_file.read()
|
| 23 |
+
for encoding in ["utf-8", "latin-1", "cp1252"]:
|
| 24 |
+
try:
|
| 25 |
+
return content.decode(encoding)
|
| 26 |
+
except UnicodeDecodeError:
|
| 27 |
+
continue
|
| 28 |
+
raise ValueError("Unable to decode text file")
|
| 29 |
+
|
| 30 |
+
else:
|
| 31 |
+
raise ValueError(f"Unsupported file type: {file_type}")
|
| 32 |
+
|
| 33 |
+
except ImportError as e:
|
| 34 |
+
raise ImportError(f"Required library not installed: {e}")
|
| 35 |
+
except Exception as e:
|
| 36 |
+
raise ValueError(f"Error processing file: {e}")
|
utils/prompts.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile utils/prompts.py
|
| 2 |
+
JD_PROMPT = """
|
| 3 |
+
Given the following Job Description, extract a structured hiring rubric.
|
| 4 |
+
|
| 5 |
+
Return STRICT JSON with the following fields:
|
| 6 |
+
- role_title
|
| 7 |
+
- must_have_skills (list)
|
| 8 |
+
- nice_to_have_skills (list)
|
| 9 |
+
- soft_skills (list)
|
| 10 |
+
- minimum_years_experience (number)
|
| 11 |
+
- minimum education requirements (list)
|
| 12 |
+
- recommended_weights (object with must_have, nice_to_have, experience, education, soft_skills)
|
| 13 |
+
|
| 14 |
+
Job Description:
|
| 15 |
+
----------------
|
| 16 |
+
{jd_text}
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
RESUME_PARSE_PROMPT = """
|
| 20 |
+
You are an experienced HR resume analyst.
|
| 21 |
+
|
| 22 |
+
Given the following resume text, extract a structured candidate profile.
|
| 23 |
+
|
| 24 |
+
Return STRICT JSON with these fields:
|
| 25 |
+
- name
|
| 26 |
+
- technical skills (list)
|
| 27 |
+
- soft skills (list)
|
| 28 |
+
- education (list)
|
| 29 |
+
- work_experience (list of short role summaries)
|
| 30 |
+
- total_years_experience (number)
|
| 31 |
+
- summary (2–3 sentence professional summary)
|
| 32 |
+
|
| 33 |
+
Resume Text:
|
| 34 |
+
-------------
|
| 35 |
+
{resume_text}
|
| 36 |
+
|
| 37 |
+
IMPORTANT:
|
| 38 |
+
- Output JSON only
|
| 39 |
+
- Do not include explanations
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
JD_RESUME_MATCH_PROMPT = """
|
| 43 |
+
You are an experienced HR hiring analyst.
|
| 44 |
+
|
| 45 |
+
You will be given:
|
| 46 |
+
1. A structured Job Description rubric (JSON)
|
| 47 |
+
2. A structured candidate profile (JSON)
|
| 48 |
+
|
| 49 |
+
Your task:
|
| 50 |
+
- Compare the candidate against the JD rubric
|
| 51 |
+
- Identify matches, partial matches, and gaps
|
| 52 |
+
- Identify positive and negative indicators
|
| 53 |
+
- DO NOT calculate a score
|
| 54 |
+
- Be factual and conservative
|
| 55 |
+
- Do NOT infer sensitive personal attributes
|
| 56 |
+
- Output ONLY valid JSON in the specified schema
|
| 57 |
+
|
| 58 |
+
Matching rules:
|
| 59 |
+
- A skill is matched if clearly demonstrated in experience or skills
|
| 60 |
+
- Partial if loosely related or implied
|
| 61 |
+
- Missing if not found
|
| 62 |
+
|
| 63 |
+
Output JSON schema:
|
| 64 |
+
{
|
| 65 |
+
"candidate_name": "",
|
| 66 |
+
"must_have_match": {
|
| 67 |
+
"matched": [],
|
| 68 |
+
"missing": [],
|
| 69 |
+
"partial": []
|
| 70 |
+
},
|
| 71 |
+
"nice_to_have_match": {
|
| 72 |
+
"matched": [],
|
| 73 |
+
"missing": []
|
| 74 |
+
},
|
| 75 |
+
"experience_analysis": {
|
| 76 |
+
"required_years": 0,
|
| 77 |
+
"candidate_years": 0,
|
| 78 |
+
"assessment": "below | meets | exceeds"
|
| 79 |
+
},
|
| 80 |
+
"positive_indicators": [],
|
| 81 |
+
"negative_indicators": [],
|
| 82 |
+
"overall_fit_summary": ""
|
| 83 |
+
}
|
| 84 |
+
"""
|
utils/resume_loader.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# %%writefile utils/resume_loader.py
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
def load_resume_files(resume_dir="data/resumes"):
|
| 5 |
+
return [
|
| 6 |
+
os.path.join(resume_dir, f)
|
| 7 |
+
for f in os.listdir(resume_dir)
|
| 8 |
+
if os.path.isfile(os.path.join(resume_dir, f))
|
| 9 |
+
and f.lower().endswith((".pdf", ".docx", ".txt"))
|
| 10 |
+
and not f.startswith(".")
|
| 11 |
+
]
|