Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ from openai import OpenAI
|
|
| 16 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 17 |
|
| 18 |
# Global Configuration
|
| 19 |
-
DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
|
| 20 |
DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
|
| 21 |
DEFAULT_MODEL = "Qwen/Qwen3-32B"
|
| 22 |
REQUEST_TIMEOUT_SECS = 120
|
|
@@ -27,7 +27,7 @@ default_client = OpenAI(
|
|
| 27 |
base_url=DEEPINFRA_BASE_URL,
|
| 28 |
)
|
| 29 |
|
| 30 |
-
# Prompts for LLM Calls
|
| 31 |
JD_SYSTEM = """You are an expert recruitment analyst. Extract a job description into STRICT JSON.
|
| 32 |
Rules:
|
| 33 |
- Output ONLY JSON (no markdown, no prose).
|
|
@@ -136,14 +136,33 @@ def read_file_safely(path: str) -> str:
|
|
| 136 |
return f"[Error reading file: {e}]"
|
| 137 |
|
| 138 |
def safe_json_loads(text: str) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
try:
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
return json.loads(block)
|
| 143 |
except Exception as e:
|
| 144 |
logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
|
| 145 |
return {}
|
| 146 |
|
|
|
|
| 147 |
# --- LLM Chat Wrapper ---
|
| 148 |
def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
|
| 149 |
try:
|
|
@@ -254,7 +273,7 @@ def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]]
|
|
| 254 |
|
| 255 |
system = (
|
| 256 |
"You are ranking candidates for a role. Output STRICT JSON ONLY:\n"
|
| 257 |
-
|
| 258 |
"Scoring criteria (weight them reasonably):\n"
|
| 259 |
"- Must-have skills coverage and relevant years\n"
|
| 260 |
"- Nice-to-have skills and domain fit\n"
|
|
@@ -274,8 +293,6 @@ def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]]
|
|
| 274 |
)
|
| 275 |
return [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
| 276 |
|
| 277 |
-
RANK_LINE_RE = re.compile(r"^\s*(\d+)\.\s*(.*?)\s*[—\-]\s*([0-9]+(?:\.[0-9]+)?)\s*/\s*10\b", re.M)
|
| 278 |
-
|
| 279 |
def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
|
| 280 |
rows: List[Dict[str, Any]] = []
|
| 281 |
parsed = safe_json_loads(content or "")
|
|
@@ -287,6 +304,7 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
|
|
| 287 |
"justification": str(it.get("justification","")).strip(),
|
| 288 |
})
|
| 289 |
return rows
|
|
|
|
| 290 |
if isinstance(parsed, list):
|
| 291 |
for it in parsed:
|
| 292 |
rows.append({
|
|
@@ -295,9 +313,8 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
|
|
| 295 |
"justification": str(it.get("justification","")).strip(),
|
| 296 |
})
|
| 297 |
return rows
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
if not rows:
|
| 301 |
rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
|
| 302 |
return rows
|
| 303 |
|
|
@@ -305,9 +322,6 @@ def process(
|
|
| 305 |
jd_text,
|
| 306 |
jd_file,
|
| 307 |
resume_files,
|
| 308 |
-
w_skill,
|
| 309 |
-
w_qual,
|
| 310 |
-
w_resp,
|
| 311 |
conditional_req
|
| 312 |
):
|
| 313 |
t0 = time.perf_counter()
|
|
@@ -334,13 +348,17 @@ def process(
|
|
| 334 |
parsed_cands = []
|
| 335 |
name_to_file = {}
|
| 336 |
t_parse_total = 0.0
|
| 337 |
-
for f in resume_files[:50]:
|
| 338 |
t_parse_s = time.perf_counter()
|
| 339 |
text, fname = load_resume(f)
|
| 340 |
contacts = quick_contacts(text)
|
| 341 |
raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
|
| 342 |
cand_struct = normalize_resume(raw_resume)
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
|
| 345 |
cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])
|
| 346 |
|
|
@@ -351,6 +369,9 @@ def process(
|
|
| 351 |
cand_struct['strengths'] = detailed_feedback.get('strengths', [])
|
| 352 |
cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
|
| 353 |
cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
parsed_cands.append(cand_struct)
|
| 356 |
name_to_file[cand_struct["name"]] = fname
|
|
@@ -371,7 +392,7 @@ def process(
|
|
| 371 |
table_rows, export_rows = [], []
|
| 372 |
for c in parsed_cands:
|
| 373 |
nm = c.get("name","")
|
| 374 |
-
sc, just = score_map.get(nm, (0.0, ""))
|
| 375 |
detailed_scores = c.get('detailed_scores', {})
|
| 376 |
table_rows.append({
|
| 377 |
"Candidate": nm,
|
|
@@ -388,8 +409,9 @@ def process(
|
|
| 388 |
})
|
| 389 |
export_rows.append({
|
| 390 |
"candidate": nm,
|
| 391 |
-
"
|
| 392 |
**detailed_scores,
|
|
|
|
| 393 |
"summary_feedback": c.get('summary_feedback', ''),
|
| 394 |
"strengths": ", ".join(c.get("strengths", [])),
|
| 395 |
"weaknesses": ", ".join(c.get("weaknesses", [])),
|
|
@@ -397,40 +419,62 @@ def process(
|
|
| 397 |
"justification": just
|
| 398 |
})
|
| 399 |
|
| 400 |
-
|
| 401 |
-
c["recommendation"] = llm_recommend(jd_struct, c, api_key, model_name)
|
| 402 |
-
|
| 403 |
-
df_export = pd.DataFrame(export_rows).sort_values("Score", ascending=False)
|
| 404 |
df_table = pd.DataFrame(table_rows).sort_values("Score (0-10)", ascending=False)
|
| 405 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
|
| 407 |
t_total = time.perf_counter() - t0
|
| 408 |
logging.info(f"Total process time: {t_total:.2f}s")
|
| 409 |
|
| 410 |
-
return df_table,
|
| 411 |
|
| 412 |
# --- Gradio App ---
|
| 413 |
with gr.Blocks(title="AI Resume Matcher & Ranking") as demo:
|
|
|
|
| 414 |
with gr.Row():
|
| 415 |
-
with gr.Column():
|
| 416 |
jd_text = gr.Textbox(label="Paste Job Description", lines=10)
|
| 417 |
jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
|
| 418 |
-
resume_files = gr.File(label="Upload Resumes (.pdf, .docx, .txt)", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
|
|
|
|
|
|
|
|
|
| 428 |
top_table = gr.DataFrame(label="Top Candidates", interactive=False)
|
|
|
|
|
|
|
|
|
|
| 429 |
|
| 430 |
submit_btn.click(
|
| 431 |
process,
|
| 432 |
-
inputs
|
|
|
|
| 433 |
outputs=[results_table, csv_export, top_table]
|
| 434 |
)
|
| 435 |
|
| 436 |
-
|
|
|
|
|
|
| 16 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 17 |
|
| 18 |
# Global Configuration
|
| 19 |
+
DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
|
| 20 |
DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
|
| 21 |
DEFAULT_MODEL = "Qwen/Qwen3-32B"
|
| 22 |
REQUEST_TIMEOUT_SECS = 120
|
|
|
|
| 27 |
base_url=DEEPINFRA_BASE_URL,
|
| 28 |
)
|
| 29 |
|
| 30 |
+
# --- Prompts for LLM Calls ---
|
| 31 |
JD_SYSTEM = """You are an expert recruitment analyst. Extract a job description into STRICT JSON.
|
| 32 |
Rules:
|
| 33 |
- Output ONLY JSON (no markdown, no prose).
|
|
|
|
| 136 |
return f"[Error reading file: {e}]"
|
| 137 |
|
| 138 |
def safe_json_loads(text: str) -> dict:
|
| 139 |
+
"""
|
| 140 |
+
Robustly parses JSON from a string, even if it's embedded in other text
|
| 141 |
+
or a markdown block.
|
| 142 |
+
"""
|
| 143 |
+
text = text or ""
|
| 144 |
try:
|
| 145 |
+
# First, attempt to find a JSON markdown block
|
| 146 |
+
match = re.search(r"```json\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
|
| 147 |
+
if match:
|
| 148 |
+
block = match.group(1)
|
| 149 |
+
else:
|
| 150 |
+
# If no markdown, find the outermost curly braces
|
| 151 |
+
start_index = text.find('{')
|
| 152 |
+
end_index = text.rfind('}')
|
| 153 |
+
if start_index != -1 and end_index != -1 and end_index > start_index:
|
| 154 |
+
block = text[start_index : end_index + 1]
|
| 155 |
+
else:
|
| 156 |
+
# Fallback if no JSON structure is found at all
|
| 157 |
+
logging.error(f"Could not find any JSON object in the text: {text[:500]}...")
|
| 158 |
+
return {}
|
| 159 |
+
|
| 160 |
return json.loads(block)
|
| 161 |
except Exception as e:
|
| 162 |
logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
|
| 163 |
return {}
|
| 164 |
|
| 165 |
+
|
| 166 |
# --- LLM Chat Wrapper ---
|
| 167 |
def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
|
| 168 |
try:
|
|
|
|
| 273 |
|
| 274 |
system = (
|
| 275 |
"You are ranking candidates for a role. Output STRICT JSON ONLY:\n"
|
| 276 |
+
'{ "candidates": [ { "candidate": str, "score": number (0-10), "justification": str } ] }\n'
|
| 277 |
"Scoring criteria (weight them reasonably):\n"
|
| 278 |
"- Must-have skills coverage and relevant years\n"
|
| 279 |
"- Nice-to-have skills and domain fit\n"
|
|
|
|
| 293 |
)
|
| 294 |
return [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
| 295 |
|
|
|
|
|
|
|
| 296 |
def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
|
| 297 |
rows: List[Dict[str, Any]] = []
|
| 298 |
parsed = safe_json_loads(content or "")
|
|
|
|
| 304 |
"justification": str(it.get("justification","")).strip(),
|
| 305 |
})
|
| 306 |
return rows
|
| 307 |
+
# Add another check for a list of candidates directly
|
| 308 |
if isinstance(parsed, list):
|
| 309 |
for it in parsed:
|
| 310 |
rows.append({
|
|
|
|
| 313 |
"justification": str(it.get("justification","")).strip(),
|
| 314 |
})
|
| 315 |
return rows
|
| 316 |
+
if not rows: # Fallback for unexpected output
|
| 317 |
+
logging.warning(f"Could not parse ranked output as JSON. Raw: {content[:500]}")
|
|
|
|
| 318 |
rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
|
| 319 |
return rows
|
| 320 |
|
|
|
|
| 322 |
jd_text,
|
| 323 |
jd_file,
|
| 324 |
resume_files,
|
|
|
|
|
|
|
|
|
|
| 325 |
conditional_req
|
| 326 |
):
|
| 327 |
t0 = time.perf_counter()
|
|
|
|
| 348 |
parsed_cands = []
|
| 349 |
name_to_file = {}
|
| 350 |
t_parse_total = 0.0
|
| 351 |
+
for f in resume_files[:50]: # Limit to 50 resumes
|
| 352 |
t_parse_s = time.perf_counter()
|
| 353 |
text, fname = load_resume(f)
|
| 354 |
contacts = quick_contacts(text)
|
| 355 |
raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
|
| 356 |
cand_struct = normalize_resume(raw_resume)
|
| 357 |
+
|
| 358 |
+
# Ensure name is not empty
|
| 359 |
+
if not cand_struct.get("name"):
|
| 360 |
+
cand_struct["name"] = os.path.splitext(fname)[0]
|
| 361 |
+
|
| 362 |
cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
|
| 363 |
cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])
|
| 364 |
|
|
|
|
| 369 |
cand_struct['strengths'] = detailed_feedback.get('strengths', [])
|
| 370 |
cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
|
| 371 |
cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
|
| 372 |
+
|
| 373 |
+
# Generate recommendations
|
| 374 |
+
cand_struct["recommendation"] = llm_recommend(jd_struct, cand_struct, api_key, model_name)
|
| 375 |
|
| 376 |
parsed_cands.append(cand_struct)
|
| 377 |
name_to_file[cand_struct["name"]] = fname
|
|
|
|
| 392 |
table_rows, export_rows = [], []
|
| 393 |
for c in parsed_cands:
|
| 394 |
nm = c.get("name","")
|
| 395 |
+
sc, just = score_map.get(nm, (0.0, "Not ranked by model"))
|
| 396 |
detailed_scores = c.get('detailed_scores', {})
|
| 397 |
table_rows.append({
|
| 398 |
"Candidate": nm,
|
|
|
|
| 409 |
})
|
| 410 |
export_rows.append({
|
| 411 |
"candidate": nm,
|
| 412 |
+
"score": round(sc, 1),
|
| 413 |
**detailed_scores,
|
| 414 |
+
"recommendation": c.get("recommendation", ""),
|
| 415 |
"summary_feedback": c.get('summary_feedback', ''),
|
| 416 |
"strengths": ", ".join(c.get("strengths", [])),
|
| 417 |
"weaknesses": ", ".join(c.get("weaknesses", [])),
|
|
|
|
| 419 |
"justification": just
|
| 420 |
})
|
| 421 |
|
| 422 |
+
df_export = pd.DataFrame(export_rows).sort_values("score", ascending=False)
|
|
|
|
|
|
|
|
|
|
| 423 |
df_table = pd.DataFrame(table_rows).sort_values("Score (0-10)", ascending=False)
|
| 424 |
+
|
| 425 |
+
# Prepare top candidates DF for display, including the recommendation
|
| 426 |
+
top_candidates_data = []
|
| 427 |
+
for _, row in df_export.head(top_n).iterrows():
|
| 428 |
+
top_candidates_data.append({
|
| 429 |
+
"Candidate": row["candidate"],
|
| 430 |
+
"Score": row["score"],
|
| 431 |
+
"Recommendation": row["recommendation"],
|
| 432 |
+
"Justification": row["justification"],
|
| 433 |
+
})
|
| 434 |
+
top_df = pd.DataFrame(top_candidates_data)
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
# --- Create a temporary file for the CSV export ---
|
| 438 |
+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_file:
|
| 439 |
+
df_export.to_csv(tmp_file.name, index=False)
|
| 440 |
+
csv_file_path = tmp_file.name # Get the path of the saved file
|
| 441 |
|
| 442 |
t_total = time.perf_counter() - t0
|
| 443 |
logging.info(f"Total process time: {t_total:.2f}s")
|
| 444 |
|
| 445 |
+
return df_table, csv_file_path, top_df
|
| 446 |
|
| 447 |
# --- Gradio App ---
|
| 448 |
with gr.Blocks(title="AI Resume Matcher & Ranking") as demo:
|
| 449 |
+
gr.Markdown("## 🤖 AI Resume Matcher & Ranking")
|
| 450 |
with gr.Row():
|
| 451 |
+
with gr.Column(scale=1):
|
| 452 |
jd_text = gr.Textbox(label="Paste Job Description", lines=10)
|
| 453 |
jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
|
| 454 |
+
resume_files = gr.File(label="Upload Resumes (.pdf, .docx, .txt)", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
|
| 455 |
+
|
| 456 |
+
with gr.Accordion("Advanced Options", open=False):
|
| 457 |
+
# Note: The sliders are for future use and are not currently wired into the LLM ranking prompt.
|
| 458 |
+
w_skill = gr.Slider(label="Weight: Skills", minimum=0, maximum=1, value=0.25, interactive=False)
|
| 459 |
+
w_qual = gr.Slider(label="Weight: Qualifications", minimum=0, maximum=1, value=0.25, interactive=False)
|
| 460 |
+
w_resp = gr.Slider(label="Weight: Responsibilities", minimum=0, maximum=1, value=0.25, interactive=False)
|
| 461 |
+
conditional_req = gr.Textbox(label="Conditional Requirement (optional)", placeholder="e.g., 'Must have experience with AWS services'")
|
| 462 |
+
|
| 463 |
+
submit_btn = gr.Button("Run Matching & Ranking", variant="primary")
|
| 464 |
+
|
| 465 |
+
with gr.Column(scale=2):
|
| 466 |
+
gr.Markdown("### Top Candidates Summary")
|
| 467 |
top_table = gr.DataFrame(label="Top Candidates", interactive=False)
|
| 468 |
+
gr.Markdown("### Detailed Ranking")
|
| 469 |
+
results_table = gr.DataFrame(label="Candidate Ranking")
|
| 470 |
+
csv_export = gr.File(label="Download Full Report (CSV)")
|
| 471 |
|
| 472 |
submit_btn.click(
|
| 473 |
process,
|
| 474 |
+
# Note: Sliders are removed from inputs as they are not used in the backend logic.
|
| 475 |
+
inputs=[jd_text, jd_file, resume_files, conditional_req],
|
| 476 |
outputs=[results_table, csv_export, top_table]
|
| 477 |
)
|
| 478 |
|
| 479 |
+
if __name__ == "__main__":
|
| 480 |
+
demo.launch()
|