afouda commited on
Commit
6192e6f
·
verified ·
1 Parent(s): 658a650

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -36
app.py CHANGED
@@ -16,7 +16,7 @@ from openai import OpenAI
16
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
17
 
18
  # Global Configuration
19
- DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
20
  DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
21
  DEFAULT_MODEL = "Qwen/Qwen3-32B"
22
  REQUEST_TIMEOUT_SECS = 120
@@ -27,7 +27,7 @@ default_client = OpenAI(
27
  base_url=DEEPINFRA_BASE_URL,
28
  )
29
 
30
- # Prompts for LLM Calls
31
  JD_SYSTEM = """You are an expert recruitment analyst. Extract a job description into STRICT JSON.
32
  Rules:
33
  - Output ONLY JSON (no markdown, no prose).
@@ -136,14 +136,33 @@ def read_file_safely(path: str) -> str:
136
  return f"[Error reading file: {e}]"
137
 
138
  def safe_json_loads(text: str) -> dict:
 
 
 
 
 
139
  try:
140
- m = re.search(r"```json\s*(.*?)```", text or "", re.DOTALL | re.IGNORECASE)
141
- block = m.group(1) if m else text
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  return json.loads(block)
143
  except Exception as e:
144
  logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
145
  return {}
146
 
 
147
  # --- LLM Chat Wrapper ---
148
  def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
149
  try:
@@ -254,7 +273,7 @@ def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]]
254
 
255
  system = (
256
  "You are ranking candidates for a role. Output STRICT JSON ONLY:\n"
257
- "{ \"candidates\": [ { \"candidate\": str, \"score\": number (0-10), \"justification\": str } ] }\n"
258
  "Scoring criteria (weight them reasonably):\n"
259
  "- Must-have skills coverage and relevant years\n"
260
  "- Nice-to-have skills and domain fit\n"
@@ -274,8 +293,6 @@ def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]]
274
  )
275
  return [{"role": "system", "content": system}, {"role": "user", "content": user}]
276
 
277
- RANK_LINE_RE = re.compile(r"^\s*(\d+)\.\s*(.*?)\s*[—\-]\s*([0-9]+(?:\.[0-9]+)?)\s*/\s*10\b", re.M)
278
-
279
  def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
280
  rows: List[Dict[str, Any]] = []
281
  parsed = safe_json_loads(content or "")
@@ -287,6 +304,7 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
287
  "justification": str(it.get("justification","")).strip(),
288
  })
289
  return rows
 
290
  if isinstance(parsed, list):
291
  for it in parsed:
292
  rows.append({
@@ -295,9 +313,8 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
295
  "justification": str(it.get("justification","")).strip(),
296
  })
297
  return rows
298
- for m in RANK_LINE_RE.finditer(content or ""):
299
- rows.append({"candidate": m.group(2).strip(), "score": float(m.group(3)), "justification": ""})
300
- if not rows:
301
  rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
302
  return rows
303
 
@@ -305,9 +322,6 @@ def process(
305
  jd_text,
306
  jd_file,
307
  resume_files,
308
- w_skill,
309
- w_qual,
310
- w_resp,
311
  conditional_req
312
  ):
313
  t0 = time.perf_counter()
@@ -334,13 +348,17 @@ def process(
334
  parsed_cands = []
335
  name_to_file = {}
336
  t_parse_total = 0.0
337
- for f in resume_files[:50]:
338
  t_parse_s = time.perf_counter()
339
  text, fname = load_resume(f)
340
  contacts = quick_contacts(text)
341
  raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
342
  cand_struct = normalize_resume(raw_resume)
343
- cand_struct.setdefault("name", os.path.splitext(fname)[0])
 
 
 
 
344
  cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
345
  cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])
346
 
@@ -351,6 +369,9 @@ def process(
351
  cand_struct['strengths'] = detailed_feedback.get('strengths', [])
352
  cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
353
  cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
 
 
 
354
 
355
  parsed_cands.append(cand_struct)
356
  name_to_file[cand_struct["name"]] = fname
@@ -371,7 +392,7 @@ def process(
371
  table_rows, export_rows = [], []
372
  for c in parsed_cands:
373
  nm = c.get("name","")
374
- sc, just = score_map.get(nm, (0.0, ""))
375
  detailed_scores = c.get('detailed_scores', {})
376
  table_rows.append({
377
  "Candidate": nm,
@@ -388,8 +409,9 @@ def process(
388
  })
389
  export_rows.append({
390
  "candidate": nm,
391
- "Score": round(sc, 1),
392
  **detailed_scores,
 
393
  "summary_feedback": c.get('summary_feedback', ''),
394
  "strengths": ", ".join(c.get("strengths", [])),
395
  "weaknesses": ", ".join(c.get("weaknesses", [])),
@@ -397,40 +419,62 @@ def process(
397
  "justification": just
398
  })
399
 
400
- # Generate recommendations
401
- c["recommendation"] = llm_recommend(jd_struct, c, api_key, model_name)
402
-
403
- df_export = pd.DataFrame(export_rows).sort_values("Score", ascending=False)
404
  df_table = pd.DataFrame(table_rows).sort_values("Score (0-10)", ascending=False)
405
- top_df = df_export.head(top_n)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
  t_total = time.perf_counter() - t0
408
  logging.info(f"Total process time: {t_total:.2f}s")
409
 
410
- return df_table, df_export.to_csv(index=False), top_df
411
 
412
  # --- Gradio App ---
413
  with gr.Blocks(title="AI Resume Matcher & Ranking") as demo:
 
414
  with gr.Row():
415
- with gr.Column():
416
  jd_text = gr.Textbox(label="Paste Job Description", lines=10)
417
  jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
418
- resume_files = gr.File(label="Upload Resumes (.pdf, .docx, .txt)", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
419
- w_skill = gr.Slider(label="Weight: Skills", minimum=0, maximum=1, value=0.25)
420
- w_qual = gr.Slider(label="Weight: Qualifications", minimum=0, maximum=1, value=0.25)
421
- w_resp = gr.Slider(label="Weight: Responsibilities", minimum=0, maximum=1, value=0.25)
422
- conditional_req = gr.Textbox(label="Conditional Requirement (optional)")
423
- submit_btn = gr.Button("Run Matching & Ranking")
424
-
425
- with gr.Column():
426
- results_table = gr.DataFrame(label="Candidate Ranking")
427
- csv_export = gr.File(label="Download CSV")
 
 
 
428
  top_table = gr.DataFrame(label="Top Candidates", interactive=False)
 
 
 
429
 
430
  submit_btn.click(
431
  process,
432
- inputs=[jd_text, jd_file, resume_files, w_skill, w_qual, w_resp, conditional_req],
 
433
  outputs=[results_table, csv_export, top_table]
434
  )
435
 
436
- demo.launch()
 
 
16
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
17
 
18
  # Global Configuration
19
+ DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
20
  DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
21
  DEFAULT_MODEL = "Qwen/Qwen3-32B"
22
  REQUEST_TIMEOUT_SECS = 120
 
27
  base_url=DEEPINFRA_BASE_URL,
28
  )
29
 
30
+ # --- Prompts for LLM Calls ---
31
  JD_SYSTEM = """You are an expert recruitment analyst. Extract a job description into STRICT JSON.
32
  Rules:
33
  - Output ONLY JSON (no markdown, no prose).
 
136
  return f"[Error reading file: {e}]"
137
 
138
  def safe_json_loads(text: str) -> dict:
139
+ """
140
+ Robustly parses JSON from a string, even if it's embedded in other text
141
+ or a markdown block.
142
+ """
143
+ text = text or ""
144
  try:
145
+ # First, attempt to find a JSON markdown block
146
+ match = re.search(r"```json\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
147
+ if match:
148
+ block = match.group(1)
149
+ else:
150
+ # If no markdown, find the outermost curly braces
151
+ start_index = text.find('{')
152
+ end_index = text.rfind('}')
153
+ if start_index != -1 and end_index != -1 and end_index > start_index:
154
+ block = text[start_index : end_index + 1]
155
+ else:
156
+ # Fallback if no JSON structure is found at all
157
+ logging.error(f"Could not find any JSON object in the text: {text[:500]}...")
158
+ return {}
159
+
160
  return json.loads(block)
161
  except Exception as e:
162
  logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
163
  return {}
164
 
165
+
166
  # --- LLM Chat Wrapper ---
167
  def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
168
  try:
 
273
 
274
  system = (
275
  "You are ranking candidates for a role. Output STRICT JSON ONLY:\n"
276
+ '{ "candidates": [ { "candidate": str, "score": number (0-10), "justification": str } ] }\n'
277
  "Scoring criteria (weight them reasonably):\n"
278
  "- Must-have skills coverage and relevant years\n"
279
  "- Nice-to-have skills and domain fit\n"
 
293
  )
294
  return [{"role": "system", "content": system}, {"role": "user", "content": user}]
295
 
 
 
296
  def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
297
  rows: List[Dict[str, Any]] = []
298
  parsed = safe_json_loads(content or "")
 
304
  "justification": str(it.get("justification","")).strip(),
305
  })
306
  return rows
307
+ # Add another check for a list of candidates directly
308
  if isinstance(parsed, list):
309
  for it in parsed:
310
  rows.append({
 
313
  "justification": str(it.get("justification","")).strip(),
314
  })
315
  return rows
316
+ if not rows: # Fallback for unexpected output
317
+ logging.warning(f"Could not parse ranked output as JSON. Raw: {content[:500]}")
 
318
  rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
319
  return rows
320
 
 
322
  jd_text,
323
  jd_file,
324
  resume_files,
 
 
 
325
  conditional_req
326
  ):
327
  t0 = time.perf_counter()
 
348
  parsed_cands = []
349
  name_to_file = {}
350
  t_parse_total = 0.0
351
+ for f in resume_files[:50]: # Limit to 50 resumes
352
  t_parse_s = time.perf_counter()
353
  text, fname = load_resume(f)
354
  contacts = quick_contacts(text)
355
  raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
356
  cand_struct = normalize_resume(raw_resume)
357
+
358
+ # Ensure name is not empty
359
+ if not cand_struct.get("name"):
360
+ cand_struct["name"] = os.path.splitext(fname)[0]
361
+
362
  cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
363
  cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])
364
 
 
369
  cand_struct['strengths'] = detailed_feedback.get('strengths', [])
370
  cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
371
  cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
372
+
373
+ # Generate recommendations
374
+ cand_struct["recommendation"] = llm_recommend(jd_struct, cand_struct, api_key, model_name)
375
 
376
  parsed_cands.append(cand_struct)
377
  name_to_file[cand_struct["name"]] = fname
 
392
  table_rows, export_rows = [], []
393
  for c in parsed_cands:
394
  nm = c.get("name","")
395
+ sc, just = score_map.get(nm, (0.0, "Not ranked by model"))
396
  detailed_scores = c.get('detailed_scores', {})
397
  table_rows.append({
398
  "Candidate": nm,
 
409
  })
410
  export_rows.append({
411
  "candidate": nm,
412
+ "score": round(sc, 1),
413
  **detailed_scores,
414
+ "recommendation": c.get("recommendation", ""),
415
  "summary_feedback": c.get('summary_feedback', ''),
416
  "strengths": ", ".join(c.get("strengths", [])),
417
  "weaknesses": ", ".join(c.get("weaknesses", [])),
 
419
  "justification": just
420
  })
421
 
422
+ df_export = pd.DataFrame(export_rows).sort_values("score", ascending=False)
 
 
 
423
  df_table = pd.DataFrame(table_rows).sort_values("Score (0-10)", ascending=False)
424
+
425
+ # Prepare top candidates DF for display, including the recommendation
426
+ top_candidates_data = []
427
+ for _, row in df_export.head(top_n).iterrows():
428
+ top_candidates_data.append({
429
+ "Candidate": row["candidate"],
430
+ "Score": row["score"],
431
+ "Recommendation": row["recommendation"],
432
+ "Justification": row["justification"],
433
+ })
434
+ top_df = pd.DataFrame(top_candidates_data)
435
+
436
+
437
+ # --- Create a temporary file for the CSV export ---
438
+ with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_file:
439
+ df_export.to_csv(tmp_file.name, index=False)
440
+ csv_file_path = tmp_file.name # Get the path of the saved file
441
 
442
  t_total = time.perf_counter() - t0
443
  logging.info(f"Total process time: {t_total:.2f}s")
444
 
445
+ return df_table, csv_file_path, top_df
446
 
447
  # --- Gradio App ---
448
  with gr.Blocks(title="AI Resume Matcher & Ranking") as demo:
449
+ gr.Markdown("## 🤖 AI Resume Matcher & Ranking")
450
  with gr.Row():
451
+ with gr.Column(scale=1):
452
  jd_text = gr.Textbox(label="Paste Job Description", lines=10)
453
  jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
454
+ resume_files = gr.File(label="Upload Resumes (.pdf, .docx, .txt)", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
455
+
456
+ with gr.Accordion("Advanced Options", open=False):
457
+ # Note: The sliders are for future use and are not currently wired into the LLM ranking prompt.
458
+ w_skill = gr.Slider(label="Weight: Skills", minimum=0, maximum=1, value=0.25, interactive=False)
459
+ w_qual = gr.Slider(label="Weight: Qualifications", minimum=0, maximum=1, value=0.25, interactive=False)
460
+ w_resp = gr.Slider(label="Weight: Responsibilities", minimum=0, maximum=1, value=0.25, interactive=False)
461
+ conditional_req = gr.Textbox(label="Conditional Requirement (optional)", placeholder="e.g., 'Must have experience with AWS services'")
462
+
463
+ submit_btn = gr.Button("Run Matching & Ranking", variant="primary")
464
+
465
+ with gr.Column(scale=2):
466
+ gr.Markdown("### Top Candidates Summary")
467
  top_table = gr.DataFrame(label="Top Candidates", interactive=False)
468
+ gr.Markdown("### Detailed Ranking")
469
+ results_table = gr.DataFrame(label="Candidate Ranking")
470
+ csv_export = gr.File(label="Download Full Report (CSV)")
471
 
472
  submit_btn.click(
473
  process,
474
+ # Note: Sliders are removed from inputs as they are not used in the backend logic.
475
+ inputs=[jd_text, jd_file, resume_files, conditional_req],
476
  outputs=[results_table, csv_export, top_table]
477
  )
478
 
479
+ if __name__ == "__main__":
480
+ demo.launch()