Spaces:

afouda
/

Rank_of_CVS

Sleeping

App Files Files Community

afouda commited on Aug 27, 2025

Commit

6192e6f

verified ·

1 Parent(s): 658a650

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -36

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ from openai import OpenAI
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # Global Configuration
-DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
 DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
 DEFAULT_MODEL = "Qwen/Qwen3-32B"
 REQUEST_TIMEOUT_SECS = 120
@@ -27,7 +27,7 @@ default_client = OpenAI(
     base_url=DEEPINFRA_BASE_URL,
 )
-# Prompts for LLM Calls
 JD_SYSTEM = """You are an expert recruitment analyst. Extract a job description into STRICT JSON.
 Rules:
 - Output ONLY JSON (no markdown, no prose).
@@ -136,14 +136,33 @@ def read_file_safely(path: str) -> str:
         return f"[Error reading file: {e}]"
 def safe_json_loads(text: str) -> dict:
     try:
-        m = re.search(r"```json\s*(.*?)```", text or "", re.DOTALL | re.IGNORECASE)
-        block = m.group(1) if m else text
         return json.loads(block)
     except Exception as e:
         logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
         return {}
 # --- LLM Chat Wrapper ---
 def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
     try:
@@ -254,7 +273,7 @@ def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]]
     system = (
         "You are ranking candidates for a role. Output STRICT JSON ONLY:\n"
-        "{ \"candidates\": [ { \"candidate\": str, \"score\": number (0-10), \"justification\": str } ] }\n"
         "Scoring criteria (weight them reasonably):\n"
         "- Must-have skills coverage and relevant years\n"
         "- Nice-to-have skills and domain fit\n"
@@ -274,8 +293,6 @@ def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]]
     )
     return [{"role": "system", "content": system}, {"role": "user", "content": user}]
-RANK_LINE_RE = re.compile(r"^\s*(\d+)\.\s*(.*?)\s*[—\-]\s*([0-9]+(?:\.[0-9]+)?)\s*/\s*10\b", re.M)
 def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
     rows: List[Dict[str, Any]] = []
     parsed = safe_json_loads(content or "")
@@ -287,6 +304,7 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
                 "justification": str(it.get("justification","")).strip(),
             })
         return rows
     if isinstance(parsed, list):
         for it in parsed:
             rows.append({
@@ -295,9 +313,8 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
                 "justification": str(it.get("justification","")).strip(),
             })
         return rows
-    for m in RANK_LINE_RE.finditer(content or ""):
-        rows.append({"candidate": m.group(2).strip(), "score": float(m.group(3)), "justification": ""})
-    if not rows:
         rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
     return rows
@@ -305,9 +322,6 @@ def process(
     jd_text,
     jd_file,
     resume_files,
-    w_skill,
-    w_qual,
-    w_resp,
     conditional_req
 ):
     t0 = time.perf_counter()
@@ -334,13 +348,17 @@ def process(
     parsed_cands = []
     name_to_file = {}
     t_parse_total = 0.0
-    for f in resume_files[:50]:
         t_parse_s = time.perf_counter()
         text, fname = load_resume(f)
         contacts = quick_contacts(text)
         raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
         cand_struct = normalize_resume(raw_resume)
-        cand_struct.setdefault("name", os.path.splitext(fname)[0])
         cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
         cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])
@@ -351,6 +369,9 @@ def process(
         cand_struct['strengths'] = detailed_feedback.get('strengths', [])
         cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
         cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
         parsed_cands.append(cand_struct)
         name_to_file[cand_struct["name"]] = fname
@@ -371,7 +392,7 @@ def process(
     table_rows, export_rows = [], []
     for c in parsed_cands:
         nm = c.get("name","")
-        sc, just = score_map.get(nm, (0.0, ""))
         detailed_scores = c.get('detailed_scores', {})
         table_rows.append({
             "Candidate": nm,
@@ -388,8 +409,9 @@ def process(
         })
         export_rows.append({
             "candidate": nm,
-            "Score": round(sc, 1),
             **detailed_scores,
             "summary_feedback": c.get('summary_feedback', ''),
             "strengths": ", ".join(c.get("strengths", [])),
             "weaknesses": ", ".join(c.get("weaknesses", [])),
@@ -397,40 +419,62 @@ def process(
             "justification": just
         })
-        # Generate recommendations
-        c["recommendation"] = llm_recommend(jd_struct, c, api_key, model_name)
-    df_export = pd.DataFrame(export_rows).sort_values("Score", ascending=False)
     df_table = pd.DataFrame(table_rows).sort_values("Score (0-10)", ascending=False)
-    top_df = df_export.head(top_n)
     t_total = time.perf_counter() - t0
     logging.info(f"Total process time: {t_total:.2f}s")
-    return df_table, df_export.to_csv(index=False), top_df
 # --- Gradio App ---
 with gr.Blocks(title="AI Resume Matcher & Ranking") as demo:
     with gr.Row():
-        with gr.Column():
             jd_text = gr.Textbox(label="Paste Job Description", lines=10)
             jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
-            resume_files = gr.File(label="Upload Resumes (.pdf, .docx, .txt)", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
-            w_skill = gr.Slider(label="Weight: Skills", minimum=0, maximum=1, value=0.25)
-            w_qual = gr.Slider(label="Weight: Qualifications", minimum=0, maximum=1, value=0.25)
-            w_resp = gr.Slider(label="Weight: Responsibilities", minimum=0, maximum=1, value=0.25)
-            conditional_req = gr.Textbox(label="Conditional Requirement (optional)")
-            submit_btn = gr.Button("Run Matching & Ranking")
-        with gr.Column():
-            results_table = gr.DataFrame(label="Candidate Ranking")
-            csv_export = gr.File(label="Download CSV")
             top_table = gr.DataFrame(label="Top Candidates", interactive=False)
     submit_btn.click(
         process,
-        inputs=[jd_text, jd_file, resume_files, w_skill, w_qual, w_resp, conditional_req],
         outputs=[results_table, csv_export, top_table]
     )
-demo.launch()

 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 # Global Configuration
+DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
 DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
 DEFAULT_MODEL = "Qwen/Qwen3-32B"
 REQUEST_TIMEOUT_SECS = 120
     base_url=DEEPINFRA_BASE_URL,
 )
+# --- Prompts for LLM Calls ---
 JD_SYSTEM = """You are an expert recruitment analyst. Extract a job description into STRICT JSON.
 Rules:
 - Output ONLY JSON (no markdown, no prose).
         return f"[Error reading file: {e}]"
 def safe_json_loads(text: str) -> dict:
+    """
+    Robustly parses JSON from a string, even if it's embedded in other text
+    or a markdown block.
+    """
+    text = text or ""
     try:
+        # First, attempt to find a JSON markdown block
+        match = re.search(r"```json\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
+        if match:
+            block = match.group(1)
+        else:
+            # If no markdown, find the outermost curly braces
+            start_index = text.find('{')
+            end_index = text.rfind('}')
+            if start_index != -1 and end_index != -1 and end_index > start_index:
+                block = text[start_index : end_index + 1]
+            else:
+                # Fallback if no JSON structure is found at all
+                logging.error(f"Could not find any JSON object in the text: {text[:500]}...")
+                return {}
         return json.loads(block)
     except Exception as e:
         logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
         return {}
 # --- LLM Chat Wrapper ---
 def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
     try:
     system = (
         "You are ranking candidates for a role. Output STRICT JSON ONLY:\n"
+        '{ "candidates": [ { "candidate": str, "score": number (0-10), "justification": str } ] }\n'
         "Scoring criteria (weight them reasonably):\n"
         "- Must-have skills coverage and relevant years\n"
         "- Nice-to-have skills and domain fit\n"
     )
     return [{"role": "system", "content": system}, {"role": "user", "content": user}]
 def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
     rows: List[Dict[str, Any]] = []
     parsed = safe_json_loads(content or "")
                 "justification": str(it.get("justification","")).strip(),
             })
         return rows
+    # Add another check for a list of candidates directly
     if isinstance(parsed, list):
         for it in parsed:
             rows.append({
                 "justification": str(it.get("justification","")).strip(),
             })
         return rows
+    if not rows: # Fallback for unexpected output
+        logging.warning(f"Could not parse ranked output as JSON. Raw: {content[:500]}")
         rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
     return rows
     jd_text,
     jd_file,
     resume_files,
     conditional_req
 ):
     t0 = time.perf_counter()
     parsed_cands = []
     name_to_file = {}
     t_parse_total = 0.0
+    for f in resume_files[:50]: # Limit to 50 resumes
         t_parse_s = time.perf_counter()
         text, fname = load_resume(f)
         contacts = quick_contacts(text)
         raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
         cand_struct = normalize_resume(raw_resume)
+        # Ensure name is not empty
+        if not cand_struct.get("name"):
+            cand_struct["name"] = os.path.splitext(fname)[0]
         cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
         cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])
         cand_struct['strengths'] = detailed_feedback.get('strengths', [])
         cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
         cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
+        # Generate recommendations
+        cand_struct["recommendation"] = llm_recommend(jd_struct, cand_struct, api_key, model_name)
         parsed_cands.append(cand_struct)
         name_to_file[cand_struct["name"]] = fname
     table_rows, export_rows = [], []
     for c in parsed_cands:
         nm = c.get("name","")
+        sc, just = score_map.get(nm, (0.0, "Not ranked by model"))
         detailed_scores = c.get('detailed_scores', {})
         table_rows.append({
             "Candidate": nm,
         })
         export_rows.append({
             "candidate": nm,
+            "score": round(sc, 1),
             **detailed_scores,
+            "recommendation": c.get("recommendation", ""),
             "summary_feedback": c.get('summary_feedback', ''),
             "strengths": ", ".join(c.get("strengths", [])),
             "weaknesses": ", ".join(c.get("weaknesses", [])),
             "justification": just
         })
+    df_export = pd.DataFrame(export_rows).sort_values("score", ascending=False)
     df_table = pd.DataFrame(table_rows).sort_values("Score (0-10)", ascending=False)
+    # Prepare top candidates DF for display, including the recommendation
+    top_candidates_data = []
+    for _, row in df_export.head(top_n).iterrows():
+        top_candidates_data.append({
+            "Candidate": row["candidate"],
+            "Score": row["score"],
+            "Recommendation": row["recommendation"],
+            "Justification": row["justification"],
+        })
+    top_df = pd.DataFrame(top_candidates_data)
+    # --- Create a temporary file for the CSV export ---
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_file:
+        df_export.to_csv(tmp_file.name, index=False)
+        csv_file_path = tmp_file.name # Get the path of the saved file
     t_total = time.perf_counter() - t0
     logging.info(f"Total process time: {t_total:.2f}s")
+    return df_table, csv_file_path, top_df
 # --- Gradio App ---
 with gr.Blocks(title="AI Resume Matcher & Ranking") as demo:
+    gr.Markdown("## 🤖 AI Resume Matcher & Ranking")
     with gr.Row():
+        with gr.Column(scale=1):
             jd_text = gr.Textbox(label="Paste Job Description", lines=10)
             jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
+            resume_files = gr.File(label="Upload Resumes (.pdf, .docx, .txt)", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
+            with gr.Accordion("Advanced Options", open=False):
+                # Note: The sliders are for future use and are not currently wired into the LLM ranking prompt.
+                w_skill = gr.Slider(label="Weight: Skills", minimum=0, maximum=1, value=0.25, interactive=False)
+                w_qual = gr.Slider(label="Weight: Qualifications", minimum=0, maximum=1, value=0.25, interactive=False)
+                w_resp = gr.Slider(label="Weight: Responsibilities", minimum=0, maximum=1, value=0.25, interactive=False)
+                conditional_req = gr.Textbox(label="Conditional Requirement (optional)", placeholder="e.g., 'Must have experience with AWS services'")
+            submit_btn = gr.Button("Run Matching & Ranking", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("### Top Candidates Summary")
             top_table = gr.DataFrame(label="Top Candidates", interactive=False)
+            gr.Markdown("### Detailed Ranking")
+            results_table = gr.DataFrame(label="Candidate Ranking")
+            csv_export = gr.File(label="Download Full Report (CSV)")
     submit_btn.click(
         process,
+        # Note: Sliders are removed from inputs as they are not used in the backend logic.
+        inputs=[jd_text, jd_file, resume_files, conditional_req],
         outputs=[results_table, csv_export, top_table]
     )
+if __name__ == "__main__":
+    demo.launch()