Spaces:

yhng2525
/

Ai-Resume-Ranking

Sleeping

App Files Files Community

yhng2525 commited on Jan 15

Commit

1595f22

verified ·

1 Parent(s): a4fe886

Upload 15 files

Browse files

Files changed (15) hide show

README.md +13 -14
app.py +260 -0
core/__init__.py +2 -0
core/crew_pipeline.py +214 -0
core/crew_tools.py +41 -0
core/jd_processor.py +64 -0
core/matcher.py +32 -0
core/ranking.py +41 -0
core/resume_parser.py +37 -0
core/scoring.py +113 -0
requirements.txt +13 -0
utils/__init__.py +2 -0
utils/file_loader.py +36 -0
utils/prompts.py +84 -0
utils/resume_loader.py +11 -0

README.md CHANGED Viewed

@@ -1,14 +1,13 @@
----
-title: Ai Resume Ranking
-emoji: 🐠
-colorFrom: green
-colorTo: gray
-sdk: gradio
-sdk_version: 6.3.0
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: CrewAI Version
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Ai Resume Match
+emoji: 🚀
+colorFrom: pink
+colorTo: purple
+sdk: gradio
+sdk_version: 6.2.0
+app_file: app.py
+pinned: false
+short_description: iti123-project
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import os, json, shutil
+from pathlib import Path
+import gradio as gr
+import pandas as pd
+from dotenv import load_dotenv
+load_dotenv()
+from utils.file_loader import load_text_from_file
+from core.crew_pipeline import (
+    crew_step1_generate_jd,
+    crew_step3_parse_resumes,
+    crew_step4_generate_matches,
+    crew_step5_rank,
+)
+# ---------- Folders ----------
+RESUME_DIR = Path("data/resumes")
+CAND_DIR   = Path("data/candidates")
+MATCH_DIR  = Path("data/matches")
+DATA_DIR   = Path("data")
+for p in [RESUME_DIR, CAND_DIR, MATCH_DIR]:
+    p.mkdir(parents=True, exist_ok=True)
+JD_PATH = DATA_DIR / "jd.json"
+# ---------- Theme CSS ----------
+def theme_css(mode: str) -> str:
+    base = """
+    .gradio-container { max-width: 1200px !important; margin: 0 auto !important; padding-bottom: 24px !important; }
+    .gradio-container, .gradio-container * { font-size: 16px; line-height: 1.55; }
+    button, .gr-button, .gr-input, .gr-textbox, .gr-file, .gr-dropdown { padding-top: 10px !important; padding-bottom: 10px !important; }
+    pre, code, .cm-editor, .cm-scroller {
+        font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
+        font-size: 15px !important; line-height: 1.65 !important; border-radius: 10px !important;
+    }
+    .cm-scroller { min-height: 420px !important; }
+    """
+    light = """
+    :root { --panel-border: #e5e7eb; --code-bg: #f8fafc; --code-fg: #0f172a; }
+    body { background: #f5f5f5; }
+    pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 1px solid var(--panel-border) !important; }
+    """
+    hf_dark = """
+    :root {
+        --panel-border: #1f2937;
+        --code-bg: #0f172a;
+        --code-fg: #e5e7eb;
+        --title-fg: #ffffff;
+        --header-fg: #d1d5db;
+        --muted-fg: #9ca3af;
+    }
+    body { background: #0b1220; }
+    h1, h1 * { color: var(--title-fg) !important; font-weight: 700 !important; }
+    h2, h3, h4, h2 *, h3 *, h4 * { color: var(--header-fg) !important; font-weight: 600 !important; }
+    button[role="tab"] { color: var(--muted-fg) !important; font-weight: 500 !important; }
+    button[role="tab"][aria-selected="true"] { color: #fb923c !important; font-weight: 700 !important; }
+    pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 1px solid var(--panel-border) !important; }
+    """
+    high_contrast = """
+    :root {
+        --panel-border: #ffffff;
+        --code-bg: #000000;
+        --code-fg: #ffffff;
+        --title-fg: #ffffff;
+        --header-fg: #ffffff;
+        --muted-fg: #e5e7eb;
+    }
+    body { background: #000000; }
+    h1, h1 * { color: var(--title-fg) !important; font-weight: 800 !important; }
+    h2, h3, h4, h2 *, h3 *, h4 * { color: var(--header-fg) !important; font-weight: 700 !important; }
+    button[role="tab"] { color: var(--muted-fg) !important; font-weight: 600 !important; }
+    button[role="tab"][aria-selected="true"] { color: #ffd500 !important; font-weight: 800 !important; }
+    pre, code, .cm-editor { background-color: var(--code-bg) !important; color: var(--code-fg) !important; border: 2px solid var(--panel-border) !important; }
+    """
+    css_map = {"Light": light, "HF Dark": hf_dark, "High Contrast": high_contrast}
+    return base + css_map.get(mode, hf_dark)
+def _ensure_api_key():
+    if not os.getenv("OPENAI_API_KEY"):
+        raise RuntimeError(
+            "OPENAI_API_KEY is not set. "
+            "On Hugging Face: Settings → Secrets → OPENAI_API_KEY. "
+            "On local: set environment variable before running."
+        )
+def _pretty_json(obj) -> str:
+    return json.dumps(obj, indent=2, ensure_ascii=False)
+# ---------- Steps ----------
+def step1_generate_jd(file_obj):
+    _ensure_api_key()
+    if file_obj is None:
+        return "", "Please upload a JD file."
+    with open(file_obj.name, "rb") as f:
+        jd_text = load_text_from_file(f)
+    # CrewAI orchestration (JD Analyst agent uses a tool that calls your existing generate_jd_rubric)
+    jd_rubric = crew_step1_generate_jd(jd_text, jd_path=str(JD_PATH))
+    return _pretty_json(jd_rubric), f"✅ JD rubric saved to {JD_PATH}"
+def step2_save_resumes(files, overwrite):
+    if not files:
+        return "Please upload at least one resume."
+    saved, skipped = 0, 0
+    for f in files:
+        dest = RESUME_DIR / Path(f.name).name
+        if dest.exists() and not overwrite:
+            skipped += 1
+            continue
+        shutil.copyfile(f.name, dest)
+        saved += 1
+    msg = f"✅ Saved {saved} resume(s) into {RESUME_DIR}"
+    if skipped:
+        msg += f" | ⏭ Skipped {skipped} duplicate(s) (overwrite=False)"
+    return msg
+def step3_parse_resumes():
+    _ensure_api_key()
+    resume_files = sorted([p for p in RESUME_DIR.iterdir() if p.suffix.lower() in [".pdf", ".docx"]])
+    if not resume_files:
+        return "No resumes found in data/resumes. Upload resumes first."
+    # CrewAI orchestration (Resume Parser agent runs the parsing tool per file)
+    summary = crew_step3_parse_resumes(
+        resume_dir=str(RESUME_DIR),
+        cand_dir=str(CAND_DIR),
+    )
+    return summary
+def step4_generate_matches():
+    _ensure_api_key()
+    if not JD_PATH.exists():
+        return "No JD rubric found. Run Step 1 first."
+    if not any(p.suffix.lower() == ".json" for p in CAND_DIR.glob("*.json")):
+        return "No candidates found. Run Step 3 first."
+    # CrewAI orchestration (Matcher agent calls your existing match_candidate_to_jd)
+    summary = crew_step4_generate_matches(
+        jd_path=str(JD_PATH),
+        cand_dir=str(CAND_DIR),
+        match_dir=str(MATCH_DIR),
+    )
+    return summary
+def step4_view_match(selected):
+    if not selected:
+        return ""
+    path = MATCH_DIR / selected
+    if not path.exists():
+        return ""
+    with open(path, "r", encoding="utf-8") as f:
+        return _pretty_json(json.load(f))
+def step5_rank(top_k):
+    _ensure_api_key()
+    if not JD_PATH.exists():
+        return None, "No JD rubric found. Run Step 1 first."
+    if not any(p.name.endswith("_match.json") for p in MATCH_DIR.iterdir()):
+        return None, "No match files found. Run Step 4 first."
+    ranking = crew_step5_rank(top_k=int(top_k), match_dir=str(MATCH_DIR))
+    rows = []
+    for i, r in enumerate(ranking["ranking"], start=1):
+        b = r.get("breakdown", {})
+        rows.append({
+            "Rank": i,
+            "Candidate": r.get("candidate_name", ""),
+            "Score": r.get("total_score", 0),
+            "Base Score": b.get("base_score", 0),
+            "Adj": b.get("bonus_penalty_adjustment", 0),
+            "Pos": b.get("positive_count", 0),
+            "Neg": b.get("negative_count", 0),
+            "Must-have": b.get("must_have_coverage", 0),
+            "Nice-to-have": b.get("nice_to_have_coverage", 0),
+            "Experience": b.get("experience_score", 0),
+            "Match File": r.get("match_file", "")
+        })
+    df = pd.DataFrame(rows)
+    return df, "✅ Ranking generated (also saved to data/ranking.json)"
+def list_match_files():
+    return sorted([p.name for p in MATCH_DIR.iterdir() if p.name.endswith("_match.json")])
+# ---------- UI ----------
+with gr.Blocks(title="AI-Powered Resume Screening & Ranking System") as demo:
+    gr.Markdown("# 📄 AI-Powered JD-Based Resumes Ranking System")
+    with gr.Row():
+        theme_mode = gr.Dropdown(
+            choices=["Light", "HF Dark", "High Contrast"],
+            value="HF Dark",
+            label="Reader Theme"
+        )
+    style_tag = gr.HTML(value=f"<style>{theme_css('HF Dark')}</style>")
+    def apply_theme(mode):
+        return gr.HTML(value=f"<style>{theme_css(mode)}</style>")
+    theme_mode.change(fn=apply_theme, inputs=theme_mode, outputs=style_tag)
+    with gr.Tab("Step 1 — JD Upload & Rubric"):
+        jd_file = gr.File(label="Upload JD (PDF/DOCX/TXT)")
+        jd_btn = gr.Button("Generate JD Rubric")
+        jd_json = gr.Code(label="JD Rubric (JSON)", language="json")
+        jd_status = gr.Textbox(label="Status")
+        jd_btn.click(step1_generate_jd, inputs=[jd_file], outputs=[jd_json, jd_status])
+    with gr.Tab("Step 2 — Resume Batch Upload"):
+        resume_files = gr.File(label="Upload resumes (PDF/DOCX)", file_count="multiple")
+        overwrite = gr.Checkbox(label="Overwrite duplicates", value=False)
+        save_btn = gr.Button("Save Resumes to data/resumes")
+        save_status = gr.Textbox(label="Status")
+        save_btn.click(step2_save_resumes, inputs=[resume_files, overwrite], outputs=[save_status])
+    with gr.Tab("Step 3 — Parse Resumes (CrewAI)"):
+        parse_btn = gr.Button("Parse Resumes → data/candidates")
+        parse_status = gr.Textbox(label="Status")
+        parse_btn.click(step3_parse_resumes, inputs=[], outputs=[parse_status])
+    with gr.Tab("Step 4 — JD-aligned Summaries (CrewAI)"):
+        match_btn = gr.Button("Generate match files → data/matches")
+        match_status = gr.Textbox(label="Status")
+        refresh_btn = gr.Button("Refresh match list")
+        match_list = gr.Dropdown(choices=[], label="Select a match file")
+        view_btn = gr.Button("View selected match JSON")
+        match_json = gr.Code(label="Match JSON", language="json")
+        match_btn.click(step4_generate_matches, inputs=[], outputs=[match_status])
+        refresh_btn.click(lambda: gr.update(choices=list_match_files()), inputs=[], outputs=[match_list])
+        view_btn.click(step4_view_match, inputs=[match_list], outputs=[match_json])
+    with gr.Tab("Step 5 — Scoring & Ranking (CrewAI)"):
+        topk = gr.Number(value=10, label="Top K", precision=0)
+        rank_btn = gr.Button("Generate Ranking")
+        rank_df = gr.Dataframe(label="Top K Ranking")
+        rank_status = gr.Textbox(label="Status")
+        rank_btn.click(step5_rank, inputs=[topk], outputs=[rank_df, rank_status])
+demo.launch(share=False)

core/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # utils/__init__.py
2	+ # core/__init__.py

core/crew_pipeline.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import json
+import os
+from pathlib import Path
+from crewai import Agent, Task, Crew, Process
+from utils.file_loader import load_text_from_file
+from core.crew_tools import (
+    generate_jd_rubric_tool,
+    parse_resume_tool,
+    match_candidate_tool,
+    build_ranking_tool,
+)
+# ---------- Helpers ----------
+def _ensure_openai_env():
+    # CrewAI expects OPENAI_API_KEY to exist (you already use this in app.py)
+    if not os.getenv("OPENAI_API_KEY"):
+        raise RuntimeError("OPENAI_API_KEY is not set (HF: Settings → Secrets → OPENAI_API_KEY).")
+    # Optional: pin model for CrewAI if you want consistency
+    # (CrewAI supports different LLM providers; leaving it unset is fine if defaults work in your env)
+    os.environ.setdefault("OPENAI_MODEL_NAME", "gpt-4o-mini")
+def _json(s: str):
+    return json.loads(s)
+# ---------- Agents ----------
+def _jd_agent():
+    return Agent(
+        role="JD Analyst",
+        goal="Convert JD text into a structured rubric JSON.",
+        backstory="You are an HR analyst who produces consistent rubric structures for automated screening.",
+        tools=[generate_jd_rubric_tool],
+        verbose=False,
+    )
+def _resume_agent():
+    return Agent(
+        role="Resume Parser",
+        goal="Extract structured candidate profile JSON from resume text.",
+        backstory="You are an ATS-style parser; you output consistent JSON that downstream scoring depends on.",
+        tools=[parse_resume_tool],
+        verbose=False,
+    )
+def _matcher_agent():
+    return Agent(
+        role="JD-Candidate Matcher",
+        goal="Create a JD-aligned match JSON for each candidate.",
+        backstory="You score alignment and produce structured evidence for ranking.",
+        tools=[match_candidate_tool],
+        verbose=False,
+    )
+def _ranker_agent():
+    return Agent(
+        role="Ranker",
+        goal="Build a final ranking JSON from match files.",
+        backstory="You turn match outputs into a clean Top-K ranking.",
+        tools=[build_ranking_tool],
+        verbose=False,
+    )
+# ---------- Crew wrappers (called by app.py) ----------
+def crew_step1_generate_jd(jd_text: str, jd_path: str) -> dict:
+    _ensure_openai_env()
+    jd_path = Path(jd_path)
+    jd_path.parent.mkdir(parents=True, exist_ok=True)
+    task = Task(
+        description=(
+            "Use generate_jd_rubric_tool on the provided JD text and return the JSON rubric.\n"
+            "Return ONLY the JSON."
+        ),
+        expected_output="A valid JSON object as a string.",
+        agent=_jd_agent(),
+    )
+    crew = Crew(
+        agents=[_jd_agent()],
+        tasks=[task],
+        process=Process.sequential,
+        verbose=False,
+        planning=False,
+    )
+    result = crew.kickoff(inputs={"jd_text": jd_text})
+    rubric = _json(str(result))
+    with open(jd_path, "w", encoding="utf-8") as f:
+        json.dump(rubric, f, indent=2, ensure_ascii=False)
+    return rubric
+def crew_step3_parse_resumes(resume_dir: str, cand_dir: str) -> str:
+    _ensure_openai_env()
+    resume_dir = Path(resume_dir)
+    cand_dir = Path(cand_dir)
+    cand_dir.mkdir(parents=True, exist_ok=True)
+    resume_files = sorted([p for p in resume_dir.iterdir() if p.suffix.lower() in [".pdf", ".docx"]])
+    if not resume_files:
+        return "No resumes found in data/resumes. Upload resumes first."
+    agent = _resume_agent()
+    ok, failed = 0, 0
+    for path in resume_files:
+        try:
+            with open(path, "rb") as f:
+                resume_text = load_text_from_file(f)
+            task = Task(
+                description=(
+                    f"Parse this resume into candidate JSON.\n"
+                    f"Filename: {path.name}\n"
+                    "Call parse_resume_tool(resume_text, filename) and return ONLY JSON."
+                ),
+                expected_output="A valid candidate JSON object as a string.",
+                agent=agent,
+            )
+            crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
+            result = crew.kickoff(inputs={"resume_text": resume_text, "filename": path.name})
+            candidate = _json(str(result))
+            out_path = cand_dir / (path.stem + ".json")
+            with open(out_path, "w", encoding="utf-8") as out:
+                json.dump(candidate, out, indent=2, ensure_ascii=False)
+            ok += 1
+        except Exception:
+            failed += 1
+    return f"✅ Parsed {ok} resume(s) into {cand_dir} | ⚠️ Failed: {failed}"
+def crew_step4_generate_matches(jd_path: str, cand_dir: str, match_dir: str) -> str:
+    _ensure_openai_env()
+    jd_path = Path(jd_path)
+    cand_dir = Path(cand_dir)
+    match_dir = Path(match_dir)
+    match_dir.mkdir(parents=True, exist_ok=True)
+    if not jd_path.exists():
+        return "No JD rubric found. Run Step 1 first."
+    cand_files = sorted([p for p in cand_dir.iterdir() if p.suffix.lower() == ".json"])
+    if not cand_files:
+        return "No candidates found. Run Step 3 first."
+    with open(jd_path, "r", encoding="utf-8") as f:
+        jd_rubric = json.load(f)
+    jd_rubric_json = json.dumps(jd_rubric, ensure_ascii=False)
+    agent = _matcher_agent()
+    created, failed = 0, 0
+    for cf in cand_files:
+        try:
+            with open(cf, "r", encoding="utf-8") as f:
+                candidate = json.load(f)
+            candidate_json = json.dumps(candidate, ensure_ascii=False)
+            task = Task(
+                description=(
+                    f"Create a JD-aligned match JSON for candidate file {cf.name}.\n"
+                    "Call match_candidate_tool(jd_rubric_json, candidate_json) and return ONLY JSON."
+                ),
+                expected_output="A valid match JSON object as a string.",
+                agent=agent,
+            )
+            crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
+            result = crew.kickoff(inputs={"jd_rubric_json": jd_rubric_json, "candidate_json": candidate_json})
+            match_obj = _json(str(result))
+            out_path = match_dir / (cf.stem + "_match.json")
+            with open(out_path, "w", encoding="utf-8") as out:
+                json.dump(match_obj, out, indent=2, ensure_ascii=False)
+            created += 1
+        except Exception:
+            failed += 1
+    return f"✅ Created {created} match file(s) in {match_dir} | ⚠️ Failed: {failed}"
+def crew_step5_rank(top_k: int, match_dir: str) -> dict:
+    _ensure_openai_env()
+    # build_ranking() already reads from data/matches internally in your current design.
+    # This crew step just triggers it via a tool and then returns JSON.
+    agent = _ranker_agent()
+    task = Task(
+        description=f"Build Top-{int(top_k)} ranking JSON using build_ranking_tool(top_k). Return ONLY JSON.",
+        expected_output="A valid ranking JSON object as a string.",
+        agent=agent,
+    )
+    crew = Crew(agents=[agent], tasks=[task], process=Process.sequential, verbose=False, planning=False)
+    result = crew.kickoff(inputs={"top_k": int(top_k)})
+    ranking = _json(str(result))
+    # Save alongside your current behavior
+    out_path = Path("data/ranking.json")
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(ranking, f, indent=2, ensure_ascii=False)
+    return ranking

core/crew_tools.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import json
+from pathlib import Path
+from typing import List, Dict, Any
+from crewai.tools import tool
+from core.jd_processor import generate_jd_rubric
+from core.resume_parser import parse_resume
+from core.matcher import match_candidate_to_jd
+from core.ranking import build_ranking
+from utils.file_loader import load_text_from_file
+@tool("generate_jd_rubric_tool")
+def generate_jd_rubric_tool(jd_text: str) -> str:
+    """Generate a structured JD rubric JSON (string) from JD text."""
+    rubric = generate_jd_rubric(jd_text)
+    return json.dumps(rubric, ensure_ascii=False)
+@tool("parse_resume_tool")
+def parse_resume_tool(resume_text: str, filename: str) -> str:
+    """Parse a resume into candidate JSON (string) from resume text."""
+    candidate = parse_resume(resume_text, filename)
+    return json.dumps(candidate, ensure_ascii=False)
+@tool("match_candidate_tool")
+def match_candidate_tool(jd_rubric_json: str, candidate_json: str) -> str:
+    """Match a candidate against JD rubric; returns match JSON (string)."""
+    jd_rubric = json.loads(jd_rubric_json)
+    candidate = json.loads(candidate_json)
+    match_result = match_candidate_to_jd(jd_rubric, candidate)
+    return json.dumps(match_result, ensure_ascii=False)
+@tool("build_ranking_tool")
+def build_ranking_tool(top_k: int) -> str:
+    """Build ranking from data/matches; returns ranking JSON (string)."""
+    ranking = build_ranking(top_k=int(top_k))
+    return json.dumps(ranking, ensure_ascii=False)

core/jd_processor.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# %%writefile core/jd_processor.py
+import logging
+import json
+import os
+from openai import OpenAI, APIError
+from utils.prompts import JD_PROMPT
+logger = logging.getLogger(__name__)
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+def generate_jd_rubric(jd_text, max_retries=3):
+    """Generate JD rubric with retry logic and better error handling."""
+    for attempt in range(max_retries):
+        try:
+            response = client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": "You are an experienced HR Talent Manager AI assistant. Your specialty is analyzing job descriptions and creating structured hiring rubrics."},
+                    {"role": "user", "content": JD_PROMPT.format(jd_text=jd_text)}
+                ],
+                temperature=0.2,
+                response_format={"type": "json_object"}  # New: Force JSON response
+            )
+            content = response.choices[0].message.content
+            logger.debug(f"LLM response (attempt {attempt+1}): {content[:200]}...")
+            # Parse JSON
+            jd_data = json.loads(content)  # No need for find() with response_format
+            # Validate structure
+            required_keys = {"role_title", "must_have_skills", "nice_to_have_skills",
+                           "soft_skills", "minimum_years_experience", "recommended_weights"}
+            if not all(key in jd_data for key in required_keys):
+                raise ValueError("Missing required keys in response")
+            return jd_data
+        except json.JSONDecodeError as e:
+            logger.warning(f"JSON decode failed (attempt {attempt+1}): {e}")
+            if attempt == max_retries - 1:
+                return get_empty_template()
+        except APIError as e:
+            logger.error(f"OpenAI API error: {e}")
+            if attempt == max_retries - 1:
+                return get_empty_template()
+        except Exception as e:
+            logger.error(f"Unexpected error: {e}")
+            return get_empty_template()
+    return get_empty_template()
+def get_empty_template():
+    """Return empty rubric template."""
+    return {
+        "role_title": "",
+        "must_have_skills": [],
+        "nice_to_have_skills": [],
+        "soft_skills": [],
+        "minimum_years_experience": 0,
+        "recommended_weights": {}
+    }

core/matcher.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# %%writefile core/matcher.py
+import json
+from openai import OpenAI
+from utils.prompts import JD_RESUME_MATCH_PROMPT
+import os
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+def match_candidate_to_jd(jd_rubric, candidate_profile):
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "system",
+                "content": JD_RESUME_MATCH_PROMPT
+            },
+            {
+                "role": "user",
+                "content": f"""
+Job Description Rubric:
+{json.dumps(jd_rubric, indent=2)}
+Candidate Profile:
+{json.dumps(candidate_profile, indent=2)}
+"""
+            }
+        ],
+        temperature=0.2
+    )
+    content = response.choices[0].message.content.strip()
+    return json.loads(content)

core/ranking.py ADDED Viewed

	@@ -0,0 +1,41 @@

+# %%writefile core/ranking.py
+import os
+import json
+from core.scoring import score_candidate
+JD_PATH = "data/jd.json"
+MATCH_DIR = "data/matches"
+OUT_PATH = "data/ranking.json"
+def build_ranking(top_k: int = 10):
+    with open(JD_PATH, "r", encoding="utf-8") as f:
+        jd_rubric = json.load(f)
+    rows = []
+    for fname in os.listdir(MATCH_DIR):
+        if not fname.endswith("_match.json"):
+            continue
+        fpath = os.path.join(MATCH_DIR, fname)
+        with open(fpath, "r", encoding="utf-8") as f:
+            match_summary = json.load(f)
+        scored = score_candidate(jd_rubric, match_summary)
+        scored["match_file"] = fname
+        rows.append(scored)
+    rows.sort(key=lambda x: x["total_score"], reverse=True)
+    result = {
+        "jd_role_title": jd_rubric.get("role_title", ""),
+        "top_k": top_k,
+        "ranking": rows[:top_k],
+        "all_candidates": rows
+    }
+    with open(OUT_PATH, "w", encoding="utf-8") as f:
+        json.dump(result, f, indent=2)
+    return result

core/resume_parser.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# %%writefile core/resume_parser.py
+import os
+import json
+from openai import OpenAI
+from utils.prompts import RESUME_PARSE_PROMPT
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+def parse_resume(resume_text, filename):
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {"role": "system", "content": "You are an expert HR resume analyst specializing in extracting structured information from resumes with high accuracy."},
+            {"role": "user", "content": RESUME_PARSE_PROMPT.format(resume_text=resume_text)}
+        ],
+        temperature=0.2
+    )
+    content = response.choices[0].message.content
+    try:
+        start = content.find("{")
+        end = content.rfind("}") + 1
+        json_str = content[start:end]
+        data = json.loads(json_str)
+    except Exception:
+        data = {
+            "name": "",
+            "skills": [],
+            "education": [],
+            "work_experience": [],
+            "total_years_experience": 0,
+            "summary": ""
+        }
+    data["candidate_id"] = filename
+    return data

core/scoring.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# %%writefile core/scoring.py
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Any, List
+def _safe_len(x) -> int:
+    return len(x) if isinstance(x, list) else 0
+def normalize_weights(weights: Dict[str, Any]) -> Dict[str, float]:
+    w_must = float(weights.get("must_have", 0))
+    w_nice = float(weights.get("nice_to_have", 0))
+    w_exp = float(weights.get("experience", 0))
+    w_soft = float(weights.get("soft_skills", 0))
+    total = w_must + w_nice + w_exp + w_soft
+    if total <= 0:
+        return {"must_have": 0.6, "nice_to_have": 0.25, "experience": 0.15, "soft_skills": 0.0}
+    return {
+        "must_have": w_must / total,
+        "nice_to_have": w_nice / total,
+        "experience": w_exp / total,
+        "soft_skills": w_soft / total
+    }
+def compute_coverage(matched: List[Any], missing: List[Any], partial: List[Any] | None = None) -> float:
+    m = _safe_len(matched)
+    miss = _safe_len(missing)
+    p = _safe_len(partial) if partial is not None else 0
+    denom = m + miss + p
+    if denom == 0:
+        return 0.0
+    return (m + 0.5 * p) / denom
+def experience_score(assessment: str) -> float:
+    assessment = (assessment or "").strip().lower()
+    if assessment == "below":
+        return 0.0
+    if assessment in ("meets", "exceeds"):
+        return 1.0
+    return 0.0
+def clamp(x: float, lo: float, hi: float) -> float:
+    return max(lo, min(hi, x))
+def score_candidate(jd_rubric: Dict[str, Any], match_summary: Dict[str, Any]) -> Dict[str, Any]:
+    weights = normalize_weights(jd_rubric.get("recommended_weights", {}))
+    mh = match_summary.get("must_have_match", {}) or {}
+    nh = match_summary.get("nice_to_have_match", {}) or {}
+    exp = match_summary.get("experience_analysis", {}) or {}
+    must_cov = compute_coverage(
+        matched=mh.get("matched", []),
+        missing=mh.get("missing", []),
+        partial=mh.get("partial", [])
+    )
+    nice_cov = compute_coverage(
+        matched=nh.get("matched", []),
+        missing=nh.get("missing", []),
+        partial=None
+    )
+    exp_sc = experience_score(exp.get("assessment", ""))
+    # Soft skills scoring (optional). Keep 0 for student MVP unless you add logic later.
+    soft_sc = 0.0
+    base_total = (
+        weights["must_have"] * must_cov +
+        weights["nice_to_have"] * nice_cov +
+        weights["experience"] * exp_sc +
+        weights["soft_skills"] * soft_sc
+    ) * 100.0
+    # ✅ Bonus/Penalty based on Step-4 indicators (transparent & capped)
+    positives = match_summary.get("positive_indicators", []) or []
+    negatives = match_summary.get("negative_indicators", []) or []
+    bonus_per_positive = 1.0
+    penalty_per_negative = 1.5
+    raw_adjustment = (len(positives) * bonus_per_positive) - (len(negatives) * penalty_per_negative)
+    # Cap adjustment so it doesn't dominate scoring
+    adjustment_cap = 8.0
+    adjustment = clamp(raw_adjustment, -adjustment_cap, adjustment_cap)
+    final_total = clamp(base_total + adjustment, 0.0, 100.0)
+    return {
+        "candidate_name": match_summary.get("candidate_name", ""),
+        "total_score": round(final_total, 2),
+        "breakdown": {
+            "base_score": round(base_total, 2),
+            "bonus_penalty_adjustment": round(adjustment, 2),
+            "positive_count": len(positives),
+            "negative_count": len(negatives),
+            "must_have_coverage": round(must_cov, 3),
+            "nice_to_have_coverage": round(nice_cov, 3),
+            "experience_score": round(exp_sc, 3),
+            "weights_normalized": weights
+        }
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+gradio==5.50.0
+numpy==2.0.2
+pandas==2.2.2
+openai==2.12.0
+pydantic==2.12.3
+PyPDF2==3.0.1
+python-docx==1.2.0
+python-dotenv==1.2.1
+tqdm==4.67.1
+# CrewAI core (enough for our custom tool wrappers)
+crewai==0.175.0
+crewai-tools

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # utils/__init__.py
2	+ # core/__init__.py

utils/file_loader.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# %%writefile file_loader.py
+from PyPDF2 import PdfReader
+from docx import Document
+def load_text_from_file(uploaded_file):
+    if "." not in uploaded_file.name:
+        raise ValueError("File has no extension")
+    file_type = uploaded_file.name.split(".")[-1].lower()
+    try:
+        if file_type == "pdf":
+            reader = PdfReader(uploaded_file)
+            return "\n".join([page.extract_text() for page in reader.pages])
+        elif file_type in ["docx", "doc"]:
+            doc = Document(uploaded_file)
+            return "\n".join([para.text for para in doc.paragraphs])
+        elif file_type == "txt":
+            content = uploaded_file.read()
+            for encoding in ["utf-8", "latin-1", "cp1252"]:
+                try:
+                    return content.decode(encoding)
+                except UnicodeDecodeError:
+                    continue
+            raise ValueError("Unable to decode text file")
+        else:
+            raise ValueError(f"Unsupported file type: {file_type}")
+    except ImportError as e:
+        raise ImportError(f"Required library not installed: {e}")
+    except Exception as e:
+        raise ValueError(f"Error processing file: {e}")

utils/prompts.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# %%writefile utils/prompts.py
+JD_PROMPT = """
+Given the following Job Description, extract a structured hiring rubric.
+Return STRICT JSON with the following fields:
+- role_title
+- must_have_skills (list)
+- nice_to_have_skills (list)
+- soft_skills (list)
+- minimum_years_experience (number)
+- minimum education requirements (list)
+- recommended_weights (object with must_have, nice_to_have, experience, education, soft_skills)
+Job Description:
+----------------
+{jd_text}
+"""
+RESUME_PARSE_PROMPT = """
+You are an experienced HR resume analyst.
+Given the following resume text, extract a structured candidate profile.
+Return STRICT JSON with these fields:
+- name
+- technical skills (list)
+- soft skills (list)
+- education (list)
+- work_experience (list of short role summaries)
+- total_years_experience (number)
+- summary (2–3 sentence professional summary)
+Resume Text:
+-------------
+{resume_text}
+IMPORTANT:
+- Output JSON only
+- Do not include explanations
+"""
+JD_RESUME_MATCH_PROMPT = """
+You are an experienced HR hiring analyst.
+You will be given:
+1. A structured Job Description rubric (JSON)
+2. A structured candidate profile (JSON)
+Your task:
+- Compare the candidate against the JD rubric
+- Identify matches, partial matches, and gaps
+- Identify positive and negative indicators
+- DO NOT calculate a score
+- Be factual and conservative
+- Do NOT infer sensitive personal attributes
+- Output ONLY valid JSON in the specified schema
+Matching rules:
+- A skill is matched if clearly demonstrated in experience or skills
+- Partial if loosely related or implied
+- Missing if not found
+Output JSON schema:
+{
+  "candidate_name": "",
+  "must_have_match": {
+    "matched": [],
+    "missing": [],
+    "partial": []
+  },
+  "nice_to_have_match": {
+    "matched": [],
+    "missing": []
+  },
+  "experience_analysis": {
+    "required_years": 0,
+    "candidate_years": 0,
+    "assessment": "below | meets | exceeds"
+  },
+  "positive_indicators": [],
+  "negative_indicators": [],
+  "overall_fit_summary": ""
+}
+"""

utils/resume_loader.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# %%writefile utils/resume_loader.py
+import os
+def load_resume_files(resume_dir="data/resumes"):
+    return [
+        os.path.join(resume_dir, f)
+        for f in os.listdir(resume_dir)
+        if os.path.isfile(os.path.join(resume_dir, f))
+        and f.lower().endswith((".pdf", ".docx", ".txt"))
+        and not f.startswith(".")
+    ]