Spaces:

feliponi
/

hirly-ner

Sleeping

File size: 6,147 Bytes

"""

app.py (MULTI-LABEL V2 - English UI)



Gradio interface for the Entity Extraction Model

(SKILL, SOFT_SKILL, LANG, CERT, EXPERIENCE_DURATION)

Loads the trained model and provides a UI to compare CV and JD.

"""

import gradio as gr
import re
from typing import List, Dict, Set, Tuple

# Import the extractor we already created
from scripts.inference import EntityExtractor

# --- 1. Model Loading ---
# --- MODIFICATION ---
# Point to the local model you just trained
MODEL_PATH = "feliponi/hirly-ner-multi"
try:
    extractor = EntityExtractor(MODEL_PATH)
    print(f"Model loaded successfully from {MODEL_PATH}")
except Exception as e:
    print(f"CRITICAL ERROR: Could not load model from {MODEL_PATH}.")
    print("Ensure the trained model is in the correct directory.")
    extractor = None

# --- 2. Business Logic (Unchanged) ---


def parse_and_sum_experience(entities: List[Dict]) -> float:
    """

    Parses 'EXPERIENCE_DURATION' spans and sums them into years.

    (This function remains the same)

    """
    total_experience = 0.0
    num_words = {
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7,
        "eight": 8,
        "nine": 9,
        "ten": 10,
    }
    durations = [
        e["entity"].lower() for e in entities if e["label"] == "EXPERIENCE_DURATION"
    ]

    for text in durations:
        found_number = None
        match = re.search(r"(\d+[\.,]\d+|\d+)", text)
        if match:
            found_number = float(match.group(1).replace(",", "."))
        else:
            for word, number in num_words.items():
                if word in text:
                    found_number = number
                    break

        if found_number is not None:
            if "month" in text or "mes" in text:
                total_experience += found_number / 12
            else:
                total_experience += found_number

    return round(total_experience, 1)


def extract_and_group_entities(

    text: str, confidence_threshold: float

) -> Dict[str, Set[str]]:
    """

    Extracts entities from text and groups them by label.

    """
    grouped_entities = {
        "SKILL": set(),
        "SOFT_SKILL": set(),
        "LANG": set(),
        "CERT": set(),
        "EXPERIENCE_DURATION": set(),
    }

    entities = extractor.extract_entities_with_details(text, confidence_threshold)

    for entity in entities:
        label = entity.get("label")
        if label in grouped_entities:
            grouped_entities[label].add(entity["entity"].lower())

    return grouped_entities


def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, Dict, Dict):
    """

    Main function called by Gradio.

    Processes CV and JD, finds all entities, sums experience, and compares.

    """
    if not extractor:
        return "ERROR: Model not loaded.", "", "", {}, {}

    # 1. Process texts and group entities
    cv_groups = extract_and_group_entities(cv_text, confidence_threshold=0.7)
    jd_groups = extract_and_group_entities(jd_text, confidence_threshold=0.7)

    # 2. Sum experience
    cv_exp_entities = extractor.extract_entities_with_details(cv_text, 0.7)
    jd_exp_entities = extractor.extract_entities_with_details(jd_text, 0.7)
    cv_exp = parse_and_sum_experience(cv_exp_entities)
    jd_exp = parse_and_sum_experience(jd_exp_entities)

    # 3. Format Match Analysis output
    match_output = "## 🚀 Match Analysis\n\n"

    labels_to_match = ["SKILL", "SOFT_SKILL", "LANG", "CERT"]

    for label in labels_to_match:
        cv_set = cv_groups[label]
        jd_set = jd_groups[label]

        matching = cv_set.intersection(jd_set)

        match_output += f"**Matching {label.replace('_', ' ')}S: {len(matching)}**\n"
        if matching:
            match_output += f"_{', '.join(sorted(list(matching)))}_\n"
        else:
            match_output += "_No matching items found._\n"
        match_output += "---\n"

    # 4. Format JSON outputs
    cv_groups.pop("EXPERIENCE_DURATION")
    jd_groups.pop("EXPERIENCE_DURATION")

    cv_json_output = {k: sorted(list(v)) for k, v in cv_groups.items() if v}
    jd_json_output = {k: sorted(list(v)) for k, v in jd_groups.items() if v}

    cv_exp_str = f"{cv_exp} years"
    jd_exp_str = f"{jd_exp} years (Requirement extracted from JD)"

    return (match_output, cv_exp_str, jd_exp_str, cv_json_output, jd_json_output)


# --- 3. Gradio Interface Definition (All English) ---

with gr.Blocks(title="Hirly - Resume & JD Analyzer") as demo:
    gr.Markdown("# 🚀 Resume vs. Job Description Analyzer")
    gr.Markdown(
        "Provide the text from a Resume (CV) and a Job Description (JD) to extract "
        "skills, soft skills, languages, certifications, years of experience, and see their compatibility."
    )

    with gr.Row():
        with gr.Column():
            cv_input = gr.Textbox(lines=20, label="Resume (CV) Text")
        with gr.Column():
            jd_input = gr.Textbox(lines=20, label="Job Description (JD) Text")

    analyze_button = gr.Button("Analyze Compatibility", variant="primary")

    gr.Markdown("---")

    with gr.Row():
        with gr.Column(scale=2):
            match_output = gr.Markdown(label="Match Analysis")
        with gr.Column(scale=1):
            cv_exp_output = gr.Textbox(label="Total Experience (CV)", interactive=False)
            jd_exp_output = gr.Textbox(label="Total Experience (JD)", interactive=False)

    with gr.Row():
        cv_only_output = gr.JSON(label="Entities Found in CV")
        jd_only_output = gr.JSON(label="Entities Required by JD")

    # Connect button to function
    analyze_button.click(
        fn=analyze_cv_and_jd,
        inputs=[cv_input, jd_input],
        outputs=[
            match_output,
            cv_exp_output,
            jd_exp_output,
            cv_only_output,
            jd_only_output,
        ],
    )

if __name__ == "__main__":
    demo.launch()