""" app.py (MULTI-LABEL V2 - English UI) Gradio interface for the Entity Extraction Model (SKILL, SOFT_SKILL, LANG, CERT, EXPERIENCE_DURATION) Loads the trained model and provides a UI to compare CV and JD. """ import gradio as gr import re from typing import List, Dict, Set, Tuple # Import the extractor we already created from scripts.inference import EntityExtractor # --- 1. Model Loading --- # --- MODIFICATION --- # Point to the local model you just trained MODEL_PATH = "feliponi/hirly-ner-multi" try: extractor = EntityExtractor(MODEL_PATH) print(f"Model loaded successfully from {MODEL_PATH}") except Exception as e: print(f"CRITICAL ERROR: Could not load model from {MODEL_PATH}.") print("Ensure the trained model is in the correct directory.") extractor = None # --- 2. Business Logic (Unchanged) --- def parse_and_sum_experience(entities: List[Dict]) -> float: """ Parses 'EXPERIENCE_DURATION' spans and sums them into years. (This function remains the same) """ total_experience = 0.0 num_words = { "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10, } durations = [ e["entity"].lower() for e in entities if e["label"] == "EXPERIENCE_DURATION" ] for text in durations: found_number = None match = re.search(r"(\d+[\.,]\d+|\d+)", text) if match: found_number = float(match.group(1).replace(",", ".")) else: for word, number in num_words.items(): if word in text: found_number = number break if found_number is not None: if "month" in text or "mes" in text: total_experience += found_number / 12 else: total_experience += found_number return round(total_experience, 1) def extract_and_group_entities( text: str, confidence_threshold: float ) -> Dict[str, Set[str]]: """ Extracts entities from text and groups them by label. """ grouped_entities = { "SKILL": set(), "SOFT_SKILL": set(), "LANG": set(), "CERT": set(), "EXPERIENCE_DURATION": set(), } entities = extractor.extract_entities_with_details(text, confidence_threshold) for entity in entities: label = entity.get("label") if label in grouped_entities: grouped_entities[label].add(entity["entity"].lower()) return grouped_entities def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, Dict, Dict): """ Main function called by Gradio. Processes CV and JD, finds all entities, sums experience, and compares. """ if not extractor: return "ERROR: Model not loaded.", "", "", {}, {} # 1. Process texts and group entities cv_groups = extract_and_group_entities(cv_text, confidence_threshold=0.7) jd_groups = extract_and_group_entities(jd_text, confidence_threshold=0.7) # 2. Sum experience cv_exp_entities = extractor.extract_entities_with_details(cv_text, 0.7) jd_exp_entities = extractor.extract_entities_with_details(jd_text, 0.7) cv_exp = parse_and_sum_experience(cv_exp_entities) jd_exp = parse_and_sum_experience(jd_exp_entities) # 3. Format Match Analysis output match_output = "## 🚀 Match Analysis\n\n" labels_to_match = ["SKILL", "SOFT_SKILL", "LANG", "CERT"] for label in labels_to_match: cv_set = cv_groups[label] jd_set = jd_groups[label] matching = cv_set.intersection(jd_set) match_output += f"**Matching {label.replace('_', ' ')}S: {len(matching)}**\n" if matching: match_output += f"_{', '.join(sorted(list(matching)))}_\n" else: match_output += "_No matching items found._\n" match_output += "---\n" # 4. Format JSON outputs cv_groups.pop("EXPERIENCE_DURATION") jd_groups.pop("EXPERIENCE_DURATION") cv_json_output = {k: sorted(list(v)) for k, v in cv_groups.items() if v} jd_json_output = {k: sorted(list(v)) for k, v in jd_groups.items() if v} cv_exp_str = f"{cv_exp} years" jd_exp_str = f"{jd_exp} years (Requirement extracted from JD)" return (match_output, cv_exp_str, jd_exp_str, cv_json_output, jd_json_output) # --- 3. Gradio Interface Definition (All English) --- with gr.Blocks(title="Hirly - Resume & JD Analyzer") as demo: gr.Markdown("# 🚀 Resume vs. Job Description Analyzer") gr.Markdown( "Provide the text from a Resume (CV) and a Job Description (JD) to extract " "skills, soft skills, languages, certifications, years of experience, and see their compatibility." ) with gr.Row(): with gr.Column(): cv_input = gr.Textbox(lines=20, label="Resume (CV) Text") with gr.Column(): jd_input = gr.Textbox(lines=20, label="Job Description (JD) Text") analyze_button = gr.Button("Analyze Compatibility", variant="primary") gr.Markdown("---") with gr.Row(): with gr.Column(scale=2): match_output = gr.Markdown(label="Match Analysis") with gr.Column(scale=1): cv_exp_output = gr.Textbox(label="Total Experience (CV)", interactive=False) jd_exp_output = gr.Textbox(label="Total Experience (JD)", interactive=False) with gr.Row(): cv_only_output = gr.JSON(label="Entities Found in CV") jd_only_output = gr.JSON(label="Entities Required by JD") # Connect button to function analyze_button.click( fn=analyze_cv_and_jd, inputs=[cv_input, jd_input], outputs=[ match_output, cv_exp_output, jd_exp_output, cv_only_output, jd_only_output, ], ) if __name__ == "__main__": demo.launch()