|
|
"""
|
|
|
app.py (MULTI-LABEL V2 - English UI)
|
|
|
|
|
|
Gradio interface for the Entity Extraction Model
|
|
|
(SKILL, SOFT_SKILL, LANG, CERT, EXPERIENCE_DURATION)
|
|
|
Loads the trained model and provides a UI to compare CV and JD.
|
|
|
"""
|
|
|
|
|
|
import gradio as gr
|
|
|
import re
|
|
|
from typing import List, Dict, Set, Tuple
|
|
|
|
|
|
|
|
|
from scripts.inference import EntityExtractor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_PATH = "feliponi/hirly-ner-multi"
|
|
|
try:
|
|
|
extractor = EntityExtractor(MODEL_PATH)
|
|
|
print(f"Model loaded successfully from {MODEL_PATH}")
|
|
|
except Exception as e:
|
|
|
print(f"CRITICAL ERROR: Could not load model from {MODEL_PATH}.")
|
|
|
print("Ensure the trained model is in the correct directory.")
|
|
|
extractor = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_and_sum_experience(entities: List[Dict]) -> float:
|
|
|
"""
|
|
|
Parses 'EXPERIENCE_DURATION' spans and sums them into years.
|
|
|
(This function remains the same)
|
|
|
"""
|
|
|
total_experience = 0.0
|
|
|
num_words = {
|
|
|
"one": 1,
|
|
|
"two": 2,
|
|
|
"three": 3,
|
|
|
"four": 4,
|
|
|
"five": 5,
|
|
|
"six": 6,
|
|
|
"seven": 7,
|
|
|
"eight": 8,
|
|
|
"nine": 9,
|
|
|
"ten": 10,
|
|
|
}
|
|
|
durations = [
|
|
|
e["entity"].lower() for e in entities if e["label"] == "EXPERIENCE_DURATION"
|
|
|
]
|
|
|
|
|
|
for text in durations:
|
|
|
found_number = None
|
|
|
match = re.search(r"(\d+[\.,]\d+|\d+)", text)
|
|
|
if match:
|
|
|
found_number = float(match.group(1).replace(",", "."))
|
|
|
else:
|
|
|
for word, number in num_words.items():
|
|
|
if word in text:
|
|
|
found_number = number
|
|
|
break
|
|
|
|
|
|
if found_number is not None:
|
|
|
if "month" in text or "mes" in text:
|
|
|
total_experience += found_number / 12
|
|
|
else:
|
|
|
total_experience += found_number
|
|
|
|
|
|
return round(total_experience, 1)
|
|
|
|
|
|
|
|
|
def extract_and_group_entities(
|
|
|
text: str, confidence_threshold: float
|
|
|
) -> Dict[str, Set[str]]:
|
|
|
"""
|
|
|
Extracts entities from text and groups them by label.
|
|
|
"""
|
|
|
grouped_entities = {
|
|
|
"SKILL": set(),
|
|
|
"SOFT_SKILL": set(),
|
|
|
"LANG": set(),
|
|
|
"CERT": set(),
|
|
|
"EXPERIENCE_DURATION": set(),
|
|
|
}
|
|
|
|
|
|
entities = extractor.extract_entities_with_details(text, confidence_threshold)
|
|
|
|
|
|
for entity in entities:
|
|
|
label = entity.get("label")
|
|
|
if label in grouped_entities:
|
|
|
grouped_entities[label].add(entity["entity"].lower())
|
|
|
|
|
|
return grouped_entities
|
|
|
|
|
|
|
|
|
def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, Dict, Dict):
|
|
|
"""
|
|
|
Main function called by Gradio.
|
|
|
Processes CV and JD, finds all entities, sums experience, and compares.
|
|
|
"""
|
|
|
if not extractor:
|
|
|
return "ERROR: Model not loaded.", "", "", {}, {}
|
|
|
|
|
|
|
|
|
cv_groups = extract_and_group_entities(cv_text, confidence_threshold=0.7)
|
|
|
jd_groups = extract_and_group_entities(jd_text, confidence_threshold=0.7)
|
|
|
|
|
|
|
|
|
cv_exp_entities = extractor.extract_entities_with_details(cv_text, 0.7)
|
|
|
jd_exp_entities = extractor.extract_entities_with_details(jd_text, 0.7)
|
|
|
cv_exp = parse_and_sum_experience(cv_exp_entities)
|
|
|
jd_exp = parse_and_sum_experience(jd_exp_entities)
|
|
|
|
|
|
|
|
|
match_output = "## π Match Analysis\n\n"
|
|
|
|
|
|
labels_to_match = ["SKILL", "SOFT_SKILL", "LANG", "CERT"]
|
|
|
|
|
|
for label in labels_to_match:
|
|
|
cv_set = cv_groups[label]
|
|
|
jd_set = jd_groups[label]
|
|
|
|
|
|
matching = cv_set.intersection(jd_set)
|
|
|
|
|
|
match_output += f"**Matching {label.replace('_', ' ')}S: {len(matching)}**\n"
|
|
|
if matching:
|
|
|
match_output += f"_{', '.join(sorted(list(matching)))}_\n"
|
|
|
else:
|
|
|
match_output += "_No matching items found._\n"
|
|
|
match_output += "---\n"
|
|
|
|
|
|
|
|
|
cv_groups.pop("EXPERIENCE_DURATION")
|
|
|
jd_groups.pop("EXPERIENCE_DURATION")
|
|
|
|
|
|
cv_json_output = {k: sorted(list(v)) for k, v in cv_groups.items() if v}
|
|
|
jd_json_output = {k: sorted(list(v)) for k, v in jd_groups.items() if v}
|
|
|
|
|
|
cv_exp_str = f"{cv_exp} years"
|
|
|
jd_exp_str = f"{jd_exp} years (Requirement extracted from JD)"
|
|
|
|
|
|
return (match_output, cv_exp_str, jd_exp_str, cv_json_output, jd_json_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Hirly - Resume & JD Analyzer") as demo:
|
|
|
gr.Markdown("# π Resume vs. Job Description Analyzer")
|
|
|
gr.Markdown(
|
|
|
"Provide the text from a Resume (CV) and a Job Description (JD) to extract "
|
|
|
"skills, soft skills, languages, certifications, years of experience, and see their compatibility."
|
|
|
)
|
|
|
|
|
|
with gr.Row():
|
|
|
with gr.Column():
|
|
|
cv_input = gr.Textbox(lines=20, label="Resume (CV) Text")
|
|
|
with gr.Column():
|
|
|
jd_input = gr.Textbox(lines=20, label="Job Description (JD) Text")
|
|
|
|
|
|
analyze_button = gr.Button("Analyze Compatibility", variant="primary")
|
|
|
|
|
|
gr.Markdown("---")
|
|
|
|
|
|
with gr.Row():
|
|
|
with gr.Column(scale=2):
|
|
|
match_output = gr.Markdown(label="Match Analysis")
|
|
|
with gr.Column(scale=1):
|
|
|
cv_exp_output = gr.Textbox(label="Total Experience (CV)", interactive=False)
|
|
|
jd_exp_output = gr.Textbox(label="Total Experience (JD)", interactive=False)
|
|
|
|
|
|
with gr.Row():
|
|
|
cv_only_output = gr.JSON(label="Entities Found in CV")
|
|
|
jd_only_output = gr.JSON(label="Entities Required by JD")
|
|
|
|
|
|
|
|
|
analyze_button.click(
|
|
|
fn=analyze_cv_and_jd,
|
|
|
inputs=[cv_input, jd_input],
|
|
|
outputs=[
|
|
|
match_output,
|
|
|
cv_exp_output,
|
|
|
jd_exp_output,
|
|
|
cv_only_output,
|
|
|
jd_only_output,
|
|
|
],
|
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
demo.launch() |