Spaces:

feliponi
/

hirly-ner

Sleeping

App Files Files Community

hirly-ner / app.py

feliponi

Upload app.py

e2e5a5f verified about 1 month ago

raw

history blame contribute delete

6.15 kB

	"""
	app.py (MULTI-LABEL V2 - English UI)

	Gradio interface for the Entity Extraction Model
	(SKILL, SOFT_SKILL, LANG, CERT, EXPERIENCE_DURATION)
	Loads the trained model and provides a UI to compare CV and JD.
	"""

	import gradio as gr
	import re
	from typing import List, Dict, Set, Tuple

	# Import the extractor we already created
	from scripts.inference import EntityExtractor

	# --- 1. Model Loading ---
	# --- MODIFICATION ---
	# Point to the local model you just trained
	MODEL_PATH = "feliponi/hirly-ner-multi"
	try:
	extractor = EntityExtractor(MODEL_PATH)
	print(f"Model loaded successfully from {MODEL_PATH}")
	except Exception as e:
	print(f"CRITICAL ERROR: Could not load model from {MODEL_PATH}.")
	print("Ensure the trained model is in the correct directory.")
	extractor = None

	# --- 2. Business Logic (Unchanged) ---


	def parse_and_sum_experience(entities: List[Dict]) -> float:
	"""
	Parses 'EXPERIENCE_DURATION' spans and sums them into years.
	(This function remains the same)
	"""
	total_experience = 0.0
	num_words = {
	"one": 1,
	"two": 2,
	"three": 3,
	"four": 4,
	"five": 5,
	"six": 6,
	"seven": 7,
	"eight": 8,
	"nine": 9,
	"ten": 10,
	}
	durations = [
	e["entity"].lower() for e in entities if e["label"] == "EXPERIENCE_DURATION"
	]

	for text in durations:
	found_number = None
	match = re.search(r"(\d+[\.,]\d+\|\d+)", text)
	if match:
	found_number = float(match.group(1).replace(",", "."))
	else:
	for word, number in num_words.items():
	if word in text:
	found_number = number
	break

	if found_number is not None:
	if "month" in text or "mes" in text:
	total_experience += found_number / 12
	else:
	total_experience += found_number

	return round(total_experience, 1)


	def extract_and_group_entities(
	text: str, confidence_threshold: float
	) -> Dict[str, Set[str]]:
	"""
	Extracts entities from text and groups them by label.
	"""
	grouped_entities = {
	"SKILL": set(),
	"SOFT_SKILL": set(),
	"LANG": set(),
	"CERT": set(),
	"EXPERIENCE_DURATION": set(),
	}

	entities = extractor.extract_entities_with_details(text, confidence_threshold)

	for entity in entities:
	label = entity.get("label")
	if label in grouped_entities:
	grouped_entities[label].add(entity["entity"].lower())

	return grouped_entities


	def analyze_cv_and_jd(cv_text: str, jd_text: str) -> (str, str, str, Dict, Dict):
	"""
	Main function called by Gradio.
	Processes CV and JD, finds all entities, sums experience, and compares.
	"""
	if not extractor:
	return "ERROR: Model not loaded.", "", "", {}, {}

	# 1. Process texts and group entities
	cv_groups = extract_and_group_entities(cv_text, confidence_threshold=0.7)
	jd_groups = extract_and_group_entities(jd_text, confidence_threshold=0.7)

	# 2. Sum experience
	cv_exp_entities = extractor.extract_entities_with_details(cv_text, 0.7)
	jd_exp_entities = extractor.extract_entities_with_details(jd_text, 0.7)
	cv_exp = parse_and_sum_experience(cv_exp_entities)
	jd_exp = parse_and_sum_experience(jd_exp_entities)

	# 3. Format Match Analysis output
	match_output = "## 🚀 Match Analysis\n\n"

	labels_to_match = ["SKILL", "SOFT_SKILL", "LANG", "CERT"]

	for label in labels_to_match:
	cv_set = cv_groups[label]
	jd_set = jd_groups[label]

	matching = cv_set.intersection(jd_set)

	match_output += f"Matching {label.replace('_', ' ')}S: {len(matching)}\n"
	if matching:
	match_output += f"_{', '.join(sorted(list(matching)))}_\n"
	else:
	match_output += "_No matching items found._\n"
	match_output += "---\n"

	# 4. Format JSON outputs
	cv_groups.pop("EXPERIENCE_DURATION")
	jd_groups.pop("EXPERIENCE_DURATION")

	cv_json_output = {k: sorted(list(v)) for k, v in cv_groups.items() if v}
	jd_json_output = {k: sorted(list(v)) for k, v in jd_groups.items() if v}

	cv_exp_str = f"{cv_exp} years"
	jd_exp_str = f"{jd_exp} years (Requirement extracted from JD)"

	return (match_output, cv_exp_str, jd_exp_str, cv_json_output, jd_json_output)


	# --- 3. Gradio Interface Definition (All English) ---

	with gr.Blocks(title="Hirly - Resume & JD Analyzer") as demo:
	gr.Markdown("# 🚀 Resume vs. Job Description Analyzer")
	gr.Markdown(
	"Provide the text from a Resume (CV) and a Job Description (JD) to extract "
	"skills, soft skills, languages, certifications, years of experience, and see their compatibility."
	)

	with gr.Row():
	with gr.Column():
	cv_input = gr.Textbox(lines=20, label="Resume (CV) Text")
	with gr.Column():
	jd_input = gr.Textbox(lines=20, label="Job Description (JD) Text")

	analyze_button = gr.Button("Analyze Compatibility", variant="primary")

	gr.Markdown("---")

	with gr.Row():
	with gr.Column(scale=2):
	match_output = gr.Markdown(label="Match Analysis")
	with gr.Column(scale=1):
	cv_exp_output = gr.Textbox(label="Total Experience (CV)", interactive=False)
	jd_exp_output = gr.Textbox(label="Total Experience (JD)", interactive=False)

	with gr.Row():
	cv_only_output = gr.JSON(label="Entities Found in CV")
	jd_only_output = gr.JSON(label="Entities Required by JD")

	# Connect button to function
	analyze_button.click(
	fn=analyze_cv_and_jd,
	inputs=[cv_input, jd_input],
	outputs=[
	match_output,
	cv_exp_output,
	jd_exp_output,
	cv_only_output,
	jd_only_output,
	],
	)

	if __name__ == "__main__":
	demo.launch()