Spaces:

nadish1210
/

Ai_Resume_Screening_System

Sleeping

App Files Files Community

Ai_Resume_Screening_System / model.py

nadish1210

Update model.py

97597a5 verified about 2 months ago

raw

history blame contribute delete

7.62 kB

	import os
	import fitz
	from docx import Document
	from sentence_transformers import SentenceTransformer, util
	from transformers import pipeline
	import pandas as pd
	from datetime import datetime
	import zipfile
	import shutil
	import re

	# ================== Models ==================
	MODELS = {
	"Fast (MiniLM)": "sentence-transformers/all-MiniLM-L6-v2",
	"Balanced (Recommended)": "sentence-transformers/all-mpnet-base-v2",
	"High Accuracy": "sentence-transformers/multi-qa-mpnet-base-dot-v1"
	}

	loaded_models = {}
	skills_classifier = None

	def get_model(model_name: str):
	if model_name not in loaded_models:
	print(f"🔄 Loading model: {model_name}")
	loaded_models[model_name] = SentenceTransformer(MODELS[model_name])
	return loaded_models[model_name]

	def get_skills_classifier():
	global skills_classifier
	if skills_classifier is None:
	skills_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
	return skills_classifier

	def extract_text_from_file(file_path: str) -> str:
	ext = os.path.splitext(file_path)[1].lower()
	try:
	if ext == ".pdf":
	doc = fitz.open(file_path)
	text = "\n".join([page.get_text("text") for page in doc])
	doc.close()
	return text.strip()

	elif ext in [".docx", ".doc"]:
	doc = Document(file_path)
	full_text = [para.text.strip() for para in doc.paragraphs if para.text.strip()]
	for table in doc.tables:
	for row in table.rows:
	for cell in row.cells:
	if cell.text.strip():
	full_text.append(cell.text.strip())
	return "\n".join(full_text).strip()
	return ""
	except Exception as e:
	print(f"Extraction error: {e}")
	return ""

	def extract_qualifications(text: str):
	"""Extract only Degrees / Qualifications"""
	if not text:
	return "Not mentioned"

	text_lower = text.lower()
	degree_pattern = r'\b(bba\|bs\|be\|bit\|bsc\|ba\|b\.?a\.?\|b\.?sc\.?\|bachelor\|master\|mba\|msc\|m\.?sc\.?\|mphil\|phd\|doctorate\|intermediate\|matric)\b'

	degrees = re.findall(degree_pattern, text_lower)
	degrees = list(set([d.upper().replace('.', '') for d in degrees if d not in ["ms office", "microsoft"]]))

	return ", ".join(degrees) if degrees else "Not mentioned"

	def extract_courses(text: str):
	"""Extract Courses, Certifications & Trainings"""
	if not text:
	return "Not mentioned"

	text_lower = text.lower()
	course_keywords = ["course", "certification", "certificate", "certified", "training", "workshop", "bootcamp", "specialization", "diploma"]

	courses = []
	sentences = re.split(r'[.!?;\n]', text)

	for sentence in sentences:
	sent = sentence.strip()
	sent_lower = sent.lower()
	if any(kw in sent_lower for kw in course_keywords):
	clean_sent = re.sub(r'\s+', ' ', sent)
	if 15 < len(clean_sent) < 110: # reasonable length
	courses.append(clean_sent)

	courses = list(dict.fromkeys(courses))[:8] # max 8 courses
	return " \| ".join(courses) if courses else "No courses/certifications mentioned"

	def get_required_qualifications(job_description: str):
	jd_lower = job_description.lower()
	degree_keywords = ["bba", "bachelor", "bs", "be", "master", "mba", "phd", "degree", "graduation", "qualification", "engineering"]
	low_edu_keywords = ["guard", "security", "watchman", "peon", "driver", "helper", "labor"]

	if any(kw in jd_lower for kw in low_edu_keywords):
	return False, "Degree not required"

	if any(kw in jd_lower for kw in degree_keywords):
	return True, "Degree required"
	return False, "Qualification preferred"

	def extract_skills(resume_text: str):
	if len(resume_text) < 100:
	return "Insufficient content"
	classifier = get_skills_classifier()
	labels = ["Python", "Machine Learning", "Deep Learning", "Data Science", "SQL", "FastAPI", "AWS", "Docker", "Communication", "Leadership"]
	try:
	result = classifier(resume_text[:2000], labels, multi_label=True)
	skills = [label for label, score in zip(result['labels'], result['scores']) if score > 0.40]
	return ", ".join(skills[:12]) if skills else "Basic skills"
	except:
	return "Skills extraction failed"

	# ================== Main Function ==================
	def screen_resumes_backend(job_description: str, cv_folder, model_option: str, threshold: float = 0.65):
	if not cv_folder:
	return [], "No CVs found.", None, None

	files = cv_folder if isinstance(cv_folder, list) else [cv_folder]
	model = get_model(model_option)
	results = []

	os.makedirs("outputs/selected_cvs", exist_ok=True)
	os.makedirs("outputs/reports", exist_ok=True)

	job_embedding = model.encode(job_description, convert_to_tensor=True)
	degree_required, qual_note = get_required_qualifications(job_description)

	for file_obj in files:
	source_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
	filename = os.path.basename(source_path)

	resume_text = extract_text_from_file(source_path)

	if len(resume_text) < 100:
	results.append({
	"Candidate": filename,
	"Score (%)": 0.0,
	"Qualifications": "❌ Extraction Failed",
	"Courses": "N/A",
	"Status": "❌ Extraction Failed",
	"Extracted Skills": "N/A",
	"Model Used": model_option
	})
	continue

	qualifications = extract_qualifications(resume_text)
	courses = extract_courses(resume_text)
	skills = extract_skills(resume_text)

	resume_embedding = model.encode(resume_text, convert_to_tensor=True)
	similarity = util.cos_sim(job_embedding, resume_embedding)[0][0].item()

	final_status = "✅ Shortlisted"
	if degree_required and qualifications == "Not mentioned":
	final_status = "❌ Rejected (Missing Qualification)"
	elif similarity < threshold:
	final_status = "❌ Rejected (Low Score)"

	if final_status == "✅ Shortlisted":
	shutil.copy2(source_path, os.path.join("outputs/selected_cvs", filename))

	results.append({
	"Candidate": filename,
	"Score (%)": round(similarity * 100, 2),
	"Qualifications": qualifications,
	"Courses": courses,
	"Status": final_status,
	"Extracted Skills": skills,
	"Model Used": model_option
	})

	df = pd.DataFrame(results)
	df.to_csv(f"outputs/reports/report_{datetime.now().strftime('%Y%m%d_%H%M')}.csv", index=False)

	shortlisted = sum(1 for r in results if "Shortlisted" in r["Status"])

	summary = f"""
	✅ Screening Completed Successfully!
	Model Used: {model_option}
	Total CVs Processed: {len(files)}
	Shortlisted: {shortlisted}
	Threshold: {threshold*100}%
	Note: Qualifications and Courses are now shown in separate columns.
	"""

	zip_path = None
	if shortlisted > 0:
	zip_path = f"outputs/shortlisted_cvs_{datetime.now().strftime('%Y%m%d_%H%M')}.zip"
	with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
	for f in os.listdir("outputs/selected_cvs"):
	zipf.write(os.path.join("outputs/selected_cvs", f), f)

	return results, summary.strip(), "outputs/selected_cvs/" if shortlisted > 0 else "", zip_path