Spaces:

TKM03
/

RESUME_FILTERING

Sleeping

App Files Files Community

RESUME_FILTERING / app.py

TKM03

Update app.py

239934b verified 8 months ago

raw

history blame contribute delete

4.01 kB

	import os
	import re
	import shutil
	import PyPDF2
	import gradio as gr
	from transformers import pipeline

	# Load the model
	classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True)

	# Category mapping (expand as needed)
	CATEGORY_MAP = {
	"LABEL_0": "Information Technology / Software Engineering",
	"LABEL_1": "Finance / Accounting / Auditing",
	"LABEL_2": "Marketing / Sales / Business Development",
	"LABEL_3": "Engineering / Mechanical / Civil / Electrical",
	"LABEL_4": "Data Science / Machine Learning / AI",
	"LABEL_5": "Human Resources / Recruitment / Talent Acquisition",
	"LABEL_6": "Healthcare / Medical / Nursing",
	"LABEL_7": "Legal / Compliance",
	"LABEL_8": "Education / Training / Teaching",
	"LABEL_9": "Customer Service / Support",
	"LABEL_10": "Operations / Logistics / Supply Chain",
	# Add more if model supports more
	}

	classified_files_by_category = {}

	def clean_text(text):
	text = re.sub(r'http\S+', '', text)
	text = re.sub(r'[^A-Za-z0-9\s]', '', text)
	return re.sub(r'\s+', ' ', text).strip()

	def extract_text_from_pdf(file):
	try:
	reader = PyPDF2.PdfReader(file)
	text = " ".join([page.extract_text() or "" for page in reader.pages])
	return clean_text(text), None
	except Exception as e:
	return "", str(e)

	def classify_resumes(files):
	global classified_files_by_category
	results = {}
	classified_files_by_category = {}

	if os.path.exists("classified_resumes"):
	shutil.rmtree("classified_resumes")
	os.makedirs("classified_resumes", exist_ok=True)

	for file in files:
	filename = os.path.basename(file.name)
	text, error = extract_text_from_pdf(file)
	if error or not text:
	results[filename] = {"error": error or "No text found in PDF"}
	continue

	prediction = classifier(text[:512])[0]
	label = prediction["label"]
	score = round(prediction["score"], 4)
	category = CATEGORY_MAP.get(label, "Other / Miscellaneous")

	results[filename] = {
	"Predicted Job Category": label,
	"Category Name": category,
	"Confidence Score": score
	}

	# Save file by category
	category_dir = f"classified_resumes/{category}"
	os.makedirs(category_dir, exist_ok=True)
	dest_path = os.path.join(category_dir, filename)
	with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
	shutil.copyfileobj(f_in, f_out)

	if category not in classified_files_by_category:
	classified_files_by_category[category] = []
	classified_files_by_category[category].append(dest_path)

	available_categories = sorted(classified_files_by_category.keys())
	return results, gr.update(choices=available_categories, value=available_categories[0] if available_categories else None)

	def filter_by_category(selected_category):
	if selected_category and selected_category in classified_files_by_category:
	return classified_files_by_category[selected_category]
	return []

	# Gradio UI
	with gr.Blocks(title="Resume Classifier by Job Category") as demo:
	gr.Markdown("## 📄 Resume Screening System\nUpload resumes and classify them into job categories.")

	file_input = gr.File(label="Upload Resume PDFs", file_types=[".pdf"], file_count="multiple")
	classify_button = gr.Button("🔍 Classify All Resumes")

	output_json = gr.JSON(label="Classification Result (JSON)")
	category_dropdown = gr.Dropdown(label="Select a Job Category", choices=[], interactive=True)
	resume_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")

	classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown])
	category_dropdown.change(fn=filter_by_category, inputs=category_dropdown, outputs=resume_output)

	if __name__ == "__main__":
	demo.launch()