Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import shutil | |
| import PyPDF2 | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Load the model | |
| classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True) | |
| # Category mapping (expand as needed) | |
| CATEGORY_MAP = { | |
| "LABEL_0": "Information Technology / Software Engineering", | |
| "LABEL_1": "Finance / Accounting / Auditing", | |
| "LABEL_2": "Marketing / Sales / Business Development", | |
| "LABEL_3": "Engineering / Mechanical / Civil / Electrical", | |
| "LABEL_4": "Data Science / Machine Learning / AI", | |
| "LABEL_5": "Human Resources / Recruitment / Talent Acquisition", | |
| "LABEL_6": "Healthcare / Medical / Nursing", | |
| "LABEL_7": "Legal / Compliance", | |
| "LABEL_8": "Education / Training / Teaching", | |
| "LABEL_9": "Customer Service / Support", | |
| "LABEL_10": "Operations / Logistics / Supply Chain", | |
| # Add more if model supports more | |
| } | |
| classified_files_by_category = {} | |
| def clean_text(text): | |
| text = re.sub(r'http\S+', '', text) | |
| text = re.sub(r'[^A-Za-z0-9\s]', '', text) | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def extract_text_from_pdf(file): | |
| try: | |
| reader = PyPDF2.PdfReader(file) | |
| text = " ".join([page.extract_text() or "" for page in reader.pages]) | |
| return clean_text(text), None | |
| except Exception as e: | |
| return "", str(e) | |
| def classify_resumes(files): | |
| global classified_files_by_category | |
| results = {} | |
| classified_files_by_category = {} | |
| if os.path.exists("classified_resumes"): | |
| shutil.rmtree("classified_resumes") | |
| os.makedirs("classified_resumes", exist_ok=True) | |
| for file in files: | |
| filename = os.path.basename(file.name) | |
| text, error = extract_text_from_pdf(file) | |
| if error or not text: | |
| results[filename] = {"error": error or "No text found in PDF"} | |
| continue | |
| prediction = classifier(text[:512])[0] | |
| label = prediction["label"] | |
| score = round(prediction["score"], 4) | |
| category = CATEGORY_MAP.get(label, "Other / Miscellaneous") | |
| results[filename] = { | |
| "Predicted Job Category": label, | |
| "Category Name": category, | |
| "Confidence Score": score | |
| } | |
| # Save file by category | |
| category_dir = f"classified_resumes/{category}" | |
| os.makedirs(category_dir, exist_ok=True) | |
| dest_path = os.path.join(category_dir, filename) | |
| with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out: | |
| shutil.copyfileobj(f_in, f_out) | |
| if category not in classified_files_by_category: | |
| classified_files_by_category[category] = [] | |
| classified_files_by_category[category].append(dest_path) | |
| available_categories = sorted(classified_files_by_category.keys()) | |
| return results, gr.update(choices=available_categories, value=available_categories[0] if available_categories else None) | |
| def filter_by_category(selected_category): | |
| if selected_category and selected_category in classified_files_by_category: | |
| return classified_files_by_category[selected_category] | |
| return [] | |
| # Gradio UI | |
| with gr.Blocks(title="Resume Classifier by Job Category") as demo: | |
| gr.Markdown("## π Resume Screening System\nUpload resumes and classify them into job categories.") | |
| file_input = gr.File(label="Upload Resume PDFs", file_types=[".pdf"], file_count="multiple") | |
| classify_button = gr.Button("π Classify All Resumes") | |
| output_json = gr.JSON(label="Classification Result (JSON)") | |
| category_dropdown = gr.Dropdown(label="Select a Job Category", choices=[], interactive=True) | |
| resume_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple") | |
| classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown]) | |
| category_dropdown.change(fn=filter_by_category, inputs=category_dropdown, outputs=resume_output) | |
| if __name__ == "__main__": | |
| demo.launch() | |