Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline, BitsAndBytesConfig | |
| import PyPDF2 | |
| import io | |
| import re | |
| import json | |
| import os | |
| import gc | |
| import torch | |
| from huggingface_hub import login | |
| from dotenv import load_dotenv | |
| # --- Configuration --- # | |
| load_dotenv() | |
| login(token=os.getenv("HF_TOKEN")) | |
| # Quantization config for memory efficiency | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_quant_type="nf4" | |
| ) | |
| # Initialize pipeline with memory optimization | |
| analyzer = pipeline( | |
| "text-generation", | |
| model="mistralai/Mistral-7B-Instruct-v0.3", | |
| device_map="auto", | |
| quantization_config=quant_config, | |
| torch_dtype=torch.float16 | |
| ) | |
| # Skills list converted to set for faster lookups | |
| GENERAL_SKILLS = { | |
| 'communication', 'problem solving', 'project management', | |
| 'python', 'sql', 'excel', 'teamwork' | |
| } | |
| # Precompiled regex patterns | |
| YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})') | |
| ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I) | |
| TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I) | |
| # --- Core Functions --- # | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text with memory cleanup""" | |
| try: | |
| text = PyPDF2.PdfReader(io.BytesIO(pdf_file)).pages[0].extract_text() | |
| return text[:10000] # Limit to first 10k chars | |
| finally: | |
| gc.collect() | |
| def calculate_scores(resume_text, job_desc=None): | |
| """Optimized scoring function""" | |
| resume_lower = resume_text.lower() | |
| scores = { | |
| "relevance_to_job": 0, | |
| "experience_quality": 0, | |
| "skills_match": 0, | |
| "education": 0, | |
| "achievements": 0, | |
| "clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))), | |
| "customization": 0 | |
| } | |
| # Relevance calculation | |
| if job_desc: | |
| job_words = set(re.findall(r'\w+', job_desc.lower())) | |
| resume_words = set(re.findall(r'\w+', resume_lower)) | |
| scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words))) | |
| else: | |
| scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower)) | |
| # Experience calculation | |
| scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text))) | |
| scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2) | |
| # Education detection | |
| if 'phd' in resume_lower or 'doctorate' in resume_lower: | |
| scores["education"] = 8 | |
| elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower: | |
| scores["education"] = 6 | |
| elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower: | |
| scores["education"] = 4 | |
| elif 'high school' in resume_lower: | |
| scores["education"] = 2 | |
| return scores, min(100, sum(scores.values())) | |
| def analyze_resume(pdf_file, job_desc=None): | |
| """Optimized analysis with memory management""" | |
| resume_text = extract_text_from_pdf(pdf_file) | |
| scores, total_score = calculate_scores(resume_text, job_desc) | |
| prompt = f"""Analyze this resume and return JSON with: | |
| 2 key strengths (reference these scores: {scores}), | |
| 3 specific improvements, | |
| 2 missing skills (if job description provided). | |
| Return ONLY valid JSON without markdown:""" | |
| try: | |
| result = analyzer( | |
| prompt, | |
| max_new_tokens=300, | |
| do_sample=False # More deterministic output | |
| )[0]["generated_text"] | |
| # Safely parse JSON | |
| return { | |
| "score": {"total": total_score, "breakdown": scores}, | |
| "analysis": json.loads(result), | |
| "raw_text": resume_text[:500] # Truncated for memory | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # --- Gradio Interface --- # | |
| with gr.Blocks(theme=gr.themes.Soft(title_size="sm")) as demo: | |
| gr.Markdown(""" | |
| # 🎯 Resume Analyzer | |
| *Optimized for memory efficiency* | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| inputs = [ | |
| gr.File(label="PDF Resume", type="binary"), | |
| gr.Textbox(label="Job Description (Optional)", lines=3) | |
| ] | |
| gr.Examples( | |
| examples=[["sample.pdf", "Data Scientist with Python experience"]], | |
| inputs=inputs | |
| ) | |
| with gr.Column(scale=2): | |
| output = gr.JSON(label="Analysis") | |
| inputs[0].upload( | |
| fn=analyze_resume, | |
| inputs=inputs, | |
| outputs=output, | |
| queue=True # Process one at a time | |
| ) | |
| demo.launch(max_threads=1) # Further reduce memory pressure |