Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import PyPDF2 | |
| import io | |
| import re | |
| import json | |
| import os | |
| import gc | |
| from huggingface_hub import login | |
| from dotenv import load_dotenv | |
| # --- Configuration --- # | |
| load_dotenv() | |
| login(token=os.getenv("HF_TOKEN")) | |
| # Skills set for faster lookups | |
| GENERAL_SKILLS = { | |
| 'communication', 'problem solving', 'project management', | |
| 'python', 'sql', 'excel', 'teamwork' | |
| } | |
| # Precompiled regex patterns | |
| YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})') | |
| ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I) | |
| TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I) | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from PDF with detailed error handling""" | |
| if pdf_file is None: | |
| raise ValueError("No PDF file uploaded") | |
| # Handle both file path and bytes input | |
| if isinstance(pdf_file, str): | |
| with open(pdf_file, 'rb') as f: | |
| file_bytes = f.read() | |
| elif isinstance(pdf_file, bytes): | |
| file_bytes = pdf_file | |
| else: | |
| raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}") | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes)) | |
| if len(pdf_reader.pages) == 0: | |
| raise ValueError("PDF has no pages") | |
| text = "\n".join(page.extract_text() for page in pdf_reader.pages) | |
| if text is None or text.strip() == "": | |
| raise ValueError("No text extracted from PDF (possibly image-based or empty)") | |
| return text[:10000] # Limit to first 10,000 characters | |
| except PyPDF2.errors.PdfReadError as e: | |
| raise Exception(f"PDF read error: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Extraction error: {str(e)}") | |
| finally: | |
| gc.collect() | |
| def calculate_scores(resume_text, job_desc=None): | |
| """Optimized scoring function""" | |
| resume_lower = resume_text.lower() | |
| scores = { | |
| "relevance_to_job": 0, | |
| "experience_quality": 0, | |
| "skills_match": 0, | |
| "education": 0, | |
| "achievements": 0, | |
| "clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))), | |
| "customization": 0 | |
| } | |
| if job_desc: | |
| job_words = set(re.findall(r'\w+', job_desc.lower())) | |
| resume_words = set(re.findall(r'\w+', resume_lower)) | |
| scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words))) | |
| else: | |
| scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower)) | |
| scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text))) | |
| scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2) | |
| if 'phd' in resume_lower or 'doctorate' in resume_lower: | |
| scores["education"] = 8 | |
| elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower: | |
| scores["education"] = 6 | |
| elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower: | |
| scores["education"] = 4 | |
| elif 'high school' in resume_lower: | |
| scores["education"] = 2 | |
| return scores, min(100, sum(scores.values())) | |
| def analyze_resume(pdf_file, job_desc=None, inference_fn=None): | |
| """Analyze resume and return extracted text and analysis as separate outputs""" | |
| try: | |
| resume_text = extract_text_from_pdf(pdf_file) | |
| except Exception as e: | |
| return ( | |
| f"Extraction failed: {str(e)}", # First output for textbox | |
| {"error": str(e)} # Second output for JSON | |
| ) | |
| scores, total_score = calculate_scores(resume_text, job_desc) | |
| # Basic analysis if inference fails | |
| basic_analysis = { | |
| "score": { | |
| "total": total_score, | |
| "breakdown": scores | |
| }, | |
| "strengths": [ | |
| "Good clarity score" if scores["clarity"] > 7 else None, | |
| "Relevant skills" if scores["relevance_to_job"] > 5 else None | |
| ], | |
| "improvements": [ | |
| "Add more measurable achievements" if scores["achievements"] < 3 else None, | |
| "Include more relevant keywords" if scores["relevance_to_job"] < 5 else None, | |
| "Check for typos" if scores["clarity"] < 9 else None | |
| ], | |
| "missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2] | |
| } | |
| # Try to get enhanced analysis if inference function is available | |
| if inference_fn: | |
| prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide: | |
| - "strengths": 2 key strengths, | |
| - "improvements": 3 specific improvements, | |
| - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}). | |
| Output a valid JSON string only, no extra text.""" | |
| try: | |
| result = inference_fn(prompt) | |
| if result and result.strip(): | |
| enhanced_analysis = json.loads(result) | |
| return ( | |
| resume_text[:5000], # First output for textbox (limited to 5000 chars) | |
| { | |
| "score": {"total": total_score, "breakdown": scores}, | |
| "analysis": enhanced_analysis, | |
| "raw_text_sample": resume_text[:200] | |
| } | |
| ) | |
| except Exception as e: | |
| print(f"Inference error: {str(e)}") | |
| # Fall through to basic analysis | |
| return ( | |
| resume_text[:5000], # First output for textbox | |
| { | |
| "score": {"total": total_score, "breakdown": scores}, | |
| "analysis": basic_analysis, | |
| "raw_text_sample": resume_text[:200] | |
| } | |
| ) | |
| # --- Gradio Interface --- # | |
| with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo: | |
| with gr.Sidebar(): | |
| gr.Markdown("# Resume Analyzer") | |
| gr.Markdown("Upload your resume in PDF format for analysis") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| pdf_input = gr.File(label="PDF Resume", type="binary") | |
| job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3) | |
| submit_btn = gr.Button("Analyze") | |
| with gr.Column(scale=2): | |
| extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False) | |
| analysis_output = gr.JSON(label="Analysis Results") | |
| submit_btn.click( | |
| fn=analyze_resume, | |
| inputs=[pdf_input, job_desc_input], | |
| outputs=[extracted_text, analysis_output] | |
| ) | |
| demo.launch(share=True) |