DefendModel / app.py
Mangesh223's picture
Update app.py
dfa143b verified
raw
history blame
6.61 kB
import gradio as gr
import PyPDF2
import io
import re
import json
import os
import gc
from huggingface_hub import login
from dotenv import load_dotenv
# --- Configuration --- #
load_dotenv()
login(token=os.getenv("HF_TOKEN"))
# Skills set for faster lookups
GENERAL_SKILLS = {
'communication', 'problem solving', 'project management',
'python', 'sql', 'excel', 'teamwork'
}
# Precompiled regex patterns
YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
def extract_text_from_pdf(pdf_file):
"""Extract text from PDF with detailed error handling"""
if pdf_file is None:
raise ValueError("No PDF file uploaded")
# Handle both file path and bytes input
if isinstance(pdf_file, str):
with open(pdf_file, 'rb') as f:
file_bytes = f.read()
elif isinstance(pdf_file, bytes):
file_bytes = pdf_file
else:
raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
try:
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
if len(pdf_reader.pages) == 0:
raise ValueError("PDF has no pages")
text = "\n".join(page.extract_text() for page in pdf_reader.pages)
if text is None or text.strip() == "":
raise ValueError("No text extracted from PDF (possibly image-based or empty)")
return text[:10000] # Limit to first 10,000 characters
except PyPDF2.errors.PdfReadError as e:
raise Exception(f"PDF read error: {str(e)}")
except Exception as e:
raise Exception(f"Extraction error: {str(e)}")
finally:
gc.collect()
def calculate_scores(resume_text, job_desc=None):
"""Optimized scoring function"""
resume_lower = resume_text.lower()
scores = {
"relevance_to_job": 0,
"experience_quality": 0,
"skills_match": 0,
"education": 0,
"achievements": 0,
"clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))),
"customization": 0
}
if job_desc:
job_words = set(re.findall(r'\w+', job_desc.lower()))
resume_words = set(re.findall(r'\w+', resume_lower))
scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words)))
else:
scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
if 'phd' in resume_lower or 'doctorate' in resume_lower:
scores["education"] = 8
elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
scores["education"] = 6
elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower:
scores["education"] = 4
elif 'high school' in resume_lower:
scores["education"] = 2
return scores, min(100, sum(scores.values()))
def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
"""Analyze resume and return extracted text and analysis as separate outputs"""
try:
resume_text = extract_text_from_pdf(pdf_file)
except Exception as e:
return (
f"Extraction failed: {str(e)}", # First output for textbox
{"error": str(e)} # Second output for JSON
)
scores, total_score = calculate_scores(resume_text, job_desc)
# Basic analysis if inference fails
basic_analysis = {
"score": {
"total": total_score,
"breakdown": scores
},
"strengths": [
"Good clarity score" if scores["clarity"] > 7 else None,
"Relevant skills" if scores["relevance_to_job"] > 5 else None
],
"improvements": [
"Add more measurable achievements" if scores["achievements"] < 3 else None,
"Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
"Check for typos" if scores["clarity"] < 9 else None
],
"missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
}
# Try to get enhanced analysis if inference function is available
if inference_fn:
prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
- "strengths": 2 key strengths,
- "improvements": 3 specific improvements,
- "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
Output a valid JSON string only, no extra text."""
try:
result = inference_fn(prompt)
if result and result.strip():
enhanced_analysis = json.loads(result)
return (
resume_text[:5000], # First output for textbox (limited to 5000 chars)
{
"score": {"total": total_score, "breakdown": scores},
"analysis": enhanced_analysis,
"raw_text_sample": resume_text[:200]
}
)
except Exception as e:
print(f"Inference error: {str(e)}")
# Fall through to basic analysis
return (
resume_text[:5000], # First output for textbox
{
"score": {"total": total_score, "breakdown": scores},
"analysis": basic_analysis,
"raw_text_sample": resume_text[:200]
}
)
# --- Gradio Interface --- #
with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("# Resume Analyzer")
gr.Markdown("Upload your resume in PDF format for analysis")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(label="PDF Resume", type="binary")
job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
submit_btn = gr.Button("Analyze")
with gr.Column(scale=2):
extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
analysis_output = gr.JSON(label="Analysis Results")
submit_btn.click(
fn=analyze_resume,
inputs=[pdf_input, job_desc_input],
outputs=[extracted_text, analysis_output]
)
demo.launch(share=True)