Spaces:

Mangesh223
/

DefendModel

Sleeping

App Files Files Community

DefendModel / app.py

Mangesh223

Update app.py

dfa143b verified about 1 year ago

raw

history blame

6.61 kB

	import gradio as gr
	import PyPDF2
	import io
	import re
	import json
	import os
	import gc
	from huggingface_hub import login
	from dotenv import load_dotenv

	# --- Configuration --- #
	load_dotenv()
	login(token=os.getenv("HF_TOKEN"))

	# Skills set for faster lookups
	GENERAL_SKILLS = {
	'communication', 'problem solving', 'project management',
	'python', 'sql', 'excel', 'teamwork'
	}

	# Precompiled regex patterns
	YEAR_PATTERN = re.compile(r'\d{4}\s[-–]\s(?:Present\|\d{4})')
	ACHIEVEMENT_PATTERN = re.compile(r'(increased\|reduced\|saved\|improved)\s+by\s+(\d+%\|\$\d+)', re.I)
	TYPO_PATTERN = re.compile(r'\b(?:responsibilities\|accomplishment\|experiance)\b', re.I)

	def extract_text_from_pdf(pdf_file):
	"""Extract text from PDF with detailed error handling"""
	if pdf_file is None:
	raise ValueError("No PDF file uploaded")

	# Handle both file path and bytes input
	if isinstance(pdf_file, str):
	with open(pdf_file, 'rb') as f:
	file_bytes = f.read()
	elif isinstance(pdf_file, bytes):
	file_bytes = pdf_file
	else:
	raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")

	try:
	pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
	if len(pdf_reader.pages) == 0:
	raise ValueError("PDF has no pages")

	text = "\n".join(page.extract_text() for page in pdf_reader.pages)
	if text is None or text.strip() == "":
	raise ValueError("No text extracted from PDF (possibly image-based or empty)")

	return text[:10000] # Limit to first 10,000 characters
	except PyPDF2.errors.PdfReadError as e:
	raise Exception(f"PDF read error: {str(e)}")
	except Exception as e:
	raise Exception(f"Extraction error: {str(e)}")
	finally:
	gc.collect()

	def calculate_scores(resume_text, job_desc=None):
	"""Optimized scoring function"""
	resume_lower = resume_text.lower()
	scores = {
	"relevance_to_job": 0,
	"experience_quality": 0,
	"skills_match": 0,
	"education": 0,
	"achievements": 0,
	"clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))),
	"customization": 0
	}

	if job_desc:
	job_words = set(re.findall(r'\w+', job_desc.lower()))
	resume_words = set(re.findall(r'\w+', resume_lower))
	scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words)))
	else:
	scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))

	scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
	scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)

	if 'phd' in resume_lower or 'doctorate' in resume_lower:
	scores["education"] = 8
	elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
	scores["education"] = 6
	elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower:
	scores["education"] = 4
	elif 'high school' in resume_lower:
	scores["education"] = 2

	return scores, min(100, sum(scores.values()))

	def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
	"""Analyze resume and return extracted text and analysis as separate outputs"""
	try:
	resume_text = extract_text_from_pdf(pdf_file)
	except Exception as e:
	return (
	f"Extraction failed: {str(e)}", # First output for textbox
	{"error": str(e)} # Second output for JSON
	)

	scores, total_score = calculate_scores(resume_text, job_desc)

	# Basic analysis if inference fails
	basic_analysis = {
	"score": {
	"total": total_score,
	"breakdown": scores
	},
	"strengths": [
	"Good clarity score" if scores["clarity"] > 7 else None,
	"Relevant skills" if scores["relevance_to_job"] > 5 else None
	],
	"improvements": [
	"Add more measurable achievements" if scores["achievements"] < 3 else None,
	"Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
	"Check for typos" if scores["clarity"] < 9 else None
	],
	"missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
	}

	# Try to get enhanced analysis if inference function is available
	if inference_fn:
	prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
	- "strengths": 2 key strengths,
	- "improvements": 3 specific improvements,
	- "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
	Output a valid JSON string only, no extra text."""

	try:
	result = inference_fn(prompt)
	if result and result.strip():
	enhanced_analysis = json.loads(result)
	return (
	resume_text[:5000], # First output for textbox (limited to 5000 chars)
	{
	"score": {"total": total_score, "breakdown": scores},
	"analysis": enhanced_analysis,
	"raw_text_sample": resume_text[:200]
	}
	)
	except Exception as e:
	print(f"Inference error: {str(e)}")
	# Fall through to basic analysis

	return (
	resume_text[:5000], # First output for textbox
	{
	"score": {"total": total_score, "breakdown": scores},
	"analysis": basic_analysis,
	"raw_text_sample": resume_text[:200]
	}
	)

	# --- Gradio Interface --- #
	with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
	with gr.Sidebar():
	gr.Markdown("# Resume Analyzer")
	gr.Markdown("Upload your resume in PDF format for analysis")

	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(label="PDF Resume", type="binary")
	job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
	submit_btn = gr.Button("Analyze")

	with gr.Column(scale=2):
	extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
	analysis_output = gr.JSON(label="Analysis Results")

	submit_btn.click(
	fn=analyze_resume,
	inputs=[pdf_input, job_desc_input],
	outputs=[extracted_text, analysis_output]
	)

	demo.launch(share=True)