Spaces:

aqsa-123
/

Resume-completeness

Runtime error

App Files Files Community

Resume-completeness / app.py

aqsa-123

Update app.py

c1e0635 verified 4 months ago

raw

history blame contribute delete

4 kB

	# IMPORTS
	import io
	import re
	from PIL import Image
	import gradio as gr
	import pdfplumber
	from docx import Document

	# Hugging Face OCR model imports
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	import torch

	# ---------------- HF OCR SETUP ----------------
	processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
	model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")

	def extract_text_from_image(file_bytes):
	"""Extract text from image using HF TrOCR model"""
	image = Image.open(io.BytesIO(file_bytes)).convert("RGB")
	pixel_values = processor(images=image, return_tensors="pt").pixel_values
	generated_ids = model.generate(pixel_values)
	text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return text

	# ---------------- PDF TEXT ----------------
	def extract_text_from_pdf(file_bytes):
	text = ""
	with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
	for page in pdf.pages:
	text += page.extract_text() or ""
	return text

	# ---------------- DOCX TEXT ----------------
	def extract_text_from_docx(file_bytes):
	doc = Document(io.BytesIO(file_bytes))
	return "\n".join([p.text for p in doc.paragraphs])

	# ---------------- FILE HANDLER ----------------
	def extract_text_from_file(file_bytes, file_ext):
	if file_ext == "pdf":
	return extract_text_from_pdf(file_bytes)
	elif file_ext == "docx":
	return extract_text_from_docx(file_bytes)
	elif file_ext in ["jpg", "jpeg", "png"]:
	return extract_text_from_image(file_bytes)
	return ""

	# ---------------- RESUME VALIDATION ----------------
	def validate_resume(text):
	text_lower = text.lower()
	issues = []

	# Contact Info
	has_email = bool(re.search(r'[\w\.-]+@[\w\.-]+\.\w+', text_lower))
	has_phone = bool(re.search(r'\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\|\d{10,11}', text_lower))
	email_keywords = ['@gmail', '@yahoo', '@hotmail', '.com', 'email', 'e-mail']
	has_any_email_keyword = any(k in text_lower for k in email_keywords)
	if not (has_email or has_phone or has_any_email_keyword):
	issues.append("Missing Contact Info")

	# Education
	if not re.search(r'(education\|degree\|bachelor\|master\|university\|school\|college\|bs\|ms\|phd)', text_lower):
	issues.append("Missing Education")

	# Experience
	if not re.search(r'(experience\|worked\|roles?\|employment\|projects?\|internship\|career\|manager\|designer\|assistant\|executive\|specialist\|developer\|engineer\|analyst\|officer\|coordinator)', text_lower):
	issues.append("Missing Experience")

	# Skills
	if not re.search(r'(marketing\|communication\|skills\|technologies\|tools\|competencies\|python\|excel\|sql\|java\|c\+\+\|javascript\|html\|css\|react\|node\|git\|linux\|aws\|docker\|kubernetes\|leadership\|teamwork)', text_lower):
	issues.append("Missing Skills")

	return issues if issues else ["✅ Resume is Complete!"]

	# ---------------- MAIN FUNCTION ----------------
	def check_resume(file):
	if file is None:
	return "⚠️ Please upload a file", ""
	try:
	file_ext = file.name.split(".")[-1].lower()

	# Gradio v3+ safe file reading
	with open(file.name, "rb") as f:
	file_bytes = f.read()

	text = extract_text_from_file(file_bytes, file_ext)
	if not text.strip():
	return "⚠️ Could not extract text", ""
	result = "\n".join(validate_resume(text))
	return result, text[:1000] # show first 1000 chars
	except Exception as e:
	return f"⚠️ Error: {str(e)}", ""

	# ---------------- GRADIO INTERFACE ----------------
	demo = gr.Interface(
	fn=check_resume,
	inputs=gr.File(label="Upload Resume (PDF, DOCX, JPG, PNG)"),
	outputs=[gr.Textbox(label="Result", lines=6),
	gr.Textbox(label="Extracted Text", lines=6)],
	title="📄 Resume Completeness Checker",
	description="Upload clear resume files for better results."
	)

	demo.launch(share=True)