Spaces:
Runtime error
Runtime error
| # IMPORTS | |
| import io | |
| import re | |
| from PIL import Image | |
| import gradio as gr | |
| import pdfplumber | |
| from docx import Document | |
| # Hugging Face OCR model imports | |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| import torch | |
| # ---------------- HF OCR SETUP ---------------- | |
| processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") | |
| model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1") | |
| def extract_text_from_image(file_bytes): | |
| """Extract text from image using HF TrOCR model""" | |
| image = Image.open(io.BytesIO(file_bytes)).convert("RGB") | |
| pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
| generated_ids = model.generate(pixel_values) | |
| text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return text | |
| # ---------------- PDF TEXT ---------------- | |
| def extract_text_from_pdf(file_bytes): | |
| text = "" | |
| with pdfplumber.open(io.BytesIO(file_bytes)) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| # ---------------- DOCX TEXT ---------------- | |
| def extract_text_from_docx(file_bytes): | |
| doc = Document(io.BytesIO(file_bytes)) | |
| return "\n".join([p.text for p in doc.paragraphs]) | |
| # ---------------- FILE HANDLER ---------------- | |
| def extract_text_from_file(file_bytes, file_ext): | |
| if file_ext == "pdf": | |
| return extract_text_from_pdf(file_bytes) | |
| elif file_ext == "docx": | |
| return extract_text_from_docx(file_bytes) | |
| elif file_ext in ["jpg", "jpeg", "png"]: | |
| return extract_text_from_image(file_bytes) | |
| return "" | |
| # ---------------- RESUME VALIDATION ---------------- | |
| def validate_resume(text): | |
| text_lower = text.lower() | |
| issues = [] | |
| # Contact Info | |
| has_email = bool(re.search(r'[\w\.-]+@[\w\.-]+\.\w+', text_lower)) | |
| has_phone = bool(re.search(r'\d{3}[-.\s]?\d{3}[-.\s]?\d{4}|\d{10,11}', text_lower)) | |
| email_keywords = ['@gmail', '@yahoo', '@hotmail', '.com', 'email', 'e-mail'] | |
| has_any_email_keyword = any(k in text_lower for k in email_keywords) | |
| if not (has_email or has_phone or has_any_email_keyword): | |
| issues.append("Missing Contact Info") | |
| # Education | |
| if not re.search(r'(education|degree|bachelor|master|university|school|college|bs|ms|phd)', text_lower): | |
| issues.append("Missing Education") | |
| # Experience | |
| if not re.search(r'(experience|worked|roles?|employment|projects?|internship|career|manager|designer|assistant|executive|specialist|developer|engineer|analyst|officer|coordinator)', text_lower): | |
| issues.append("Missing Experience") | |
| # Skills | |
| if not re.search(r'(marketing|communication|skills|technologies|tools|competencies|python|excel|sql|java|c\+\+|javascript|html|css|react|node|git|linux|aws|docker|kubernetes|leadership|teamwork)', text_lower): | |
| issues.append("Missing Skills") | |
| return issues if issues else ["✅ Resume is Complete!"] | |
| # ---------------- MAIN FUNCTION ---------------- | |
| def check_resume(file): | |
| if file is None: | |
| return "⚠️ Please upload a file", "" | |
| try: | |
| file_ext = file.name.split(".")[-1].lower() | |
| # Gradio v3+ safe file reading | |
| with open(file.name, "rb") as f: | |
| file_bytes = f.read() | |
| text = extract_text_from_file(file_bytes, file_ext) | |
| if not text.strip(): | |
| return "⚠️ Could not extract text", "" | |
| result = "\n".join(validate_resume(text)) | |
| return result, text[:1000] # show first 1000 chars | |
| except Exception as e: | |
| return f"⚠️ Error: {str(e)}", "" | |
| # ---------------- GRADIO INTERFACE ---------------- | |
| demo = gr.Interface( | |
| fn=check_resume, | |
| inputs=gr.File(label="Upload Resume (PDF, DOCX, JPG, PNG)"), | |
| outputs=[gr.Textbox(label="Result", lines=6), | |
| gr.Textbox(label="Extracted Text", lines=6)], | |
| title="📄 Resume Completeness Checker", | |
| description="Upload clear resume files for better results." | |
| ) | |
| demo.launch(share=True) | |