Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import re | |
| from docx import Document | |
| from PyPDF2 import PdfReader # Use PdfReader from PyPDF2 | |
| # Function to extract text from a PDF file | |
| def extract_text_from_pdf(pdf_file): | |
| text = "" | |
| pdf = PdfReader(pdf_file) | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to extract text from a DOCX file | |
| def extract_text_from_docx(docx_file): | |
| doc = Document(docx_file) | |
| text = "\n".join([para.text for para in doc.paragraphs]) | |
| return text | |
| def extract_text_from_pdf(pdf_file): | |
| text = "" | |
| pdf = PdfReader(pdf_file) | |
| if not pdf.pages: | |
| raise ValueError("The PDF file is empty.") | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to extract information from a resume | |
| def extract_info_from_resume(resume_path): | |
| if resume_path.name.endswith('.pdf'): | |
| text = extract_text_from_pdf(resume_path) | |
| elif resume_path.name.endswith('.docx'): | |
| text = extract_text_from_docx(resume_path) | |
| else: | |
| raise ValueError("Unsupported file format. Only PDF and DOCX are supported.") | |
| # Define regular expressions to extract information | |
| name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)" | |
| email_pattern = r"[\w\.-]+@[\w\.-]+" | |
| phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})" | |
| name = re.search(name_pattern, text) | |
| email = re.search(email_pattern, text) | |
| phone = re.search(phone_pattern, text) | |
| extracted_info = { | |
| "Name": name.group() if name else "Name not found", | |
| "Email": email.group() if email else "Email not found", | |
| "Phone": phone.group() if phone else "Phone number not found", | |
| } | |
| return extracted_info | |
| # Define a Gradio interface | |
| iface = gr.Interface( | |
| fn=extract_info_from_resume, | |
| inputs=gr.inputs.File(type="file"), | |
| outputs="json" | |
| ) | |
| # Deploy the Gradio interface | |
| iface.launch(share=True) | |