Spaces:

mkoot007
/

Classification-testing

Runtime error

App Files Files Community

mkoot007 commited on Oct 26, 2023

Commit

406399b

1 Parent(s): 3b2fed7

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py CHANGED Viewed

	@@ -0,0 +1,68 @@

+import gradio as gr
+import re
+from docx import Document  # Use python-docx to read DOCX files
+from PyPDF2 import PdfFileReader  # Import PdfFileReader from PyPDF2
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_file):
+    text = ""
+    pdf = PdfFileReader(pdf_file)
+    for page_num in range(pdf.getNumPages()):
+        page = pdf.getPage(page_num)
+        text += page.extractText()
+    return text
+# Function to extract text from a DOCX file
+def extract_text_from_docx(docx_file):
+    doc = Document(docx_file)
+    text = "\n".join([para.text for para in doc.paragraphs])
+    return text
+# Function to extract information from a resume
+def extract_info_from_resume(resume_path):
+    if resume_path.name.endswith('.pdf'):
+        text = extract_text_from_pdf(resume_path)
+    elif resume_path.name.endswith('.docx'):
+        text = extract_text_from_docx(resume_path)
+    else:
+        raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")
+    # Define regular expressions to extract information
+    name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)"
+    email_pattern = r"[\w\.-]+@[\w\.-]+"
+    phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})"
+    name = re.search(name_pattern, text)
+    email = re.search(email_pattern, text)
+    phone = re.search(phone_pattern, text)
+    if name:
+        name = name.group()
+    else:
+        name = "Name not found"
+    if email:
+        email = email.group()
+    else:
+        email = "Email not found"
+    if phone:
+        phone = phone.group()
+    else:
+        phone = "Phone number not found"
+    extracted_info = {
+        "Name": name,
+        "Email": email,
+        "Phone": phone,
+    }
+    return extracted_info
+# Define a Gradio interface
+iface = gr.Interface(
+    fn=extract_info_from_resume,
+    inputs=gr.inputs.File(type="file"),
+    outputs="json"
+)
+# Deploy the Gradio interface
+iface.launch(share=True)