mkoot007 commited on
Commit
406399b
·
1 Parent(s): 3b2fed7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py CHANGED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ from docx import Document # Use python-docx to read DOCX files
4
+ from PyPDF2 import PdfFileReader # Import PdfFileReader from PyPDF2
5
+
6
+ # Function to extract text from a PDF file
7
+ def extract_text_from_pdf(pdf_file):
8
+ text = ""
9
+ pdf = PdfFileReader(pdf_file)
10
+ for page_num in range(pdf.getNumPages()):
11
+ page = pdf.getPage(page_num)
12
+ text += page.extractText()
13
+ return text
14
+
15
+ # Function to extract text from a DOCX file
16
+ def extract_text_from_docx(docx_file):
17
+ doc = Document(docx_file)
18
+ text = "\n".join([para.text for para in doc.paragraphs])
19
+ return text
20
+
21
+ # Function to extract information from a resume
22
+ def extract_info_from_resume(resume_path):
23
+ if resume_path.name.endswith('.pdf'):
24
+ text = extract_text_from_pdf(resume_path)
25
+ elif resume_path.name.endswith('.docx'):
26
+ text = extract_text_from_docx(resume_path)
27
+ else:
28
+ raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")
29
+
30
+ # Define regular expressions to extract information
31
+ name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)"
32
+ email_pattern = r"[\w\.-]+@[\w\.-]+"
33
+ phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})"
34
+
35
+ name = re.search(name_pattern, text)
36
+ email = re.search(email_pattern, text)
37
+ phone = re.search(phone_pattern, text)
38
+
39
+ if name:
40
+ name = name.group()
41
+ else:
42
+ name = "Name not found"
43
+ if email:
44
+ email = email.group()
45
+ else:
46
+ email = "Email not found"
47
+ if phone:
48
+ phone = phone.group()
49
+ else:
50
+ phone = "Phone number not found"
51
+
52
+ extracted_info = {
53
+ "Name": name,
54
+ "Email": email,
55
+ "Phone": phone,
56
+ }
57
+
58
+ return extracted_info
59
+
60
+ # Define a Gradio interface
61
+ iface = gr.Interface(
62
+ fn=extract_info_from_resume,
63
+ inputs=gr.inputs.File(type="file"),
64
+ outputs="json"
65
+ )
66
+
67
+ # Deploy the Gradio interface
68
+ iface.launch(share=True)