Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ from fastapi import FastAPI
|
|
| 4 |
import docx
|
| 5 |
import re
|
| 6 |
|
|
|
|
| 7 |
def extract_text_from_pdf(file):
|
| 8 |
doc = fitz.open(stream=file.read(), filetype="pdf")
|
| 9 |
text = ""
|
|
@@ -11,11 +12,13 @@ def extract_text_from_pdf(file):
|
|
| 11 |
text += page.get_text()
|
| 12 |
return text
|
| 13 |
|
|
|
|
| 14 |
def extract_text_from_docx(file):
|
| 15 |
doc = docx.Document(file)
|
| 16 |
text = "\n".join([para.text for para in doc.paragraphs])
|
| 17 |
return text
|
| 18 |
|
|
|
|
| 19 |
def extract_info(text):
|
| 20 |
data = {}
|
| 21 |
|
|
@@ -27,25 +30,28 @@ def extract_info(text):
|
|
| 27 |
data["email"] = email_match.group(0) if email_match else ""
|
| 28 |
data["phone"] = phone_match.group(0) if phone_match else ""
|
| 29 |
|
|
|
|
| 30 |
skill_keywords = ["Python", "Java", "C++", "NLP", "Machine Learning", "Data Science", "SQL", "React"]
|
| 31 |
found_skills = [skill for skill in skill_keywords if skill.lower() in text.lower()]
|
| 32 |
data["skills"] = found_skills
|
| 33 |
|
| 34 |
return data
|
| 35 |
|
|
|
|
| 36 |
def process_resume(file):
|
| 37 |
if file.name.endswith(".pdf"):
|
| 38 |
text = extract_text_from_pdf(file)
|
| 39 |
elif file.name.endswith(".docx"):
|
| 40 |
text = extract_text_from_docx(file)
|
| 41 |
else:
|
| 42 |
-
return {"error": "Unsupported file format"}
|
| 43 |
|
| 44 |
extracted_data = extract_info(text)
|
| 45 |
return extracted_data, "β
Resume processed successfully!"
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
|
|
|
| 49 |
|
| 50 |
with gr.Row():
|
| 51 |
with gr.Column(scale=1):
|
|
@@ -58,9 +64,7 @@ with gr.Blocks(title="Smart Resume Parser - JSON Edition", css="body { max-width
|
|
| 58 |
with gr.Column(scale=2):
|
| 59 |
output_json = gr.JSON(label="π§ Extracted Resume Data")
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
reset_flag = gr.State(False)
|
| 63 |
-
|
| 64 |
analyze_button.click(
|
| 65 |
fn=process_resume,
|
| 66 |
inputs=[file_input],
|
|
@@ -73,4 +77,15 @@ with gr.Blocks(title="Smart Resume Parser - JSON Edition", css="body { max-width
|
|
| 73 |
outputs=[output_json, status_box]
|
| 74 |
)
|
| 75 |
|
|
|
|
| 76 |
app = gr.mount_gradio_app(app=FastAPI(), blocks=demo, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import docx
|
| 5 |
import re
|
| 6 |
|
| 7 |
+
# π Extract text from PDF file
|
| 8 |
def extract_text_from_pdf(file):
|
| 9 |
doc = fitz.open(stream=file.read(), filetype="pdf")
|
| 10 |
text = ""
|
|
|
|
| 12 |
text += page.get_text()
|
| 13 |
return text
|
| 14 |
|
| 15 |
+
# π Extract text from DOCX file
|
| 16 |
def extract_text_from_docx(file):
|
| 17 |
doc = docx.Document(file)
|
| 18 |
text = "\n".join([para.text for para in doc.paragraphs])
|
| 19 |
return text
|
| 20 |
|
| 21 |
+
# π§ Extract structured info from text
|
| 22 |
def extract_info(text):
|
| 23 |
data = {}
|
| 24 |
|
|
|
|
| 30 |
data["email"] = email_match.group(0) if email_match else ""
|
| 31 |
data["phone"] = phone_match.group(0) if phone_match else ""
|
| 32 |
|
| 33 |
+
# π οΈ Simple keyword match for skills
|
| 34 |
skill_keywords = ["Python", "Java", "C++", "NLP", "Machine Learning", "Data Science", "SQL", "React"]
|
| 35 |
found_skills = [skill for skill in skill_keywords if skill.lower() in text.lower()]
|
| 36 |
data["skills"] = found_skills
|
| 37 |
|
| 38 |
return data
|
| 39 |
|
| 40 |
+
# π― Main function to process uploaded resume
|
| 41 |
def process_resume(file):
|
| 42 |
if file.name.endswith(".pdf"):
|
| 43 |
text = extract_text_from_pdf(file)
|
| 44 |
elif file.name.endswith(".docx"):
|
| 45 |
text = extract_text_from_docx(file)
|
| 46 |
else:
|
| 47 |
+
return {"error": "Unsupported file format"}, "β File format not supported"
|
| 48 |
|
| 49 |
extracted_data = extract_info(text)
|
| 50 |
return extracted_data, "β
Resume processed successfully!"
|
| 51 |
|
| 52 |
+
# π¨ Gradio UI Layout
|
| 53 |
+
with gr.Blocks(title="Smart Resume Parser", css="body { max-width: 1100px; margin: auto; }") as demo:
|
| 54 |
+
gr.Markdown("## π Smart Resume Parser β Extract structured info from PDF/DOCX")
|
| 55 |
|
| 56 |
with gr.Row():
|
| 57 |
with gr.Column(scale=1):
|
|
|
|
| 64 |
with gr.Column(scale=2):
|
| 65 |
output_json = gr.JSON(label="π§ Extracted Resume Data")
|
| 66 |
|
| 67 |
+
# π Button Actions
|
|
|
|
|
|
|
| 68 |
analyze_button.click(
|
| 69 |
fn=process_resume,
|
| 70 |
inputs=[file_input],
|
|
|
|
| 77 |
outputs=[output_json, status_box]
|
| 78 |
)
|
| 79 |
|
| 80 |
+
# π Mount Gradio to FastAPI for Hugging Face Spaces
|
| 81 |
app = gr.mount_gradio_app(app=FastAPI(), blocks=demo, path="/")
|
| 82 |
+
|
| 83 |
+
# π§ͺ Local Dev Testing
|
| 84 |
+
if __name__ == "__main__":
|
| 85 |
+
import uvicorn
|
| 86 |
+
uvicorn.run("app:app", host="0.0.0.0", port=7860)
|
| 87 |
+
|
| 88 |
+
# β
Hugging Face Compatibility Fix
|
| 89 |
+
import sys
|
| 90 |
+
if __name__ != "__main__":
|
| 91 |
+
sys.modules["app"] = sys.modules[__name__]
|