Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,9 @@ import PyPDF2
|
|
| 11 |
from PIL import Image
|
| 12 |
import pytesseract
|
| 13 |
from pdf2image import convert_from_path
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Download necessary NLTK data
|
| 16 |
nltk.download('punkt')
|
|
@@ -54,6 +57,13 @@ def extract_text_with_ocr(pdf_file):
|
|
| 54 |
text += pytesseract.image_to_string(image)
|
| 55 |
return text
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# Unified PDF extraction function
|
| 58 |
def extract_text_from_pdf(pdf_file):
|
| 59 |
"""Extract text using multiple methods."""
|
|
@@ -67,6 +77,9 @@ def extract_text_from_pdf(pdf_file):
|
|
| 67 |
except Exception as e:
|
| 68 |
print(f"Error with PyMuPDF: {e}")
|
| 69 |
|
|
|
|
|
|
|
|
|
|
| 70 |
# Attempt PyPDF2 extraction
|
| 71 |
try:
|
| 72 |
text = extract_text_with_pypdf2(pdf_file)
|
|
@@ -75,6 +88,9 @@ def extract_text_from_pdf(pdf_file):
|
|
| 75 |
except Exception as e:
|
| 76 |
print(f"Error with PyPDF2: {e}")
|
| 77 |
|
|
|
|
|
|
|
|
|
|
| 78 |
# Attempt OCR as a last resort
|
| 79 |
try:
|
| 80 |
text = extract_text_with_ocr(pdf_file)
|
|
@@ -134,6 +150,10 @@ def analyze_documents(resume_text, job_description):
|
|
| 134 |
Resume: {resume_text}
|
| 135 |
"""
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
|
| 138 |
headers = {'Content-Type': 'application/json'}
|
| 139 |
data = {
|
|
@@ -142,7 +162,19 @@ def analyze_documents(resume_text, job_description):
|
|
| 142 |
]
|
| 143 |
}
|
| 144 |
response = requests.post(url, headers=headers, json=data)
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
# Streamlit app configuration
|
| 148 |
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
|
|
@@ -166,16 +198,28 @@ resume_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx
|
|
| 166 |
# Process the uploaded resume and job description
|
| 167 |
if resume_file:
|
| 168 |
if job_description:
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
else:
|
| 181 |
st.warning("Please enter the job description to begin analysis.")
|
|
|
|
| 11 |
from PIL import Image
|
| 12 |
import pytesseract
|
| 13 |
from pdf2image import convert_from_path
|
| 14 |
+
import docx # For DOCX processing
|
| 15 |
+
import io
|
| 16 |
+
import os
|
| 17 |
|
| 18 |
# Download necessary NLTK data
|
| 19 |
nltk.download('punkt')
|
|
|
|
| 57 |
text += pytesseract.image_to_string(image)
|
| 58 |
return text
|
| 59 |
|
| 60 |
+
# Function for DOCX text extraction
|
| 61 |
+
def extract_text_from_docx(docx_file):
|
| 62 |
+
"""Extract text from a DOCX file."""
|
| 63 |
+
doc = docx.Document(docx_file)
|
| 64 |
+
text = '\n'.join([para.text for para in doc.paragraphs])
|
| 65 |
+
return text
|
| 66 |
+
|
| 67 |
# Unified PDF extraction function
|
| 68 |
def extract_text_from_pdf(pdf_file):
|
| 69 |
"""Extract text using multiple methods."""
|
|
|
|
| 77 |
except Exception as e:
|
| 78 |
print(f"Error with PyMuPDF: {e}")
|
| 79 |
|
| 80 |
+
# Reset file pointer
|
| 81 |
+
pdf_file.seek(0)
|
| 82 |
+
|
| 83 |
# Attempt PyPDF2 extraction
|
| 84 |
try:
|
| 85 |
text = extract_text_with_pypdf2(pdf_file)
|
|
|
|
| 88 |
except Exception as e:
|
| 89 |
print(f"Error with PyPDF2: {e}")
|
| 90 |
|
| 91 |
+
# Reset file pointer
|
| 92 |
+
pdf_file.seek(0)
|
| 93 |
+
|
| 94 |
# Attempt OCR as a last resort
|
| 95 |
try:
|
| 96 |
text = extract_text_with_ocr(pdf_file)
|
|
|
|
| 150 |
Resume: {resume_text}
|
| 151 |
"""
|
| 152 |
|
| 153 |
+
API_KEY = os.getenv("GEMINI_API_KEY") # Ensure you set this environment variable securely
|
| 154 |
+
if not API_KEY:
|
| 155 |
+
return {"Match Percentage": "API Key Missing", "Recommendations": "Please set the GEMINI_API_KEY environment variable."}
|
| 156 |
+
|
| 157 |
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
|
| 158 |
headers = {'Content-Type': 'application/json'}
|
| 159 |
data = {
|
|
|
|
| 162 |
]
|
| 163 |
}
|
| 164 |
response = requests.post(url, headers=headers, json=data)
|
| 165 |
+
|
| 166 |
+
if response.status_code == 200:
|
| 167 |
+
try:
|
| 168 |
+
result = response.json()
|
| 169 |
+
# Parse the response as needed. This is a placeholder.
|
| 170 |
+
return {
|
| 171 |
+
"Match Percentage": result.get('choices', [{}])[0].get('text', 'N/A').strip(),
|
| 172 |
+
"Recommendations": "Placeholder for actual recommendations."
|
| 173 |
+
}
|
| 174 |
+
except ValueError:
|
| 175 |
+
return {"Match Percentage": "Error", "Recommendations": "Failed to parse response."}
|
| 176 |
+
else:
|
| 177 |
+
return {"Match Percentage": "Error", "Recommendations": f"API request failed with status code {response.status_code}."}
|
| 178 |
|
| 179 |
# Streamlit app configuration
|
| 180 |
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
|
|
|
|
| 198 |
# Process the uploaded resume and job description
|
| 199 |
if resume_file:
|
| 200 |
if job_description:
|
| 201 |
+
try:
|
| 202 |
+
if resume_file.type == "application/pdf":
|
| 203 |
+
resume_text = extract_text_from_pdf(resume_file)
|
| 204 |
+
elif resume_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
| 205 |
+
resume_text = extract_text_from_docx(resume_file)
|
| 206 |
+
else:
|
| 207 |
+
st.error("Unsupported file type.")
|
| 208 |
+
resume_text = ""
|
| 209 |
+
|
| 210 |
+
if resume_text:
|
| 211 |
+
cleaned_resume = clean_and_normalize_text(resume_text)
|
| 212 |
+
cleaned_job_description = clean_and_normalize_text(job_description)
|
| 213 |
+
|
| 214 |
+
# Analyze the resume and job description
|
| 215 |
+
result = analyze_documents(cleaned_resume, cleaned_job_description)
|
| 216 |
+
|
| 217 |
+
# Display the analysis results
|
| 218 |
+
st.write(f"**Match Percentage**: {result.get('Match Percentage', 'N/A')}")
|
| 219 |
+
st.write(f"**Recommendations**: {result.get('Recommendations', 'N/A')}")
|
| 220 |
+
else:
|
| 221 |
+
st.error("Failed to extract text from the uploaded file.")
|
| 222 |
+
except Exception as e:
|
| 223 |
+
st.error(f"An error occurred during processing: {e}")
|
| 224 |
else:
|
| 225 |
st.warning("Please enter the job description to begin analysis.")
|