Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,30 +1,17 @@
|
|
| 1 |
-
import re
|
| 2 |
import streamlit as st
|
| 3 |
import requests
|
| 4 |
-
|
| 5 |
-
import pdfplumber # pdfplumber for more accurate text extraction
|
| 6 |
from docx import Document
|
| 7 |
-
import io
|
| 8 |
|
| 9 |
# Set up API key for Google Generative Language
|
| 10 |
API_KEY = st.secrets["GOOGLE_API_KEY"]
|
| 11 |
|
| 12 |
-
def
|
| 13 |
-
"""Extract text from PDF
|
|
|
|
| 14 |
text = ""
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
for page_num in range(len(pdf_document)):
|
| 18 |
-
page = pdf_document.load_page(page_num)
|
| 19 |
-
text += page.get_text("text") # Extract text from page
|
| 20 |
-
return text
|
| 21 |
-
|
| 22 |
-
def extract_text_from_pdf_pdfplumber(pdf_file):
|
| 23 |
-
"""Extract text from PDF using pdfplumber."""
|
| 24 |
-
text = ""
|
| 25 |
-
with pdfplumber.open(pdf_file) as pdf:
|
| 26 |
-
for page in pdf.pages:
|
| 27 |
-
text += page.extract_text() # Extract text from page
|
| 28 |
return text
|
| 29 |
|
| 30 |
def extract_text_from_docx(docx_file):
|
|
@@ -57,51 +44,14 @@ def analyze_documents(resume_text, job_description):
|
|
| 57 |
]
|
| 58 |
}
|
| 59 |
response = requests.post(url, headers=headers, json=data)
|
| 60 |
-
|
| 61 |
-
# Return the full API response
|
| 62 |
return response.json()
|
| 63 |
|
| 64 |
-
def extract_full_analysis(response):
|
| 65 |
-
"""Extract the full analysis (match percentage, missing keywords, etc.) from the API response."""
|
| 66 |
-
try:
|
| 67 |
-
# Get the analysis content from the API response
|
| 68 |
-
analysis_content = response.get("choices", [{}])[0].get("text", "")
|
| 69 |
-
|
| 70 |
-
# Regex to extract the match percentage, missing keywords, final thoughts, and recommendations
|
| 71 |
-
match_percentage = re.search(r"Match Percentage:.*?([a-zA-Z0-9\s\-\(\)<>\d]+%)", analysis_content)
|
| 72 |
-
missing_keywords = re.search(r"Missing Keywords:([\s\S]*?)(?=\n\n|Final Thoughts)", analysis_content)
|
| 73 |
-
final_thoughts = re.search(r"Final Thoughts:\n\n([\s\S]*?)(?=\n\n|Recommendations)", analysis_content)
|
| 74 |
-
recommendations = re.search(r"Recommendations:\n\n([\s\S]*?)(?=\n\n|$)", analysis_content)
|
| 75 |
-
|
| 76 |
-
# Extracted content
|
| 77 |
-
match_percentage = match_percentage.group(1) if match_percentage else "Match Percentage: N/A"
|
| 78 |
-
missing_keywords = missing_keywords.group(1).strip() if missing_keywords else "No missing keywords identified."
|
| 79 |
-
final_thoughts = final_thoughts.group(1).strip() if final_thoughts else "No final thoughts provided."
|
| 80 |
-
recommendations = recommendations.group(1).strip() if recommendations else "No recommendations provided."
|
| 81 |
-
|
| 82 |
-
return {
|
| 83 |
-
"match_percentage": match_percentage,
|
| 84 |
-
"missing_keywords": missing_keywords,
|
| 85 |
-
"final_thoughts": final_thoughts,
|
| 86 |
-
"recommendations": recommendations
|
| 87 |
-
}
|
| 88 |
-
|
| 89 |
-
except Exception as e:
|
| 90 |
-
st.error(f"Error extracting analysis: {str(e)}")
|
| 91 |
-
return {
|
| 92 |
-
"match_percentage": "Match Percentage: N/A",
|
| 93 |
-
"missing_keywords": "Error extracting missing keywords.",
|
| 94 |
-
"final_thoughts": "Error extracting final thoughts.",
|
| 95 |
-
"recommendations": "Error extracting recommendations."
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
def display_resume(file, index):
|
| 99 |
"""Display uploaded resume content."""
|
| 100 |
file_type = file.name.split('.')[-1].lower()
|
| 101 |
unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
|
| 102 |
if file_type == 'pdf':
|
| 103 |
-
|
| 104 |
-
text = extract_text_from_pdf_pymupdf(file)
|
| 105 |
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
|
| 106 |
elif file_type == 'docx':
|
| 107 |
text = extract_text_from_docx(file)
|
|
@@ -110,27 +60,55 @@ def display_resume(file, index):
|
|
| 110 |
st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
|
| 111 |
|
| 112 |
def analyze_multiple_resumes(resumes, job_description):
|
| 113 |
-
"""Analyze multiple resumes."""
|
| 114 |
-
|
| 115 |
for index, resume in enumerate(resumes):
|
| 116 |
resume.seek(0) # Reset file pointer
|
| 117 |
file_type = resume.name.split('.')[-1].lower()
|
| 118 |
|
| 119 |
# Extract resume text based on file type
|
| 120 |
if file_type == 'pdf':
|
| 121 |
-
|
| 122 |
elif file_type == 'docx':
|
| 123 |
-
|
| 124 |
|
| 125 |
-
# Analyze the resume
|
| 126 |
-
analysis = analyze_documents(
|
| 127 |
-
full_analysis = extract_full_analysis(analysis)
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
# Streamlit app configuration
|
| 136 |
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
+
from PyPDF2 import PdfReader
|
|
|
|
| 4 |
from docx import Document
|
|
|
|
| 5 |
|
| 6 |
# Set up API key for Google Generative Language
|
| 7 |
API_KEY = st.secrets["GOOGLE_API_KEY"]
|
| 8 |
|
| 9 |
+
def extract_text_from_pdf(pdf_file):
|
| 10 |
+
"""Extract text from PDF file."""
|
| 11 |
+
reader = PdfReader(pdf_file)
|
| 12 |
text = ""
|
| 13 |
+
for page in reader.pages:
|
| 14 |
+
text += page.extract_text()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
return text
|
| 16 |
|
| 17 |
def extract_text_from_docx(docx_file):
|
|
|
|
| 44 |
]
|
| 45 |
}
|
| 46 |
response = requests.post(url, headers=headers, json=data)
|
|
|
|
|
|
|
| 47 |
return response.json()
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
def display_resume(file, index):
|
| 50 |
"""Display uploaded resume content."""
|
| 51 |
file_type = file.name.split('.')[-1].lower()
|
| 52 |
unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
|
| 53 |
if file_type == 'pdf':
|
| 54 |
+
text = extract_text_from_pdf(file)
|
|
|
|
| 55 |
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
|
| 56 |
elif file_type == 'docx':
|
| 57 |
text = extract_text_from_docx(file)
|
|
|
|
| 60 |
st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
|
| 61 |
|
| 62 |
def analyze_multiple_resumes(resumes, job_description):
|
| 63 |
+
"""Analyze multiple resumes and display the results."""
|
| 64 |
+
match_percentages = []
|
| 65 |
for index, resume in enumerate(resumes):
|
| 66 |
resume.seek(0) # Reset file pointer
|
| 67 |
file_type = resume.name.split('.')[-1].lower()
|
| 68 |
|
| 69 |
# Extract resume text based on file type
|
| 70 |
if file_type == 'pdf':
|
| 71 |
+
resume_text = extract_text_from_pdf(resume)
|
| 72 |
elif file_type == 'docx':
|
| 73 |
+
resume_text = extract_text_from_docx(resume)
|
| 74 |
|
| 75 |
+
# Analyze the resume text
|
| 76 |
+
analysis = analyze_documents(resume_text, job_description)
|
|
|
|
| 77 |
|
| 78 |
+
if "candidates" in analysis:
|
| 79 |
+
for candidate in analysis["candidates"]:
|
| 80 |
+
if "content" in candidate and "parts" in candidate["content"]:
|
| 81 |
+
for part in candidate["content"]["parts"]:
|
| 82 |
+
response_text = part["text"]
|
| 83 |
+
st.write(response_text)
|
| 84 |
+
|
| 85 |
+
# Extract match percentage safely
|
| 86 |
+
lines = response_text.split("\n")
|
| 87 |
+
match_percentage = None
|
| 88 |
+
for line in lines:
|
| 89 |
+
if "match percentage" in line.lower():
|
| 90 |
+
# Try to extract the match percentage
|
| 91 |
+
percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
|
| 92 |
+
if percentage_str: # If there's a valid numeric match percentage
|
| 93 |
+
try:
|
| 94 |
+
match_percentage = int(percentage_str)
|
| 95 |
+
# Cap the match percentage to 100
|
| 96 |
+
if match_percentage > 100:
|
| 97 |
+
match_percentage = 100
|
| 98 |
+
except ValueError:
|
| 99 |
+
st.error(f"Error processing match percentage in resume {resume.name}")
|
| 100 |
+
match_percentage = 0 # Default to 0 if there's an issue
|
| 101 |
+
|
| 102 |
+
if match_percentage is not None:
|
| 103 |
+
match_percentages.append(match_percentage)
|
| 104 |
+
st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
|
| 105 |
+
st.progress(match_percentage / 100) # Convert to decimal format
|
| 106 |
+
|
| 107 |
+
# Display overall match percentage across all resumes
|
| 108 |
+
if match_percentages:
|
| 109 |
+
avg_match_percentage = sum(match_percentages) / len(match_percentages)
|
| 110 |
+
st.write(f"### Average Match Percentage for All Resumes: {avg_match_percentage:.2f}%")
|
| 111 |
+
st.progress(avg_match_percentage / 100) # Convert to decimal format
|
| 112 |
|
| 113 |
# Streamlit app configuration
|
| 114 |
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
|