Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ from PyPDF2 import PdfReader
|
|
| 4 |
from docx import Document
|
| 5 |
import pandas as pd
|
| 6 |
import matplotlib.pyplot as plt
|
|
|
|
| 7 |
|
| 8 |
# Set up API key for Google Generative Language
|
| 9 |
API_KEY = st.secrets["GOOGLE_API_KEY"]
|
|
@@ -24,16 +25,45 @@ def extract_text_from_docx(docx_file):
|
|
| 24 |
text += para.text + "\n"
|
| 25 |
return text
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def analyze_documents(resume_text, job_description):
|
| 28 |
"""Analyze resume text against the job description."""
|
| 29 |
custom_prompt = f"""
|
| 30 |
Please analyze the following resume in the context of the job description provided.
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
3. Final thoughts on the resume's overall match in 3 lines.
|
| 36 |
-
4. Recommendations on how to add missing keywords and improve the resume in 3-4 points.
|
| 37 |
Job Description: {job_description}
|
| 38 |
Resume: {resume_text}
|
| 39 |
"""
|
|
@@ -54,9 +84,11 @@ def display_resume(file, index):
|
|
| 54 |
unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
|
| 55 |
if file_type == 'pdf':
|
| 56 |
text = extract_text_from_pdf(file)
|
|
|
|
| 57 |
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
|
| 58 |
elif file_type == 'docx':
|
| 59 |
text = extract_text_from_docx(file)
|
|
|
|
| 60 |
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
|
| 61 |
else:
|
| 62 |
st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
|
|
@@ -67,53 +99,34 @@ def analyze_multiple_resumes(resumes, job_description):
|
|
| 67 |
for index, resume in enumerate(resumes):
|
| 68 |
resume.seek(0) # Reset file pointer
|
| 69 |
file_type = resume.name.split('.')[-1].lower()
|
| 70 |
-
|
| 71 |
# Extract resume text based on file type
|
| 72 |
if file_type == 'pdf':
|
| 73 |
resume_text = extract_text_from_pdf(resume)
|
| 74 |
elif file_type == 'docx':
|
| 75 |
resume_text = extract_text_from_docx(resume)
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
| 77 |
# Analyze the resume text
|
| 78 |
analysis = analyze_documents(resume_text, job_description)
|
| 79 |
-
|
| 80 |
-
if "
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
try:
|
| 96 |
-
match_percentage = int(percentage_str)
|
| 97 |
-
# Cap the match percentage to 100
|
| 98 |
-
if match_percentage > 100:
|
| 99 |
-
match_percentage = 100
|
| 100 |
-
except ValueError:
|
| 101 |
-
st.error(f"Error processing match percentage in resume {resume.name}")
|
| 102 |
-
match_percentage = 0 # Default to 0 if there's an issue
|
| 103 |
-
|
| 104 |
-
if match_percentage is not None:
|
| 105 |
-
match_percentages.append(match_percentage)
|
| 106 |
-
st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
|
| 107 |
-
st.progress(match_percentage / 100) # Convert to decimal format
|
| 108 |
-
|
| 109 |
-
# Pie chart for skills
|
| 110 |
-
labels = ["Matched", "Missing"]
|
| 111 |
-
sizes = [match_percentage, 100 - match_percentage]
|
| 112 |
-
fig, ax = plt.subplots()
|
| 113 |
-
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140)
|
| 114 |
-
ax.axis('equal')
|
| 115 |
-
st.pyplot(fig)
|
| 116 |
-
|
| 117 |
# Display overall match percentage across all resumes
|
| 118 |
if match_percentages:
|
| 119 |
avg_match_percentage = sum(match_percentages) / len(match_percentages)
|
|
|
|
| 4 |
from docx import Document
|
| 5 |
import pandas as pd
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
+
import re
|
| 8 |
|
| 9 |
# Set up API key for Google Generative Language
|
| 10 |
API_KEY = st.secrets["GOOGLE_API_KEY"]
|
|
|
|
| 25 |
text += para.text + "\n"
|
| 26 |
return text
|
| 27 |
|
| 28 |
+
def normalize_text(text):
|
| 29 |
+
"""Normalize text by removing extra spaces, newlines, and standardizing formatting."""
|
| 30 |
+
# Remove extra spaces and newlines
|
| 31 |
+
text = " ".join(text.split())
|
| 32 |
+
return text.lower() # Convert to lowercase to avoid case differences
|
| 33 |
+
|
| 34 |
+
def clean_resume_text(text):
|
| 35 |
+
"""Clean resume text by removing unwanted characters or formatting."""
|
| 36 |
+
text = re.sub(r'\s+', ' ', text) # Replace multiple spaces/newlines with a single space
|
| 37 |
+
text = re.sub(r'[\r\n\t]+', ' ', text) # Remove any carriage returns or tabs
|
| 38 |
+
return text.strip()
|
| 39 |
+
|
| 40 |
+
def extract_match_percentage(response):
|
| 41 |
+
"""Extract match percentage from API response with additional checks."""
|
| 42 |
+
try:
|
| 43 |
+
analysis_content = response.get("choices", [{}])[0].get("text", "")
|
| 44 |
+
match_percentage = None
|
| 45 |
+
|
| 46 |
+
# Check for the match percentage in the response text
|
| 47 |
+
for line in analysis_content.split("\n"):
|
| 48 |
+
if "match percentage" in line.lower():
|
| 49 |
+
# Try to find the percentage value
|
| 50 |
+
percentage_str = ''.join(filter(str.isdigit, line.split(":")[-1].strip()))
|
| 51 |
+
if percentage_str:
|
| 52 |
+
match_percentage = int(percentage_str)
|
| 53 |
+
match_percentage = min(100, max(0, match_percentage)) # Ensure it's between 0 and 100
|
| 54 |
+
return match_percentage if match_percentage is not None else 0
|
| 55 |
+
except Exception as e:
|
| 56 |
+
st.error(f"Error extracting match percentage: {str(e)}")
|
| 57 |
+
return 0 # Default to 0 if there's an error
|
| 58 |
+
|
| 59 |
def analyze_documents(resume_text, job_description):
|
| 60 |
"""Analyze resume text against the job description."""
|
| 61 |
custom_prompt = f"""
|
| 62 |
Please analyze the following resume in the context of the job description provided.
|
| 63 |
+
Consider exact matches for hard skills, soft skills, and experience keywords.
|
| 64 |
+
1. Report the exact match percentage of the resume to the job description.
|
| 65 |
+
2. List the missing keywords from the resume.
|
| 66 |
+
3. Provide final feedback on the resume’s overall relevance to the job description.
|
|
|
|
|
|
|
| 67 |
Job Description: {job_description}
|
| 68 |
Resume: {resume_text}
|
| 69 |
"""
|
|
|
|
| 84 |
unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
|
| 85 |
if file_type == 'pdf':
|
| 86 |
text = extract_text_from_pdf(file)
|
| 87 |
+
text = normalize_text(text) # Normalize text
|
| 88 |
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
|
| 89 |
elif file_type == 'docx':
|
| 90 |
text = extract_text_from_docx(file)
|
| 91 |
+
text = normalize_text(text) # Normalize text
|
| 92 |
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
|
| 93 |
else:
|
| 94 |
st.error(f"Unsupported file type for {file.name}. Please upload a PDF or DOCX file.")
|
|
|
|
| 99 |
for index, resume in enumerate(resumes):
|
| 100 |
resume.seek(0) # Reset file pointer
|
| 101 |
file_type = resume.name.split('.')[-1].lower()
|
| 102 |
+
|
| 103 |
# Extract resume text based on file type
|
| 104 |
if file_type == 'pdf':
|
| 105 |
resume_text = extract_text_from_pdf(resume)
|
| 106 |
elif file_type == 'docx':
|
| 107 |
resume_text = extract_text_from_docx(resume)
|
| 108 |
+
|
| 109 |
+
# Normalize and clean extracted text
|
| 110 |
+
resume_text = clean_resume_text(normalize_text(resume_text))
|
| 111 |
+
|
| 112 |
# Analyze the resume text
|
| 113 |
analysis = analyze_documents(resume_text, job_description)
|
| 114 |
+
|
| 115 |
+
if "choices" in analysis:
|
| 116 |
+
match_percentage = extract_match_percentage(analysis)
|
| 117 |
+
|
| 118 |
+
match_percentages.append(match_percentage)
|
| 119 |
+
st.write(f"### Match Percentage for {resume.name}: {match_percentage}%")
|
| 120 |
+
st.progress(match_percentage / 100) # Convert to decimal format
|
| 121 |
+
|
| 122 |
+
# Pie chart for skills
|
| 123 |
+
labels = ["Matched", "Missing"]
|
| 124 |
+
sizes = [match_percentage, 100 - match_percentage]
|
| 125 |
+
fig, ax = plt.subplots()
|
| 126 |
+
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140)
|
| 127 |
+
ax.axis('equal')
|
| 128 |
+
st.pyplot(fig)
|
| 129 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
# Display overall match percentage across all resumes
|
| 131 |
if match_percentages:
|
| 132 |
avg_match_percentage = sum(match_percentages) / len(match_percentages)
|