Spaces:
Sleeping
Sleeping
File size: 8,311 Bytes
3e26ed6 c28a311 3524511 3cd34b3 ac1dad7 3524511 ac1dad7 4690f7b 9a1feff 3cd34b3 716287a 3cd34b3 716287a d607db1 ac1dad7 716287a ac1dad7 9a1feff 3e26ed6 0e2e798 4926c60 0e2e798 14dcd22 0e2e798 14dcd22 583146a ac1dad7 583146a ac1dad7 4926c60 583146a ac1dad7 4926c60 583146a d607db1 4926c60 ac1dad7 5036f94 4926c60 efa524a 3eec01c 5036f94 3e26ed6 3eec01c 3e26ed6 3eec01c 3e26ed6 3eec01c 5036f94 3e26ed6 3eec01c 3e26ed6 3eec01c 3e26ed6 3eec01c c28a311 3eec01c 3e26ed6 3eec01c 3e26ed6 3eec01c 3e26ed6 c28a311 3e26ed6 c28a311 3e26ed6 c28a311 ac1dad7 3eec01c 3e26ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import re
import pandas as pd
import streamlit as st
import requests
from PyPDF2 import PdfReader
from docx import Document
# Set up API key for Google Generative Language
API_KEY = st.secrets["GOOGLE_API_KEY"]
def extract_text_from_pdf(pdf_file):
"""Extract text from PDF file."""
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(docx_file):
"""Extract text from DOCX file."""
doc = Document(docx_file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
def extract_contact_info(resume_text):
"""Extract name, email, and phone number from resume text."""
# Regex patterns for email and phone
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
phone_pattern = r'\+?\d[\d -]{8,12}\d'
# Attempt to extract contact details
email = re.search(email_pattern, resume_text)
phone = re.search(phone_pattern, resume_text)
# Extract Name (assuming first line of the resume might be the name)
name = resume_text.splitlines()[0].strip() if resume_text else "Name Not Found"
return {
"name": name,
"email": email.group() if email else "Email Not Found",
"phone": phone.group() if phone else "Contact Not Found"
}
def analyze_documents(resume_text, job_description):
custom_prompt = f"""
Please analyze the following resume in the context of the job description provided. Strictly check every single line in the job description and analyze my resume whether there is a match exactly. Strictly maintain high ATS standards and give scores only to the correct ones. Focus on hard skills which are missing and also soft skills which are missing. Provide the following details.:
1. The match percentage of the resume to the job description. Display this.
2. A list of missing keywords accurate ones.
3. Final thoughts on the resume's overall match with the job description in 3 lines.
4. Recommendations on how to add the missing keywords and improve the resume in 3-4 points with examples.
Please display in the above order don't mention the numbers like 1. 2. etc and strictly follow ATS standards so that analysis will be accurate. Strictly follow the above templates omg. don't keep changing every time.
Strictly follow the above things and template which has to be displayed and don't keep changing again and again. Don't fucking change the template from above.
Title should be Resume analysis and maintain the same title for all. Also if someone uploads the same unchanged resume twice, keep in mind to give the same results. Display new ones only if they have changed their resume according to your suggestions or at least few changes.
Job Description: {job_description}
Resume: {resume_text}
"""
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
headers = {'Content-Type': 'application/json'}
data = {
"contents": [
{"role": "user", "parts": [{"text": custom_prompt}]}
]
}
response = requests.post(url, headers=headers, json=data)
return response.json()
def display_resume(file, index):
"""Display uploaded resume content with a unique key."""
file_type = file.name.split('.')[-1].lower()
unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
if file_type == 'pdf':
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
elif file_type == 'docx':
doc = Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
else:
st.error("Unsupported file type. Please upload a PDF or DOCX file.")
# Streamlit page configuration for better layout
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
# Custom styles for headers, buttons, and sections
st.markdown("""
<style>
.highlight {
background-color: #f9f9f9;
border-left: 6px solid #4CAF50;
padding: 10px;
margin-bottom: 10px;
font-size: 16px;
}
.highlight-bold {
font-weight: bold;
color: #4CAF50;
}
</style>
""", unsafe_allow_html=True)
# Main Title
st.markdown('<h1 style="text-align: center; color: #4CAF50;">ππ Resume Evaluation System π</h1>', unsafe_allow_html=True)
# Job Description Input
job_description = st.text_area("Job Description:", height=200, placeholder="Paste the job description here...")
# Resume Upload
resumes = st.file_uploader("Upload Your Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
highlighted_data = [] # List to store the highlighted results
if resumes:
st.write("Uploaded Resumes:")
for index, resume in enumerate(resumes):
st.write(f"π {resume.name}")
display_resume(resume, index)
# Analysis Button
if st.button("Analyze Resumes"):
if job_description and resumes:
if len(resumes) <= 10:
for index, resume in enumerate(resumes):
resume.seek(0) # Reset file pointer
file_type = resume.name.split('.')[-1].lower()
if file_type == 'pdf':
resume_text = extract_text_from_pdf(resume)
elif file_type == 'docx':
resume_text = extract_text_from_docx(resume)
contact_info = extract_contact_info(resume_text)
analysis = analyze_documents(resume_text, job_description)
# Extract match percentage
match_percentage = "Not Found"
if "candidates" in analysis:
for candidate in analysis["candidates"]:
if "content" in candidate and "parts" in candidate["content"]:
for part in candidate["content"]["parts"]:
response_text = part["text"]
if "match percentage" in response_text.lower():
match_percentage = re.search(r'\d{1,3}%', response_text)
match_percentage = match_percentage.group(0) if match_percentage else "Not Found"
# Add the data to the list for CSV export and table
highlighted_data.append({
"Name": contact_info["name"],
"Email": contact_info["email"],
"Contact": contact_info["phone"],
"Match Percentage": match_percentage
})
# Display contact info and match percentage
st.markdown(f"""
<div class="highlight">
<p><span class="highlight-bold">Name:</span> {contact_info['name']}</p>
<p><span class="highlight-bold">Email:</span> {contact_info['email']}</p>
<p><span class="highlight-bold">Contact:</span> {contact_info['phone']}</p>
<p><span class="highlight-bold">Match Percentage:</span> <span style="color: green; font-weight: bold;">{match_percentage}</span></p>
</div>
""", unsafe_allow_html=True)
# Display results in a table
if highlighted_data:
df = pd.DataFrame(highlighted_data)
st.write("### Highlighted Results")
st.table(df)
# Provide CSV download button
csv = df.to_csv(index=False)
st.download_button(
label="Download Results as CSV",
data=csv,
file_name="resume_analysis_results.csv",
mime="text/csv"
)
st.success("Analysis Complete!")
else:
st.error("You can upload a maximum of 10 resumes.")
else:
st.error("Please provide the job description and upload resumes.")
|