Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,198 +1,91 @@
|
|
| 1 |
-
import
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import google.generativeai as genai
|
| 4 |
-
import PyPDF2 as pdf
|
| 5 |
-
import io
|
| 6 |
-
import re
|
| 7 |
import streamlit as st
|
| 8 |
-
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
| 9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 10 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 11 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
raise ValueError("API key not found. Please set GOOGLE_API_KEY as an environment variable.")
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
|
| 21 |
-
#
|
| 22 |
-
|
| 23 |
-
education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
Args:
|
| 71 |
-
text: Resume text.
|
| 72 |
-
Returns:
|
| 73 |
-
str: Comma-separated skills or "Not Available".
|
| 74 |
-
"""
|
| 75 |
-
ner_results = skill_extractor(text)
|
| 76 |
-
skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
|
| 77 |
-
return ", ".join(skills) if skills else "Not Available"
|
| 78 |
-
|
| 79 |
-
# Function to extract education details
|
| 80 |
-
def extract_education(text):
|
| 81 |
-
"""
|
| 82 |
-
Extract education information using NER and regex.
|
| 83 |
-
|
| 84 |
-
Args:
|
| 85 |
-
text: Resume text.
|
| 86 |
-
Returns:
|
| 87 |
-
str: Extracted education details.
|
| 88 |
-
"""
|
| 89 |
-
ner_results = education_extractor(text)
|
| 90 |
-
education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
|
| 91 |
-
|
| 92 |
-
if education_entities:
|
| 93 |
-
return ", ".join(education_entities)
|
| 94 |
-
else:
|
| 95 |
-
education_patterns = [
|
| 96 |
-
r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Engineering|Data Science)",
|
| 97 |
-
r"(University of [A-Za-z]+)"
|
| 98 |
-
]
|
| 99 |
-
matches = []
|
| 100 |
-
for pattern in education_patterns:
|
| 101 |
-
matches.extend(re.findall(pattern, text))
|
| 102 |
-
return ", ".join(matches) if matches else "Not Available"
|
| 103 |
-
|
| 104 |
-
# Function to calculate match percentage using TF-IDF
|
| 105 |
-
def calculate_match_percentage(resume_text, job_description):
|
| 106 |
-
"""
|
| 107 |
-
Calculate the match percentage using TF-IDF and cosine similarity.
|
| 108 |
-
|
| 109 |
-
Args:
|
| 110 |
-
resume_text: Resume text.
|
| 111 |
-
job_description: Job description.
|
| 112 |
-
Returns:
|
| 113 |
-
float: Match percentage (0-100).
|
| 114 |
-
"""
|
| 115 |
-
documents = [resume_text, job_description]
|
| 116 |
-
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
| 117 |
-
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
|
| 118 |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
|
| 119 |
-
return round(cosine_sim[0][0] * 100, 2)
|
| 120 |
|
| 121 |
-
|
| 122 |
-
def analyze_with_gemini(resume_text, job_description):
|
| 123 |
-
"""
|
| 124 |
-
Use Gemini Flash 1.5 to generate an ATS analysis.
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
job_description: Job description content.
|
| 129 |
-
Returns:
|
| 130 |
-
str: AI-generated analysis.
|
| 131 |
-
"""
|
| 132 |
prompt = f"""
|
| 133 |
-
|
|
|
|
| 134 |
Resume: {resume_text}
|
| 135 |
Job Description: {job_description}
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
- Skills
|
| 139 |
-
- Education
|
| 140 |
-
- Leadership Experience (years)
|
| 141 |
-
- Match Percentage
|
| 142 |
-
Provide a summary of the candidate's strengths in bullet points.
|
| 143 |
"""
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
| 154 |
|
| 155 |
-
# Streamlit Interface
|
| 156 |
-
st.title("Resume ATS Analysis Tool")
|
| 157 |
-
st.markdown("### Upload Resume PDF and Enter Job Description for Analysis")
|
| 158 |
-
|
| 159 |
-
uploaded_file = st.file_uploader("Upload Resume (PDF format)", type=["pdf"])
|
| 160 |
-
job_description = st.text_area("Job Description", height=200)
|
| 161 |
-
|
| 162 |
-
if uploaded_file and job_description.strip():
|
| 163 |
-
if st.button("Analyze"):
|
| 164 |
-
resume_text = extract_pdf_text(uploaded_file)
|
| 165 |
-
if not resume_text:
|
| 166 |
-
st.error("No text extracted from PDF. Please upload a valid file.")
|
| 167 |
-
st.stop()
|
| 168 |
-
|
| 169 |
-
# Extract candidate details
|
| 170 |
-
email, phone = extract_contact_info(resume_text)
|
| 171 |
-
skills = extract_skills(resume_text)
|
| 172 |
-
education = extract_education(resume_text)
|
| 173 |
-
match_percentage = calculate_match_percentage(resume_text, job_description)
|
| 174 |
-
gemini_analysis = analyze_with_gemini(resume_text, job_description)
|
| 175 |
-
|
| 176 |
-
# Prepare the results
|
| 177 |
-
results = {
|
| 178 |
-
"Email": email,
|
| 179 |
-
"Contact": phone,
|
| 180 |
-
"Skills": skills,
|
| 181 |
-
"Education": education,
|
| 182 |
-
"Match Percentage": match_percentage,
|
| 183 |
-
"Gemini Analysis": gemini_analysis
|
| 184 |
-
}
|
| 185 |
-
|
| 186 |
-
# Display results
|
| 187 |
-
st.write(pd.DataFrame([results]))
|
| 188 |
-
|
| 189 |
-
# Allow download as CSV
|
| 190 |
-
csv = pd.DataFrame([results]).to_csv(index=False)
|
| 191 |
-
st.download_button(
|
| 192 |
-
label="Download Results as CSV",
|
| 193 |
-
data=csv,
|
| 194 |
-
file_name="resume_analysis_results.csv",
|
| 195 |
-
mime="text/csv"
|
| 196 |
-
)
|
| 197 |
-
else:
|
| 198 |
-
st.info("Upload a resume and provide a job description to start the analysis.")
|
|
|
|
| 1 |
+
import spacy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import streamlit as st
|
|
|
|
| 3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
+
import PyPDF2
|
| 6 |
+
import nltk
|
| 7 |
+
from nltk.corpus import stopwords
|
| 8 |
+
from nltk.tokenize import word_tokenize
|
| 9 |
+
from gemini_flash import GeminiFlash # Assuming Gemini Flash is installed
|
| 10 |
|
| 11 |
+
# Ensure that NLTK's stopwords are available
|
| 12 |
+
nltk.download('punkt')
|
| 13 |
+
nltk.download('stopwords')
|
|
|
|
| 14 |
|
| 15 |
+
# Load spaCy model for NER
|
| 16 |
+
nlp = spacy.load("en_core_web_sm")
|
| 17 |
|
| 18 |
+
# Initialize Gemini Flash for prompt engineering
|
| 19 |
+
prompt_engineer = GeminiFlash()
|
|
|
|
| 20 |
|
| 21 |
+
# Streamlit Interface
|
| 22 |
+
st.title("AI Resume and Job Description Analyzer")
|
| 23 |
+
|
| 24 |
+
# Step 1: Resume Upload
|
| 25 |
+
uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf")
|
| 26 |
+
|
| 27 |
+
if uploaded_file is not None:
|
| 28 |
+
# Read the PDF file
|
| 29 |
+
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
| 30 |
+
resume_text = ""
|
| 31 |
+
for page in range(len(pdf_reader.pages)):
|
| 32 |
+
resume_text += pdf_reader.pages[page].extract_text()
|
| 33 |
+
|
| 34 |
+
# Display the resume text
|
| 35 |
+
st.text_area("Resume Text", resume_text, height=300)
|
| 36 |
+
|
| 37 |
+
# Step 2: Job Description Input
|
| 38 |
+
job_description = st.text_area("Enter Job Description")
|
| 39 |
+
|
| 40 |
+
if job_description:
|
| 41 |
+
# Preprocess job description using NLTK
|
| 42 |
+
stop_words = set(stopwords.words("english"))
|
| 43 |
+
tokens = word_tokenize(job_description)
|
| 44 |
+
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
|
| 45 |
+
ps = nltk.PorterStemmer()
|
| 46 |
+
stemmed_tokens = [ps.stem(word) for word in filtered_tokens]
|
| 47 |
+
preprocessed_job_description = " ".join(stemmed_tokens)
|
| 48 |
+
|
| 49 |
+
# Display preprocessed job description
|
| 50 |
+
st.text_area("Processed Job Description", preprocessed_job_description)
|
| 51 |
+
|
| 52 |
+
# Step 3: Named Entity Recognition (NER) on Resume
|
| 53 |
+
if resume_text:
|
| 54 |
+
doc = nlp(resume_text)
|
| 55 |
+
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
| 56 |
+
|
| 57 |
+
# Display extracted entities
|
| 58 |
+
st.subheader("Named Entities from Resume")
|
| 59 |
+
st.write(entities)
|
| 60 |
+
|
| 61 |
+
# Step 4: Candidate-Job Relevance Using Cosine Similarity
|
| 62 |
+
if resume_text and job_description:
|
| 63 |
+
vectorizer = TfidfVectorizer()
|
| 64 |
+
tfidf_matrix = vectorizer.fit_transform([job_description, resume_text])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
|
|
|
|
| 66 |
|
| 67 |
+
st.write(f"Cosine Similarity (Relevance): {cosine_sim[0][0]:.2f}")
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
+
# Step 5: Gemini Flash - Prompt Engineering (using Gemini Flash to craft relevant prompts for an LLM)
|
| 70 |
+
if resume_text and job_description:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
prompt = f"""
|
| 72 |
+
Given the resume text and job description, evaluate how well the candidate's qualifications match the job requirements.
|
| 73 |
+
|
| 74 |
Resume: {resume_text}
|
| 75 |
Job Description: {job_description}
|
| 76 |
+
|
| 77 |
+
Based on the information provided, generate a detailed match score between the candidate and the job.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
"""
|
| 79 |
+
|
| 80 |
+
# Use Gemini Flash to refine and enhance the prompt (assuming Gemini Flash enhances the prompt)
|
| 81 |
+
enhanced_prompt = prompt_engineer.refine_prompt(prompt)
|
| 82 |
+
|
| 83 |
+
# Display the enhanced prompt (for debugging or transparency)
|
| 84 |
+
st.subheader("Enhanced Prompt for LLM")
|
| 85 |
+
st.write(enhanced_prompt)
|
| 86 |
+
|
| 87 |
+
# Here you would typically pass the `enhanced_prompt` to a large language model (LLM) API or model for evaluation
|
| 88 |
+
# For demonstration purposes, assume a function `get_llm_response` exists that interacts with a model.
|
| 89 |
+
# response = get_llm_response(enhanced_prompt)
|
| 90 |
+
# st.write("LLM Response:", response)
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|