ahmedumeraziz's picture
Update app.py
0b04e39 verified
import streamlit as st
import docx2txt
import PyPDF2
import re
from sentence_transformers import SentenceTransformer, util
import torch
from sklearn.feature_extraction.text import CountVectorizer
import difflib
# Load semantic model
@st.cache_resource
def load_model():
return SentenceTransformer('all-MiniLM-L6-v2')
model = load_model()
def extract_text_from_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
def extract_text_from_docx(file):
return docx2txt.process(file)
def clean_text(text):
text = re.sub(r'\s+', ' ', text)
return text.strip().lower()
def calculate_semantic_match(resume_text, job_text):
resume_embedding = model.encode(resume_text, convert_to_tensor=True)
job_embedding = model.encode(job_text, convert_to_tensor=True)
similarity = util.cos_sim(resume_embedding, job_embedding)
return round(float(similarity.item()) * 100, 2)
def extract_keywords(text, top_n=30):
vectorizer = CountVectorizer(stop_words='english', max_features=top_n)
X = vectorizer.fit_transform([text])
return set(vectorizer.get_feature_names_out())
def find_missing_keywords(resume_text, job_text):
resume_keywords = extract_keywords(resume_text)
job_keywords = extract_keywords(job_text)
missing = job_keywords - resume_keywords
return missing
# Function to extract exact missing sentences/phrases
def find_missing_sentences(resume_text, job_text):
resume_sentences = resume_text.split(". ")
job_sentences = job_text.split(". ")
missing_sentences = [sentence for sentence in job_sentences if sentence.lower() not in [r.lower() for r in resume_sentences]]
return missing_sentences
# --- Streamlit Interface ---
st.set_page_config(page_title="Resume Matcher App", page_icon="πŸ“")
st.title("πŸ“ Resume Matcher App")
st.write(
"Upload your resume and paste the job description. "
"This app will analyze and show how well your resume matches the job requirements."
)
uploaded_file = st.file_uploader("Upload Your Resume (PDF or DOCX)", type=["pdf", "docx"])
if uploaded_file is not None:
# Check file size limit (5 MB)
file_size = uploaded_file.size
if file_size > 5 * 1024 * 1024:
st.error("⚠️ File size exceeds 5MB. Please upload a smaller file.")
st.stop()
job_description = st.text_area("Paste the Job Description Below:")
if uploaded_file and job_description:
if uploaded_file.type == "application/pdf":
resume_text = extract_text_from_pdf(uploaded_file)
elif uploaded_file.type in [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/msword"
]:
resume_text = extract_text_from_docx(uploaded_file)
else:
st.error("⚠️ Unsupported file format. Please upload a PDF or DOCX file.")
st.stop()
resume_text = clean_text(resume_text)
job_description = clean_text(job_description)
if st.button("πŸ” Check Match"):
with st.spinner('Analyzing...'):
match_percentage = calculate_semantic_match(resume_text, job_description)
missing_sentences = find_missing_sentences(resume_text, job_description)
st.subheader("βœ… Results:")
st.progress(int(match_percentage))
st.metric(label="Matching Percentage", value=f"{match_percentage}%")
# Show Missing Sentences (exact as in job description)
if missing_sentences:
st.markdown("### πŸš€ Important Missing Sentences (Add these to improve):")
for sentence in missing_sentences:
st.write(f"- **{sentence}**")
st.info("πŸ”” If you include these important sentences, your match percentage can improve significantly and may exceed 90%!")
else:
st.success("🎯 No missing sentences detected! Your resume covers most important points.")
# Tips based on percentage
if match_percentage < 70:
st.warning("Your resume matches less than 70% of the job description. Consider adding the above missing sentences.")
elif match_percentage < 90:
st.info("Good match! Minor improvements can make it even better.")
else:
st.success("Excellent match! Your resume strongly aligns with the job description.")
else:
st.info("πŸ“„ Please upload your resume and paste the job description to proceed.")