Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import docx2txt | |
| import PyPDF2 | |
| import re | |
| from sentence_transformers import SentenceTransformer, util | |
| import torch | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| import difflib | |
| # Load semantic model | |
| def load_model(): | |
| return SentenceTransformer('all-MiniLM-L6-v2') | |
| model = load_model() | |
| def extract_text_from_pdf(file): | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text | |
| return text | |
| def extract_text_from_docx(file): | |
| return docx2txt.process(file) | |
| def clean_text(text): | |
| text = re.sub(r'\s+', ' ', text) | |
| return text.strip().lower() | |
| def calculate_semantic_match(resume_text, job_text): | |
| resume_embedding = model.encode(resume_text, convert_to_tensor=True) | |
| job_embedding = model.encode(job_text, convert_to_tensor=True) | |
| similarity = util.cos_sim(resume_embedding, job_embedding) | |
| return round(float(similarity.item()) * 100, 2) | |
| def extract_keywords(text, top_n=30): | |
| vectorizer = CountVectorizer(stop_words='english', max_features=top_n) | |
| X = vectorizer.fit_transform([text]) | |
| return set(vectorizer.get_feature_names_out()) | |
| def find_missing_keywords(resume_text, job_text): | |
| resume_keywords = extract_keywords(resume_text) | |
| job_keywords = extract_keywords(job_text) | |
| missing = job_keywords - resume_keywords | |
| return missing | |
| # Function to extract exact missing sentences/phrases | |
| def find_missing_sentences(resume_text, job_text): | |
| resume_sentences = resume_text.split(". ") | |
| job_sentences = job_text.split(". ") | |
| missing_sentences = [sentence for sentence in job_sentences if sentence.lower() not in [r.lower() for r in resume_sentences]] | |
| return missing_sentences | |
| # --- Streamlit Interface --- | |
| st.set_page_config(page_title="Resume Matcher App", page_icon="π") | |
| st.title("π Resume Matcher App") | |
| st.write( | |
| "Upload your resume and paste the job description. " | |
| "This app will analyze and show how well your resume matches the job requirements." | |
| ) | |
| uploaded_file = st.file_uploader("Upload Your Resume (PDF or DOCX)", type=["pdf", "docx"]) | |
| if uploaded_file is not None: | |
| # Check file size limit (5 MB) | |
| file_size = uploaded_file.size | |
| if file_size > 5 * 1024 * 1024: | |
| st.error("β οΈ File size exceeds 5MB. Please upload a smaller file.") | |
| st.stop() | |
| job_description = st.text_area("Paste the Job Description Below:") | |
| if uploaded_file and job_description: | |
| if uploaded_file.type == "application/pdf": | |
| resume_text = extract_text_from_pdf(uploaded_file) | |
| elif uploaded_file.type in [ | |
| "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
| "application/msword" | |
| ]: | |
| resume_text = extract_text_from_docx(uploaded_file) | |
| else: | |
| st.error("β οΈ Unsupported file format. Please upload a PDF or DOCX file.") | |
| st.stop() | |
| resume_text = clean_text(resume_text) | |
| job_description = clean_text(job_description) | |
| if st.button("π Check Match"): | |
| with st.spinner('Analyzing...'): | |
| match_percentage = calculate_semantic_match(resume_text, job_description) | |
| missing_sentences = find_missing_sentences(resume_text, job_description) | |
| st.subheader("β Results:") | |
| st.progress(int(match_percentage)) | |
| st.metric(label="Matching Percentage", value=f"{match_percentage}%") | |
| # Show Missing Sentences (exact as in job description) | |
| if missing_sentences: | |
| st.markdown("### π Important Missing Sentences (Add these to improve):") | |
| for sentence in missing_sentences: | |
| st.write(f"- **{sentence}**") | |
| st.info("π If you include these important sentences, your match percentage can improve significantly and may exceed 90%!") | |
| else: | |
| st.success("π― No missing sentences detected! Your resume covers most important points.") | |
| # Tips based on percentage | |
| if match_percentage < 70: | |
| st.warning("Your resume matches less than 70% of the job description. Consider adding the above missing sentences.") | |
| elif match_percentage < 90: | |
| st.info("Good match! Minor improvements can make it even better.") | |
| else: | |
| st.success("Excellent match! Your resume strongly aligns with the job description.") | |
| else: | |
| st.info("π Please upload your resume and paste the job description to proceed.") | |