import streamlit as st import os from PyPDF2 import PdfReader from pytesseract import image_to_string from pdf2image import convert_from_path from PIL import Image from docx import Document from unstructured.partition.auto import partition import spacy from sentence_transformers import SentenceTransformer, util # Load NLP models nlp = spacy.load("en_core_web_sm") model = SentenceTransformer('all-MiniLM-L6-v2') # Text extraction functions def extract_pdf_text(pdf_path): reader = PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() return text def extract_image_pdf_text(pdf_path): images = convert_from_path(pdf_path) text = "" for image in images: text += image_to_string(image) return text def extract_image_text(image_path): text = image_to_string(Image.open(image_path)) return text def extract_word_text(docx_path): doc = Document(docx_path) text = "" for paragraph in doc.paragraphs: text += paragraph.text return text def extract_generic_text(file_path): elements = partition(filename=file_path) text = " ".join([str(el) for el in elements]) return text def extract_text(file_path): if not os.path.exists(file_path): return "File not found. Please check the path." _, file_extension = os.path.splitext(file_path) file_extension = file_extension.lower() try: if file_extension == ".pdf": try: return extract_pdf_text(file_path) except Exception: return extract_image_pdf_text(file_path) elif file_extension in [".jpg", ".jpeg", ".png"]: return extract_image_text(file_path) elif file_extension == ".docx": return extract_word_text(file_path) else: return extract_generic_text(file_path) except Exception as e: return f"Error processing file: {str(e)}" # Skill extraction function def extract_skills(text): doc = nlp(text) skills = set() # Extract skills as named entities labeled "SKILL" or significant nouns for ent in doc.ents: if ent.label_ == "SKILL": skills.add(ent.text.lower()) for token in doc: if token.pos_ in {"NOUN"} and not token.is_stop: skills.add(token.text.lower()) return list(skills) # ATS score calculation def calculate_ats_score(resume_text, job_text): required_skills = extract_skills(job_text) resume_skills = extract_skills(resume_text) missing_skills = [skill for skill in required_skills if skill not in resume_skills] resume_embedding = model.encode(resume_text, convert_to_tensor=True) job_embedding = model.encode(job_text, convert_to_tensor=True) similarity_score = util.cos_sim(resume_embedding, job_embedding)[0][0].item() * 100 skill_match_score = ((len(required_skills) - len(missing_skills)) / len(required_skills)) * 100 if required_skills else 0 final_ats_score = 0.7 * similarity_score + 0.3 * skill_match_score return final_ats_score, similarity_score, skill_match_score, missing_skills # Streamlit UI st.markdown( """

Resume ATS Scanner

""", unsafe_allow_html=True ) col1, col2 = st.columns(2) with col1: st.subheader("Resume") resume_input = st.text_area("Enter the text or upload a file", key="resume_input", height=150) resume_file = st.file_uploader("Upload Resume File", type=["txt", "pdf", "docx", "jpg", "png"], key="resume_file") with col2: st.subheader("Job Description") jd_input = st.text_area("Enter the text or upload a file", key="jd_input", height=150) jd_file = st.file_uploader("Upload Job Description File", type=["txt", "pdf", "docx", "jpg", "png"], key="jd_file") if st.button("Calculate Your ATS Score", key="calculate-btn"): if resume_input or resume_file: if resume_file: resume_path = f"temp_resume{os.path.splitext(resume_file.name)[-1]}" with open(resume_path, "wb") as f: f.write(resume_file.getbuffer()) resume_text = extract_text(resume_path) else: resume_text = resume_input if jd_input or jd_file: if jd_file: jd_path = f"temp_jd{os.path.splitext(jd_file.name)[-1]}" with open(jd_path, "wb") as f: f.write(jd_file.getbuffer()) job_text = extract_text(jd_path) else: job_text = jd_input ats_score, similarity_score, skill_match_score, missing_skills = calculate_ats_score(resume_text, job_text) st.markdown(f"""

ATS Score: {ats_score:.2f}%

Cosine Similarity: {similarity_score:.2f}%

Skill Match Score: {skill_match_score:.2f}%

""", unsafe_allow_html=True) if missing_skills: st.markdown(f"""

Missing Skills: {', '.join(missing_skills)}

Consider updating your resume to include the missing skills.

""", unsafe_allow_html=True) else: st.markdown("

Your resume matches the job description perfectly!

", unsafe_allow_html=True) else: st.error("Please provide a job description!") else: st.error("Please provide a resume!")