import streamlit as st import pickle import re import docx import PyPDF2 from sklearn.metrics.pairwise import cosine_similarity # 1. CONFIG st.set_page_config(page_title="AI Resume Screening", layout="wide") import os # def ensure_models(): # if not os.path.exists("clf.pkl") or not os.path.exists("tfidf.pkl"): # os.system("python train_model.py") # if not os.path.exists("ats_scorer.pkl"): # os.system("python train_ats_model.py") # ensure_models() # 2. LOAD RESOURCES @st.cache_resource def load_resources(): try: clf = pickle.load(open('clf.pkl', 'rb')) tfidf = pickle.load(open('tfidf.pkl', 'rb')) le = pickle.load(open('encoder.pkl', 'rb')) ats = pickle.load(open('ats_scorer.pkl', 'rb')) prototypes = pickle.load(open('prototypes.pkl', 'rb')) return clf, tfidf, le, ats, prototypes except FileNotFoundError: return None, None, None, None, None clf, tfidf, le, ats_model, prototypes = load_resources() # 3. UTILS def clean_text(txt): txt = re.sub(r'http\S+\s', ' ', txt) txt = re.sub(r'[^\w\s]', ' ', txt) return txt.lower() def extract_text(file): try: if file.name.endswith('.pdf'): reader = PyPDF2.PdfReader(file) return " ".join([page.extract_text() for page in reader.pages]) elif file.name.endswith('.docx'): doc = docx.Document(file) return " ".join([p.text for p in doc.paragraphs]) elif file.name.endswith('.txt'): return file.read().decode('utf-8') except: return "" def calculate_scores(text, category): # Retrieve the "Master Profile" for the predicted category if category not in prototypes: return 0, 0, 0 master_profile = prototypes[category] cleaned_resume = clean_text(text) # 1. Cosine Similarity vecs = tfidf.transform([cleaned_resume, master_profile]) cosine_sim = cosine_similarity(vecs[0], vecs[1])[0][0] # 2. Keyword Match res_tokens = set(cleaned_resume.split()) mp_tokens = set(master_profile.split()) keyword_match = len(res_tokens.intersection(mp_tokens)) / len(mp_tokens) if mp_tokens else 0 # 3. AI Prediction try: ml_score = ats_model.predict([[cosine_sim, keyword_match]])[0] except: ml_score = 0 # 4. Fallback Logic (Prevent 0 Scores) # If the AI predicts extremely low but similarity is okay, fallback to math if ml_score < 10: final_score = cosine_sim * 100 else: final_score = ml_score # Visual Scaling (Raw cosine sim is usually low, e.g. 0.4, we map it to 0-100 scale) if final_score < 1: # If it's 0.85 style final_score *= 100 return round(final_score, 1), round(cosine_sim*100, 1), round(keyword_match*100, 1) # 4. MAIN APP def main(): st.title("📄 AI Resume Classifier & ATS Scorer") st.markdown("Powered by `AzharAli05` (Classification) & `0xnbk` (Scoring)") if not clf: st.error("⚠️ Models missing! Run `train_model.py` then `train_ats_model.py`.") st.stop() file = st.file_uploader("Upload Resume", type=['pdf', 'docx', 'txt']) if file: text = extract_text(file) if len(text) > 20: # Predict Category clean = clean_text(text) vec = tfidf.transform([clean]) cat_id = clf.predict(vec)[0] category = le.inverse_transform([cat_id])[0] # Predict Score ats_score, raw_sim, key_match = calculate_scores(text, category) # Display st.success(f"### Predicted Role: {category}") col1, col2, col3 = st.columns(3) col1.metric("ATS Score (AI)", f"{ats_score}%") col2.metric("Content Match", f"{raw_sim}%") col3.metric("Keyword Overlap", f"{key_match}%") st.progress(min(ats_score/100, 1.0)) if ats_score > 75: st.balloons() st.info("Great match!") elif ats_score < 40: st.warning("Low match. Try adding more relevant keywords.") with st.expander("Show Extracted Text"): st.text(text) else: st.warning("Could not extract text. File might be an image/scan.") if __name__ == "__main__": main()