Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pickle | |
| import re | |
| import docx | |
| import PyPDF2 | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # 1. CONFIG | |
| st.set_page_config(page_title="AI Resume Screening", layout="wide") | |
| import os | |
| # def ensure_models(): | |
| # if not os.path.exists("clf.pkl") or not os.path.exists("tfidf.pkl"): | |
| # os.system("python train_model.py") | |
| # if not os.path.exists("ats_scorer.pkl"): | |
| # os.system("python train_ats_model.py") | |
| # ensure_models() | |
| # 2. LOAD RESOURCES | |
| def load_resources(): | |
| try: | |
| clf = pickle.load(open('clf.pkl', 'rb')) | |
| tfidf = pickle.load(open('tfidf.pkl', 'rb')) | |
| le = pickle.load(open('encoder.pkl', 'rb')) | |
| ats = pickle.load(open('ats_scorer.pkl', 'rb')) | |
| prototypes = pickle.load(open('prototypes.pkl', 'rb')) | |
| return clf, tfidf, le, ats, prototypes | |
| except FileNotFoundError: | |
| return None, None, None, None, None | |
| clf, tfidf, le, ats_model, prototypes = load_resources() | |
| # 3. UTILS | |
| def clean_text(txt): | |
| txt = re.sub(r'http\S+\s', ' ', txt) | |
| txt = re.sub(r'[^\w\s]', ' ', txt) | |
| return txt.lower() | |
| def extract_text(file): | |
| try: | |
| if file.name.endswith('.pdf'): | |
| reader = PyPDF2.PdfReader(file) | |
| return " ".join([page.extract_text() for page in reader.pages]) | |
| elif file.name.endswith('.docx'): | |
| doc = docx.Document(file) | |
| return " ".join([p.text for p in doc.paragraphs]) | |
| elif file.name.endswith('.txt'): | |
| return file.read().decode('utf-8') | |
| except: | |
| return "" | |
| def calculate_scores(text, category): | |
| # Retrieve the "Master Profile" for the predicted category | |
| if category not in prototypes: | |
| return 0, 0, 0 | |
| master_profile = prototypes[category] | |
| cleaned_resume = clean_text(text) | |
| # 1. Cosine Similarity | |
| vecs = tfidf.transform([cleaned_resume, master_profile]) | |
| cosine_sim = cosine_similarity(vecs[0], vecs[1])[0][0] | |
| # 2. Keyword Match | |
| res_tokens = set(cleaned_resume.split()) | |
| mp_tokens = set(master_profile.split()) | |
| keyword_match = len(res_tokens.intersection(mp_tokens)) / len(mp_tokens) if mp_tokens else 0 | |
| # 3. AI Prediction | |
| try: | |
| ml_score = ats_model.predict([[cosine_sim, keyword_match]])[0] | |
| except: | |
| ml_score = 0 | |
| # 4. Fallback Logic (Prevent 0 Scores) | |
| # If the AI predicts extremely low but similarity is okay, fallback to math | |
| if ml_score < 10: | |
| final_score = cosine_sim * 100 | |
| else: | |
| final_score = ml_score | |
| # Visual Scaling (Raw cosine sim is usually low, e.g. 0.4, we map it to 0-100 scale) | |
| if final_score < 1: # If it's 0.85 style | |
| final_score *= 100 | |
| return round(final_score, 1), round(cosine_sim*100, 1), round(keyword_match*100, 1) | |
| # 4. MAIN APP | |
| def main(): | |
| st.title("๐ AI Resume Classifier & ATS Scorer") | |
| st.markdown("Powered by `AzharAli05` (Classification) & `0xnbk` (Scoring)") | |
| if not clf: | |
| st.error("โ ๏ธ Models missing! Run `train_model.py` then `train_ats_model.py`.") | |
| st.stop() | |
| file = st.file_uploader("Upload Resume", type=['pdf', 'docx', 'txt']) | |
| if file: | |
| text = extract_text(file) | |
| if len(text) > 20: | |
| # Predict Category | |
| clean = clean_text(text) | |
| vec = tfidf.transform([clean]) | |
| cat_id = clf.predict(vec)[0] | |
| category = le.inverse_transform([cat_id])[0] | |
| # Predict Score | |
| ats_score, raw_sim, key_match = calculate_scores(text, category) | |
| # Display | |
| st.success(f"### Predicted Role: {category}") | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("ATS Score (AI)", f"{ats_score}%") | |
| col2.metric("Content Match", f"{raw_sim}%") | |
| col3.metric("Keyword Overlap", f"{key_match}%") | |
| st.progress(min(ats_score/100, 1.0)) | |
| if ats_score > 75: | |
| st.balloons() | |
| st.info("Great match!") | |
| elif ats_score < 40: | |
| st.warning("Low match. Try adding more relevant keywords.") | |
| with st.expander("Show Extracted Text"): | |
| st.text(text) | |
| else: | |
| st.warning("Could not extract text. File might be an image/scan.") | |
| if __name__ == "__main__": | |
| main() | |