File size: 4,549 Bytes
74f28d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
import pickle
import re
import docx
import PyPDF2
from sklearn.metrics.pairwise import cosine_similarity
# 1. CONFIG
st.set_page_config(page_title="AI Resume Screening", layout="wide")
import os
# def ensure_models():
# if not os.path.exists("clf.pkl") or not os.path.exists("tfidf.pkl"):
# os.system("python train_model.py")
# if not os.path.exists("ats_scorer.pkl"):
# os.system("python train_ats_model.py")
# ensure_models()
# 2. LOAD RESOURCES
@st.cache_resource
def load_resources():
try:
clf = pickle.load(open('clf.pkl', 'rb'))
tfidf = pickle.load(open('tfidf.pkl', 'rb'))
le = pickle.load(open('encoder.pkl', 'rb'))
ats = pickle.load(open('ats_scorer.pkl', 'rb'))
prototypes = pickle.load(open('prototypes.pkl', 'rb'))
return clf, tfidf, le, ats, prototypes
except FileNotFoundError:
return None, None, None, None, None
clf, tfidf, le, ats_model, prototypes = load_resources()
# 3. UTILS
def clean_text(txt):
txt = re.sub(r'http\S+\s', ' ', txt)
txt = re.sub(r'[^\w\s]', ' ', txt)
return txt.lower()
def extract_text(file):
try:
if file.name.endswith('.pdf'):
reader = PyPDF2.PdfReader(file)
return " ".join([page.extract_text() for page in reader.pages])
elif file.name.endswith('.docx'):
doc = docx.Document(file)
return " ".join([p.text for p in doc.paragraphs])
elif file.name.endswith('.txt'):
return file.read().decode('utf-8')
except:
return ""
def calculate_scores(text, category):
# Retrieve the "Master Profile" for the predicted category
if category not in prototypes:
return 0, 0, 0
master_profile = prototypes[category]
cleaned_resume = clean_text(text)
# 1. Cosine Similarity
vecs = tfidf.transform([cleaned_resume, master_profile])
cosine_sim = cosine_similarity(vecs[0], vecs[1])[0][0]
# 2. Keyword Match
res_tokens = set(cleaned_resume.split())
mp_tokens = set(master_profile.split())
keyword_match = len(res_tokens.intersection(mp_tokens)) / len(mp_tokens) if mp_tokens else 0
# 3. AI Prediction
try:
ml_score = ats_model.predict([[cosine_sim, keyword_match]])[0]
except:
ml_score = 0
# 4. Fallback Logic (Prevent 0 Scores)
# If the AI predicts extremely low but similarity is okay, fallback to math
if ml_score < 10:
final_score = cosine_sim * 100
else:
final_score = ml_score
# Visual Scaling (Raw cosine sim is usually low, e.g. 0.4, we map it to 0-100 scale)
if final_score < 1: # If it's 0.85 style
final_score *= 100
return round(final_score, 1), round(cosine_sim*100, 1), round(keyword_match*100, 1)
# 4. MAIN APP
def main():
st.title("📄 AI Resume Classifier & ATS Scorer")
st.markdown("Powered by `AzharAli05` (Classification) & `0xnbk` (Scoring)")
if not clf:
st.error("⚠️ Models missing! Run `train_model.py` then `train_ats_model.py`.")
st.stop()
file = st.file_uploader("Upload Resume", type=['pdf', 'docx', 'txt'])
if file:
text = extract_text(file)
if len(text) > 20:
# Predict Category
clean = clean_text(text)
vec = tfidf.transform([clean])
cat_id = clf.predict(vec)[0]
category = le.inverse_transform([cat_id])[0]
# Predict Score
ats_score, raw_sim, key_match = calculate_scores(text, category)
# Display
st.success(f"### Predicted Role: {category}")
col1, col2, col3 = st.columns(3)
col1.metric("ATS Score (AI)", f"{ats_score}%")
col2.metric("Content Match", f"{raw_sim}%")
col3.metric("Keyword Overlap", f"{key_match}%")
st.progress(min(ats_score/100, 1.0))
if ats_score > 75:
st.balloons()
st.info("Great match!")
elif ats_score < 40:
st.warning("Low match. Try adding more relevant keywords.")
with st.expander("Show Extracted Text"):
st.text(text)
else:
st.warning("Could not extract text. File might be an image/scan.")
if __name__ == "__main__":
main()
|