Spaces:

SoS13
/

resume-analyzer

Sleeping

App Files Files Community

resume-analyzer / app.py

SoS13

Upload 10 files

84610b4 verified 25 days ago

raw

history blame contribute delete

4.55 kB

	import streamlit as st
	import pickle
	import re
	import docx
	import PyPDF2
	from sklearn.metrics.pairwise import cosine_similarity

	# 1. CONFIG
	st.set_page_config(page_title="AI Resume Screening", layout="wide")
	import os

	# def ensure_models():
	# if not os.path.exists("clf.pkl") or not os.path.exists("tfidf.pkl"):
	# os.system("python train_model.py")
	# if not os.path.exists("ats_scorer.pkl"):
	# os.system("python train_ats_model.py")

	# ensure_models()

	# 2. LOAD RESOURCES
	@st.cache_resource
	def load_resources():
	try:
	clf = pickle.load(open('clf.pkl', 'rb'))
	tfidf = pickle.load(open('tfidf.pkl', 'rb'))
	le = pickle.load(open('encoder.pkl', 'rb'))
	ats = pickle.load(open('ats_scorer.pkl', 'rb'))
	prototypes = pickle.load(open('prototypes.pkl', 'rb'))
	return clf, tfidf, le, ats, prototypes
	except FileNotFoundError:
	return None, None, None, None, None

	clf, tfidf, le, ats_model, prototypes = load_resources()

	# 3. UTILS
	def clean_text(txt):
	txt = re.sub(r'http\S+\s', ' ', txt)
	txt = re.sub(r'[^\w\s]', ' ', txt)
	return txt.lower()

	def extract_text(file):
	try:
	if file.name.endswith('.pdf'):
	reader = PyPDF2.PdfReader(file)
	return " ".join([page.extract_text() for page in reader.pages])
	elif file.name.endswith('.docx'):
	doc = docx.Document(file)
	return " ".join([p.text for p in doc.paragraphs])
	elif file.name.endswith('.txt'):
	return file.read().decode('utf-8')
	except:
	return ""

	def calculate_scores(text, category):
	# Retrieve the "Master Profile" for the predicted category
	if category not in prototypes:
	return 0, 0, 0

	master_profile = prototypes[category]
	cleaned_resume = clean_text(text)

	# 1. Cosine Similarity
	vecs = tfidf.transform([cleaned_resume, master_profile])
	cosine_sim = cosine_similarity(vecs[0], vecs[1])[0][0]


	# 2. Keyword Match
	res_tokens = set(cleaned_resume.split())
	mp_tokens = set(master_profile.split())
	keyword_match = len(res_tokens.intersection(mp_tokens)) / len(mp_tokens) if mp_tokens else 0

	# 3. AI Prediction
	try:
	ml_score = ats_model.predict([[cosine_sim, keyword_match]])[0]
	except:
	ml_score = 0

	# 4. Fallback Logic (Prevent 0 Scores)
	# If the AI predicts extremely low but similarity is okay, fallback to math
	if ml_score < 10:
	final_score = cosine_sim * 100
	else:
	final_score = ml_score

	# Visual Scaling (Raw cosine sim is usually low, e.g. 0.4, we map it to 0-100 scale)
	if final_score < 1: # If it's 0.85 style
	final_score *= 100

	return round(final_score, 1), round(cosine_sim100, 1), round(keyword_match100, 1)

	# 4. MAIN APP
	def main():
	st.title("📄 AI Resume Classifier & ATS Scorer")
	st.markdown("Powered by `AzharAli05` (Classification) & `0xnbk` (Scoring)")

	if not clf:
	st.error("⚠️ Models missing! Run `train_model.py` then `train_ats_model.py`.")
	st.stop()

	file = st.file_uploader("Upload Resume", type=['pdf', 'docx', 'txt'])

	if file:
	text = extract_text(file)
	if len(text) > 20:
	# Predict Category
	clean = clean_text(text)
	vec = tfidf.transform([clean])
	cat_id = clf.predict(vec)[0]
	category = le.inverse_transform([cat_id])[0]

	# Predict Score
	ats_score, raw_sim, key_match = calculate_scores(text, category)

	# Display
	st.success(f"### Predicted Role: {category}")

	col1, col2, col3 = st.columns(3)
	col1.metric("ATS Score (AI)", f"{ats_score}%")
	col2.metric("Content Match", f"{raw_sim}%")
	col3.metric("Keyword Overlap", f"{key_match}%")

	st.progress(min(ats_score/100, 1.0))

	if ats_score > 75:
	st.balloons()
	st.info("Great match!")
	elif ats_score < 40:
	st.warning("Low match. Try adding more relevant keywords.")

	with st.expander("Show Extracted Text"):
	st.text(text)
	else:
	st.warning("Could not extract text. File might be an image/scan.")

	if __name__ == "__main__":
	main()