Spaces:

sid22669
/

Resume_classifier

Sleeping

App Files Files Community

Resume_classifier / app.py

sid22669

Update app.py

144cf7f verified 8 months ago

raw

history blame contribute delete

4.68 kB

	import streamlit as st
	import joblib
	import re
	import PyPDF2
	import pandas as pd
	import os
	import uuid
	from datetime import datetime
	import tempfile
	from io import BytesIO

	# Load model and vectorizer
	classifier_model = joblib.load('resume_classifier')
	resume_vectorizer = joblib.load('resume_vectorizer')

	def transfer_tmp_logs():
	tmp_log_path = "/tmp/corrections_log.csv"
	main_log_path = "corrections_log.csv"

	if not os.path.exists(tmp_log_path):
	return # No new logs to transfer

	tmp_df = pd.read_csv(tmp_log_path)

	if os.path.exists(main_log_path):
	main_df = pd.read_csv(main_log_path)
	# Merge without duplicates based on serial_id
	combined_df = pd.concat([main_df, tmp_df]).drop_duplicates(subset=["serial_id"], keep="last")
	else:
	combined_df = tmp_df

	combined_df.to_csv(main_log_path, index=False)

	# Optionally, clean up the tmp file after transfer
	os.remove(tmp_log_path)


	def read_uploaded_file(uploaded_file):
	ext = os.path.splitext(uploaded_file.name)[1].lower()

	try:
	if ext == ".pdf":
	reader = PyPDF2.PdfReader(uploaded_file)
	text = ""
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"
	return text.strip()

	elif ext == ".txt":
	return uploaded_file.read().decode("utf-8").strip()

	else:
	return "Unsupported file type."

	except Exception as e:
	return f"Error reading file: {str(e)}"


	def clean_resume(text):
	return re.sub(r'[^a-zA-Z]', ' ', text).lower()


	def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction):
	log_file = "/tmp/corrections_log.csv"
	resume_text_short = resume_text[:500] # Truncate for privacy/log size

	new_row = {
	"serial_id": serial_id,
	"timestamp": timestamp,
	"resume_text": resume_text_short,
	"model_prediction": model_prediction,
	"corrected_prediction": corrected_prediction
	}

	if os.path.exists(log_file):
	df = pd.read_csv(log_file)
	if serial_id in df["serial_id"].values:
	df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction
	else:
	df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
	else:
	df = pd.DataFrame([new_row])

	df.to_csv(log_file, index=False)


	# Streamlit UI
	st.title("📄 Resume Role Classifier")

	uploaded_file = st.file_uploader(
	"Upload your resume (PDF, TXT format)",
	type=["pdf", "txt", "doc", "docx"]
	)

	if uploaded_file:
	# Reset the file read pointer in case it was read earlier
	uploaded_file.seek(0)

	# Track upload session
	if (
	"uploaded_file_name" not in st.session_state
	or st.session_state.uploaded_file_name != uploaded_file.name
	):
	st.session_state.uploaded_file_name = uploaded_file.name
	st.session_state.serial_id = str(uuid.uuid4())
	st.session_state.corrected_prediction = None

	extracted_text = read_uploaded_file(uploaded_file)

	if "Error" in extracted_text or not extracted_text.strip():
	st.warning("⚠️ Could not extract text from the uploaded file.")
	else:
	cleaned_text = clean_resume(extracted_text)
	new_input = resume_vectorizer.transform([cleaned_text])
	prediction = classifier_model.predict(new_input)[0]

	st.write(f"Predicted Role: `{prediction}`")

	feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio")

	corrected_prediction = prediction

	if feedback == "No":
	corrected_prediction = st.text_input(
	"Please provide the correct role:",
	value=st.session_state.get("corrected_prediction", ""),
	key="correction_input"
	)
	st.session_state.corrected_prediction = corrected_prediction
	else:
	st.session_state.corrected_prediction = prediction

	if (feedback == "Yes") or (feedback == "No" and corrected_prediction):
	now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	log_or_update(
	serial_id=st.session_state.serial_id,
	timestamp=now,
	resume_text=extracted_text,
	model_prediction=prediction,
	corrected_prediction=corrected_prediction
	)
	st.success(f"✅ Final role recorded: `{corrected_prediction}`")
	else:
	st.info("📤 Please upload a supported file (PDF, TXT, DOC, DOCX).")