Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import joblib | |
| import re | |
| import PyPDF2 | |
| import pandas as pd | |
| import os | |
| import uuid | |
| from datetime import datetime | |
| import tempfile | |
| from io import BytesIO | |
| # Load model and vectorizer | |
| classifier_model = joblib.load('resume_classifier') | |
| resume_vectorizer = joblib.load('resume_vectorizer') | |
| def transfer_tmp_logs(): | |
| tmp_log_path = "/tmp/corrections_log.csv" | |
| main_log_path = "corrections_log.csv" | |
| if not os.path.exists(tmp_log_path): | |
| return # No new logs to transfer | |
| tmp_df = pd.read_csv(tmp_log_path) | |
| if os.path.exists(main_log_path): | |
| main_df = pd.read_csv(main_log_path) | |
| # Merge without duplicates based on serial_id | |
| combined_df = pd.concat([main_df, tmp_df]).drop_duplicates(subset=["serial_id"], keep="last") | |
| else: | |
| combined_df = tmp_df | |
| combined_df.to_csv(main_log_path, index=False) | |
| # Optionally, clean up the tmp file after transfer | |
| os.remove(tmp_log_path) | |
| def read_uploaded_file(uploaded_file): | |
| ext = os.path.splitext(uploaded_file.name)[1].lower() | |
| try: | |
| if ext == ".pdf": | |
| reader = PyPDF2.PdfReader(uploaded_file) | |
| text = "" | |
| for page in reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| return text.strip() | |
| elif ext == ".txt": | |
| return uploaded_file.read().decode("utf-8").strip() | |
| else: | |
| return "Unsupported file type." | |
| except Exception as e: | |
| return f"Error reading file: {str(e)}" | |
| def clean_resume(text): | |
| return re.sub(r'[^a-zA-Z]', ' ', text).lower() | |
| def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction): | |
| log_file = "/tmp/corrections_log.csv" | |
| resume_text_short = resume_text[:500] # Truncate for privacy/log size | |
| new_row = { | |
| "serial_id": serial_id, | |
| "timestamp": timestamp, | |
| "resume_text": resume_text_short, | |
| "model_prediction": model_prediction, | |
| "corrected_prediction": corrected_prediction | |
| } | |
| if os.path.exists(log_file): | |
| df = pd.read_csv(log_file) | |
| if serial_id in df["serial_id"].values: | |
| df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction | |
| else: | |
| df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) | |
| else: | |
| df = pd.DataFrame([new_row]) | |
| df.to_csv(log_file, index=False) | |
| # Streamlit UI | |
| st.title("π Resume Role Classifier") | |
| uploaded_file = st.file_uploader( | |
| "Upload your resume (PDF, TXT format)", | |
| type=["pdf", "txt", "doc", "docx"] | |
| ) | |
| if uploaded_file: | |
| # Reset the file read pointer in case it was read earlier | |
| uploaded_file.seek(0) | |
| # Track upload session | |
| if ( | |
| "uploaded_file_name" not in st.session_state | |
| or st.session_state.uploaded_file_name != uploaded_file.name | |
| ): | |
| st.session_state.uploaded_file_name = uploaded_file.name | |
| st.session_state.serial_id = str(uuid.uuid4()) | |
| st.session_state.corrected_prediction = None | |
| extracted_text = read_uploaded_file(uploaded_file) | |
| if "Error" in extracted_text or not extracted_text.strip(): | |
| st.warning("β οΈ Could not extract text from the uploaded file.") | |
| else: | |
| cleaned_text = clean_resume(extracted_text) | |
| new_input = resume_vectorizer.transform([cleaned_text]) | |
| prediction = classifier_model.predict(new_input)[0] | |
| st.write(f"**Predicted Role:** `{prediction}`") | |
| feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio") | |
| corrected_prediction = prediction | |
| if feedback == "No": | |
| corrected_prediction = st.text_input( | |
| "Please provide the correct role:", | |
| value=st.session_state.get("corrected_prediction", ""), | |
| key="correction_input" | |
| ) | |
| st.session_state.corrected_prediction = corrected_prediction | |
| else: | |
| st.session_state.corrected_prediction = prediction | |
| if (feedback == "Yes") or (feedback == "No" and corrected_prediction): | |
| now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| log_or_update( | |
| serial_id=st.session_state.serial_id, | |
| timestamp=now, | |
| resume_text=extracted_text, | |
| model_prediction=prediction, | |
| corrected_prediction=corrected_prediction | |
| ) | |
| st.success(f"β Final role recorded: `{corrected_prediction}`") | |
| else: | |
| st.info("π€ Please upload a supported file (PDF, TXT, DOC, DOCX).") | |