Spaces:

sid22669
/

Resume_classifier

Sleeping

App Files Files Community

sid22669 commited on May 21, 2025

Commit

bb9a1f9

verified ·

1 Parent(s): 893cdba

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +2 -0
app.py +136 -0
corrections_log.csv +19 -0
resume_classifier +3 -0
resume_vectorizer +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+resume_classifier filter=lfs diff=lfs merge=lfs -text
+resume_vectorizer filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import streamlit as st
+import joblib
+import re
+import PyPDF2
+import pandas as pd
+import os
+import uuid
+from datetime import datetime
+from docx import Document
+import tempfile
+# Load model and vectorizer
+classifier_model = joblib.load('resume_classifier')
+resume_vectorizer = joblib.load('resume_vectorizer')
+def read_file(file_path):
+    try:
+        ext = os.path.splitext(file_path)[1].lower()
+        if ext == ".pdf":
+            with open(file_path, "rb") as file:
+                reader = PyPDF2.PdfReader(file)
+                text = ""
+                for page in reader.pages:
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text + "\n"
+                return text.strip()
+        elif ext == ".txt":
+            with open(file_path, "r", encoding="utf-8") as file:
+                return file.read().strip()
+        elif ext in [".doc", ".docx"]:
+            try:
+                import textract
+                text = textract.process(file_path)
+                return text.decode("utf-8").strip()
+            except Exception as e:
+                return f"Error reading Word file with textract: {str(e)}"
+        else:
+            return "Unsupported file type."
+    except Exception as e:
+        return f"Error reading file: {str(e)}"
+def clean_resume(text):
+    return re.sub(r'[^a-zA-Z]', ' ', text).lower()
+def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction):
+    log_file = "corrections_log.csv"
+    resume_text_short = resume_text[:500]  # Truncate for privacy/log size
+    new_row = {
+        "serial_id": serial_id,
+        "timestamp": timestamp,
+        "resume_text": resume_text_short,
+        "model_prediction": model_prediction,
+        "corrected_prediction": corrected_prediction
+    }
+    if os.path.exists(log_file):
+        df = pd.read_csv(log_file)
+        if serial_id in df["serial_id"].values:
+            df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction
+        else:
+            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
+    else:
+        df = pd.DataFrame([new_row])
+    df.to_csv(log_file, index=False)
+# Streamlit UI
+st.title("📄 Resume Role Classifier")
+uploaded_file = st.file_uploader(
+    "Upload your resume (PDF, TXT, DOC, or DOCX format)",
+    type=["pdf", "txt", "doc", "docx"]
+)
+if uploaded_file:
+    # Check if serial_id already exists in session for current file, else create
+    if "uploaded_file_name" not in st.session_state or st.session_state.uploaded_file_name != uploaded_file.name:
+        st.session_state.uploaded_file_name = uploaded_file.name
+        st.session_state.serial_id = str(uuid.uuid4())
+        st.session_state.corrected_prediction = None  # To store correction during session
+    # Save uploaded file to temp and extract text (same as your code)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as temp_file:
+        temp_file.write(uploaded_file.read())
+        temp_path = temp_file.name
+    extracted_text = read_file(temp_path)
+    os.remove(temp_path)
+    if "Error" in extracted_text or not extracted_text.strip():
+        st.warning("Could not extract text from the uploaded file.")
+    else:
+        cleaned_text = clean_resume(extracted_text)
+        new_input = resume_vectorizer.transform([cleaned_text])
+        prediction = classifier_model.predict(new_input)[0]
+        st.write(f"**Predicted Role:** `{prediction}`")
+        feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio")
+        corrected_prediction = prediction
+        if feedback == "No":
+            # Use session state to keep corrected prediction during session
+            corrected_prediction = st.text_input("Please provide the correct role:",
+                                                 value=st.session_state.get("corrected_prediction", ""),
+                                                 key="correction_input")
+            st.session_state.corrected_prediction = corrected_prediction
+        else:
+            st.session_state.corrected_prediction = prediction
+        # Log/update only if user made a choice (Yes or No + correction if No)
+        if (feedback == "Yes") or (feedback == "No" and corrected_prediction):
+            now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            log_or_update(
+                serial_id=st.session_state.serial_id,
+                timestamp=now,
+                resume_text=extracted_text,
+                model_prediction=prediction,
+                corrected_prediction=corrected_prediction
+            )
+            st.success(f"✅ Final role recorded: `{corrected_prediction}`")
+else:
+    st.info("📤 Please upload a supported file (PDF, TXT, DOC, DOCX).")

corrections_log.csv ADDED Viewed

	@@ -0,0 +1,19 @@

+serial_id,timestamp,resume_text,model_prediction,corrected_prediction
+a54626de-ec83-4288-90ce-09155fe0bf1d,5/21/25 14:35,"Siddharth V Professional Summary
+‚Ä¢AI Development Expertise: Skilled in developing and Ô¨Åne-tuning AI models, including Generative Adversarial Networks (GANs), Large Language Models (LLMs), and Retrieval-Augmented Generation (RAG) systems.
+‚Ä¢Programming ProÔ¨Åciency: Strong command of Python (Pandas, NumPy, TensorFlow, PyTorch), SQL, and data visualization libraries (Matplotlib, Seaborn).
+‚Ä¢Machine Learning & Data Science: Experience in building and deploying machine learning models for tasks like reg",Data Science,AI Developer
+f173c716-1e8c-449d-ae23-9ecfbc4564b4,5/21/25 15:54,"YOUR NAME
+Senior Java Developer with 8+ years of experience and a history of consistently delivering impactful solutions. Led a team at Java Tech Solutions, Inc., achieving a 15% increase in overall software efficiency through the successful analysis and implementation of complex functional requirements. Recognized for mentoring and training junior developers, improving coding skills by 30% within six months, and adept at introducing Agile design processes which have reduced project timelines",Java Developer,Java Developer
+508de157-108f-45b3-a8af-7bf4bd271e61,5/21/25 15:56,"POWER BI DEVELOPER RESUME
+9738 46th Ave SW, Seattle, WA 98146 ‚Ä¢  youremail@gmail.com ‚Ä¢  (206) 534-9039
+Results-producing Power BI Developer with 5+ years experience in designing analytical reports based on company data, translating data into knowledge, as well as developing BI and analytics solutions. Resourceful, organized, and dependable problem solver seeking a position at [Company Name] to build winning environments that consistently add value, deliver measurable results, and enhance",DevOps Engineer,Power Bi Developer

resume_classifier ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56865c6c18c1e50111094edf98b8cd1a6a9edf1bd89560cb119fde55de0eec0e
+size 1034229

resume_vectorizer ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:970dfe0c4c9694927b5666ad3515e334663845359afc0a11cceb1b79c9c8ce18
+size 144876