sid22669 commited on
Commit
bb9a1f9
·
verified ·
1 Parent(s): 893cdba

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -0
  2. app.py +136 -0
  3. corrections_log.csv +19 -0
  4. resume_classifier +3 -0
  5. resume_vectorizer +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ resume_classifier filter=lfs diff=lfs merge=lfs -text
37
+ resume_vectorizer filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import re
4
+ import PyPDF2
5
+ import pandas as pd
6
+ import os
7
+ import uuid
8
+ from datetime import datetime
9
+ from docx import Document
10
+ import tempfile
11
+
12
+ # Load model and vectorizer
13
+ classifier_model = joblib.load('resume_classifier')
14
+ resume_vectorizer = joblib.load('resume_vectorizer')
15
+
16
+
17
+ def read_file(file_path):
18
+ try:
19
+ ext = os.path.splitext(file_path)[1].lower()
20
+
21
+ if ext == ".pdf":
22
+ with open(file_path, "rb") as file:
23
+ reader = PyPDF2.PdfReader(file)
24
+ text = ""
25
+ for page in reader.pages:
26
+ page_text = page.extract_text()
27
+ if page_text:
28
+ text += page_text + "\n"
29
+ return text.strip()
30
+
31
+ elif ext == ".txt":
32
+ with open(file_path, "r", encoding="utf-8") as file:
33
+ return file.read().strip()
34
+
35
+ elif ext in [".doc", ".docx"]:
36
+ try:
37
+ import textract
38
+ text = textract.process(file_path)
39
+ return text.decode("utf-8").strip()
40
+ except Exception as e:
41
+ return f"Error reading Word file with textract: {str(e)}"
42
+
43
+
44
+ else:
45
+ return "Unsupported file type."
46
+
47
+ except Exception as e:
48
+ return f"Error reading file: {str(e)}"
49
+
50
+
51
+ def clean_resume(text):
52
+ return re.sub(r'[^a-zA-Z]', ' ', text).lower()
53
+
54
+
55
+ def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction):
56
+ log_file = "corrections_log.csv"
57
+ resume_text_short = resume_text[:500] # Truncate for privacy/log size
58
+
59
+ new_row = {
60
+ "serial_id": serial_id,
61
+ "timestamp": timestamp,
62
+ "resume_text": resume_text_short,
63
+ "model_prediction": model_prediction,
64
+ "corrected_prediction": corrected_prediction
65
+ }
66
+
67
+ if os.path.exists(log_file):
68
+ df = pd.read_csv(log_file)
69
+ if serial_id in df["serial_id"].values:
70
+ df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction
71
+ else:
72
+ df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
73
+ else:
74
+ df = pd.DataFrame([new_row])
75
+
76
+ df.to_csv(log_file, index=False)
77
+
78
+
79
+ # Streamlit UI
80
+ st.title("📄 Resume Role Classifier")
81
+
82
+ uploaded_file = st.file_uploader(
83
+ "Upload your resume (PDF, TXT, DOC, or DOCX format)",
84
+ type=["pdf", "txt", "doc", "docx"]
85
+ )
86
+
87
+ if uploaded_file:
88
+ # Check if serial_id already exists in session for current file, else create
89
+ if "uploaded_file_name" not in st.session_state or st.session_state.uploaded_file_name != uploaded_file.name:
90
+ st.session_state.uploaded_file_name = uploaded_file.name
91
+ st.session_state.serial_id = str(uuid.uuid4())
92
+ st.session_state.corrected_prediction = None # To store correction during session
93
+
94
+ # Save uploaded file to temp and extract text (same as your code)
95
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as temp_file:
96
+ temp_file.write(uploaded_file.read())
97
+ temp_path = temp_file.name
98
+
99
+ extracted_text = read_file(temp_path)
100
+ os.remove(temp_path)
101
+
102
+ if "Error" in extracted_text or not extracted_text.strip():
103
+ st.warning("Could not extract text from the uploaded file.")
104
+ else:
105
+ cleaned_text = clean_resume(extracted_text)
106
+ new_input = resume_vectorizer.transform([cleaned_text])
107
+ prediction = classifier_model.predict(new_input)[0]
108
+
109
+ st.write(f"**Predicted Role:** `{prediction}`")
110
+
111
+ feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio")
112
+
113
+ corrected_prediction = prediction
114
+
115
+ if feedback == "No":
116
+ # Use session state to keep corrected prediction during session
117
+ corrected_prediction = st.text_input("Please provide the correct role:",
118
+ value=st.session_state.get("corrected_prediction", ""),
119
+ key="correction_input")
120
+ st.session_state.corrected_prediction = corrected_prediction
121
+ else:
122
+ st.session_state.corrected_prediction = prediction
123
+
124
+ # Log/update only if user made a choice (Yes or No + correction if No)
125
+ if (feedback == "Yes") or (feedback == "No" and corrected_prediction):
126
+ now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
127
+ log_or_update(
128
+ serial_id=st.session_state.serial_id,
129
+ timestamp=now,
130
+ resume_text=extracted_text,
131
+ model_prediction=prediction,
132
+ corrected_prediction=corrected_prediction
133
+ )
134
+ st.success(f"✅ Final role recorded: `{corrected_prediction}`")
135
+ else:
136
+ st.info("📤 Please upload a supported file (PDF, TXT, DOC, DOCX).")
corrections_log.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ serial_id,timestamp,resume_text,model_prediction,corrected_prediction
2
+ a54626de-ec83-4288-90ce-09155fe0bf1d,5/21/25 14:35,"Siddharth V Professional Summary
3
+ •AI Development Expertise: Skilled in developing and fine-tuning AI models, including Generative Adversarial Networks (GANs), Large Language Models (LLMs), and Retrieval-Augmented Generation (RAG) systems.
4
+ •Programming Proficiency: Strong command of Python (Pandas, NumPy, TensorFlow, PyTorch), SQL, and data visualization libraries (Matplotlib, Seaborn).
5
+ •Machine Learning & Data Science: Experience in building and deploying machine learning models for tasks like reg",Data Science,AI Developer
6
+ f173c716-1e8c-449d-ae23-9ecfbc4564b4,5/21/25 15:54,"YOUR NAME
7
+
8
+
9
+
10
+ Senior Java Developer with 8+ years of experience and a history of consistently delivering impactful solutions. Led a team at Java Tech Solutions, Inc., achieving a 15% increase in overall software efficiency through the successful analysis and implementation of complex functional requirements. Recognized for mentoring and training junior developers, improving coding skills by 30% within six months, and adept at introducing Agile design processes which have reduced project timelines",Java Developer,Java Developer
11
+ 508de157-108f-45b3-a8af-7bf4bd271e61,5/21/25 15:56,"POWER BI DEVELOPER RESUME
12
+
13
+
14
+
15
+ 9738 46th Ave SW, Seattle, WA 98146 • youremail@gmail.com • (206) 534-9039
16
+
17
+
18
+
19
+ Results-producing Power BI Developer with 5+ years experience in designing analytical reports based on company data, translating data into knowledge, as well as developing BI and analytics solutions. Resourceful, organized, and dependable problem solver seeking a position at [Company Name] to build winning environments that consistently add value, deliver measurable results, and enhance",DevOps Engineer,Power Bi Developer
resume_classifier ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56865c6c18c1e50111094edf98b8cd1a6a9edf1bd89560cb119fde55de0eec0e
3
+ size 1034229
resume_vectorizer ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:970dfe0c4c9694927b5666ad3515e334663845359afc0a11cceb1b79c9c8ce18
3
+ size 144876