Nikhil-Murade commited on
Commit
be5fcd5
·
verified ·
1 Parent(s): 38a82b1

Upload 6 files

Browse files
clustered_job_titles.csv ADDED
The diff for this file is too large to render. See raw diff
 
kmeans_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f421ac36ca2a1e639b917828b35b3b3655bb38957139462d5e08eeeaa89e3f4
3
+ size 1895411
main.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import numpy as np
4
+ from preprocessing import preprocess_single_title
5
+ import logging
6
+
7
+ # Configure logging for errors
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Load the pre-trained models
12
+ vectorizer = joblib.load('vectorizer_model.pkl')
13
+ kmeans_model = joblib.load('kmeans_model.pkl')
14
+
15
+ # Streamlit app title
16
+ st.title("Job Title Clustering App")
17
+
18
+ # Display Silhouette Score in the sidebar
19
+ st.sidebar.header("Insights") # This creates a big heading in the sidebar
20
+ st.sidebar.write("Silhouette Score: 0.5840")
21
+
22
+ # Input fields for job titles
23
+ job_title_1 = st.text_input("Enter the first job title:")
24
+ job_title_2 = st.text_input("Enter the second job title:")
25
+
26
+
27
+ # Button to process the inputs
28
+ if st.button("Submit"):
29
+ if not job_title_1 or not job_title_2:
30
+ st.error("Please enter both job titles.")
31
+ else:
32
+ try:
33
+ # Preprocess the input job titles
34
+ clean_title_1 = preprocess_single_title(job_title_1)
35
+ clean_title_2 = preprocess_single_title(job_title_2)
36
+
37
+ # Log the preprocessed titles
38
+ logger.info(f"Preprocessed Title 1: {clean_title_1}")
39
+ logger.info(f"Preprocessed Title 2: {clean_title_2}")
40
+
41
+ # Vectorize the preprocessed job titles
42
+ title_vector_1 = vectorizer.transform([clean_title_1])
43
+ title_vector_2 = vectorizer.transform([clean_title_2])
44
+
45
+ # Predict clusters for each job title
46
+ cluster_1 = kmeans_model.predict(title_vector_1)[0]
47
+ cluster_2 = kmeans_model.predict(title_vector_2)[0]
48
+
49
+ # Display results
50
+ st.write(f"Cluster for '{job_title_1}': {cluster_1}")
51
+ st.write(f"Cluster for '{job_title_2}': {cluster_2}")
52
+
53
+ if cluster_1 == cluster_2:
54
+ st.success(f"The job titles '{job_title_1}' and '{job_title_2}' belong to the same cluster!")
55
+ else:
56
+ st.warning(f"The job titles '{job_title_1}' and '{job_title_2}' do not belong to the same cluster.")
57
+
58
+ # Display top words for the predicted clusters
59
+ def get_top_words(cluster, vectorizer, kmeans_model):
60
+ feature_names = vectorizer.get_feature_names_out()
61
+ top_word_indices = np.argsort(kmeans_model.cluster_centers_[cluster])[::-1][:5]
62
+ top_words = [feature_names[i] for i in top_word_indices]
63
+ return top_words
64
+
65
+ top_words_1 = get_top_words(cluster_1, vectorizer, kmeans_model)
66
+ top_words_2 = get_top_words(cluster_2, vectorizer, kmeans_model)
67
+
68
+ st.write(f"Top words in Cluster {cluster_1}: {', '.join(top_words_1)}")
69
+ st.write(f"Top words in Cluster {cluster_2}: {', '.join(top_words_2)}")
70
+
71
+ except Exception as e:
72
+ logger.error(f"Error occurred: {e}", exc_info=True)
73
+ st.error(f"An error occurred: {e}")
preprocessing.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import logging
3
+
4
+ # Configure logging
5
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class JobTitlePreprocessor():
9
+ """Preprocesses job titles by converting to lowercase, removing unwanted words, special characters, numbers greater than 10, and content from location, states, regions, etc."""
10
+
11
+ def __init__(self):
12
+ self.unwanted_words = ['remote', 'hybrid', 'flexible location', 'location', 'open to work', 'role', 'job', 'level', 'remot']
13
+
14
+ def remove_location_unwanted_words_brackets(self, title: str) -> str:
15
+ """Removes parts of the title based on unwanted words, bracketed content, numbers greater than 10, and also removes symbols other than alphanumeric."""
16
+ # Remove unwanted words
17
+ for word in self.unwanted_words:
18
+ pattern = r'\b{}\b'.format(re.escape(word))
19
+ title = re.sub(pattern, '', title, flags=re.IGNORECASE)
20
+
21
+ # Remove content within brackets
22
+ title = re.sub(r'\[.*?\]|\(.*?\)|\{.*?\}', '', title)
23
+
24
+ # Remove any non-alphanumeric characters (keeping spaces)
25
+ title = re.sub(r'[^a-zA-Z0-9\s]', '', title)
26
+
27
+ # Remove numbers greater than 10
28
+ title = re.sub(r'\b(?:[1-9][0-9]+|1[1-9]|[2-9][0-9])\b', '', title)
29
+
30
+ # Clean up extra spaces
31
+ title = re.sub(r'\s+', ' ', title).strip()
32
+
33
+ return title
34
+
35
+ def preprocess(self, title: str) -> str:
36
+ """Converts title to lowercase, removes unwanted words, replaces specific terms, and standardizes job titles."""
37
+ if not isinstance(title, str):
38
+ return title
39
+
40
+ # Convert to lowercase
41
+ title = title.lower()
42
+
43
+ # Remove unwanted words
44
+ for word in self.unwanted_words:
45
+ title = re.sub(r'\b{}\b'.format(re.escape(word)), '', title, flags=re.IGNORECASE)
46
+
47
+ # Replace specific terms and Roman numerals
48
+ replacements = [
49
+ (r'\b(?:SR|sr|Sr\.?|SR\.?|Senior|senior)\b', 'Senior'),
50
+ (r'\b(?:JR|jr|Jr\.?|JR\.?|Junior|junior)\b', 'Junior'),
51
+ (r'\b(?:AIML|aiml|ML|ml|MachineLearning|machinelearning|machine[_\-]learning)\b', 'Machine Learning'),
52
+ (r'\b(?:GenAI|genai|Genai|generative[_\-]ai|GenerativeAI|generativeai)\b', 'Generative AI'),
53
+ (r'\b(?:NLP|nlp|natural[_\-]language[_\-]processing|natural language processing)\b', 'NLP'),
54
+ (r'\b(?:i|I)\b', '1'),
55
+ (r'\b(?:ii|II)\b', '2'),
56
+ (r'\b(?:iii|III)\b', '3'),
57
+ (r'\b(?:iv|IV)\b', '4'),
58
+ (r'\b(?:v|V)\b', '5')
59
+ ]
60
+
61
+ for pattern, replacement in replacements:
62
+ title = re.sub(pattern, replacement, title, flags=re.IGNORECASE)
63
+
64
+ # Handle specific Data Scientist cases
65
+ title = re.sub(r'\b(director|dir\.?|dir)\b.*?(data\s*scientist|data\s*science)', 'Director Data Scientist', title, flags=re.IGNORECASE)
66
+ title = re.sub(r'\b(manager|mgr)\b.*?(data\s*scientist|data\s*science)', 'Manager Data Scientist', title, flags=re.IGNORECASE)
67
+ title = re.sub(r'\b(lead)\b.*?(data\s*scientist|data\s*science)', 'Lead Data Scientist', title, flags=re.IGNORECASE)
68
+ title = re.sub(r'\b(associate|associates?)\b.*?(data\s*scientist|data\s*science)', 'Associate Data Scientist', title, flags=re.IGNORECASE)
69
+ title = re.sub(r'\b(applied)\b.*?(data\s*scientist|data\s*science)', 'Applied Data Scientist', title, flags=re.IGNORECASE)
70
+ title = re.sub(r'\b(intern|internship|trainee)\b.*?(data\s*scientist|data\s*science)', 'Intern Data Scientist', title, flags=re.IGNORECASE)
71
+
72
+ # Clean up extra spaces
73
+ title = re.sub(r'\s+', ' ', title).strip()
74
+
75
+ return title
76
+
77
+
78
+ def preprocess_single_title(title: str) -> str:
79
+ preprocessor = JobTitlePreprocessor()
80
+ clean_title = preprocessor.remove_location_unwanted_words_brackets(title)
81
+ clean_title = preprocessor.preprocess(clean_title)
82
+ return clean_title
83
+
84
+
85
+ if __name__ == "__main__":
86
+ # Example single title
87
+ title = "Senior Remote Machine Learning Data Scientist (Manager)"
88
+ clean_title = preprocess_single_title(title)
89
+ logger.info(f"Original title: {title}")
90
+ logger.info(f"Preprocessed title: {clean_title}")
91
+
92
+
requirements.txt ADDED
@@ -0,0 +1,427 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ aiosignal==1.3.1
3
+ alembic==1.13.2
4
+ altair==5.3.0
5
+ annotated-types==0.7.0
6
+ anyio==4.4.0
7
+ appdirs==1.4.4
8
+ apturl==0.5.2
9
+ argcomplete==1.10.3
10
+ argon2-cffi==23.1.0
11
+ argon2-cffi-bindings==21.2.0
12
+ arrow==1.3.0
13
+ arxiv==2.1.3
14
+ asgiref==3.8.1
15
+ asttokens==2.4.1
16
+ async-lru==2.0.4
17
+ async-timeout==4.0.3
18
+ attrs==23.2.0
19
+ auth0-python==4.7.1
20
+ Babel==2.15.0
21
+ backoff==2.2.1
22
+ bcrypt==4.2.0
23
+ beautifulsoup4==4.12.3
24
+ black==24.4.2
25
+ bleach==6.1.0
26
+ blinker==1.8.2
27
+ blis==0.7.11
28
+ boto3==1.35.14
29
+ botocore==1.35.14
30
+ Brlapi==0.8.3
31
+ build==1.2.2
32
+ cachetools==5.3.3
33
+ catalogue==2.0.10
34
+ certifi==2024.6.2
35
+ cffi==1.16.0
36
+ chardet==3.0.4
37
+ charset-normalizer==3.3.2
38
+ chroma-hnswlib==0.7.3
39
+ chromadb==0.4.24
40
+ click==8.1.7
41
+ cloudpathlib==0.18.1
42
+ cog==0.9.7
43
+ cohere==5.9.1
44
+ colorama==0.4.4
45
+ coloredlogs==15.0.1
46
+ comm==0.2.2
47
+ command-not-found==0.3
48
+ compressed_rtf==1.0.6
49
+ confection==0.1.5
50
+ crewai==0.55.2
51
+ crewai-tools==0.12.0
52
+ cryptography==42.0.8
53
+ cupshelpers==1.0
54
+ cymem==2.0.8
55
+ dataclasses-json==0.6.6
56
+ datasets==3.0.0
57
+ dbus-python==1.2.18
58
+ debugpy==1.8.1
59
+ decorator==5.1.1
60
+ defer==1.0.6
61
+ defusedxml==0.7.1
62
+ Deprecated==1.2.14
63
+ deprecation==2.1.0
64
+ dill==0.3.8
65
+ dirtyjson==1.0.8
66
+ distlib==0.3.8
67
+ distro==1.9.0
68
+ distro-info==1.1+ubuntu0.2
69
+ dnspython==2.6.1
70
+ docker==7.1.0
71
+ docstring_parser==0.16
72
+ docx2txt==0.8
73
+ duplicity==0.8.21
74
+ ebcdic==1.1.1
75
+ embedchain==0.1.121
76
+ entrypoints==0.4
77
+ exceptiongroup==1.2.1
78
+ executing==2.0.1
79
+ extract-msg==0.28.7
80
+ fastapi==0.114.0
81
+ fastavro==1.9.7
82
+ fasteners==0.14.1
83
+ fastjsonschema==2.19.1
84
+ fasttext==0.9.3
85
+ feedparser==6.0.11
86
+ filelock==3.14.0
87
+ fireworks-ai==0.15.1
88
+ Flask==3.0.3
89
+ flatbuffers==24.3.25
90
+ fqdn==1.5.1
91
+ frozenlist==1.4.1
92
+ fsspec==2024.6.0
93
+ future==0.18.2
94
+ gitdb==4.0.11
95
+ GitPython==3.1.43
96
+ google-api-core==2.19.2
97
+ google-auth==2.34.0
98
+ google-cloud-aiplatform==1.65.0
99
+ google-cloud-bigquery==3.25.0
100
+ google-cloud-core==2.4.1
101
+ google-cloud-resource-manager==1.12.5
102
+ google-cloud-storage==2.18.2
103
+ google-crc32c==1.6.0
104
+ google-resumable-media==2.7.2
105
+ googleapis-common-protos==1.65.0
106
+ gptcache==0.1.44
107
+ greenlet==3.0.3
108
+ grpc-google-iam-v1==0.13.1
109
+ grpcio==1.64.1
110
+ grpcio-status==1.62.3
111
+ grpcio-tools==1.62.2
112
+ h11==0.14.0
113
+ h2==4.1.0
114
+ hpack==4.0.0
115
+ httpcore==1.0.5
116
+ httplib2==0.20.2
117
+ httptools==0.6.1
118
+ httpx==0.27.0
119
+ httpx-sse==0.4.0
120
+ huggingface-hub==0.24.6
121
+ humanfriendly==10.0
122
+ hyperframe==6.0.1
123
+ idna==3.7
124
+ IMAPClient==2.1.0
125
+ importlib_metadata==7.1.0
126
+ importlib_resources==6.4.4
127
+ iniconfig==2.0.0
128
+ instructor==1.3.3
129
+ ipykernel==6.29.4
130
+ ipython==8.24.0
131
+ ipywidgets==8.1.2
132
+ isoduration==20.11.0
133
+ itsdangerous==2.2.0
134
+ jedi==0.19.1
135
+ jeepney==0.7.1
136
+ Jinja2==3.1.4
137
+ jiter==0.4.2
138
+ jmespath==1.0.1
139
+ joblib==1.4.2
140
+ json5==0.9.25
141
+ json_repair==0.25.3
142
+ jsonpatch==1.33
143
+ jsonpointer==2.4
144
+ jsonref==1.1.0
145
+ jsonschema==4.22.0
146
+ jsonschema-specifications==2023.12.1
147
+ jupyter==1.0.0
148
+ jupyter-console==6.6.3
149
+ jupyter-events==0.10.0
150
+ jupyter-lsp==2.2.5
151
+ jupyter_client==8.6.1
152
+ jupyter_core==5.7.2
153
+ jupyter_server==2.14.0
154
+ jupyter_server_terminals==0.5.3
155
+ jupyterlab==4.1.8
156
+ jupyterlab_pygments==0.3.0
157
+ jupyterlab_server==2.27.1
158
+ jupyterlab_widgets==3.0.10
159
+ kazam==1.4.5
160
+ keyring==23.5.0
161
+ kubernetes==30.1.0
162
+ lancedb==0.5.7
163
+ langchain==0.2.16
164
+ langchain-cohere==0.1.9
165
+ langchain-community==0.2.16
166
+ langchain-core==0.2.38
167
+ langchain-experimental==0.0.65
168
+ langchain-fireworks==0.1.7
169
+ langchain-mongodb==0.1.9
170
+ langchain-openai==0.1.9
171
+ langchain-text-splitters==0.2.4
172
+ langcodes==3.4.0
173
+ langsmith==0.1.120
174
+ language-selector==0.1
175
+ language_data==1.2.0
176
+ launchpadlib==1.10.16
177
+ lazr.restfulclient==0.14.4
178
+ lazr.uri==1.0.6
179
+ llama-index==0.10.43
180
+ llama-index-agent-openai==0.2.7
181
+ llama-index-cli==0.1.12
182
+ llama-index-core==0.10.43
183
+ llama-index-embeddings-openai==0.1.10
184
+ llama-index-indices-managed-llama-cloud==0.1.6
185
+ llama-index-legacy==0.9.48
186
+ llama-index-llms-openai==0.1.22
187
+ llama-index-multi-modal-llms-openai==0.1.6
188
+ llama-index-program-openai==0.1.6
189
+ llama-index-question-gen-openai==0.1.3
190
+ llama-index-readers-file==0.1.23
191
+ llama-index-readers-llama-parse==0.1.4
192
+ llama-index-vector-stores-mongodb==0.1.5
193
+ llama-index-vector-stores-qdrant==0.2.8
194
+ llama-parse==0.4.4
195
+ llamaindex-py-client==0.1.19
196
+ llmsherpa==0.1.4
197
+ load-dotenv==0.1.0
198
+ lockfile==0.12.2
199
+ loguru==0.7.2
200
+ louis==3.20.0
201
+ lxml==5.2.2
202
+ macaroonbakery==1.3.1
203
+ Mako==1.1.3
204
+ marisa-trie==1.2.0
205
+ markdown-it-py==3.0.0
206
+ MarkupSafe==2.1.5
207
+ marshmallow==3.21.2
208
+ matplotlib-inline==0.1.7
209
+ mdurl==0.1.2
210
+ mem0ai==0.0.20
211
+ mistune==3.0.2
212
+ mmh3==4.1.0
213
+ monotonic==1.6
214
+ more-itertools==8.10.0
215
+ MouseInfo==0.1.3
216
+ mpmath==1.3.0
217
+ multidict==6.0.5
218
+ multiprocess==0.70.16
219
+ murmurhash==1.0.10
220
+ mypy-extensions==1.0.0
221
+ nbclient==0.10.0
222
+ nbconvert==7.16.4
223
+ nbformat==5.10.4
224
+ nest-asyncio==1.6.0
225
+ netifaces==0.11.0
226
+ networkx==3.3
227
+ nltk==3.8.1
228
+ nodeenv==1.9.1
229
+ notebook==7.1.3
230
+ notebook_shim==0.2.4
231
+ numpy==1.26.4
232
+ oauthlib==3.2.2
233
+ olefile==0.46
234
+ onnxruntime==1.19.2
235
+ openai==1.45.0
236
+ opentelemetry-api==1.27.0
237
+ opentelemetry-exporter-otlp-proto-common==1.27.0
238
+ opentelemetry-exporter-otlp-proto-grpc==1.27.0
239
+ opentelemetry-exporter-otlp-proto-http==1.27.0
240
+ opentelemetry-instrumentation==0.48b0
241
+ opentelemetry-instrumentation-asgi==0.48b0
242
+ opentelemetry-instrumentation-fastapi==0.48b0
243
+ opentelemetry-proto==1.27.0
244
+ opentelemetry-sdk==1.27.0
245
+ opentelemetry-semantic-conventions==0.48b0
246
+ opentelemetry-util-http==0.48b0
247
+ orjson==3.10.7
248
+ outcome==1.3.0.post0
249
+ overrides==7.7.0
250
+ packaging==24.0
251
+ pandas==2.2.2
252
+ pandocfilters==1.5.1
253
+ parameterized==0.9.0
254
+ paramiko==2.9.3
255
+ parso==0.8.4
256
+ pathspec==0.12.1
257
+ pdfminer.six==20191110
258
+ pexpect==4.8.0
259
+ pillow==10.3.0
260
+ pipenv==2024.0.0
261
+ platformdirs==4.2.2
262
+ pluggy==1.5.0
263
+ portalocker==2.8.2
264
+ posthog==3.6.3
265
+ preshed==3.0.9
266
+ prometheus_client==0.20.0
267
+ prompt-toolkit==3.0.43
268
+ proto-plus==1.24.0
269
+ protobuf==4.25.3
270
+ psutil==5.9.8
271
+ ptyprocess==0.7.0
272
+ pulsar-client==3.5.0
273
+ pure-eval==0.2.2
274
+ py==1.11.0
275
+ pyarrow==16.1.0
276
+ pyasn1==0.6.0
277
+ pyasn1_modules==0.4.0
278
+ PyAutoGUI==0.9.54
279
+ pybind11==2.13.5
280
+ pycairo==1.20.1
281
+ pycparser==2.22
282
+ pycryptodome==3.20.0
283
+ pycups==2.0.1
284
+ pydantic==2.9.2
285
+ pydantic_core==2.23.4
286
+ pydeck==0.9.1
287
+ PyGetWindow==0.0.9
288
+ Pygments==2.18.0
289
+ PyGObject==3.42.1
290
+ PyJWT==2.9.0
291
+ pylance==0.9.18
292
+ pymacaroons==0.13.0
293
+ pymongo==4.7.3
294
+ Pympler==1.0.1
295
+ PyMsgBox==1.0.9
296
+ PyMuPDF==1.24.10
297
+ PyMuPDFb==1.24.10
298
+ PyNaCl==1.5.0
299
+ pyparsing==2.4.7
300
+ pypdf==4.2.0
301
+ pyperclip==1.9.0
302
+ PyPika==0.48.9
303
+ pyproject_hooks==1.1.0
304
+ PyRect==0.2.0
305
+ pyRFC3339==1.1
306
+ pyright==1.1.379
307
+ pysbd==0.3.4
308
+ PyScreeze==0.1.30
309
+ PySocks==1.7.1
310
+ pytest==8.3.2
311
+ python-apt==2.4.0+ubuntu3
312
+ python-dateutil==2.9.0.post0
313
+ python-debian==0.1.43+ubuntu1.1
314
+ python-dotenv==1.0.1
315
+ python-json-logger==2.0.7
316
+ python-pptx==0.6.23
317
+ python3-xlib==0.15
318
+ pytube==15.0.0
319
+ pytweening==1.2.0
320
+ pytz==2024.1
321
+ pyxdg==0.27
322
+ PyYAML==6.0.1
323
+ pyzmq==26.0.3
324
+ qdrant-client==1.9.1
325
+ qtconsole==5.5.2
326
+ QtPy==2.4.1
327
+ ratelimiter==1.2.0.post0
328
+ referencing==0.35.1
329
+ regex==2024.7.24
330
+ reportlab==3.6.8
331
+ requests==2.32.3
332
+ requests-oauthlib==2.0.0
333
+ retry==0.9.2
334
+ rfc3339-validator==0.1.4
335
+ rfc3986-validator==0.1.1
336
+ rich==13.7.1
337
+ rpds-py==0.18.1
338
+ rsa==4.9
339
+ s3transfer==0.10.2
340
+ schema==0.7.7
341
+ scikit-learn==1.0.2
342
+ scipy==1.13.1
343
+ SecretStorage==3.3.1
344
+ selenium==4.23.1
345
+ semver==3.0.2
346
+ Send2Trash==1.8.3
347
+ sgmllib3k==1.0.0
348
+ shapely==2.0.6
349
+ shellingham==1.5.4
350
+ six==1.16.0
351
+ smart-open==7.0.4
352
+ smmap==5.0.1
353
+ sniffio==1.3.1
354
+ sortedcontainers==2.4.0
355
+ soupsieve==2.5
356
+ spacy==3.7.5
357
+ spacy-legacy==3.0.12
358
+ spacy-loggers==1.0.5
359
+ SpeechRecognition==3.8.1
360
+ SQLAlchemy==2.0.34
361
+ srsly==2.4.8
362
+ stack-data==0.6.3
363
+ starlette==0.38.5
364
+ streamlit==1.35.0
365
+ striprtf==0.0.26
366
+ structlog==24.1.0
367
+ sympy==1.13.2
368
+ systemd-python==234
369
+ tabulate==0.9.0
370
+ tenacity==8.3.0
371
+ termcolor==2.4.0
372
+ terminado==0.18.1
373
+ textract==1.6.5
374
+ thinc==8.2.4
375
+ threadpoolctl==3.5.0
376
+ tiktoken==0.7.0
377
+ tinycss2==1.3.0
378
+ tokenizers==0.20.0
379
+ toml==0.10.2
380
+ tomli==2.0.1
381
+ toolz==0.12.1
382
+ tornado==6.4
383
+ tqdm==4.66.4
384
+ traitlets==5.14.3
385
+ trio==0.26.2
386
+ trio-websocket==0.11.1
387
+ trubrics==1.3.6
388
+ typeguard==2.13.3
389
+ typer==0.12.3
390
+ types-python-dateutil==2.9.0.20240316
391
+ types-requests==2.32.0.20240907
392
+ typing-inspect==0.9.0
393
+ typing_extensions==4.12.1
394
+ tzdata==2024.1
395
+ tzlocal==5.2
396
+ ubuntu-drivers-common==0.0.0
397
+ ubuntu-pro-client==8001
398
+ ufw==0.36.1
399
+ unattended-upgrades==0.1
400
+ uri-template==1.3.0
401
+ urllib3==2.2.1
402
+ usb-creator==0.3.7
403
+ uvicorn==0.29.0
404
+ uvloop==0.19.0
405
+ validators==0.28.3
406
+ virtualenv==20.26.1
407
+ wadllib==1.3.6
408
+ wasabi==1.1.3
409
+ watchdog==4.0.1
410
+ watchfiles==0.21.0
411
+ wcwidth==0.2.13
412
+ weasel==0.4.1
413
+ webcolors==1.13
414
+ webencodings==0.5.1
415
+ websocket-client==1.8.0
416
+ websockets==12.0
417
+ Werkzeug==3.0.4
418
+ widgetsnbextension==4.0.10
419
+ wrapt==1.16.0
420
+ wsproto==1.2.0
421
+ xdg==5
422
+ xkit==0.0.0
423
+ xlrd==1.2.0
424
+ XlsxWriter==3.2.0
425
+ xxhash==3.5.0
426
+ yarl==1.9.4
427
+ zipp==3.19.2
vectorizer_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f8ad0016f36e0a63823957b2d7b4184cc1bdfe1e6b0f7b67af25f2adfbf1e40
3
+ size 143174