Divyam Sharma commited on
Commit
97364d6
·
1 Parent(s): 8dfd9f9

Text Classifier

Browse files
app.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ # def greet(name):
4
+ # return "Hello " + name + "!!"
5
+
6
+ # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ # iface.launch()
8
+
9
+
10
+
11
+
12
+ ## ------------------------------------------------------------------------------------------------
13
+
14
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../Attention Classifier Pytorch Student.ipynb.
15
+
16
+ # %% auto 0
17
+ __all__ = ['tokenizer', 'vocab_size', 'embedding_size', 'num_heads', 'embeddings_fname', 'model', 's', 'pred', 'tokens', 'attn',
18
+ 'intf', 'DocumentAttentionClassifier', 'get_label_and_weights', 'visualize_attention',
19
+ 'predict_and_visualize']
20
+
21
+ # %% ../Attention Classifier Pytorch Student.ipynb 2
22
+ import numpy as np
23
+ import torch
24
+ from torch.utils.data import Dataset, DataLoader
25
+ import json
26
+
27
+ np.random.seed(42)
28
+ import torch
29
+ import torch.nn as nn
30
+ import torch.nn.functional as F
31
+ from torch.nn import init
32
+ from collections import Counter
33
+ import random
34
+ from torch import optim
35
+
36
+ import pandas as pd
37
+ import pickle
38
+
39
+ import wandb
40
+
41
+ import numpy as np
42
+ import matplotlib
43
+ import matplotlib.pyplot as plt
44
+ from sklearn.metrics import f1_score
45
+ import seaborn as sns
46
+
47
+ # Sort of smart tokenization
48
+ from nltk.tokenize import RegexpTokenizer
49
+
50
+ # Attention plotting
51
+ import matplotlib.pyplot as plt
52
+
53
+ # %% ../Attention Classifier Pytorch Student.ipynb 4
54
+ # Load the word-to-index mapping we used for word2vec and use the same type
55
+ # of tokenizer. We'll need to use this to tokenize in the same way and keep
56
+ # the same word-to-id mapping
57
+
58
+ tokenizer = RegexpTokenizer(r'\w+')
59
+
60
+ with open('word_to_index.json', 'r') as f:
61
+ word_to_index = json.load(f)
62
+ with open('index_to_word.json', 'r') as f:
63
+ index_to_word = json.load(f)
64
+
65
+ # %% ../Attention Classifier Pytorch Student.ipynb 6
66
+ class DocumentAttentionClassifier(nn.Module):
67
+
68
+ def __init__(self, vocab_size, embedding_size, num_heads, embeddings_fname):
69
+ '''
70
+ Creates the new classifier model. embeddings_fname is a string containing the
71
+ filename with the saved pytorch parameters (the state dict) for the Embedding
72
+ object that should be used to initialize this class's word Embedding parameters
73
+ '''
74
+ super(DocumentAttentionClassifier, self).__init__()
75
+
76
+ # Save the input arguments to the state
77
+
78
+
79
+
80
+ # Create the Embedding object that will hold our word embeddings that we
81
+ # learned in word2vec. This embedding object should have the same size
82
+ # as what we learned before. However, we don't to start from scratch!
83
+ # Once created, load the saved (word2vec-based) parameters into the object
84
+ # using load_state_dict.
85
+ self.vocab_size = vocab_size
86
+ self.embedding_size = embedding_size
87
+ self.num_heads = num_heads
88
+
89
+ # Load pre-trained embeddings
90
+ self.embeddings = nn.Embedding(vocab_size, embedding_size)
91
+ pretrained_embeddings = torch.load(embeddings_fname)
92
+ self.embeddings.load_state_dict({'weight': pretrained_embeddings})
93
+
94
+ # Initialize attention heads as trainable parameters
95
+ self.attention_heads = nn.Parameter(torch.randn(num_heads, embedding_size))
96
+
97
+ # Linear layer for classification from concatenated attention heads
98
+ self.output_layer = nn.Linear(num_heads * embedding_size, 1)
99
+
100
+
101
+ # Define the attention heads. You have two options:
102
+ #
103
+ # 1) the worse way to implement this is to define your heads using an Embedding
104
+ # and then access them individually later in forward(). This will be slower
105
+ # but will probably still work
106
+ #
107
+ # 2) the ideal way is to think of your attention heads as rows in a matrix--
108
+ # just like we do for word2vec. While this is kind of the same as how
109
+ # we represent things like in an Embedding, the key difference is that we
110
+ # can now use **matrix operations** to calculate the different r and a
111
+ # vectors, which will be much faster (and less code). To do this, you'll
112
+ # need to represent the attention heads as a Tensor directly (not a layer)
113
+ # and make sure pytorch runs gradient descent on these parameters.
114
+ #
115
+ # It's up to you which to use, but try option 2 first and see what you do
116
+ # in the forward() function
117
+
118
+
119
+
120
+
121
+
122
+ # Define the layer that goes from the concatenated attention heads' outputs
123
+ # to the single output value. We'll push this output value through the sigmoid
124
+ # to get our prediction
125
+
126
+ # pass
127
+
128
+
129
+ def forward(self, word_ids):
130
+
131
+ # Pro Tip™: when implementing this forward pass, try playing around with pytorch
132
+ # tensors in a jupyter notebook by making "fake" versions of them. For example:
133
+ #
134
+ # word_embeds = torch.Tensor([[1,6,2], [9,1,7]])
135
+ #
136
+ # If you have two word embeddings of length 3, how can you define the attention
137
+ # heads to get the 'r' vector? Trying things out in the simple case will let you
138
+ # quickly verify the sequence of operations you want to run, e.g., that you can take
139
+ # the softmax of the 'r' vector to get the 'a' vector and it has the right shape
140
+ # and values
141
+
142
+ # Hint 1: If you're representing attention using Option 2, most of this code is just
143
+ # matrix multiplications
144
+
145
+ # Get embeddings for input word IDs
146
+ embeddings = self.embeddings(word_ids) # [batch_size, seq_len, embedding_size]
147
+
148
+ # Calculate 'r' vectors (attention scores) for each head
149
+ attention_scores = torch.matmul(embeddings, self.attention_heads.unsqueeze(0).transpose(1, 2))
150
+ # [batch_size, seq_len, num_heads]
151
+
152
+ # Apply softmax to get attention weights ('a' vectors)
153
+ attention_weights = F.softmax(attention_scores, dim=1) # softmax over seq_len dimension
154
+ # [batch_size, seq_len, num_heads]
155
+
156
+ # Apply attention weights to embeddings (weighted sum of embeddings)
157
+ attended_embeddings = torch.matmul(attention_weights.transpose(1, 2), embeddings)
158
+ # [batch_size, num_heads, embedding_size]
159
+
160
+ # Concatenate attention head outputs to form a single vector per document
161
+ concatenated = attended_embeddings.view(attended_embeddings.size(0), -1)
162
+ # [batch_size, num_heads * embedding_size]
163
+
164
+ # Pass through output layer to get prediction
165
+ output = self.output_layer(concatenated)
166
+ # [batch_size, 1]
167
+
168
+ # Apply sigmoid activation for binary classification
169
+ prediction = torch.sigmoid(output).squeeze(1) # squeeze to remove extra dimension
170
+ # [batch_size]
171
+
172
+ return prediction, attention_weights
173
+
174
+ # Hint 2: Most of your time is going to be spent figuring out shape errors and what
175
+ # operations you need to do to get the right outputs. This is normal.
176
+
177
+ # Hint 3: This is the hardest part of this last part of the homework.
178
+
179
+
180
+ # Get the word embeddings for the ids
181
+
182
+
183
+ # Calcuate the 'r' vectors which are the dot product of each attention head
184
+ # with each word embedding. You should be getting a tensor that has this
185
+ # dot product back out---remember this vector is capturing how much the
186
+ # head thinks the vector is relevant for the task
187
+
188
+
189
+ # Calcuate the softmax of the 'r' vector, which call 'a'. This will give us
190
+ # a probability distribution over the tokens for each head. Be sure to check
191
+ # that the softmax is being calculated over the right axis/dimension of the
192
+ # data (You should see probability values that sum to 1 for each head's
193
+ # ratings across all the tokens)
194
+
195
+
196
+ # Calculate the re-weighting of the word embeddings for each head's attention
197
+ # weight and sum the reweighted sequence for each head into a single vector.
198
+ # This should give you n_heads vectors that each have embedding_size length.
199
+ # Note again that each head should give you a different weighting of the
200
+ # input word embeddings
201
+
202
+
203
+ # Create a single vector that has all n_heads' attention-weighted vectors
204
+ # as one single vector. We need this one-long-vector shape so that we
205
+ # can pass all these vectors as input into a layer.
206
+ #
207
+ # NOTE: if you're doing Option 2 for representing attention, you don't
208
+ # actually need to create a new vector (which is very inefficient).
209
+ # Instead, you can create a new *view* of the same data that reshapes the
210
+ # different heads' vectors so it looks like one long vector.
211
+
212
+
213
+ # Pass the side-by-side attention-weighted vectors through your linear
214
+ # layer to get some output activation.
215
+ #
216
+ # NOTE: if you're feeling adventurous, try adding an extra layer here
217
+ # which will allow you different attention-weighted vectors to interact
218
+ # in making the model decision
219
+
220
+
221
+
222
+ # Return the sigmoid of the output activation *and* the attention
223
+ # weights for each head. We'll need these later for visualization
224
+ # pass
225
+
226
+ # %% ../Attention Classifier Pytorch Student.ipynb 32
227
+ # Parameters for model initialization
228
+ vocab_size = len(word_to_index) # Assuming word_to_index is defined
229
+ embedding_size = 50 # the size used in word2vec model
230
+ num_heads = 5 # number of attention heads
231
+ embeddings_fname = 'model_weights_target.pt'
232
+ model = DocumentAttentionClassifier(vocab_size, embedding_size, num_heads, embeddings_fname)
233
+
234
+
235
+ # %% ../Attention Classifier Pytorch Student.ipynb 33
236
+ model.load_state_dict(torch.load('attention_clf_model.pt'))
237
+
238
+
239
+ # %% ../Attention Classifier Pytorch Student.ipynb 34
240
+ def get_label_and_weights(text, model):
241
+ '''
242
+ Classifies the text (requires tokenizing, etc.) and returns (1) the classification label,
243
+ (2) the tokenized words in the model's vocabulary,
244
+ and (3) the attention weights over the in-vocab tokens as a numpy array. Note that the
245
+ attention weights will be a matrix, depending on how many heads were used in training.
246
+ '''
247
+
248
+ # Tokenize the text
249
+ token_ids = tokenizer.tokenize(text.lower()) # Adjust according to your tokenizer
250
+ word_ids = [word_to_index.get(token, word_to_index['<UNK>']) for token in token_ids]
251
+
252
+ device='mps'
253
+ model = model.to(device)
254
+ token_ids_tensor = torch.tensor([np.array(word_ids)]).to(device)
255
+
256
+ # Forward pass through the model
257
+ model.eval()
258
+ with torch.no_grad():
259
+ output, attention_weights = model(token_ids_tensor)
260
+
261
+ # Convert output to label
262
+ predicted_label = int(output.item() > 0.5)
263
+
264
+ # Convert token IDs back to tokens
265
+ tokens = [index_to_word.get(str(tid)) for tid in token_ids_tensor[0].tolist() if tid!=word_to_index['<UNK>']]
266
+
267
+ # Convert attention weights to numpy array if not already
268
+ attention_weights_numpy = attention_weights.cpu().numpy()
269
+
270
+ return predicted_label, tokens, attention_weights_numpy.squeeze(0)
271
+
272
+ # %% ../Attention Classifier Pytorch Student.ipynb 36
273
+ def visualize_attention(words, attention_weights):
274
+ '''
275
+ Makes a heatmap figure that visualizes the attention weights for an item.
276
+ Attention weights should be a numpy array that has the shape (num_words, num_heads)
277
+ '''
278
+ fig, ax = plt.subplots()
279
+ # Rescale image size based on the input length
280
+ fig.set_size_inches((len(words), 4))
281
+ im = ax.imshow(attention_weights.T)
282
+
283
+ head_labels = [ 'head-%d' % h for h in range(attention_weights.shape[1])]
284
+ ax.set_xticks(np.arange(len(words))) # , labels=words)
285
+ ax.set_yticks(np.arange(len(head_labels))) #, labels=head_labels)
286
+
287
+ plt.figure(figsize=(48,10))
288
+ # Rotate the word labels and set their alignment.
289
+ plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
290
+ rotation_mode="anchor")
291
+
292
+ # Add the words and axis labels
293
+ ax.set_yticklabels(labels=range(attention_weights.shape[1]), fontsize=16)
294
+ ax.set_ylabel('Attention Head', fontsize=16)
295
+ ax.set_xticklabels(labels=words, fontsize=16)
296
+
297
+ # Add a color bar to show probability scaling
298
+ cb = fig.colorbar(im, ax=ax, label='Probability', pad = 0.01)
299
+ cb.ax.tick_params(labelsize=16)
300
+ cb.set_label(label='Probability',size=16)
301
+ fig.tight_layout()
302
+ plt.show()
303
+
304
+ # %% ../Attention Classifier Pytorch Student.ipynb 38
305
+ s = 'Just as I remembered it, one of my favorites from childhood! Great condition, very happy to have this to share with my daughter. Packaging was so nice and was received quickly.'
306
+ pred, tokens, attn = get_label_and_weights(s, model)
307
+ visualize_attention(tokens, attn)
308
+ print(pred)
309
+
310
+ # %% ../Attention Classifier Pytorch Student.ipynb 39
311
+ def predict_and_visualize(s):
312
+ pred, tokens, attn = get_label_and_weights(s, model)
313
+
314
+ # Assuming visualize_attention can save an image and return its path
315
+ image_path = visualize_attention(tokens, attn)
316
+
317
+ return pred, image_path
318
+
319
+
320
+ # %% ../Attention Classifier Pytorch Student.ipynb 40
321
+ import gradio as gr
322
+
323
+ intf = gr.Interface(fn=predict_and_visualize,
324
+ inputs="text",
325
+ outputs=["text", "image"],
326
+ examples=["The book was amazing!", "Today's Weather is pretty bad!", "How are you feeling"],
327
+ title="Text Review Classifier with Attention Visualization",
328
+ description="Enter a review to see the prediction and attention visualization.")
329
+
330
+ intf.launch()
attention_clf_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb5ba2f87a7f02517c8063b74e38e96f935c27ef23c15645d96d35ee9cdf543c
3
+ size 5847344
index_to_word.json ADDED
The diff for this file is too large to render. See raw diff
 
model_weights_target.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf081907d43db059b83fb65a7224233bd9077dc1c5a22ce1b05df67ec5b6180
3
+ size 5844445
requirements.txt ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.0.0
2
+ altgraph @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/altgraph-0.17.2-py2.py3-none-any.whl
3
+ annotated-types==0.6.0
4
+ anyio==4.0.0
5
+ appdirs==1.4.4
6
+ appnope==0.1.3
7
+ argon2-cffi==23.1.0
8
+ argon2-cffi-bindings==21.2.0
9
+ arrow==1.2.3
10
+ asttokens==2.2.1
11
+ astunparse==1.6.3
12
+ async-lru==2.0.4
13
+ attrs==23.1.0
14
+ Babel==2.12.1
15
+ backcall==0.2.0
16
+ beautifulsoup4==4.12.2
17
+ bleach==6.0.0
18
+ blis==0.7.11
19
+ bokeh==3.3.1
20
+ cachetools==5.3.2
21
+ catalogue==2.0.10
22
+ certifi==2023.7.22
23
+ cffi==1.15.1
24
+ charset-normalizer==3.2.0
25
+ click==8.1.7
26
+ cloudpathlib==0.15.1
27
+ cloudpickle==3.0.0
28
+ comm==0.1.4
29
+ confection==0.1.3
30
+ contourpy==1.1.0
31
+ cycler==0.11.0
32
+ cymem==2.0.8
33
+ dask==2023.11.0
34
+ debugpy==1.6.7.post1
35
+ decorator==5.1.1
36
+ defusedxml==0.7.1
37
+ distributed==2023.11.0
38
+ docker-pycreds==0.4.0
39
+ docopt==0.6.2
40
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl
41
+ entrypoints==0.4
42
+ exceptiongroup==1.1.3
43
+ execnb==0.1.5
44
+ executing==1.2.0
45
+ fastcore==1.5.29
46
+ fastjsonschema==2.18.0
47
+ filelock==3.13.1
48
+ flatbuffers==23.5.26
49
+ fonttools==4.42.1
50
+ fqdn==1.5.1
51
+ fsspec==2023.10.0
52
+ future @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/future-0.18.2-py3-none-any.whl
53
+ gast==0.5.4
54
+ gensim==4.3.2
55
+ ghapi==1.0.4
56
+ gitdb==4.0.11
57
+ GitPython==3.1.42
58
+ google-auth==2.23.4
59
+ google-auth-oauthlib==1.1.0
60
+ google-pasta==0.2.0
61
+ graphviz==0.20.1
62
+ grpcio==1.59.3
63
+ h5py==3.10.0
64
+ idna==3.4
65
+ imageio==2.32.0
66
+ importlib-metadata==6.8.0
67
+ importlib-resources==6.0.1
68
+ ipycytoscape==1.3.3
69
+ ipykernel==6.25.1
70
+ ipython==8.12.3
71
+ ipywidgets==8.0.4
72
+ isoduration==20.11.0
73
+ jedi==0.19.0
74
+ Jinja2==3.1.2
75
+ joblib==1.3.2
76
+ json5==0.9.14
77
+ jsonpointer==2.4
78
+ jsonschema==4.19.0
79
+ jsonschema-specifications==2023.7.1
80
+ jupyter-events==0.7.0
81
+ jupyter-lsp==2.2.0
82
+ jupyter_client==7.4.9
83
+ jupyter_core==5.3.1
84
+ jupyter_server==2.7.3
85
+ jupyter_server_terminals==0.4.4
86
+ jupyterlab==4.0.6
87
+ jupyterlab-pygments==0.2.2
88
+ jupyterlab-widgets==3.0.9
89
+ jupyterlab_server==2.25.0
90
+ keras==2.15.0
91
+ kiwisolver==1.4.5
92
+ langcodes==3.3.0
93
+ lazy_loader==0.3
94
+ libclang==16.0.6
95
+ locket==1.0.0
96
+ lxml==4.9.3
97
+ lz4==4.3.2
98
+ macholib @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/macholib-1.15.2-py2.py3-none-any.whl
99
+ Markdown==3.5.1
100
+ MarkupSafe==2.1.3
101
+ matplotlib==3.7.2
102
+ matplotlib-inline==0.1.6
103
+ mistune==3.0.1
104
+ ml-dtypes==0.2.0
105
+ mpmath==1.3.0
106
+ msgpack==1.0.7
107
+ murmurhash==1.0.10
108
+ nbclient==0.8.0
109
+ nbconvert==7.16.2
110
+ nbdev==2.3.13
111
+ nbformat==5.9.2
112
+ nest-asyncio==1.5.7
113
+ networkx==3.2.1
114
+ nltk==3.8.1
115
+ notebook==7.0.3
116
+ notebook_shim==0.2.3
117
+ numpy==1.25.2
118
+ oauthlib==3.2.2
119
+ opencv-python==4.8.1.78
120
+ opt-einsum==3.3.0
121
+ overrides==7.4.0
122
+ packaging==23.1
123
+ pandas==2.1.0
124
+ pandocfilters==1.5.0
125
+ parso==0.8.3
126
+ partd==1.4.1
127
+ pathy==0.10.2
128
+ patsy==0.5.3
129
+ pexpect==4.8.0
130
+ pickleshare==0.7.5
131
+ Pillow==10.0.0
132
+ pipreqs==0.5.0
133
+ platformdirs==3.10.0
134
+ plotly==5.18.0
135
+ preshed==3.0.9
136
+ prometheus-client==0.17.1
137
+ prompt-toolkit==3.0.39
138
+ protobuf==4.23.4
139
+ psutil==5.9.5
140
+ ptyprocess==0.7.0
141
+ pure-eval==0.2.2
142
+ pyarrow==14.0.1
143
+ pyarrow-hotfix==0.5
144
+ pyasn1==0.5.1
145
+ pyasn1-modules==0.3.0
146
+ pycparser==2.21
147
+ pydantic==2.4.2
148
+ pydantic_core==2.10.1
149
+ Pygments==2.16.1
150
+ pyparsing==3.0.9
151
+ python-dateutil==2.8.2
152
+ python-json-logger==2.0.7
153
+ pytz==2023.3
154
+ PyYAML==6.0.1
155
+ pyzmq==24.0.1
156
+ referencing==0.30.2
157
+ regex==2023.8.8
158
+ requests==2.31.0
159
+ requests-oauthlib==1.3.1
160
+ rfc3339-validator==0.1.4
161
+ rfc3986-validator==0.1.1
162
+ rpds-py==0.10.3
163
+ rsa==4.9
164
+ scikit-image==0.22.0
165
+ scikit-learn==1.3.0
166
+ scipy==1.11.2
167
+ seaborn==0.12.2
168
+ Send2Trash==1.8.2
169
+ sentry-sdk==1.40.5
170
+ setproctitle==1.3.3
171
+ six @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/six-1.15.0-py2.py3-none-any.whl
172
+ sklearn==0.0.post7
173
+ smart-open==6.4.0
174
+ smmap==5.0.1
175
+ sniffio==1.3.0
176
+ sortedcontainers==2.4.0
177
+ soupsieve==2.5
178
+ spacy==3.7.1
179
+ spacy-legacy==3.0.12
180
+ spacy-loggers==1.0.5
181
+ spectate==1.0.1
182
+ srsly==2.4.8
183
+ stack-data==0.6.2
184
+ statsmodels==0.14.0
185
+ sympy==1.12
186
+ tblib==3.0.0
187
+ tenacity==8.2.3
188
+ tensorboard==2.15.1
189
+ tensorboard-data-server==0.7.2
190
+ tensorflow==2.15.0
191
+ tensorflow-estimator==2.15.0
192
+ tensorflow-io-gcs-filesystem==0.34.0
193
+ tensorflow-macos==2.15.0
194
+ termcolor==2.3.0
195
+ terminado==0.17.1
196
+ thinc==8.2.1
197
+ threadpoolctl==3.2.0
198
+ tifffile==2023.9.26
199
+ tinycss2==1.2.1
200
+ tomli==2.0.1
201
+ toolz==0.12.0
202
+ torch==2.2.0
203
+ tornado==6.3.3
204
+ tqdm==4.66.1
205
+ traitlets==5.9.0
206
+ typer==0.9.0
207
+ typing_extensions==4.9.0
208
+ tzdata==2023.3
209
+ uri-template==1.3.0
210
+ urllib3==2.0.4
211
+ wandb==0.16.3
212
+ wasabi==1.1.2
213
+ watchdog==4.0.0
214
+ wcwidth==0.2.6
215
+ weasel==0.3.2
216
+ webcolors==1.13
217
+ webencodings==0.5.1
218
+ websocket-client==1.6.3
219
+ Werkzeug==3.0.1
220
+ widgetsnbextension==4.0.9
221
+ wrapt==1.14.1
222
+ xgboost==2.0.2
223
+ xyzservices==2023.10.1
224
+ yarg==0.1.9
225
+ zict==3.0.0
226
+ zipp==3.16.2
word_to_index.json ADDED
The diff for this file is too large to render. See raw diff