Spaces:

DdIiVvYyAaMm
/

TextSentenceClassifier

Sleeping

App Files Files Community

Divyam Sharma commited on Mar 10, 2024

Commit

97364d6

1 Parent(s): 8dfd9f9

Text Classifier

Browse files

Files changed (6) hide show

app.py +330 -0
attention_clf_model.pt +3 -0
index_to_word.json +0 -0
model_weights_target.pt +3 -0
requirements.txt +226 -0
word_to_index.json +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,330 @@

+import gradio as gr
+# def greet(name):
+#     return "Hello " + name + "!!"
+# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
+# iface.launch()
+## ------------------------------------------------------------------------------------------------
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../Attention Classifier Pytorch Student.ipynb.
+# %% auto 0
+__all__ = ['tokenizer', 'vocab_size', 'embedding_size', 'num_heads', 'embeddings_fname', 'model', 's', 'pred', 'tokens', 'attn',
+           'intf', 'DocumentAttentionClassifier', 'get_label_and_weights', 'visualize_attention',
+           'predict_and_visualize']
+# %% ../Attention Classifier Pytorch Student.ipynb 2
+import numpy as np
+import torch
+from torch.utils.data import Dataset, DataLoader
+import json
+np.random.seed(42)
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import init
+from collections import Counter
+import random
+from torch import optim
+import pandas as pd
+import pickle
+import wandb
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+from sklearn.metrics import f1_score
+import seaborn as sns
+# Sort of smart tokenization
+from nltk.tokenize import RegexpTokenizer
+# Attention plotting
+import matplotlib.pyplot as plt
+# %% ../Attention Classifier Pytorch Student.ipynb 4
+# Load the word-to-index mapping we used for word2vec and use the same type
+# of tokenizer. We'll need to use this to tokenize in the same way and keep
+# the same word-to-id mapping
+tokenizer = RegexpTokenizer(r'\w+')
+with open('word_to_index.json', 'r') as f:
+    word_to_index = json.load(f)
+with open('index_to_word.json', 'r') as f:
+    index_to_word = json.load(f)
+# %% ../Attention Classifier Pytorch Student.ipynb 6
+class DocumentAttentionClassifier(nn.Module):
+    def __init__(self, vocab_size, embedding_size, num_heads, embeddings_fname):
+        '''
+        Creates the new classifier model. embeddings_fname is a string containing the
+        filename with the saved pytorch parameters (the state dict) for the Embedding
+        object that should be used to initialize this class's word Embedding parameters
+        '''
+        super(DocumentAttentionClassifier, self).__init__()
+        # Save the input arguments to the state
+        # Create the Embedding object that will hold our word embeddings that we
+        # learned in word2vec. This embedding object should have the same size
+        # as what we learned before. However, we don't to start from scratch!
+        # Once created, load the saved (word2vec-based) parameters into the object
+        # using load_state_dict.
+        self.vocab_size = vocab_size
+        self.embedding_size = embedding_size
+        self.num_heads = num_heads
+        # Load pre-trained embeddings
+        self.embeddings = nn.Embedding(vocab_size, embedding_size)
+        pretrained_embeddings = torch.load(embeddings_fname)
+        self.embeddings.load_state_dict({'weight': pretrained_embeddings})
+        # Initialize attention heads as trainable parameters
+        self.attention_heads = nn.Parameter(torch.randn(num_heads, embedding_size))
+        # Linear layer for classification from concatenated attention heads
+        self.output_layer = nn.Linear(num_heads * embedding_size, 1)
+        # Define the attention heads. You have two options:
+        #
+        # 1) the worse way to implement this is to define your heads using an Embedding
+        #    and then access them individually later in forward(). This will be slower
+        #    but will probably still work
+        #
+        # 2) the ideal way is to think of your attention heads as rows in a matrix--
+        #    just like we do for word2vec. While this is kind of the same as how
+        #    we represent things like in an Embedding, the key difference is that we
+        #    can now use **matrix operations** to calculate the different r and a
+        #    vectors, which will be much faster (and less code). To do this, you'll
+        #    need to represent the attention heads as a Tensor directly (not a layer)
+        #    and make sure pytorch runs gradient descent on these parameters.
+        #
+        #  It's up to you which to use, but try option 2 first and see what you do
+        #  in the forward() function
+        # Define the layer that goes from the concatenated attention heads' outputs
+        # to the single output value. We'll push this output value through the sigmoid
+        # to get our prediction
+        # pass
+    def forward(self, word_ids):
+        # Pro Tip™: when implementing this forward pass, try playing around with pytorch
+        # tensors in a jupyter notebook by making "fake" versions of them. For example:
+        #
+        # word_embeds = torch.Tensor([[1,6,2], [9,1,7]])
+        #
+        # If you have two word embeddings of length 3, how can you define the attention
+        # heads to get the 'r' vector? Trying things out in the simple case will let you
+        # quickly verify the sequence of operations you want to run, e.g., that you can take
+        # the softmax of the 'r' vector to get the 'a' vector and it has the right shape
+        # and values
+        # Hint 1: If you're representing attention using Option 2, most of this code is just
+        #         matrix multiplications
+        # Get embeddings for input word IDs
+        embeddings = self.embeddings(word_ids)  # [batch_size, seq_len, embedding_size]
+        # Calculate 'r' vectors (attention scores) for each head
+        attention_scores = torch.matmul(embeddings, self.attention_heads.unsqueeze(0).transpose(1, 2))
+        # [batch_size, seq_len, num_heads]
+        # Apply softmax to get attention weights ('a' vectors)
+        attention_weights = F.softmax(attention_scores, dim=1)  # softmax over seq_len dimension
+        # [batch_size, seq_len, num_heads]
+        # Apply attention weights to embeddings (weighted sum of embeddings)
+        attended_embeddings = torch.matmul(attention_weights.transpose(1, 2), embeddings)
+        # [batch_size, num_heads, embedding_size]
+        # Concatenate attention head outputs to form a single vector per document
+        concatenated = attended_embeddings.view(attended_embeddings.size(0), -1)
+        # [batch_size, num_heads * embedding_size]
+        # Pass through output layer to get prediction
+        output = self.output_layer(concatenated)
+        # [batch_size, 1]
+        # Apply sigmoid activation for binary classification
+        prediction = torch.sigmoid(output).squeeze(1)  # squeeze to remove extra dimension
+        # [batch_size]
+        return prediction, attention_weights
+        # Hint 2: Most of your time is going to be spent figuring out shape errors and what
+        #         operations you need to do to get the right outputs. This is normal.
+        # Hint 3: This is the hardest part of this last part of the homework.
+        # Get the word embeddings for the ids
+        # Calcuate the 'r' vectors which are the dot product of each attention head
+        # with each word embedding. You should be getting a tensor that has this
+        # dot product back out---remember this vector is capturing how much the
+        # head thinks the vector is relevant for the task
+        # Calcuate the softmax of the 'r' vector, which call 'a'. This will give us
+        # a probability distribution over the tokens for each head. Be sure to check
+        # that the softmax is being calculated over the right axis/dimension of the
+        # data (You should see probability values that sum to 1 for each head's
+        # ratings across all the tokens)
+        # Calculate the re-weighting of the word embeddings for each head's attention
+        # weight and sum the reweighted sequence for each head into a single vector.
+        # This should give you n_heads vectors that each have embedding_size length.
+        # Note again that each head should give you a different weighting of the
+        # input word embeddings
+        # Create a single vector that has all n_heads' attention-weighted vectors
+        # as one single vector. We need this one-long-vector shape so that we
+        # can pass all these vectors as input into a layer.
+        #
+        # NOTE: if you're doing Option 2 for representing attention, you don't
+        # actually need to create a new vector (which is very inefficient).
+        # Instead, you can create a new *view* of the same data that reshapes the
+        # different heads' vectors so it looks like one long vector.
+        # Pass the side-by-side attention-weighted vectors through your linear
+        # layer to get some output activation.
+        #
+        # NOTE: if you're feeling adventurous, try adding an extra layer here
+        # which will allow you different attention-weighted vectors to interact
+        # in making the model decision
+        # Return the sigmoid of the output activation *and* the attention
+        # weights for each head. We'll need these later for visualization
+        # pass
+# %% ../Attention Classifier Pytorch Student.ipynb 32
+# Parameters for model initialization
+vocab_size = len(word_to_index)  # Assuming word_to_index is defined
+embedding_size = 50  # the size used in word2vec model
+num_heads = 5  # number of attention heads
+embeddings_fname = 'model_weights_target.pt'
+model = DocumentAttentionClassifier(vocab_size, embedding_size, num_heads, embeddings_fname)
+# %% ../Attention Classifier Pytorch Student.ipynb 33
+model.load_state_dict(torch.load('attention_clf_model.pt'))
+# %% ../Attention Classifier Pytorch Student.ipynb 34
+def get_label_and_weights(text, model):
+    '''
+    Classifies the text (requires tokenizing, etc.) and returns (1) the classification label,
+    (2) the tokenized words in the model's vocabulary,
+    and (3) the attention weights over the in-vocab tokens as a numpy array. Note that the
+    attention weights will be a matrix, depending on how many heads were used in training.
+    '''
+    # Tokenize the text
+    token_ids = tokenizer.tokenize(text.lower())  # Adjust according to your tokenizer
+    word_ids = [word_to_index.get(token, word_to_index['<UNK>']) for token in token_ids]
+    device='mps'
+    model = model.to(device)
+    token_ids_tensor = torch.tensor([np.array(word_ids)]).to(device)
+    # Forward pass through the model
+    model.eval()
+    with torch.no_grad():
+        output, attention_weights = model(token_ids_tensor)
+        # Convert output to label
+        predicted_label = int(output.item() > 0.5)
+        # Convert token IDs back to tokens
+        tokens = [index_to_word.get(str(tid)) for tid in token_ids_tensor[0].tolist() if tid!=word_to_index['<UNK>']]
+        # Convert attention weights to numpy array if not already
+        attention_weights_numpy = attention_weights.cpu().numpy()
+        return predicted_label, tokens, attention_weights_numpy.squeeze(0)
+# %% ../Attention Classifier Pytorch Student.ipynb 36
+def visualize_attention(words, attention_weights):
+    '''
+    Makes a heatmap figure that visualizes the attention weights for an item.
+    Attention weights should be a numpy array that has the shape (num_words, num_heads)
+    '''
+    fig, ax = plt.subplots()
+    # Rescale image size based on the input length
+    fig.set_size_inches((len(words), 4))
+    im = ax.imshow(attention_weights.T)
+    head_labels = [ 'head-%d' % h for h in range(attention_weights.shape[1])]
+    ax.set_xticks(np.arange(len(words))) # , labels=words)
+    ax.set_yticks(np.arange(len(head_labels))) #, labels=head_labels)
+    plt.figure(figsize=(48,10))
+    # Rotate the word labels and set their alignment.
+    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
+             rotation_mode="anchor")
+    # Add the words and axis labels
+    ax.set_yticklabels(labels=range(attention_weights.shape[1]), fontsize=16)
+    ax.set_ylabel('Attention Head', fontsize=16)
+    ax.set_xticklabels(labels=words, fontsize=16)
+    # Add a color bar to show probability scaling
+    cb = fig.colorbar(im, ax=ax, label='Probability', pad = 0.01)
+    cb.ax.tick_params(labelsize=16)
+    cb.set_label(label='Probability',size=16)
+    fig.tight_layout()
+    plt.show()
+# %% ../Attention Classifier Pytorch Student.ipynb 38
+s = 'Just as I remembered it, one of my favorites from childhood! Great condition, very happy to have this to share with my daughter. Packaging was so nice and was received quickly.'
+pred, tokens, attn = get_label_and_weights(s, model)
+visualize_attention(tokens, attn)
+print(pred)
+# %% ../Attention Classifier Pytorch Student.ipynb 39
+def predict_and_visualize(s):
+    pred, tokens, attn = get_label_and_weights(s, model)
+    # Assuming visualize_attention can save an image and return its path
+    image_path = visualize_attention(tokens, attn)
+    return pred, image_path
+# %% ../Attention Classifier Pytorch Student.ipynb 40
+import gradio as gr
+intf = gr.Interface(fn=predict_and_visualize,
+                    inputs="text",
+                    outputs=["text", "image"],
+                    examples=["The book was amazing!", "Today's Weather is pretty bad!", "How are you feeling"],
+                    title="Text Review Classifier with Attention Visualization",
+                    description="Enter a review to see the prediction and attention visualization.")
+intf.launch()

attention_clf_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb5ba2f87a7f02517c8063b74e38e96f935c27ef23c15645d96d35ee9cdf543c
+size 5847344

index_to_word.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model_weights_target.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aaf081907d43db059b83fb65a7224233bd9077dc1c5a22ce1b05df67ec5b6180
+size 5844445

requirements.txt ADDED Viewed

	@@ -0,0 +1,226 @@

+absl-py==2.0.0
+altgraph @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/altgraph-0.17.2-py2.py3-none-any.whl
+annotated-types==0.6.0
+anyio==4.0.0
+appdirs==1.4.4
+appnope==0.1.3
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.2.3
+asttokens==2.2.1
+astunparse==1.6.3
+async-lru==2.0.4
+attrs==23.1.0
+Babel==2.12.1
+backcall==0.2.0
+beautifulsoup4==4.12.2
+bleach==6.0.0
+blis==0.7.11
+bokeh==3.3.1
+cachetools==5.3.2
+catalogue==2.0.10
+certifi==2023.7.22
+cffi==1.15.1
+charset-normalizer==3.2.0
+click==8.1.7
+cloudpathlib==0.15.1
+cloudpickle==3.0.0
+comm==0.1.4
+confection==0.1.3
+contourpy==1.1.0
+cycler==0.11.0
+cymem==2.0.8
+dask==2023.11.0
+debugpy==1.6.7.post1
+decorator==5.1.1
+defusedxml==0.7.1
+distributed==2023.11.0
+docker-pycreds==0.4.0
+docopt==0.6.2
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl
+entrypoints==0.4
+exceptiongroup==1.1.3
+execnb==0.1.5
+executing==1.2.0
+fastcore==1.5.29
+fastjsonschema==2.18.0
+filelock==3.13.1
+flatbuffers==23.5.26
+fonttools==4.42.1
+fqdn==1.5.1
+fsspec==2023.10.0
+future @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/future-0.18.2-py3-none-any.whl
+gast==0.5.4
+gensim==4.3.2
+ghapi==1.0.4
+gitdb==4.0.11
+GitPython==3.1.42
+google-auth==2.23.4
+google-auth-oauthlib==1.1.0
+google-pasta==0.2.0
+graphviz==0.20.1
+grpcio==1.59.3
+h5py==3.10.0
+idna==3.4
+imageio==2.32.0
+importlib-metadata==6.8.0
+importlib-resources==6.0.1
+ipycytoscape==1.3.3
+ipykernel==6.25.1
+ipython==8.12.3
+ipywidgets==8.0.4
+isoduration==20.11.0
+jedi==0.19.0
+Jinja2==3.1.2
+joblib==1.3.2
+json5==0.9.14
+jsonpointer==2.4
+jsonschema==4.19.0
+jsonschema-specifications==2023.7.1
+jupyter-events==0.7.0
+jupyter-lsp==2.2.0
+jupyter_client==7.4.9
+jupyter_core==5.3.1
+jupyter_server==2.7.3
+jupyter_server_terminals==0.4.4
+jupyterlab==4.0.6
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==3.0.9
+jupyterlab_server==2.25.0
+keras==2.15.0
+kiwisolver==1.4.5
+langcodes==3.3.0
+lazy_loader==0.3
+libclang==16.0.6
+locket==1.0.0
+lxml==4.9.3
+lz4==4.3.2
+macholib @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/macholib-1.15.2-py2.py3-none-any.whl
+Markdown==3.5.1
+MarkupSafe==2.1.3
+matplotlib==3.7.2
+matplotlib-inline==0.1.6
+mistune==3.0.1
+ml-dtypes==0.2.0
+mpmath==1.3.0
+msgpack==1.0.7
+murmurhash==1.0.10
+nbclient==0.8.0
+nbconvert==7.16.2
+nbdev==2.3.13
+nbformat==5.9.2
+nest-asyncio==1.5.7
+networkx==3.2.1
+nltk==3.8.1
+notebook==7.0.3
+notebook_shim==0.2.3
+numpy==1.25.2
+oauthlib==3.2.2
+opencv-python==4.8.1.78
+opt-einsum==3.3.0
+overrides==7.4.0
+packaging==23.1
+pandas==2.1.0
+pandocfilters==1.5.0
+parso==0.8.3
+partd==1.4.1
+pathy==0.10.2
+patsy==0.5.3
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==10.0.0
+pipreqs==0.5.0
+platformdirs==3.10.0
+plotly==5.18.0
+preshed==3.0.9
+prometheus-client==0.17.1
+prompt-toolkit==3.0.39
+protobuf==4.23.4
+psutil==5.9.5
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==14.0.1
+pyarrow-hotfix==0.5
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pycparser==2.21
+pydantic==2.4.2
+pydantic_core==2.10.1
+Pygments==2.16.1
+pyparsing==3.0.9
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+pytz==2023.3
+PyYAML==6.0.1
+pyzmq==24.0.1
+referencing==0.30.2
+regex==2023.8.8
+requests==2.31.0
+requests-oauthlib==1.3.1
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rpds-py==0.10.3
+rsa==4.9
+scikit-image==0.22.0
+scikit-learn==1.3.0
+scipy==1.11.2
+seaborn==0.12.2
+Send2Trash==1.8.2
+sentry-sdk==1.40.5
+setproctitle==1.3.3
+six @ file:///System/Volumes/Data/SWE/Apps/DT/BuildRoots/BuildRoot7/ActiveBuildRoot/Library/Caches/com.apple.xbs/Sources/python3/python3-133.100.1.1/six-1.15.0-py2.py3-none-any.whl
+sklearn==0.0.post7
+smart-open==6.4.0
+smmap==5.0.1
+sniffio==1.3.0
+sortedcontainers==2.4.0
+soupsieve==2.5
+spacy==3.7.1
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spectate==1.0.1
+srsly==2.4.8
+stack-data==0.6.2
+statsmodels==0.14.0
+sympy==1.12
+tblib==3.0.0
+tenacity==8.2.3
+tensorboard==2.15.1
+tensorboard-data-server==0.7.2
+tensorflow==2.15.0
+tensorflow-estimator==2.15.0
+tensorflow-io-gcs-filesystem==0.34.0
+tensorflow-macos==2.15.0
+termcolor==2.3.0
+terminado==0.17.1
+thinc==8.2.1
+threadpoolctl==3.2.0
+tifffile==2023.9.26
+tinycss2==1.2.1
+tomli==2.0.1
+toolz==0.12.0
+torch==2.2.0
+tornado==6.3.3
+tqdm==4.66.1
+traitlets==5.9.0
+typer==0.9.0
+typing_extensions==4.9.0
+tzdata==2023.3
+uri-template==1.3.0
+urllib3==2.0.4
+wandb==0.16.3
+wasabi==1.1.2
+watchdog==4.0.0
+wcwidth==0.2.6
+weasel==0.3.2
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.6.3
+Werkzeug==3.0.1
+widgetsnbextension==4.0.9
+wrapt==1.14.1
+xgboost==2.0.2
+xyzservices==2023.10.1
+yarg==0.1.9
+zict==3.0.0
+zipp==3.16.2

word_to_index.json ADDED Viewed

The diff for this file is too large to render. See raw diff