Spaces:

sadjava
/

sentiment-analysis

Build error

App Files Files Community

sadjava commited on Aug 13, 2023

Commit

ba4df19

1 Parent(s): 3e1f234

added app

Browse files

Files changed (4) hide show

app.py +42 -0
layer.py +129 -0
model.pt +3 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# AUTOGENERATED! DO NOT EDIT! File to edit: ../app.ipynb.
+# %% auto 0
+__all__ = ['tokenizer', 'device', 'model', 'CLASS_LABELS', 'sentence', 'label', 'examples', 'intf', 'classify_sentiment']
+# %% ../app.ipynb 2
+import gradio as gr
+import torch
+from layer import Model
+# %% ../app.ipynb 3
+from transformers import BertTokenizerFast
+tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased')
+# %% ../app.ipynb 4
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = torch.load('./model.pt', map_location=torch.device('cpu')).to(device)
+model.eval()
+# %% ../app.ipynb 5
+CLASS_LABELS  = ['Negative', 'Positive']
+# %% ../app.ipynb 6
+def classify_sentiment(sentence):
+    tokens = tokenizer(sentence)
+    pred = model(torch.tensor([tokens['input_ids']]).to(device), [len(tokens)]).item()
+    return dict(zip(CLASS_LABELS, [1 - pred, pred]))
+# %% ../app.ipynb 7
+sentence = gr.inputs.Textbox()
+label = gr.outputs.Label()
+examples = ['Movie is the best!', 'Worst movie ever.']
+intf = gr.Interface(fn=classify_sentiment,
+                    inputs=sentence,
+                    outputs=label,
+                    title='Sentiment analysis',
+                    examples=examples)
+intf.launch(inline=False)

layer.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
+class DynamicLayerConfig:
+    """
+        Arguments for nn.Embedding layer:
+            vocab_size - size of the vocabulary (number of unique tokens, depends on tokenizer configuration)
+            embed_size - the number of features to represent one token
+        Arguments for LSTM layer:
+            hidden_size – the number of features in the hidden state
+            proj_size – if > 0, will use LSTM with projections of corresponding size (instead of embed_size)
+            num_layers – number of recurrent layers
+            dropout – if non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer,
+                        with dropout probability equal to dropout
+            bidirectional – if True, becomes a bidirectional LSTM
+    """
+    def __init__(
+            self,
+            vocab_size: int,
+            embed_size: int,
+            hidden_size: int,
+            proj_size: int = 0,
+            num_layers: int = 1,
+            dropout: float = 0.,
+            bidirectional: bool = False
+            ):
+        self.embed_size = embed_size
+        self.hidden_size = hidden_size
+        self.vocab_size = vocab_size
+        self.proj_size = proj_size
+        self.num_layers = num_layers
+        self.dropout = dropout
+        self.bidirectional = bidirectional
+class DynamicLayerAttentionBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
+        if config.bidirectional:
+            self.hidden_size *= 2
+            self.proj_size *= 2
+        self.W_Q = nn.Linear(self.hidden_size, self.proj_size, bias=False)
+        self.W_K = nn.Linear(self.hidden_size, self.proj_size, bias=False)
+        self.W_V = nn.Linear(self.hidden_size, self.proj_size, bias=False)
+    def forward(self, rnn_output):
+        Q = self.W_Q(rnn_output)
+        K = self.W_K(rnn_output)
+        V = self.W_V(rnn_output)
+        d_k = K.size(-1)
+        scores = torch.matmul(Q, K.transpose(1,2)) / np.sqrt(d_k)
+        alpha_n = F.softmax(scores, dim=-1)
+        context = torch.matmul(alpha_n, V)
+        output = context.sum(1)
+        return output, alpha_n
+class DynamicLayer(nn.Module):
+    def __init__(self, config: DynamicLayerConfig):
+        super().__init__()
+        self.config = config
+        self.wte = nn.Embedding(self.config.vocab_size, self.config.embed_size)
+        self.lstm = nn.LSTM(
+            input_size=self.config.embed_size,
+            hidden_size=self.config.hidden_size,
+            proj_size=self.config.proj_size,
+            num_layers=self.config.num_layers,
+            dropout=self.config.dropout,
+            bidirectional=self.config.bidirectional,
+            batch_first=True,
+        )
+        self.attention = DynamicLayerAttentionBlock(self.config)
+    """
+        Arguments:
+        input_ids - tensor of shape (batch_size, sequence_length). All values are in interval - [0, vocab_size).
+                    These indices will be processed through nn.Embedding to obtain inputs_embeds of shape (batch_size, sequence_length, embed_size)
+            or
+        inputs_embeds - tensor of shape (batch_size, sequence_length, embed_size)
+    """
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        input_lens: torch.LongTensor,
+    ) -> torch.FloatTensor:
+        input_embeds = self.wte(input_ids)
+        input_packed = pack_padded_sequence(input_embeds, input_lens, batch_first=True, enforce_sorted=False)
+        lstm_output, (hn, cn) = self.lstm(input_packed)
+        output_padded, output_lengths = pad_packed_sequence(lstm_output, batch_first=True)
+        output, _ = self.attention(output_padded)
+        return output
+class Model(nn.Module):
+    def __init__(self, config: DynamicLayerConfig):
+        super().__init__()
+        self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
+        if config.bidirectional:
+            self.proj_size *= 2
+        self.dynamic_layer = DynamicLayer(config)
+        self.fc = nn.Linear(self.proj_size, 1)
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        input_lens: torch.LongTensor,
+    ) -> torch.FloatTensor:
+        fixed_sized = self.dynamic_layer(input_ids, input_lens)
+        return torch.sigmoid(self.fc(fixed_sized))

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6c002c780dd99e67b019c9f68eb1f12c9801bbeb0393ae8d58c77f54ed6e6ae
+size 16041171

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ torch
2	+ transformers