Spaces:

RishabA
/

sentiment-analysis

Sleeping

App Files Files Community

RishabA commited on Apr 8, 2025

Commit

6addb3e

verified ·

1 Parent(s): ff5e38b

Upload 5 files

Browse files

Files changed (5) hide show

Sentiment_Analysis_in_PyTorch.ipynb +772 -0
app.py +69 -0
model.py +292 -0
requirements.txt +5 -0
sentiment_analysis_model.pt +3 -0

Sentiment_Analysis_in_PyTorch.ipynb ADDED Viewed

	@@ -0,0 +1,772 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "machine_shape": "hm",
+      "gpuType": "A100"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9gYFoxi68eer"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install datasets transformers"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "import os\n",
+        "import math\n",
+        "import time\n",
+        "from tqdm.notebook import trange, tqdm\n",
+        "\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "from torch import optim\n",
+        "from torch.utils.data import DataLoader\n",
+        "from torch import Tensor\n",
+        "from torch.utils.data.dataset import Dataset\n",
+        "import torch.nn.functional as F\n",
+        "from torch.distributions import Categorical\n",
+        "from torch.cuda.amp import autocast, GradScaler\n",
+        "\n",
+        "from datasets import load_dataset\n",
+        "from transformers import AutoTokenizer\n",
+        "\n",
+        "torch.backends.cuda.matmul.allow_tf32 = True\n",
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "device"
+      ],
+      "metadata": {
+        "id": "rhkTsyBn8j_m"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "train_dataset = load_dataset(\"sst5\", split=\"train\")\n",
+        "test_dataset = load_dataset(\"sst5\", split=\"test\")\n",
+        "\n",
+        "print(f\"Length of train dataset: {len(train_dataset)}\")\n",
+        "print(f\"Length of test dataset: {len(test_dataset)}\")"
+      ],
+      "metadata": {
+        "id": "c0wKEehd8lfH"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "train_dataset[1][\"text\"], train_dataset[1][\"label\"]"
+      ],
+      "metadata": {
+        "id": "Oj6qWm8H8uYK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")"
+      ],
+      "metadata": {
+        "id": "7wbogVwT8ulJ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "len(tokenizer.vocab)"
+      ],
+      "metadata": {
+        "id": "tPDFZ3xK8wZb"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "tokenizer.vocab_size"
+      ],
+      "metadata": {
+        "id": "EY2TbtGZ8xdA"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(\"[PAD] token id:\", tokenizer.pad_token_id) # 0\n",
+        "print(\"[CLS] token id:\", tokenizer.cls_token_id) # 101\n",
+        "print(\"[SEP] token id:\", tokenizer.sep_token_id) # 102"
+      ],
+      "metadata": {
+        "id": "1_Wq4KEj81lb"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class SST5Dataset(Dataset):\n",
+        "    def __init__(self, dataset, tokenizer, max_length=128):\n",
+        "      self.dataset = dataset\n",
+        "      self.tokenizer = tokenizer\n",
+        "      self.max_length = max_length\n",
+        "\n",
+        "    def __len__(self):\n",
+        "      return len(self.dataset)\n",
+        "\n",
+        "    def __getitem__(self, idx):\n",
+        "      sample = self.dataset[idx]\n",
+        "      text = sample[\"text\"]\n",
+        "      label = torch.tensor(sample[\"label\"])\n",
+        "\n",
+        "      encoded_text = self.tokenizer(\n",
+        "          text,\n",
+        "          truncation=True,\n",
+        "          padding=\"max_length\",\n",
+        "          max_length=self.max_length,\n",
+        "          return_tensors=\"pt\"\n",
+        "      )\n",
+        "\n",
+        "      # Remove the extra batch dimension for each item in the encoded dictionary.\n",
+        "      encoded_text = {key: val.squeeze(dim=0) for key, val in encoded_text.items()}\n",
+        "\n",
+        "      return {\n",
+        "          \"text\": encoded_text,\n",
+        "          \"label\": label\n",
+        "      }\n",
+        "\n",
+        "train_dataset = SST5Dataset(dataset=train_dataset,\n",
+        "                            tokenizer=tokenizer,\n",
+        "                            max_length=32)\n",
+        "\n",
+        "test_dataset = SST5Dataset(dataset=test_dataset,\n",
+        "                           tokenizer=tokenizer,\n",
+        "                           max_length=32)"
+      ],
+      "metadata": {
+        "id": "jQY8xfZa-ilL"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "batch_size = 128\n",
+        "num_workers = os.cpu_count()\n",
+        "\n",
+        "train_dataloader = DataLoader(train_dataset,\n",
+        "                              batch_size=batch_size,\n",
+        "                              shuffle=True,\n",
+        "                              num_workers=num_workers,\n",
+        "                              pin_memory=True)\n",
+        "\n",
+        "test_dataloader = DataLoader(test_dataset,\n",
+        "                             batch_size=batch_size,\n",
+        "                             shuffle=False,\n",
+        "                             num_workers=num_workers,\n",
+        "                             pin_memory=True)"
+      ],
+      "metadata": {
+        "id": "ItktnvlfApqz"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_items = next(iter(train_dataloader))\n",
+        "print(tokenizer.decode(test_items[\"text\"][\"input_ids\"][0]))"
+      ],
+      "metadata": {
+        "id": "KrroXe5aAtzs"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class EmbeddingLayer(nn.Module):\n",
+        "  def __init__(self,\n",
+        "               vocab_size: int,\n",
+        "               d_model: int = 768):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.d_model = d_model\n",
+        "\n",
+        "    self.lut = nn.Embedding(num_embeddings=vocab_size, embedding_dim=d_model) # (vocab_size, d_model)\n",
+        "\n",
+        "  def forward(self, x):\n",
+        "    # x shape: (batch_size, seq_len)\n",
+        "    return self.lut(x) * math.sqrt(self.d_model) # (batch_size, seq_len, d_model)"
+      ],
+      "metadata": {
+        "id": "el4Tnb37AvO7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class PositionalEncoding(nn.Module):\n",
+        "  def __init__(self,\n",
+        "               d_model: int = 768,\n",
+        "               dropout: float = 0.1,\n",
+        "               max_length: int = 128):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.dropout = nn.Dropout(p=dropout)\n",
+        "\n",
+        "    pe = torch.zeros(max_length, d_model) # (max_length, d_model)\n",
+        "    # Create position column\n",
+        "    k = torch.arange(0, max_length).unsqueeze(dim=1) # (max_length, 1)\n",
+        "\n",
+        "    # Use the log version of the function for positional encodings\n",
+        "    div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)) # (d_model / 2)\n",
+        "\n",
+        "    # Use sine for the even indices and cosine for the odd indices\n",
+        "    pe[:, 0::2] = torch.sin(k * div_term)\n",
+        "    pe[:, 1::2] = torch.cos(k * div_term)\n",
+        "\n",
+        "    pe = pe.unsqueeze(dim=0) # Add the batch dimension(1, max_length, d_model)\n",
+        "\n",
+        "    # We use a buffer because the positional encoding is fixed and not a model paramter that we want to be updated during backpropagation.\n",
+        "    self.register_buffer(\"pe\", pe) # Buffers are saved with the model state and are moved to the correct device\n",
+        "\n",
+        "  def forward(self, x):\n",
+        "    # x shape: (batch_size, seq_length, d_model)\n",
+        "    x += self.pe[:, :x.size(1)]\n",
+        "    return self.dropout(x)"
+      ],
+      "metadata": {
+        "id": "Qk0sNjc7A6sZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class MultiHeadAttention(nn.Module):\n",
+        "  def __init__(self,\n",
+        "               d_model: int = 768,\n",
+        "               n_heads: int = 8,\n",
+        "               dropout: float = 0.1):\n",
+        "    super().__init__()\n",
+        "    assert d_model % n_heads == 0\n",
+        "\n",
+        "    self.d_model = d_model\n",
+        "    self.n_heads = n_heads\n",
+        "    self.d_key = d_model // n_heads\n",
+        "\n",
+        "    self.Wq = nn.Linear(in_features=d_model, out_features=d_model)\n",
+        "    self.Wk = nn.Linear(in_features=d_model, out_features=d_model)\n",
+        "    self.Wv = nn.Linear(in_features=d_model, out_features=d_model)\n",
+        "    self.Wo = nn.Linear(in_features=d_model, out_features=d_model)\n",
+        "\n",
+        "    self.dropout = nn.Dropout(p=dropout)\n",
+        "\n",
+        "\n",
+        "  def forward(self,\n",
+        "              query: Tensor,\n",
+        "              key: Tensor,\n",
+        "              value: Tensor,\n",
+        "              mask: Tensor = None):\n",
+        "    # input shape: (batch_size, seq_len, d_model)\n",
+        "\n",
+        "    batch_size = key.size(0)\n",
+        "\n",
+        "    Q = self.Wq(query)\n",
+        "    K = self.Wk(key)\n",
+        "    V = self.Wv(value)\n",
+        "\n",
+        "    Q = Q.view(batch_size, -1, self.n_heads, self.d_key).permute(0, 2, 1, 3) # (batch_size, n_heads, q_length, d_key)\n",
+        "    K = K.view(batch_size, -1, self.n_heads, self.d_key).permute(0, 2, 1, 3) # (batch_size, n_heads, k_length, d_key)\n",
+        "    V = V.view(batch_size, -1, self.n_heads, self.d_key).permute(0, 2, 1, 3) # (batch_size, n_heads, v_length, d_key)\n",
+        "\n",
+        "    scaled_dot_product = torch.matmul(Q, K.permute(0, 1, 3, 2)) / math.sqrt(self.d_key) # (batch_size, n_heads, q_length, k_length)\n",
+        "\n",
+        "    if mask is not None:\n",
+        "      scaled_dot_product = scaled_dot_product.masked_fill(mask == 0, float('-inf'))\n",
+        "\n",
+        "    attention_probs = torch.softmax(scaled_dot_product, dim=-1)\n",
+        "\n",
+        "    A = torch.matmul(self.dropout(attention_probs), V)  # (batch_size, n_heads, q_length, d_key)\n",
+        "\n",
+        "    A = A.permute(0, 2, 1, 3) # (batch_size, q_length, n_heads, d_key)\n",
+        "    A = A.contiguous().view(batch_size, -1, self.n_heads * self.d_key) # (batch_size, q_length, d_model)\n",
+        "\n",
+        "    output = self.Wo(A) # (batch_size, q_length, d_model)\n",
+        "\n",
+        "    return output, attention_probs"
+      ],
+      "metadata": {
+        "id": "8ugM9m7rA9zL"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class PositionwiseFeedForward(nn.Module):\n",
+        "  def __init__(self,\n",
+        "               d_model: int = 768,\n",
+        "               dropout: float = 0.1):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.ffn = nn.Sequential(\n",
+        "        nn.Linear(in_features=d_model, out_features=(d_model * 4)),\n",
+        "        nn.ReLU(),\n",
+        "        nn.Linear(in_features=(d_model * 4), out_features=d_model),\n",
+        "        nn.Dropout(p=dropout)\n",
+        "    )\n",
+        "\n",
+        "  def forward(self, x):\n",
+        "    # x shape: (batch_size, q_length, d_model)\n",
+        "    return self.ffn(x) # (batch_size, q_length, d_model)"
+      ],
+      "metadata": {
+        "id": "kqQGZf6rA_KL"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class EncoderLayer(nn.Module):\n",
+        "  def __init__(self,\n",
+        "               d_model: int = 768,\n",
+        "               n_heads: int = 8,\n",
+        "               dropout: float = 0.1):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.attention = MultiHeadAttention(d_model=d_model, n_heads=n_heads, dropout=dropout)\n",
+        "    self.attention_layer_norm = nn.LayerNorm(d_model)\n",
+        "\n",
+        "    self.position_wise_ffn = PositionwiseFeedForward(d_model=d_model, dropout=dropout)\n",
+        "    self.ffn_layer_norm = nn.LayerNorm(d_model)\n",
+        "\n",
+        "    self.dropout = nn.Dropout(p=dropout)\n",
+        "\n",
+        "  def forward(self,\n",
+        "              src: Tensor,\n",
+        "              src_mask: Tensor):\n",
+        "    _src, attention_probs = self.attention(query=src, key=src, value=src, mask=src_mask)\n",
+        "    src = self.attention_layer_norm(src + self.dropout(_src))\n",
+        "\n",
+        "    _src = self.position_wise_ffn(src)\n",
+        "    src = self.ffn_layer_norm(src + self.dropout(_src))\n",
+        "\n",
+        "    return src, attention_probs"
+      ],
+      "metadata": {
+        "id": "_jypLBCiBDb-"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class Encoder(nn.Module):\n",
+        "  def __init__(self,\n",
+        "               d_model: int = 768,\n",
+        "               n_layers: int = 3,\n",
+        "               n_heads: int = 8,\n",
+        "               dropout: float = 0.1):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.layers = nn.ModuleList([EncoderLayer(d_model=d_model, n_heads=n_heads, dropout=dropout) for layer in range(n_layers)])\n",
+        "    self.dropout = nn.Dropout(p=dropout)\n",
+        "\n",
+        "  def forward(self,\n",
+        "              src: Tensor,\n",
+        "              src_mask: Tensor):\n",
+        "\n",
+        "    for layer in self.layers:\n",
+        "      src, attention_probs = layer(src, src_mask)\n",
+        "\n",
+        "    self.attention_probs = attention_probs\n",
+        "\n",
+        "    # src += torch.randn_like(src) * 0.001\n",
+        "    return src"
+      ],
+      "metadata": {
+        "id": "o-cPP_YLBF8y"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class Transformer(nn.Module):\n",
+        "  def __init__(self,\n",
+        "               encoder: Encoder,\n",
+        "               src_embed: EmbeddingLayer,\n",
+        "               src_pad_idx: int,\n",
+        "               device,\n",
+        "               d_model: int = 768,\n",
+        "               num_labels: int = 5):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.encoder = encoder\n",
+        "    self.src_embed = src_embed\n",
+        "    self.device = device\n",
+        "    self.src_pad_idx = src_pad_idx\n",
+        "\n",
+        "    self.dropout = nn.Dropout(p=0.1)\n",
+        "    self.classifier = nn.Linear(in_features=d_model, out_features=num_labels)\n",
+        "\n",
+        "  def make_src_mask(self, src: Tensor):\n",
+        "    # Assign 1 to tokens that need attended to and 0 to padding tokens, then add 2 dimensions\n",
+        "    src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)\n",
+        "\n",
+        "    return src_mask\n",
+        "\n",
+        "  def forward(self, src: Tensor):\n",
+        "    src_mask = self.make_src_mask(src) # (batch_size, 1, 1, src_seq_length)\n",
+        "    output = self.encoder(self.src_embed(src), src_mask)  # (batch_size, src_seq_length, d_model)\n",
+        "    output = output[:, 0, :] # Get the sos token vector representation (works sort of like a cls token in ViT) shape: (batch_size, 1, d_model)\n",
+        "    logits = self.classifier(self.dropout(output))\n",
+        "\n",
+        "    return logits"
+      ],
+      "metadata": {
+        "id": "5fcff-6oBX_w"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def make_model(device,\n",
+        "               tokenizer,\n",
+        "               n_layers: int = 3,\n",
+        "               d_model: int = 768,\n",
+        "               num_labels: int = 5,\n",
+        "               n_heads: int = 8,\n",
+        "               dropout: float = 0.1,\n",
+        "               max_length: int = 128):\n",
+        "  encoder = Encoder(d_model=d_model,\n",
+        "                    n_layers=n_layers,\n",
+        "                    n_heads=n_heads,\n",
+        "                    dropout=dropout)\n",
+        "\n",
+        "  src_embed = EmbeddingLayer(vocab_size=tokenizer.vocab_size, d_model=d_model)\n",
+        "\n",
+        "  pos_enc = PositionalEncoding(d_model=d_model,\n",
+        "                               dropout=dropout,\n",
+        "                               max_length=max_length)\n",
+        "\n",
+        "  model = Transformer(encoder=encoder,\n",
+        "                      src_embed=nn.Sequential(src_embed, pos_enc),\n",
+        "                      src_pad_idx=tokenizer.pad_token_id,\n",
+        "                      device=device,\n",
+        "                      d_model=d_model,\n",
+        "                      num_labels=num_labels)\n",
+        "\n",
+        "  # Initialize parameters with Xaviar/Glorot\n",
+        "  # This maintains a consistent variance of activations throughout the network\n",
+        "  # Helps avoid issues like vanishing or exploding gradients.\n",
+        "  for p in model.parameters():\n",
+        "    if p.dim() > 1:\n",
+        "      nn.init.xavier_uniform_(p)\n",
+        "\n",
+        "  return model"
+      ],
+      "metadata": {
+        "id": "-7adHoyYBcqT"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model = make_model(device=device,\n",
+        "                   tokenizer=tokenizer,\n",
+        "                   n_layers=4,\n",
+        "                   d_model=768,\n",
+        "                   num_labels=5,\n",
+        "                   n_heads=8,\n",
+        "                   dropout=0.1,\n",
+        "                   max_length=32)\n",
+        "\n",
+        "model.to(device)"
+      ],
+      "metadata": {
+        "id": "M0EbhBuQBhUK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(f\"The model has {(sum(p.numel() for p in model.parameters() if p.requires_grad)):,} trainable parameters\")"
+      ],
+      "metadata": {
+        "id": "NT37aWKnBk4y"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "lr = 1e-4\n",
+        "\n",
+        "optimizer = torch.optim.Adam(params=model.parameters(),\n",
+        "                             lr=lr,\n",
+        "                             betas=(0.9, 0.999))\n",
+        "loss_fn = nn.CrossEntropyLoss()\n",
+        "scaler = GradScaler()"
+      ],
+      "metadata": {
+        "id": "hZmiAxW-BmLW"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def train(model,\n",
+        "          iterator,\n",
+        "          optimizer,\n",
+        "          loss_fn,\n",
+        "          clip,\n",
+        "          epoch):\n",
+        "  model.train()\n",
+        "  epoch_loss = 0\n",
+        "\n",
+        "  pbar = tqdm(iterator, total=len(iterator), desc=f\"Epoch {epoch + 1} Progress\", colour=\"#005500\")\n",
+        "  for i, batch in enumerate(pbar):\n",
+        "    src = batch[\"text\"][\"input_ids\"].to(device)\n",
+        "    labels = batch[\"label\"].to(device)\n",
+        "\n",
+        "    optimizer.zero_grad()\n",
+        "    with autocast():\n",
+        "      # Forward pass\n",
+        "      logits = model(src)\n",
+        "\n",
+        "      # Calculate the loss\n",
+        "      loss = loss_fn(logits, labels)\n",
+        "\n",
+        "    scaler.scale(loss).backward()\n",
+        "    scaler.unscale_(optimizer)\n",
+        "    nn.utils.clip_grad_norm_(model.parameters(), clip)\n",
+        "    scaler.step(optimizer)\n",
+        "    scaler.update()\n",
+        "    epoch_loss += loss.item()\n",
+        "\n",
+        "    pbar.set_postfix(loss=loss.item()) # Update the loss on the tqdm progress bar\n",
+        "\n",
+        "  return (epoch_loss / len(iterator))"
+      ],
+      "metadata": {
+        "id": "WMNVjg0UBqQF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def evaluate(model,\n",
+        "             iterator,\n",
+        "             loss_fn):\n",
+        "  model.eval()\n",
+        "  epoch_loss = 0\n",
+        "\n",
+        "  with torch.inference_mode():\n",
+        "    for i, batch in enumerate(iterator):\n",
+        "      src = batch[\"text\"][\"input_ids\"].to(device)\n",
+        "      labels = batch[\"label\"].to(device)\n",
+        "\n",
+        "      # Forward pass\n",
+        "      logits = model(src)\n",
+        "\n",
+        "      # Calculate the loss\n",
+        "      loss = loss_fn(logits, labels)\n",
+        "      epoch_loss += loss.item()\n",
+        "\n",
+        "  return (epoch_loss / len(iterator))"
+      ],
+      "metadata": {
+        "id": "V0McrJ1FF5d3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def epoch_time(start_time, end_time):\n",
+        "  elapsed_time = end_time - start_time\n",
+        "  elapsed_mins = int(elapsed_time / 60)\n",
+        "  elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
+        "  return elapsed_mins, elapsed_secs"
+      ],
+      "metadata": {
+        "id": "rq9YQv_eF5YQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "epochs = 10\n",
+        "clip = 1\n",
+        "\n",
+        "best_valid_loss = float(\"inf\")\n",
+        "model_path = \"sentiment_analysis_model.pt\"\n",
+        "\n",
+        "if os.path.exists(model_path):\n",
+        "  print(f\"Loading model from {model_path}...\")\n",
+        "  model.load_state_dict(torch.load(model_path, map_location=device))"
+      ],
+      "metadata": {
+        "id": "JE6JAXM-F5Qc"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "should_train = True\n",
+        "\n",
+        "if should_train:\n",
+        "  for epoch in tqdm(range(epochs), desc=f\"Training progress\", colour=\"#00ff00\"):\n",
+        "    start_time = time.time()\n",
+        "\n",
+        "    train_loss = train(model=model,\n",
+        "                      iterator=train_dataloader,\n",
+        "                      optimizer=optimizer,\n",
+        "                      loss_fn=loss_fn,\n",
+        "                      clip=clip,\n",
+        "                      epoch=epoch)\n",
+        "\n",
+        "    end_time = time.time()\n",
+        "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
+        "\n",
+        "    message = f\"Epoch: {epoch + 1} | Time: {epoch_mins}m {epoch_secs}s --> STORED\"\n",
+        "\n",
+        "    torch.save(model.state_dict(), model_path)\n",
+        "\n",
+        "    print(message)\n",
+        "    print(f\"Train Loss: {train_loss:.6f}\")"
+      ],
+      "metadata": {
+        "id": "ruWsYqeYGCi0"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_loss = evaluate(model=model,\n",
+        "                     iterator=test_dataloader,\n",
+        "                     loss_fn=loss_fn)\n",
+        "\n",
+        "print(f\"Test Loss: {test_loss:.6f}\")"
+      ],
+      "metadata": {
+        "id": "GNHZ-ft8GHGy"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def get_sentiment(question, model, device, max_length: int = 32):\n",
+        "  model.eval()\n",
+        "\n",
+        "  encoded = tokenizer(question, truncation=True, max_length=max_length, return_tensors=\"pt\")\n",
+        "  src_tensor = encoded[\"input_ids\"].to(device)\n",
+        "\n",
+        "  with torch.inference_mode():\n",
+        "    # Forward pass for classification.\n",
+        "    logits = model(src_tensor) # shape: (1, num_labels)\n",
+        "\n",
+        "  # Get the predicted class (index) with the highest score.\n",
+        "  pred_index = torch.argmax(logits, dim=1).item()\n",
+        "\n",
+        "  sentiment_map = {\n",
+        "      0: \"Very Negative\",\n",
+        "      1: \"Negative\",\n",
+        "      2: \"Neutral\",\n",
+        "      3: \"Positive\",\n",
+        "      4: \"Very Positive\"\n",
+        "  }\n",
+        "  predicted_sentiment = sentiment_map.get(pred_index, \"unknown\")\n",
+        "\n",
+        "  return predicted_sentiment"
+      ],
+      "metadata": {
+        "id": "0ej2-U8dGrot"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Question Answering\n",
+        "src_sentence = \"That book was amazing!\" #@param \"\"\n",
+        "\n",
+        "predicted_sentiment = get_sentiment(src_sentence, model, device, max_length=32)\n",
+        "\n",
+        "print(predicted_sentiment)"
+      ],
+      "metadata": {
+        "id": "oCwZfvW5IpWG"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import time
+import torch
+import gradio as gr
+from transformers import AutoTokenizer
+from model import make_model, get_sentiment
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+model = make_model(
+    device=device,
+    tokenizer=tokenizer,
+    n_layers=4,
+    d_model=768,
+    num_labels=5,
+    n_heads=8,
+    dropout=0.1,
+    max_length=32,
+)
+model.to(device)
+model_path = "sentiment_analysis_model.pt"
+if os.path.exists(model_path):
+    print(f"Loading model from {model_path}...")
+    model.load_state_dict(torch.load(model_path, map_location=device))
+else:
+    print("No pretrained model found. Using randomly initialized weights.")
+def predict_sentiment(text):
+    sentiment = get_sentiment(text, model, tokenizer, device, max_length=32)
+    return sentiment
+css_str = """
+body {
+    background-color: #f7f7f7;
+}
+.title {
+    font-size: 48px;
+    font-weight: bold;
+    text-align: center;
+    margin-top: 20px;
+}
+.description {
+    font-size: 20px;
+    text-align: center;
+    argin-bottom: 40px;
+}
+"""
+with gr.Blocks(css=css_str) as demo:
+    gr.Markdown("<div class='title'>Sentiment Diffusion</div>")
+    gr.Markdown(
+        "<div class='description'>Enter a sentence and see the predicted sentiment.</div>"
+    )
+    text_input = gr.Textbox(
+        label="Enter Text", lines=3, placeholder="Type your review or sentence here..."
+    )
+    predict_btn = gr.Button("Predict Sentiment")
+    output_box = gr.Textbox(label="Predicted Sentiment")
+    predict_btn.click(fn=predict_sentiment, inputs=text_input, outputs=output_box)
+if __name__ == "__main__":
+    demo.launch(share=True)

model.py ADDED Viewed

	@@ -0,0 +1,292 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+class EmbeddingLayer(nn.Module):
+    def __init__(self, vocab_size: int, d_model: int = 768):
+        super().__init__()
+        self.d_model = d_model
+        self.lut = nn.Embedding(
+            num_embeddings=vocab_size, embedding_dim=d_model
+        )  # (vocab_size, d_model)
+    def forward(self, x):
+        # x shape: (batch_size, seq_len)
+        return self.lut(x) * math.sqrt(self.d_model)  # (batch_size, seq_len, d_model)
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model: int = 768, dropout: float = 0.1, max_length: int = 128):
+        super().__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        pe = torch.zeros(max_length, d_model)  # (max_length, d_model)
+        # Create position column
+        k = torch.arange(0, max_length).unsqueeze(dim=1)  # (max_length, 1)
+        # Use the log version of the function for positional encodings
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)
+        )  # (d_model / 2)
+        # Use sine for the even indices and cosine for the odd indices
+        pe[:, 0::2] = torch.sin(k * div_term)
+        pe[:, 1::2] = torch.cos(k * div_term)
+        pe = pe.unsqueeze(dim=0)  # Add the batch dimension(1, max_length, d_model)
+        # We use a buffer because the positional encoding is fixed and not a model paramter that we want to be updated during backpropagation.
+        self.register_buffer(
+            "pe", pe
+        )  # Buffers are saved with the model state and are moved to the correct device
+    def forward(self, x):
+        # x shape: (batch_size, seq_length, d_model)
+        x += self.pe[:, : x.size(1)]
+        return self.dropout(x)
+class MultiHeadAttention(nn.Module):
+    def __init__(self, d_model: int = 768, n_heads: int = 8, dropout: float = 0.1):
+        super().__init__()
+        assert d_model % n_heads == 0
+        self.d_model = d_model
+        self.n_heads = n_heads
+        self.d_key = d_model // n_heads
+        self.Wq = nn.Linear(in_features=d_model, out_features=d_model)
+        self.Wk = nn.Linear(in_features=d_model, out_features=d_model)
+        self.Wv = nn.Linear(in_features=d_model, out_features=d_model)
+        self.Wo = nn.Linear(in_features=d_model, out_features=d_model)
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, query: Tensor, key: Tensor, value: Tensor, mask: Tensor = None):
+        # input shape: (batch_size, seq_len, d_model)
+        batch_size = key.size(0)
+        Q = self.Wq(query)
+        K = self.Wk(key)
+        V = self.Wv(value)
+        Q = Q.view(batch_size, -1, self.n_heads, self.d_key).permute(
+            0, 2, 1, 3
+        )  # (batch_size, n_heads, q_length, d_key)
+        K = K.view(batch_size, -1, self.n_heads, self.d_key).permute(
+            0, 2, 1, 3
+        )  # (batch_size, n_heads, k_length, d_key)
+        V = V.view(batch_size, -1, self.n_heads, self.d_key).permute(
+            0, 2, 1, 3
+        )  # (batch_size, n_heads, v_length, d_key)
+        scaled_dot_product = torch.matmul(Q, K.permute(0, 1, 3, 2)) / math.sqrt(
+            self.d_key
+        )  # (batch_size, n_heads, q_length, k_length)
+        if mask is not None:
+            scaled_dot_product = scaled_dot_product.masked_fill(
+                mask == 0, float("-inf")
+            )
+        attention_probs = torch.softmax(scaled_dot_product, dim=-1)
+        A = torch.matmul(
+            self.dropout(attention_probs), V
+        )  # (batch_size, n_heads, q_length, d_key)
+        A = A.permute(0, 2, 1, 3)  # (batch_size, q_length, n_heads, d_key)
+        A = A.contiguous().view(
+            batch_size, -1, self.n_heads * self.d_key
+        )  # (batch_size, q_length, d_model)
+        output = self.Wo(A)  # (batch_size, q_length, d_model)
+        return output, attention_probs
+class PositionwiseFeedForward(nn.Module):
+    def __init__(self, d_model: int = 768, dropout: float = 0.1):
+        super().__init__()
+        self.ffn = nn.Sequential(
+            nn.Linear(in_features=d_model, out_features=(d_model * 4)),
+            nn.ReLU(),
+            nn.Linear(in_features=(d_model * 4), out_features=d_model),
+            nn.Dropout(p=dropout),
+        )
+    def forward(self, x):
+        # x shape: (batch_size, q_length, d_model)
+        return self.ffn(x)  # (batch_size, q_length, d_model)
+class EncoderLayer(nn.Module):
+    def __init__(self, d_model: int = 768, n_heads: int = 8, dropout: float = 0.1):
+        super().__init__()
+        self.attention = MultiHeadAttention(
+            d_model=d_model, n_heads=n_heads, dropout=dropout
+        )
+        self.attention_layer_norm = nn.LayerNorm(d_model)
+        self.position_wise_ffn = PositionwiseFeedForward(
+            d_model=d_model, dropout=dropout
+        )
+        self.ffn_layer_norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, src: Tensor, src_mask: Tensor):
+        _src, attention_probs = self.attention(
+            query=src, key=src, value=src, mask=src_mask
+        )
+        src = self.attention_layer_norm(src + self.dropout(_src))
+        _src = self.position_wise_ffn(src)
+        src = self.ffn_layer_norm(src + self.dropout(_src))
+        return src, attention_probs
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        d_model: int = 768,
+        n_layers: int = 3,
+        n_heads: int = 8,
+        dropout: float = 0.1,
+    ):
+        super().__init__()
+        self.layers = nn.ModuleList(
+            [
+                EncoderLayer(d_model=d_model, n_heads=n_heads, dropout=dropout)
+                for layer in range(n_layers)
+            ]
+        )
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, src: Tensor, src_mask: Tensor):
+        for layer in self.layers:
+            src, attention_probs = layer(src, src_mask)
+        self.attention_probs = attention_probs
+        # src += torch.randn_like(src) * 0.001
+        return src
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        encoder: Encoder,
+        src_embed: EmbeddingLayer,
+        src_pad_idx: int,
+        device,
+        d_model: int = 768,
+        num_labels: int = 5,
+    ):
+        super().__init__()
+        self.encoder = encoder
+        self.src_embed = src_embed
+        self.device = device
+        self.src_pad_idx = src_pad_idx
+        self.dropout = nn.Dropout(p=0.1)
+        self.classifier = nn.Linear(in_features=d_model, out_features=num_labels)
+    def make_src_mask(self, src: Tensor):
+        # Assign 1 to tokens that need attended to and 0 to padding tokens, then add 2 dimensions
+        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
+        return src_mask
+    def forward(self, src: Tensor):
+        src_mask = self.make_src_mask(src)  # (batch_size, 1, 1, src_seq_length)
+        output = self.encoder(
+            self.src_embed(src), src_mask
+        )  # (batch_size, src_seq_length, d_model)
+        output = output[
+            :, 0, :
+        ]  # Get the sos token vector representation (works sort of like a cls token in ViT) shape: (batch_size, 1, d_model)
+        logits = self.classifier(self.dropout(output))
+        return logits
+def make_model(
+    device,
+    tokenizer,
+    n_layers: int = 3,
+    d_model: int = 768,
+    num_labels: int = 5,
+    n_heads: int = 8,
+    dropout: float = 0.1,
+    max_length: int = 128,
+):
+    encoder = Encoder(
+        d_model=d_model, n_layers=n_layers, n_heads=n_heads, dropout=dropout
+    )
+    src_embed = EmbeddingLayer(vocab_size=tokenizer.vocab_size, d_model=d_model)
+    pos_enc = PositionalEncoding(
+        d_model=d_model, dropout=dropout, max_length=max_length
+    )
+    model = Transformer(
+        encoder=encoder,
+        src_embed=nn.Sequential(src_embed, pos_enc),
+        src_pad_idx=tokenizer.pad_token_id,
+        device=device,
+        d_model=d_model,
+        num_labels=num_labels,
+    )
+    # Initialize parameters with Xaviar/Glorot
+    # This maintains a consistent variance of activations throughout the network
+    # Helps avoid issues like vanishing or exploding gradients.
+    for p in model.parameters():
+        if p.dim() > 1:
+            nn.init.xavier_uniform_(p)
+    return model
+def get_sentiment(text, model, tokenizer, device, max_length: int = 32):
+    model.eval()
+    encoded = model.src_embed[0].lut.weight.new_tensor([])
+    encoded = tokenizer(
+        text,
+        truncation=True,
+        max_length=max_length,
+        padding="max_length",
+        return_tensors="pt",
+    )
+    src_tensor = encoded["input_ids"].to(device)
+    with torch.inference_mode():
+        logits = model(src_tensor)  # shape: (batch_size, num_labels)
+    pred_index = torch.argmax(logits, dim=1).item()
+    sentiment_map = {
+        0: "Very Negative",
+        1: "Negative",
+        2: "Neutral",
+        3: "Positive",
+        4: "Very Positive",
+    }
+    return sentiment_map.get(pred_index, "Unknown")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+transformers
+datasets
+gradio
+nltk

sentiment_analysis_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eae4e6ac0f01d92d35262998fc93d46e976636a23dd21073867a93eb1a80a84a
+size 207310930