{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NJ6MhJYYBCwu", "outputId": "85eb2778-813b-4462-9ed5-c81a8bf27474" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Thu Jun 29 12:31:21 2023 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 525.85.12 Driver Version: 525.85.12 CUDA Version: 12.0 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 51C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------+\n" ] } ], "source": [ "!nvidia-smi" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "id": "wmj22-TcZMef" }, "source": [ "## Setup\n", "\n", "We'll need [the Transformers library](https://huggingface.co/transformers/) by Hugging Face:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Kj_7Tz0-pK69", "outputId": "f8215e7a-c231-4ad9-c7fb-cf992933d875" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip install -q -U watermark" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Jjsbi1u3QFEM", "outputId": "55ef32b8-6924-434e-f353-15fac991287b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m51.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m108.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m88.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip install -qq transformers" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AJqoaFpVpoM8", "outputId": "5ac2bfed-402a-42a2-b060-8b2c8c94d5ca" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python implementation: CPython\n", "Python version : 3.10.12\n", "IPython version : 7.34.0\n", "\n", "numpy : 1.22.4\n", "pandas : 1.5.3\n", "torch : 2.0.1+cu118\n", "transformers: 4.30.2\n", "\n" ] } ], "source": [ "%reload_ext watermark\n", "%watermark -v -p numpy,pandas,torch,transformers" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "w68CZpOwFoly", "outputId": "de8523be-0fd7-4114-fb67-08dd386bd2d8" }, "outputs": [ { "data": { "text/plain": [ "device(type='cuda', index=0)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#@title Setup & Config\n", "import transformers\n", "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n", "import torch\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from pylab import rcParams\n", "import matplotlib.pyplot as plt\n", "from matplotlib import rc\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import confusion_matrix, classification_report\n", "from collections import defaultdict\n", "from textwrap import wrap\n", "\n", "from torch import nn, optim\n", "from torch.utils.data import Dataset, DataLoader\n", "import torch.nn.functional as F\n", "\n", "%matplotlib inline\n", "%config InlineBackend.figure_format='retina'\n", "\n", "sns.set(style='whitegrid', palette='muted', font_scale=1.2)\n", "\n", "HAPPY_COLORS_PALETTE = [\"#01BEFE\", \"#FFDD00\", \"#FF7D00\", \"#FF006D\", \"#ADFF02\", \"#8F00FF\"]\n", "\n", "sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))\n", "\n", "rcParams['figure.figsize'] = 12, 8\n", "\n", "RANDOM_SEED = 42\n", "np.random.seed(RANDOM_SEED)\n", "torch.manual_seed(RANDOM_SEED)\n", "\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "device" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SgPRhuMzi9ot", "outputId": "a12fde95-c69b-47b0-c59f-e2d0aa537cea" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/gdown/cli.py:121: FutureWarning: Option `--id` was deprecated in version 4.3.1 and will be removed in 5.0. You don't need to pass it anymore to use a file ID.\n", " warnings.warn(\n", "Downloading...\n", "From: https://drive.google.com/uc?id=1S6qMioqPJjyBLpLVz4gmRTnJHnjitnuV\n", "To: /content/apps.csv\n", "100% 134k/134k [00:00<00:00, 87.9MB/s]\n", "/usr/local/lib/python3.10/dist-packages/gdown/cli.py:121: FutureWarning: Option `--id` was deprecated in version 4.3.1 and will be removed in 5.0. You don't need to pass it anymore to use a file ID.\n", " warnings.warn(\n", "Downloading...\n", "From: https://drive.google.com/uc?id=1zdmewp7ayS4js4VtrJEHzAheSW-5NBZv\n", "To: /content/reviews.csv\n", "100% 7.17M/7.17M [00:00<00:00, 101MB/s]\n" ] } ], "source": [ "!gdown --id 1S6qMioqPJjyBLpLVz4gmRTnJHnjitnuV\n", "!gdown --id 1zdmewp7ayS4js4VtrJEHzAheSW-5NBZv" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 686 }, "id": "mUKLyKc7I6Qp", "outputId": "83ce462b-7ecd-4788-99e6-ef12f3d053cd" }, "outputs": [ { "data": { "text/html": [ "\n", "
| \n", " | userName | \n", "userImage | \n", "content | \n", "score | \n", "thumbsUpCount | \n", "reviewCreatedVersion | \n", "at | \n", "replyContent | \n", "repliedAt | \n", "sortOrder | \n", "appId | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Andrew Thomas | \n", "https://lh3.googleusercontent.com/a-/AOh14GiHd... | \n", "Update: After getting a response from the deve... | \n", "1 | \n", "21 | \n", "4.17.0.3 | \n", "2020-04-05 22:25:57 | \n", "According to our TOS, and the term you have ag... | \n", "2020-04-05 15:10:24 | \n", "most_relevant | \n", "com.anydo | \n", "
| 1 | \n", "Craig Haines | \n", "https://lh3.googleusercontent.com/-hoe0kwSJgPQ... | \n", "Used it for a fair amount of time without any ... | \n", "1 | \n", "11 | \n", "4.17.0.3 | \n", "2020-04-04 13:40:01 | \n", "It sounds like you logged in with a different ... | \n", "2020-04-05 15:11:35 | \n", "most_relevant | \n", "com.anydo | \n", "
| 2 | \n", "steven adkins | \n", "https://lh3.googleusercontent.com/a-/AOh14GiXw... | \n", "Your app sucks now!!!!! Used to be good but no... | \n", "1 | \n", "17 | \n", "4.17.0.3 | \n", "2020-04-01 16:18:13 | \n", "This sounds odd! We are not aware of any issue... | \n", "2020-04-02 16:05:56 | \n", "most_relevant | \n", "com.anydo | \n", "
| 3 | \n", "Lars Panzerbjørn | \n", "https://lh3.googleusercontent.com/a-/AOh14Gg-h... | \n", "It seems OK, but very basic. Recurring tasks n... | \n", "1 | \n", "192 | \n", "4.17.0.2 | \n", "2020-03-12 08:17:34 | \n", "We do offer this option as part of the Advance... | \n", "2020-03-15 06:20:13 | \n", "most_relevant | \n", "com.anydo | \n", "
| 4 | \n", "Scott Prewitt | \n", "https://lh3.googleusercontent.com/-K-X1-YsVd6U... | \n", "Absolutely worthless. This app runs a prohibit... | \n", "1 | \n", "42 | \n", "4.17.0.2 | \n", "2020-03-14 17:41:01 | \n", "We're sorry you feel this way! 90% of the app ... | \n", "2020-03-15 23:45:51 | \n", "most_relevant | \n", "com.anydo | \n", "