Add initial implementation of a Tiny NLP sentiment classification model

- Created a Jupyter notebook for a simple text classification task using TensorFlow and TensorFlow Datasets.
- Implemented data loading, preprocessing, model definition, training, and evaluation steps.
- Added functionality to convert the trained model to TensorFlow Lite format for deployment.
- Included a prediction function for testing custom input text.
- Saved the TensorFlow Lite model as 'tiny_sentiment_model_imdb.tflite'.

Files changed (3) hide show

basic-clasification.ipynb +167 -0
starter project Tiny NLP (klasifikasi teks sederhana: sentimen positif vs negatif).ipynb +688 -0
tiny_sentiment_model_imdb.tflite +0 -0

basic-clasification.ipynb ADDED Viewed

	@@ -0,0 +1,167 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c201ca37",
+   "metadata": {},
+   "source": [
+    "# Basic Image Classification (CNN)\n",
+    "\n",
+    "Notebook ini berisi contoh paling dasar untuk klasifikasi gambar menggunakan TensorFlow/Keras.\n",
+    "\n",
+    "## Ide tugas klasifikasi\n",
+    "1. Klasifikasi wireframe: `login`, `dashboard`, `product`, `form`, `table`.\n",
+    "2. Klasifikasi style desain: `clean`, `dense`, `minimal`, `complex`.\n",
+    "3. Klasifikasi tipe komponen dominan: `card-heavy`, `table-heavy`, `form-heavy`.\n",
+    "\n",
+    "Struktur dataset yang disarankan:\n",
+    "\n",
+    "```text\n",
+    "my_dataset/\n",
+    "  train/\n",
+    "    class_a/\n",
+    "    class_b/\n",
+    "    class_c/\n",
+    "  val/\n",
+    "    class_a/\n",
+    "    class_b/\n",
+    "    class_c/\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6067a559",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "from tensorflow.keras import layers, models\n",
+    "\n",
+    "# Ubah sesuai lokasi dataset Anda\n",
+    "data_dir_train = \"./my_dataset/train\"\n",
+    "data_dir_val = \"./my_dataset/val\"\n",
+    "\n",
+    "img_size = (128, 128)\n",
+    "batch_size = 32\n",
+    "\n",
+    "train_ds = tf.keras.utils.image_dataset_from_directory(\n",
+    "    data_dir_train,\n",
+    "    image_size=img_size,\n",
+    "    batch_size=batch_size,\n",
+    "    label_mode=\"int\"\n",
+    ")\n",
+    "\n",
+    "val_ds = tf.keras.utils.image_dataset_from_directory(\n",
+    "    data_dir_val,\n",
+    "    image_size=img_size,\n",
+    "    batch_size=batch_size,\n",
+    "    label_mode=\"int\"\n",
+    ")\n",
+    "\n",
+    "class_names = train_ds.class_names\n",
+    "num_classes = len(class_names)\n",
+    "print(\"Classes:\", class_names)\n",
+    "\n",
+    "# Optimasi pipeline input\n",
+    "autotune = tf.data.AUTOTUNE\n",
+    "train_ds = train_ds.shuffle(1000).prefetch(buffer_size=autotune)\n",
+    "val_ds = val_ds.prefetch(buffer_size=autotune)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "368bd39b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Model CNN sederhana\n",
+    "model = models.Sequential([\n",
+    "    layers.Rescaling(1.0 / 255, input_shape=(img_size[0], img_size[1], 3)),\n",
+    "    layers.Conv2D(32, 3, activation=\"relu\"),\n",
+    "    layers.MaxPooling2D(),\n",
+    "    layers.Conv2D(64, 3, activation=\"relu\"),\n",
+    "    layers.MaxPooling2D(),\n",
+    "    layers.Conv2D(128, 3, activation=\"relu\"),\n",
+    "    layers.MaxPooling2D(),\n",
+    "    layers.Flatten(),\n",
+    "    layers.Dense(128, activation=\"relu\"),\n",
+    "    layers.Dropout(0.3),\n",
+    "    layers.Dense(num_classes, activation=\"softmax\")\n",
+    "])\n",
+    "\n",
+    "model.compile(\n",
+    "    optimizer=\"adam\",\n",
+    "    loss=\"sparse_categorical_crossentropy\",\n",
+    "    metrics=[\"accuracy\"]\n",
+    ")\n",
+    "\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b75d2ec6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "epochs = 10\n",
+    "history = model.fit(\n",
+    "    train_ds,\n",
+    "    validation_data=val_ds,\n",
+    "    epochs=epochs\n",
+    ")\n",
+    "\n",
+    "loss, acc = model.evaluate(val_ds)\n",
+    "print(f\"Validation accuracy: {acc:.4f}\")\n",
+    "\n",
+    "model.save(\"basic_cnn_classification.h5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55fff896",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prediksi 1 gambar baru\n",
+    "import numpy as np\n",
+    "from tensorflow.keras.preprocessing import image\n",
+    "\n",
+    "img_path = \"./sample.jpg\"  # ganti ke file gambar Anda\n",
+    "img = image.load_img(img_path, target_size=img_size)\n",
+    "arr = image.img_to_array(img)\n",
+    "arr = np.expand_dims(arr, axis=0) / 255.0\n",
+    "\n",
+    "pred = model.predict(arr)\n",
+    "pred_class = class_names[np.argmax(pred)]\n",
+    "print(\"Predicted class:\", pred_class)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "research",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.20"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

starter project Tiny NLP (klasifikasi teks sederhana: sentimen positif vs negatif).ipynb ADDED Viewed

	@@ -0,0 +1,688 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "460f0c3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: tensorflow in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (2.21.0)\n",
+      "Requirement already satisfied: tensorflow-datasets in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (4.9.9)\n",
+      "Requirement already satisfied: absl-py>=1.0.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.3.1)\n",
+      "Requirement already satisfied: astunparse>=1.6.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (1.6.3)\n",
+      "Requirement already satisfied: flatbuffers>=25.9.23 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (25.9.23)\n",
+      "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (0.7.0)\n",
+      "Requirement already satisfied: google_pasta>=0.1.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (0.2.0)\n",
+      "Requirement already satisfied: libclang>=13.0.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (18.1.1)\n",
+      "Requirement already satisfied: opt_einsum>=2.3.2 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.3.0)\n",
+      "Requirement already satisfied: packaging in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (26.0)\n",
+      "Requirement already satisfied: protobuf<8.0.0,>=6.31.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (6.32.0)\n",
+      "Requirement already satisfied: requests<3,>=2.21.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.32.5)\n",
+      "Requirement already satisfied: setuptools in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (80.10.2)\n",
+      "Requirement already satisfied: six>=1.12.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (1.17.0)\n",
+      "Requirement already satisfied: termcolor>=1.1.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.2.0)\n",
+      "Requirement already satisfied: typing_extensions>=3.6.6 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (4.15.0)\n",
+      "Requirement already satisfied: wrapt>=1.11.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.0.1)\n",
+      "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (1.78.0)\n",
+      "Requirement already satisfied: keras>=3.12.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.12.1)\n",
+      "Requirement already satisfied: numpy>=1.26.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.2.5)\n",
+      "Requirement already satisfied: h5py<3.15.0,>=3.11.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.14.0)\n",
+      "Requirement already satisfied: ml_dtypes<1.0.0,>=0.5.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (0.5.4)\n",
+      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (3.4.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (3.11)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (2.6.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (2026.1.4)\n",
+      "Requirement already satisfied: dm-tree in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (0.1.10)\n",
+      "Requirement already satisfied: etils>=1.6.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (1.13.0)\n",
+      "Requirement already satisfied: immutabledict in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (4.3.1)\n",
+      "Requirement already satisfied: promise in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (2.3)\n",
+      "Requirement already satisfied: psutil in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (7.0.0)\n",
+      "Requirement already satisfied: pyarrow in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (23.0.1)\n",
+      "Requirement already satisfied: simple_parsing in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (0.1.8)\n",
+      "Requirement already satisfied: tensorflow-metadata in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (1.17.3)\n",
+      "Requirement already satisfied: toml in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (0.10.2)\n",
+      "Requirement already satisfied: tqdm in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (4.67.3)\n",
+      "Requirement already satisfied: wheel<1.0,>=0.23.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from astunparse>=1.6.0->tensorflow) (0.46.3)\n",
+      "Requirement already satisfied: fsspec in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (2026.3.0)\n",
+      "Requirement already satisfied: importlib_resources in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (6.5.2)\n",
+      "Requirement already satisfied: zipp in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (3.23.0)\n",
+      "Requirement already satisfied: einops in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (0.8.2)\n",
+      "Requirement already satisfied: rich in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from keras>=3.12.0->tensorflow) (14.2.0)\n",
+      "Requirement already satisfied: namex in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from keras>=3.12.0->tensorflow) (0.1.0)\n",
+      "Requirement already satisfied: optree in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from keras>=3.12.0->tensorflow) (0.18.0)\n",
+      "Requirement already satisfied: attrs>=18.2.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from dm-tree->tensorflow-datasets) (26.1.0)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from rich->keras>=3.12.0->tensorflow) (4.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from rich->keras>=3.12.0->tensorflow) (2.19.2)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->keras>=3.12.0->tensorflow) (0.1.2)\n",
+      "Requirement already satisfied: docstring-parser~=0.15 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from simple_parsing->tensorflow-datasets) (0.17.0)\n",
+      "TensorFlow version: 2.21.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 1. Install & Import\n",
+    "# =========================\n",
+    "!pip install tensorflow tensorflow-datasets\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "import tensorflow_datasets as tfds\n",
+    "import numpy as np\n",
+    "\n",
+    "print(\"TensorFlow version:\", tf.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7078b823",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:absl:Variant folder /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0 has no dataset_info.json\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c5199f8e459944839e9117fa4e483203",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Dl Completed...: 0 url [00:00, ? url/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d0423f620ad44853bfc8237977ffd90e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Dl Size...: 0 MiB [00:00, ? MiB/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2d58dab3dcea4d89af9730a66e4fc576",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6653450aa06e4b8f9438de3fd4b49bb1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train examples...: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3c31403c17234d42b656cb3de3b02fe5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Shuffling /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/incomplete.UBUGIV_1.0.0/imdb_reviews-tra…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1b2c932dbb66420faa5ce931167da15f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating test examples...: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "03bc0b5fec7447c793f5f714b4fc3a39",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Shuffling /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/incomplete.UBUGIV_1.0.0/imdb_reviews-tes…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1ecd1361c7a54ae7abe2de2a6d20ffad",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating unsupervised examples...: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dd202d09a08341e1b7b9ec0eb49dc0fe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Shuffling /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/incomplete.UBUGIV_1.0.0/imdb_reviews-uns…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1mDataset imdb_reviews downloaded and prepared to /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.\u001b[0m\n",
+      "tfds.core.DatasetInfo(\n",
+      "    name='imdb_reviews',\n",
+      "    full_name='imdb_reviews/plain_text/1.0.0',\n",
+      "    description=\"\"\"\n",
+      "    Large Movie Review Dataset. This is a dataset for binary sentiment\n",
+      "    classification containing substantially more data than previous benchmark\n",
+      "    datasets. We provide a set of 25,000 highly polar movie reviews for training,\n",
+      "    and 25,000 for testing. There is additional unlabeled data for use as well.\n",
+      "    \"\"\",\n",
+      "    config_description=\"\"\"\n",
+      "    Plain text\n",
+      "    \"\"\",\n",
+      "    homepage='http://ai.stanford.edu/~amaas/data/sentiment/',\n",
+      "    data_dir='/Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0',\n",
+      "    file_format=tfrecord,\n",
+      "    download_size=80.23 MiB,\n",
+      "    dataset_size=129.83 MiB,\n",
+      "    features=FeaturesDict({\n",
+      "        'label': ClassLabel(shape=(), dtype=int64, num_classes=2),\n",
+      "        'text': Text(shape=(), dtype=string),\n",
+      "    }),\n",
+      "    supervised_keys=('text', 'label'),\n",
+      "    disable_shuffling=False,\n",
+      "    nondeterministic_order=False,\n",
+      "    splits={\n",
+      "        'test': <SplitInfo num_examples=25000, num_shards=1>,\n",
+      "        'train': <SplitInfo num_examples=25000, num_shards=1>,\n",
+      "        'unsupervised': <SplitInfo num_examples=50000, num_shards=1>,\n",
+      "    },\n",
+      "    citation=\"\"\"@InProceedings{maas-EtAl:2011:ACL-HLT2011,\n",
+      "      author    = {Maas, Andrew L.  and  Daly, Raymond E.  and  Pham, Peter T.  and  Huang, Dan  and  Ng, Andrew Y.  and  Potts, Christopher},\n",
+      "      title     = {Learning Word Vectors for Sentiment Analysis},\n",
+      "      booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},\n",
+      "      month     = {June},\n",
+      "      year      = {2011},\n",
+      "      address   = {Portland, Oregon, USA},\n",
+      "      publisher = {Association for Computational Linguistics},\n",
+      "      pages     = {142--150},\n",
+      "      url       = {http://www.aclweb.org/anthology/P11-1015}\n",
+      "    }\"\"\",\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 2. Load Dataset\n",
+    "# =========================\n",
+    "dataset, info = tfds.load(\n",
+    "    \"imdb_reviews\",\n",
+    "    with_info=True,\n",
+    "    as_supervised=True\n",
+    ")\n",
+    "\n",
+    "train_data, test_data = dataset['train'], dataset['test']\n",
+    "\n",
+    "print(info)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "69c4d361",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+      "I0000 00:00:1775131893.297532   71659 tf_record_dataset_op.cc:396] The default buffer size is 262144, which is overridden by the user specified `buffer_size` of 8388608\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 3. Preprocessing (Tokenization)\n",
+    "# =========================\n",
+    "vocab_size = 1000  # kecil → TinyML friendly\n",
+    "max_length = 100\n",
+    "oov_tok = \"<OOV>\"\n",
+    "\n",
+    "tokenizer = tf.keras.preprocessing.text.Tokenizer(\n",
+    "    num_words=vocab_size,\n",
+    "    oov_token=oov_tok\n",
+    ")\n",
+    "\n",
+    "# ambil teks saja\n",
+    "train_sentences = []\n",
+    "train_labels = []\n",
+    "\n",
+    "for sentence, label in train_data:\n",
+    "    train_sentences.append(sentence.numpy().decode('utf-8'))\n",
+    "    train_labels.append(label.numpy())\n",
+    "\n",
+    "tokenizer.fit_on_texts(train_sentences)\n",
+    "\n",
+    "# convert ke sequence\n",
+    "train_sequences = tokenizer.texts_to_sequences(train_sentences)\n",
+    "train_padded = tf.keras.preprocessing.sequence.pad_sequences(\n",
+    "    train_sequences,\n",
+    "    maxlen=max_length,\n",
+    "    padding='post',\n",
+    "    truncating='post'\n",
+    ")\n",
+    "\n",
+    "train_labels = np.array(train_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "aadbfb8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# =========================\n",
+    "# 4. Test Data Preprocessing\n",
+    "# =========================\n",
+    "test_sentences = []\n",
+    "test_labels = []\n",
+    "\n",
+    "for sentence, label in test_data:\n",
+    "    test_sentences.append(sentence.numpy().decode('utf-8'))\n",
+    "    test_labels.append(label.numpy())\n",
+    "\n",
+    "test_sequences = tokenizer.texts_to_sequences(test_sentences)\n",
+    "test_padded = tf.keras.preprocessing.sequence.pad_sequences(\n",
+    "    test_sequences,\n",
+    "    maxlen=max_length,\n",
+    "    padding='post',\n",
+    "    truncating='post'\n",
+    ")\n",
+    "\n",
+    "test_labels = np.array(test_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "47c7c99d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages/keras/src/layers/core/embedding.py:97: UserWarning: Argument `input_length` is deprecated. Just remove it.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential\"</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1mModel: \"sequential\"\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+       "┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━���━┩\n",
+       "│ embedding (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)           │ ?                      │   <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ global_average_pooling1d        │ ?                      │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
+       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalAveragePooling1D</span>)        │                        │               │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ dense (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                   │ ?                      │   <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ ?                      │   <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
+       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
+       "│ embedding (\u001b[38;5;33mEmbedding\u001b[0m)           │ ?                      │   \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ global_average_pooling1d        │ ?                      │             \u001b[38;5;34m0\u001b[0m │\n",
+       "│ (\u001b[38;5;33mGlobalAveragePooling1D\u001b[0m)        │                        │               │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ dense (\u001b[38;5;33mDense\u001b[0m)                   │ ?                      │   \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ dense_1 (\u001b[38;5;33mDense\u001b[0m)                 │ ?                      │   \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
+       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 5. Model Tiny (Lightweight)\n",
+    "# =========================\n",
+    "model = tf.keras.Sequential([\n",
+    "    tf.keras.layers.Embedding(vocab_size, 16, input_length=max_length),\n",
+    "    tf.keras.layers.GlobalAveragePooling1D(),\n",
+    "    tf.keras.layers.Dense(16, activation='relu'),\n",
+    "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
+    "])\n",
+    "\n",
+    "model.compile(\n",
+    "    loss='binary_crossentropy',\n",
+    "    optimizer='adam',\n",
+    "    metrics=['accuracy']\n",
+    ")\n",
+    "\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "21c6cb46",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/5\n",
+      "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 1ms/step - accuracy: 0.6960 - loss: 0.5796 - val_accuracy: 0.7796 - val_loss: 0.4684\n",
+      "Epoch 2/5\n",
+      "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.7903 - loss: 0.4446 - val_accuracy: 0.7908 - val_loss: 0.4400\n",
+      "Epoch 3/5\n",
+      "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.8042 - loss: 0.4221 - val_accuracy: 0.7971 - val_loss: 0.4329\n",
+      "Epoch 4/5\n",
+      "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.8093 - loss: 0.4146 - val_accuracy: 0.7968 - val_loss: 0.4296\n",
+      "Epoch 5/5\n",
+      "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.8081 - loss: 0.4121 - val_accuracy: 0.7958 - val_loss: 0.4364\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 6. Training\n",
+    "# =========================\n",
+    "history = model.fit(\n",
+    "    train_padded,\n",
+    "    train_labels,\n",
+    "    epochs=5,\n",
+    "    validation_data=(test_padded, test_labels)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "7c98aa50",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 454us/step - accuracy: 0.7958 - loss: 0.4364\n",
+      "Test Accuracy: 0.7957599759101868\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 7. Evaluation\n",
+    "# =========================\n",
+    "loss, acc = model.evaluate(test_padded, test_labels)\n",
+    "print(\"Test Accuracy:\", acc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e979268",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5/assets\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Assets written to: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5/assets\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saved artifact at '/var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5'. The following endpoints are available:\n",
+      "\n",
+      "* Endpoint 'serve'\n",
+      "  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100), dtype=tf.float32, name='keras_tensor')\n",
+      "Output Type:\n",
+      "  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)\n",
+      "Captures:\n",
+      "  6045205776: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
+      "  13165568320: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
+      "  13127621040: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
+      "  13127623152: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
+      "  13127625440: TensorSpec(shape=(), dtype=tf.resource, name=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "W0000 00:00:1775131910.106384   68307 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.\n",
+      "W0000 00:00:1775131910.106397   68307 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model TFLite berhasil disimpan!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "I0000 00:00:1775131910.106825   68307 reader.cc:83] Reading SavedModel from: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5\n",
+      "I0000 00:00:1775131910.107216   68307 reader.cc:52] Reading meta graph with tags { serve }\n",
+      "I0000 00:00:1775131910.107221   68307 reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5\n",
+      "I0000 00:00:1775131910.110084   68307 mlir_graph_optimization_pass.cc:437] MLIR V1 optimization pass is not enabled\n",
+      "I0000 00:00:1775131910.110559   68307 loader.cc:236] Restoring SavedModel bundle.\n",
+      "I0000 00:00:1775131910.128073   68307 loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5\n",
+      "I0000 00:00:1775131910.134243   68307 loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 27421 microseconds.\n",
+      "I0000 00:00:1775131910.146179   68307 dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 8. Convert ke TensorFlow Lite (TinyML Step)\n",
+    "# =========================\n",
+    "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n",
+    "\n",
+    "# Quantization (WAJIB untuk TinyML)\n",
+    "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+    "\n",
+    "tflite_model = converter.convert()\n",
+    "\n",
+    "# simpan\n",
+    "with open(\"tiny_sentiment_model_imdb.tflite\", \"wb\") as f:\n",
+    "    f.write(tflite_model)\n",
+    "\n",
+    "print(\"Model TFLite berhasil disimpan!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "e89e8714",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step\n",
+      "Negative\n",
+      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 20ms/step\n",
+      "Negative\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =========================\n",
+    "# 9. Test Custom Input\n",
+    "# =========================\n",
+    "def predict(text):\n",
+    "    seq = tokenizer.texts_to_sequences([text])\n",
+    "    padded = tf.keras.preprocessing.sequence.pad_sequences(\n",
+    "        seq, maxlen=max_length, padding='post'\n",
+    "    )\n",
+    "    pred = model.predict(padded)[0][0]\n",
+    "    return \"Positive\" if pred > 0.5 else \"Negative\"\n",
+    "\n",
+    "print(predict(\"this movie is ugly\"))\n",
+    "print(predict(\"bad film ever\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "research",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.20"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

tiny_sentiment_model_imdb.tflite ADDED Viewed

Binary file (20.7 kB). View file