Muhammad Pazrin Andreanor commited on
Commit
508e2b6
Β·
1 Parent(s): 9200f4b

Add initial implementation of a Tiny NLP sentiment classification model

Browse files

- Created a Jupyter notebook for a simple text classification task using TensorFlow and TensorFlow Datasets.
- Implemented data loading, preprocessing, model definition, training, and evaluation steps.
- Added functionality to convert the trained model to TensorFlow Lite format for deployment.
- Included a prediction function for testing custom input text.
- Saved the TensorFlow Lite model as 'tiny_sentiment_model_imdb.tflite'.

basic-clasification.ipynb ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "c201ca37",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Basic Image Classification (CNN)\n",
9
+ "\n",
10
+ "Notebook ini berisi contoh paling dasar untuk klasifikasi gambar menggunakan TensorFlow/Keras.\n",
11
+ "\n",
12
+ "## Ide tugas klasifikasi\n",
13
+ "1. Klasifikasi wireframe: `login`, `dashboard`, `product`, `form`, `table`.\n",
14
+ "2. Klasifikasi style desain: `clean`, `dense`, `minimal`, `complex`.\n",
15
+ "3. Klasifikasi tipe komponen dominan: `card-heavy`, `table-heavy`, `form-heavy`.\n",
16
+ "\n",
17
+ "Struktur dataset yang disarankan:\n",
18
+ "\n",
19
+ "```text\n",
20
+ "my_dataset/\n",
21
+ " train/\n",
22
+ " class_a/\n",
23
+ " class_b/\n",
24
+ " class_c/\n",
25
+ " val/\n",
26
+ " class_a/\n",
27
+ " class_b/\n",
28
+ " class_c/\n",
29
+ "```"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": null,
35
+ "id": "6067a559",
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "import tensorflow as tf\n",
40
+ "from tensorflow.keras import layers, models\n",
41
+ "\n",
42
+ "# Ubah sesuai lokasi dataset Anda\n",
43
+ "data_dir_train = \"./my_dataset/train\"\n",
44
+ "data_dir_val = \"./my_dataset/val\"\n",
45
+ "\n",
46
+ "img_size = (128, 128)\n",
47
+ "batch_size = 32\n",
48
+ "\n",
49
+ "train_ds = tf.keras.utils.image_dataset_from_directory(\n",
50
+ " data_dir_train,\n",
51
+ " image_size=img_size,\n",
52
+ " batch_size=batch_size,\n",
53
+ " label_mode=\"int\"\n",
54
+ ")\n",
55
+ "\n",
56
+ "val_ds = tf.keras.utils.image_dataset_from_directory(\n",
57
+ " data_dir_val,\n",
58
+ " image_size=img_size,\n",
59
+ " batch_size=batch_size,\n",
60
+ " label_mode=\"int\"\n",
61
+ ")\n",
62
+ "\n",
63
+ "class_names = train_ds.class_names\n",
64
+ "num_classes = len(class_names)\n",
65
+ "print(\"Classes:\", class_names)\n",
66
+ "\n",
67
+ "# Optimasi pipeline input\n",
68
+ "autotune = tf.data.AUTOTUNE\n",
69
+ "train_ds = train_ds.shuffle(1000).prefetch(buffer_size=autotune)\n",
70
+ "val_ds = val_ds.prefetch(buffer_size=autotune)"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "id": "368bd39b",
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "# Model CNN sederhana\n",
81
+ "model = models.Sequential([\n",
82
+ " layers.Rescaling(1.0 / 255, input_shape=(img_size[0], img_size[1], 3)),\n",
83
+ " layers.Conv2D(32, 3, activation=\"relu\"),\n",
84
+ " layers.MaxPooling2D(),\n",
85
+ " layers.Conv2D(64, 3, activation=\"relu\"),\n",
86
+ " layers.MaxPooling2D(),\n",
87
+ " layers.Conv2D(128, 3, activation=\"relu\"),\n",
88
+ " layers.MaxPooling2D(),\n",
89
+ " layers.Flatten(),\n",
90
+ " layers.Dense(128, activation=\"relu\"),\n",
91
+ " layers.Dropout(0.3),\n",
92
+ " layers.Dense(num_classes, activation=\"softmax\")\n",
93
+ "])\n",
94
+ "\n",
95
+ "model.compile(\n",
96
+ " optimizer=\"adam\",\n",
97
+ " loss=\"sparse_categorical_crossentropy\",\n",
98
+ " metrics=[\"accuracy\"]\n",
99
+ ")\n",
100
+ "\n",
101
+ "model.summary()"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": null,
107
+ "id": "b75d2ec6",
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": [
111
+ "epochs = 10\n",
112
+ "history = model.fit(\n",
113
+ " train_ds,\n",
114
+ " validation_data=val_ds,\n",
115
+ " epochs=epochs\n",
116
+ ")\n",
117
+ "\n",
118
+ "loss, acc = model.evaluate(val_ds)\n",
119
+ "print(f\"Validation accuracy: {acc:.4f}\")\n",
120
+ "\n",
121
+ "model.save(\"basic_cnn_classification.h5\")"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": null,
127
+ "id": "55fff896",
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "# Prediksi 1 gambar baru\n",
132
+ "import numpy as np\n",
133
+ "from tensorflow.keras.preprocessing import image\n",
134
+ "\n",
135
+ "img_path = \"./sample.jpg\" # ganti ke file gambar Anda\n",
136
+ "img = image.load_img(img_path, target_size=img_size)\n",
137
+ "arr = image.img_to_array(img)\n",
138
+ "arr = np.expand_dims(arr, axis=0) / 255.0\n",
139
+ "\n",
140
+ "pred = model.predict(arr)\n",
141
+ "pred_class = class_names[np.argmax(pred)]\n",
142
+ "print(\"Predicted class:\", pred_class)"
143
+ ]
144
+ }
145
+ ],
146
+ "metadata": {
147
+ "kernelspec": {
148
+ "display_name": "research",
149
+ "language": "python",
150
+ "name": "python3"
151
+ },
152
+ "language_info": {
153
+ "codemirror_mode": {
154
+ "name": "ipython",
155
+ "version": 3
156
+ },
157
+ "file_extension": ".py",
158
+ "mimetype": "text/x-python",
159
+ "name": "python",
160
+ "nbconvert_exporter": "python",
161
+ "pygments_lexer": "ipython3",
162
+ "version": "3.10.20"
163
+ }
164
+ },
165
+ "nbformat": 4,
166
+ "nbformat_minor": 5
167
+ }
starter project Tiny NLP (klasifikasi teks sederhana: sentimen positif vs negatif).ipynb ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "460f0c3f",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Requirement already satisfied: tensorflow in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (2.21.0)\n",
14
+ "Requirement already satisfied: tensorflow-datasets in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (4.9.9)\n",
15
+ "Requirement already satisfied: absl-py>=1.0.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.3.1)\n",
16
+ "Requirement already satisfied: astunparse>=1.6.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (1.6.3)\n",
17
+ "Requirement already satisfied: flatbuffers>=25.9.23 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (25.9.23)\n",
18
+ "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (0.7.0)\n",
19
+ "Requirement already satisfied: google_pasta>=0.1.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (0.2.0)\n",
20
+ "Requirement already satisfied: libclang>=13.0.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (18.1.1)\n",
21
+ "Requirement already satisfied: opt_einsum>=2.3.2 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.3.0)\n",
22
+ "Requirement already satisfied: packaging in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (26.0)\n",
23
+ "Requirement already satisfied: protobuf<8.0.0,>=6.31.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (6.32.0)\n",
24
+ "Requirement already satisfied: requests<3,>=2.21.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.32.5)\n",
25
+ "Requirement already satisfied: setuptools in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (80.10.2)\n",
26
+ "Requirement already satisfied: six>=1.12.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (1.17.0)\n",
27
+ "Requirement already satisfied: termcolor>=1.1.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.2.0)\n",
28
+ "Requirement already satisfied: typing_extensions>=3.6.6 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (4.15.0)\n",
29
+ "Requirement already satisfied: wrapt>=1.11.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.0.1)\n",
30
+ "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (1.78.0)\n",
31
+ "Requirement already satisfied: keras>=3.12.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.12.1)\n",
32
+ "Requirement already satisfied: numpy>=1.26.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (2.2.5)\n",
33
+ "Requirement already satisfied: h5py<3.15.0,>=3.11.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (3.14.0)\n",
34
+ "Requirement already satisfied: ml_dtypes<1.0.0,>=0.5.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow) (0.5.4)\n",
35
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (3.4.4)\n",
36
+ "Requirement already satisfied: idna<4,>=2.5 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (3.11)\n",
37
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (2.6.3)\n",
38
+ "Requirement already satisfied: certifi>=2017.4.17 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from requests<3,>=2.21.0->tensorflow) (2026.1.4)\n",
39
+ "Requirement already satisfied: dm-tree in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (0.1.10)\n",
40
+ "Requirement already satisfied: etils>=1.6.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (1.13.0)\n",
41
+ "Requirement already satisfied: immutabledict in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (4.3.1)\n",
42
+ "Requirement already satisfied: promise in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (2.3)\n",
43
+ "Requirement already satisfied: psutil in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (7.0.0)\n",
44
+ "Requirement already satisfied: pyarrow in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (23.0.1)\n",
45
+ "Requirement already satisfied: simple_parsing in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (0.1.8)\n",
46
+ "Requirement already satisfied: tensorflow-metadata in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (1.17.3)\n",
47
+ "Requirement already satisfied: toml in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (0.10.2)\n",
48
+ "Requirement already satisfied: tqdm in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from tensorflow-datasets) (4.67.3)\n",
49
+ "Requirement already satisfied: wheel<1.0,>=0.23.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from astunparse>=1.6.0->tensorflow) (0.46.3)\n",
50
+ "Requirement already satisfied: fsspec in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (2026.3.0)\n",
51
+ "Requirement already satisfied: importlib_resources in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (6.5.2)\n",
52
+ "Requirement already satisfied: zipp in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (3.23.0)\n",
53
+ "Requirement already satisfied: einops in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from etils[edc,enp,epath,epy,etree]>=1.6.0; python_version < \"3.11\"->tensorflow-datasets) (0.8.2)\n",
54
+ "Requirement already satisfied: rich in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from keras>=3.12.0->tensorflow) (14.2.0)\n",
55
+ "Requirement already satisfied: namex in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from keras>=3.12.0->tensorflow) (0.1.0)\n",
56
+ "Requirement already satisfied: optree in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from keras>=3.12.0->tensorflow) (0.18.0)\n",
57
+ "Requirement already satisfied: attrs>=18.2.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from dm-tree->tensorflow-datasets) (26.1.0)\n",
58
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from rich->keras>=3.12.0->tensorflow) (4.0.0)\n",
59
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from rich->keras>=3.12.0->tensorflow) (2.19.2)\n",
60
+ "Requirement already satisfied: mdurl~=0.1 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->keras>=3.12.0->tensorflow) (0.1.2)\n",
61
+ "Requirement already satisfied: docstring-parser~=0.15 in /Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages (from simple_parsing->tensorflow-datasets) (0.17.0)\n",
62
+ "TensorFlow version: 2.21.0\n"
63
+ ]
64
+ }
65
+ ],
66
+ "source": [
67
+ "# =========================\n",
68
+ "# 1. Install & Import\n",
69
+ "# =========================\n",
70
+ "!pip install tensorflow tensorflow-datasets\n",
71
+ "\n",
72
+ "import tensorflow as tf\n",
73
+ "import tensorflow_datasets as tfds\n",
74
+ "import numpy as np\n",
75
+ "\n",
76
+ "print(\"TensorFlow version:\", tf.__version__)"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 3,
82
+ "id": "7078b823",
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stderr",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "WARNING:absl:Variant folder /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0 has no dataset_info.json\n"
90
+ ]
91
+ },
92
+ {
93
+ "name": "stdout",
94
+ "output_type": "stream",
95
+ "text": [
96
+ "\u001b[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...\u001b[0m\n"
97
+ ]
98
+ },
99
+ {
100
+ "data": {
101
+ "application/vnd.jupyter.widget-view+json": {
102
+ "model_id": "c5199f8e459944839e9117fa4e483203",
103
+ "version_major": 2,
104
+ "version_minor": 0
105
+ },
106
+ "text/plain": [
107
+ "Dl Completed...: 0 url [00:00, ? url/s]"
108
+ ]
109
+ },
110
+ "metadata": {},
111
+ "output_type": "display_data"
112
+ },
113
+ {
114
+ "data": {
115
+ "application/vnd.jupyter.widget-view+json": {
116
+ "model_id": "d0423f620ad44853bfc8237977ffd90e",
117
+ "version_major": 2,
118
+ "version_minor": 0
119
+ },
120
+ "text/plain": [
121
+ "Dl Size...: 0 MiB [00:00, ? MiB/s]"
122
+ ]
123
+ },
124
+ "metadata": {},
125
+ "output_type": "display_data"
126
+ },
127
+ {
128
+ "data": {
129
+ "application/vnd.jupyter.widget-view+json": {
130
+ "model_id": "2d58dab3dcea4d89af9730a66e4fc576",
131
+ "version_major": 2,
132
+ "version_minor": 0
133
+ },
134
+ "text/plain": [
135
+ "Generating splits...: 0%| | 0/3 [00:00<?, ? splits/s]"
136
+ ]
137
+ },
138
+ "metadata": {},
139
+ "output_type": "display_data"
140
+ },
141
+ {
142
+ "data": {
143
+ "application/vnd.jupyter.widget-view+json": {
144
+ "model_id": "6653450aa06e4b8f9438de3fd4b49bb1",
145
+ "version_major": 2,
146
+ "version_minor": 0
147
+ },
148
+ "text/plain": [
149
+ "Generating train examples...: 0 examples [00:00, ? examples/s]"
150
+ ]
151
+ },
152
+ "metadata": {},
153
+ "output_type": "display_data"
154
+ },
155
+ {
156
+ "data": {
157
+ "application/vnd.jupyter.widget-view+json": {
158
+ "model_id": "3c31403c17234d42b656cb3de3b02fe5",
159
+ "version_major": 2,
160
+ "version_minor": 0
161
+ },
162
+ "text/plain": [
163
+ "Shuffling /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/incomplete.UBUGIV_1.0.0/imdb_reviews-tra…"
164
+ ]
165
+ },
166
+ "metadata": {},
167
+ "output_type": "display_data"
168
+ },
169
+ {
170
+ "data": {
171
+ "application/vnd.jupyter.widget-view+json": {
172
+ "model_id": "1b2c932dbb66420faa5ce931167da15f",
173
+ "version_major": 2,
174
+ "version_minor": 0
175
+ },
176
+ "text/plain": [
177
+ "Generating test examples...: 0 examples [00:00, ? examples/s]"
178
+ ]
179
+ },
180
+ "metadata": {},
181
+ "output_type": "display_data"
182
+ },
183
+ {
184
+ "data": {
185
+ "application/vnd.jupyter.widget-view+json": {
186
+ "model_id": "03bc0b5fec7447c793f5f714b4fc3a39",
187
+ "version_major": 2,
188
+ "version_minor": 0
189
+ },
190
+ "text/plain": [
191
+ "Shuffling /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/incomplete.UBUGIV_1.0.0/imdb_reviews-tes…"
192
+ ]
193
+ },
194
+ "metadata": {},
195
+ "output_type": "display_data"
196
+ },
197
+ {
198
+ "data": {
199
+ "application/vnd.jupyter.widget-view+json": {
200
+ "model_id": "1ecd1361c7a54ae7abe2de2a6d20ffad",
201
+ "version_major": 2,
202
+ "version_minor": 0
203
+ },
204
+ "text/plain": [
205
+ "Generating unsupervised examples...: 0 examples [00:00, ? examples/s]"
206
+ ]
207
+ },
208
+ "metadata": {},
209
+ "output_type": "display_data"
210
+ },
211
+ {
212
+ "data": {
213
+ "application/vnd.jupyter.widget-view+json": {
214
+ "model_id": "dd202d09a08341e1b7b9ec0eb49dc0fe",
215
+ "version_major": 2,
216
+ "version_minor": 0
217
+ },
218
+ "text/plain": [
219
+ "Shuffling /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/incomplete.UBUGIV_1.0.0/imdb_reviews-uns…"
220
+ ]
221
+ },
222
+ "metadata": {},
223
+ "output_type": "display_data"
224
+ },
225
+ {
226
+ "name": "stdout",
227
+ "output_type": "stream",
228
+ "text": [
229
+ "\u001b[1mDataset imdb_reviews downloaded and prepared to /Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.\u001b[0m\n",
230
+ "tfds.core.DatasetInfo(\n",
231
+ " name='imdb_reviews',\n",
232
+ " full_name='imdb_reviews/plain_text/1.0.0',\n",
233
+ " description=\"\"\"\n",
234
+ " Large Movie Review Dataset. This is a dataset for binary sentiment\n",
235
+ " classification containing substantially more data than previous benchmark\n",
236
+ " datasets. We provide a set of 25,000 highly polar movie reviews for training,\n",
237
+ " and 25,000 for testing. There is additional unlabeled data for use as well.\n",
238
+ " \"\"\",\n",
239
+ " config_description=\"\"\"\n",
240
+ " Plain text\n",
241
+ " \"\"\",\n",
242
+ " homepage='http://ai.stanford.edu/~amaas/data/sentiment/',\n",
243
+ " data_dir='/Users/ryfazrin/tensorflow_datasets/imdb_reviews/plain_text/1.0.0',\n",
244
+ " file_format=tfrecord,\n",
245
+ " download_size=80.23 MiB,\n",
246
+ " dataset_size=129.83 MiB,\n",
247
+ " features=FeaturesDict({\n",
248
+ " 'label': ClassLabel(shape=(), dtype=int64, num_classes=2),\n",
249
+ " 'text': Text(shape=(), dtype=string),\n",
250
+ " }),\n",
251
+ " supervised_keys=('text', 'label'),\n",
252
+ " disable_shuffling=False,\n",
253
+ " nondeterministic_order=False,\n",
254
+ " splits={\n",
255
+ " 'test': <SplitInfo num_examples=25000, num_shards=1>,\n",
256
+ " 'train': <SplitInfo num_examples=25000, num_shards=1>,\n",
257
+ " 'unsupervised': <SplitInfo num_examples=50000, num_shards=1>,\n",
258
+ " },\n",
259
+ " citation=\"\"\"@InProceedings{maas-EtAl:2011:ACL-HLT2011,\n",
260
+ " author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},\n",
261
+ " title = {Learning Word Vectors for Sentiment Analysis},\n",
262
+ " booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},\n",
263
+ " month = {June},\n",
264
+ " year = {2011},\n",
265
+ " address = {Portland, Oregon, USA},\n",
266
+ " publisher = {Association for Computational Linguistics},\n",
267
+ " pages = {142--150},\n",
268
+ " url = {http://www.aclweb.org/anthology/P11-1015}\n",
269
+ " }\"\"\",\n",
270
+ ")\n"
271
+ ]
272
+ }
273
+ ],
274
+ "source": [
275
+ "# =========================\n",
276
+ "# 2. Load Dataset\n",
277
+ "# =========================\n",
278
+ "dataset, info = tfds.load(\n",
279
+ " \"imdb_reviews\",\n",
280
+ " with_info=True,\n",
281
+ " as_supervised=True\n",
282
+ ")\n",
283
+ "\n",
284
+ "train_data, test_data = dataset['train'], dataset['test']\n",
285
+ "\n",
286
+ "print(info)"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": 4,
292
+ "id": "69c4d361",
293
+ "metadata": {},
294
+ "outputs": [
295
+ {
296
+ "name": "stderr",
297
+ "output_type": "stream",
298
+ "text": [
299
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
300
+ "I0000 00:00:1775131893.297532 71659 tf_record_dataset_op.cc:396] The default buffer size is 262144, which is overridden by the user specified `buffer_size` of 8388608\n"
301
+ ]
302
+ }
303
+ ],
304
+ "source": [
305
+ "# =========================\n",
306
+ "# 3. Preprocessing (Tokenization)\n",
307
+ "# =========================\n",
308
+ "vocab_size = 1000 # kecil β†’ TinyML friendly\n",
309
+ "max_length = 100\n",
310
+ "oov_tok = \"<OOV>\"\n",
311
+ "\n",
312
+ "tokenizer = tf.keras.preprocessing.text.Tokenizer(\n",
313
+ " num_words=vocab_size,\n",
314
+ " oov_token=oov_tok\n",
315
+ ")\n",
316
+ "\n",
317
+ "# ambil teks saja\n",
318
+ "train_sentences = []\n",
319
+ "train_labels = []\n",
320
+ "\n",
321
+ "for sentence, label in train_data:\n",
322
+ " train_sentences.append(sentence.numpy().decode('utf-8'))\n",
323
+ " train_labels.append(label.numpy())\n",
324
+ "\n",
325
+ "tokenizer.fit_on_texts(train_sentences)\n",
326
+ "\n",
327
+ "# convert ke sequence\n",
328
+ "train_sequences = tokenizer.texts_to_sequences(train_sentences)\n",
329
+ "train_padded = tf.keras.preprocessing.sequence.pad_sequences(\n",
330
+ " train_sequences,\n",
331
+ " maxlen=max_length,\n",
332
+ " padding='post',\n",
333
+ " truncating='post'\n",
334
+ ")\n",
335
+ "\n",
336
+ "train_labels = np.array(train_labels)"
337
+ ]
338
+ },
339
+ {
340
+ "cell_type": "code",
341
+ "execution_count": 5,
342
+ "id": "aadbfb8a",
343
+ "metadata": {},
344
+ "outputs": [],
345
+ "source": [
346
+ "# =========================\n",
347
+ "# 4. Test Data Preprocessing\n",
348
+ "# =========================\n",
349
+ "test_sentences = []\n",
350
+ "test_labels = []\n",
351
+ "\n",
352
+ "for sentence, label in test_data:\n",
353
+ " test_sentences.append(sentence.numpy().decode('utf-8'))\n",
354
+ " test_labels.append(label.numpy())\n",
355
+ "\n",
356
+ "test_sequences = tokenizer.texts_to_sequences(test_sentences)\n",
357
+ "test_padded = tf.keras.preprocessing.sequence.pad_sequences(\n",
358
+ " test_sequences,\n",
359
+ " maxlen=max_length,\n",
360
+ " padding='post',\n",
361
+ " truncating='post'\n",
362
+ ")\n",
363
+ "\n",
364
+ "test_labels = np.array(test_labels)"
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": 6,
370
+ "id": "47c7c99d",
371
+ "metadata": {},
372
+ "outputs": [
373
+ {
374
+ "name": "stderr",
375
+ "output_type": "stream",
376
+ "text": [
377
+ "/Users/ryfazrin/miniconda3/envs/research/lib/python3.10/site-packages/keras/src/layers/core/embedding.py:97: UserWarning: Argument `input_length` is deprecated. Just remove it.\n",
378
+ " warnings.warn(\n"
379
+ ]
380
+ },
381
+ {
382
+ "data": {
383
+ "text/html": [
384
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential\"</span>\n",
385
+ "</pre>\n"
386
+ ],
387
+ "text/plain": [
388
+ "\u001b[1mModel: \"sequential\"\u001b[0m\n"
389
+ ]
390
+ },
391
+ "metadata": {},
392
+ "output_type": "display_data"
393
+ },
394
+ {
395
+ "data": {
396
+ "text/html": [
397
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
398
+ "┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n",
399
+ "┑━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━���━┩\n",
400
+ "β”‚ embedding (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) β”‚ ? β”‚ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) β”‚\n",
401
+ "β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n",
402
+ "β”‚ global_average_pooling1d β”‚ ? β”‚ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> β”‚\n",
403
+ "β”‚ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalAveragePooling1D</span>) β”‚ β”‚ β”‚\n",
404
+ "β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n",
405
+ "β”‚ dense (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) β”‚ ? β”‚ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) β”‚\n",
406
+ "β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n",
407
+ "β”‚ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) β”‚ ? β”‚ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) β”‚\n",
408
+ "β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n",
409
+ "</pre>\n"
410
+ ],
411
+ "text/plain": [
412
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
413
+ "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
414
+ "┑━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
415
+ "β”‚ embedding (\u001b[38;5;33mEmbedding\u001b[0m) β”‚ ? β”‚ \u001b[38;5;34m0\u001b[0m (unbuilt) β”‚\n",
416
+ "β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n",
417
+ "β”‚ global_average_pooling1d β”‚ ? β”‚ \u001b[38;5;34m0\u001b[0m β”‚\n",
418
+ "β”‚ (\u001b[38;5;33mGlobalAveragePooling1D\u001b[0m) β”‚ β”‚ β”‚\n",
419
+ "β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n",
420
+ "β”‚ dense (\u001b[38;5;33mDense\u001b[0m) β”‚ ? β”‚ \u001b[38;5;34m0\u001b[0m (unbuilt) β”‚\n",
421
+ "β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n",
422
+ "β”‚ dense_1 (\u001b[38;5;33mDense\u001b[0m) β”‚ ? β”‚ \u001b[38;5;34m0\u001b[0m (unbuilt) β”‚\n",
423
+ "β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n"
424
+ ]
425
+ },
426
+ "metadata": {},
427
+ "output_type": "display_data"
428
+ },
429
+ {
430
+ "data": {
431
+ "text/html": [
432
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
433
+ "</pre>\n"
434
+ ],
435
+ "text/plain": [
436
+ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
437
+ ]
438
+ },
439
+ "metadata": {},
440
+ "output_type": "display_data"
441
+ },
442
+ {
443
+ "data": {
444
+ "text/html": [
445
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
446
+ "</pre>\n"
447
+ ],
448
+ "text/plain": [
449
+ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
450
+ ]
451
+ },
452
+ "metadata": {},
453
+ "output_type": "display_data"
454
+ },
455
+ {
456
+ "data": {
457
+ "text/html": [
458
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
459
+ "</pre>\n"
460
+ ],
461
+ "text/plain": [
462
+ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
463
+ ]
464
+ },
465
+ "metadata": {},
466
+ "output_type": "display_data"
467
+ }
468
+ ],
469
+ "source": [
470
+ "# =========================\n",
471
+ "# 5. Model Tiny (Lightweight)\n",
472
+ "# =========================\n",
473
+ "model = tf.keras.Sequential([\n",
474
+ " tf.keras.layers.Embedding(vocab_size, 16, input_length=max_length),\n",
475
+ " tf.keras.layers.GlobalAveragePooling1D(),\n",
476
+ " tf.keras.layers.Dense(16, activation='relu'),\n",
477
+ " tf.keras.layers.Dense(1, activation='sigmoid')\n",
478
+ "])\n",
479
+ "\n",
480
+ "model.compile(\n",
481
+ " loss='binary_crossentropy',\n",
482
+ " optimizer='adam',\n",
483
+ " metrics=['accuracy']\n",
484
+ ")\n",
485
+ "\n",
486
+ "model.summary()"
487
+ ]
488
+ },
489
+ {
490
+ "cell_type": "code",
491
+ "execution_count": 7,
492
+ "id": "21c6cb46",
493
+ "metadata": {},
494
+ "outputs": [
495
+ {
496
+ "name": "stdout",
497
+ "output_type": "stream",
498
+ "text": [
499
+ "Epoch 1/5\n",
500
+ "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 1ms/step - accuracy: 0.6960 - loss: 0.5796 - val_accuracy: 0.7796 - val_loss: 0.4684\n",
501
+ "Epoch 2/5\n",
502
+ "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.7903 - loss: 0.4446 - val_accuracy: 0.7908 - val_loss: 0.4400\n",
503
+ "Epoch 3/5\n",
504
+ "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.8042 - loss: 0.4221 - val_accuracy: 0.7971 - val_loss: 0.4329\n",
505
+ "Epoch 4/5\n",
506
+ "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.8093 - loss: 0.4146 - val_accuracy: 0.7968 - val_loss: 0.4296\n",
507
+ "Epoch 5/5\n",
508
+ "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1ms/step - accuracy: 0.8081 - loss: 0.4121 - val_accuracy: 0.7958 - val_loss: 0.4364\n"
509
+ ]
510
+ }
511
+ ],
512
+ "source": [
513
+ "# =========================\n",
514
+ "# 6. Training\n",
515
+ "# =========================\n",
516
+ "history = model.fit(\n",
517
+ " train_padded,\n",
518
+ " train_labels,\n",
519
+ " epochs=5,\n",
520
+ " validation_data=(test_padded, test_labels)\n",
521
+ ")"
522
+ ]
523
+ },
524
+ {
525
+ "cell_type": "code",
526
+ "execution_count": 8,
527
+ "id": "7c98aa50",
528
+ "metadata": {},
529
+ "outputs": [
530
+ {
531
+ "name": "stdout",
532
+ "output_type": "stream",
533
+ "text": [
534
+ "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 454us/step - accuracy: 0.7958 - loss: 0.4364\n",
535
+ "Test Accuracy: 0.7957599759101868\n"
536
+ ]
537
+ }
538
+ ],
539
+ "source": [
540
+ "# =========================\n",
541
+ "# 7. Evaluation\n",
542
+ "# =========================\n",
543
+ "loss, acc = model.evaluate(test_padded, test_labels)\n",
544
+ "print(\"Test Accuracy:\", acc)"
545
+ ]
546
+ },
547
+ {
548
+ "cell_type": "code",
549
+ "execution_count": null,
550
+ "id": "2e979268",
551
+ "metadata": {},
552
+ "outputs": [
553
+ {
554
+ "name": "stdout",
555
+ "output_type": "stream",
556
+ "text": [
557
+ "INFO:tensorflow:Assets written to: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5/assets\n"
558
+ ]
559
+ },
560
+ {
561
+ "name": "stderr",
562
+ "output_type": "stream",
563
+ "text": [
564
+ "INFO:tensorflow:Assets written to: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5/assets\n"
565
+ ]
566
+ },
567
+ {
568
+ "name": "stdout",
569
+ "output_type": "stream",
570
+ "text": [
571
+ "Saved artifact at '/var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5'. The following endpoints are available:\n",
572
+ "\n",
573
+ "* Endpoint 'serve'\n",
574
+ " args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100), dtype=tf.float32, name='keras_tensor')\n",
575
+ "Output Type:\n",
576
+ " TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)\n",
577
+ "Captures:\n",
578
+ " 6045205776: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
579
+ " 13165568320: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
580
+ " 13127621040: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
581
+ " 13127623152: TensorSpec(shape=(), dtype=tf.resource, name=None)\n",
582
+ " 13127625440: TensorSpec(shape=(), dtype=tf.resource, name=None)\n"
583
+ ]
584
+ },
585
+ {
586
+ "name": "stderr",
587
+ "output_type": "stream",
588
+ "text": [
589
+ "W0000 00:00:1775131910.106384 68307 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.\n",
590
+ "W0000 00:00:1775131910.106397 68307 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.\n"
591
+ ]
592
+ },
593
+ {
594
+ "name": "stdout",
595
+ "output_type": "stream",
596
+ "text": [
597
+ "Model TFLite berhasil disimpan!\n"
598
+ ]
599
+ },
600
+ {
601
+ "name": "stderr",
602
+ "output_type": "stream",
603
+ "text": [
604
+ "I0000 00:00:1775131910.106825 68307 reader.cc:83] Reading SavedModel from: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5\n",
605
+ "I0000 00:00:1775131910.107216 68307 reader.cc:52] Reading meta graph with tags { serve }\n",
606
+ "I0000 00:00:1775131910.107221 68307 reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5\n",
607
+ "I0000 00:00:1775131910.110084 68307 mlir_graph_optimization_pass.cc:437] MLIR V1 optimization pass is not enabled\n",
608
+ "I0000 00:00:1775131910.110559 68307 loader.cc:236] Restoring SavedModel bundle.\n",
609
+ "I0000 00:00:1775131910.128073 68307 loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/d6/03s1zvdj5wg1gwyhbzrcn8gr0000gn/T/tmpfhvqfrf5\n",
610
+ "I0000 00:00:1775131910.134243 68307 loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 27421 microseconds.\n",
611
+ "I0000 00:00:1775131910.146179 68307 dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n"
612
+ ]
613
+ }
614
+ ],
615
+ "source": [
616
+ "# =========================\n",
617
+ "# 8. Convert ke TensorFlow Lite (TinyML Step)\n",
618
+ "# =========================\n",
619
+ "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n",
620
+ "\n",
621
+ "# Quantization (WAJIB untuk TinyML)\n",
622
+ "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
623
+ "\n",
624
+ "tflite_model = converter.convert()\n",
625
+ "\n",
626
+ "# simpan\n",
627
+ "with open(\"tiny_sentiment_model_imdb.tflite\", \"wb\") as f:\n",
628
+ " f.write(tflite_model)\n",
629
+ "\n",
630
+ "print(\"Model TFLite berhasil disimpan!\")"
631
+ ]
632
+ },
633
+ {
634
+ "cell_type": "code",
635
+ "execution_count": 12,
636
+ "id": "e89e8714",
637
+ "metadata": {},
638
+ "outputs": [
639
+ {
640
+ "name": "stdout",
641
+ "output_type": "stream",
642
+ "text": [
643
+ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step\n",
644
+ "Negative\n",
645
+ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 20ms/step\n",
646
+ "Negative\n"
647
+ ]
648
+ }
649
+ ],
650
+ "source": [
651
+ "# =========================\n",
652
+ "# 9. Test Custom Input\n",
653
+ "# =========================\n",
654
+ "def predict(text):\n",
655
+ " seq = tokenizer.texts_to_sequences([text])\n",
656
+ " padded = tf.keras.preprocessing.sequence.pad_sequences(\n",
657
+ " seq, maxlen=max_length, padding='post'\n",
658
+ " )\n",
659
+ " pred = model.predict(padded)[0][0]\n",
660
+ " return \"Positive\" if pred > 0.5 else \"Negative\"\n",
661
+ "\n",
662
+ "print(predict(\"this movie is ugly\"))\n",
663
+ "print(predict(\"bad film ever\"))"
664
+ ]
665
+ }
666
+ ],
667
+ "metadata": {
668
+ "kernelspec": {
669
+ "display_name": "research",
670
+ "language": "python",
671
+ "name": "python3"
672
+ },
673
+ "language_info": {
674
+ "codemirror_mode": {
675
+ "name": "ipython",
676
+ "version": 3
677
+ },
678
+ "file_extension": ".py",
679
+ "mimetype": "text/x-python",
680
+ "name": "python",
681
+ "nbconvert_exporter": "python",
682
+ "pygments_lexer": "ipython3",
683
+ "version": "3.10.20"
684
+ }
685
+ },
686
+ "nbformat": 4,
687
+ "nbformat_minor": 5
688
+ }
tiny_sentiment_model_imdb.tflite ADDED
Binary file (20.7 kB). View file