Mr-FineTuner commited on
Commit
5d6ec7a
·
1 Parent(s): 0a2fe91

Upload Eng2Indo_Seq2Seq_Attention.ipynb

Browse files
fine tuning/Eng2Indo_Seq2Seq_Attention.ipynb ADDED
@@ -0,0 +1,1493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "colab_type": "text",
7
+ "id": "rycMa52tWIy4"
8
+ },
9
+ "source": [
10
+ "## English to Indonesian translation using attention"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {},
16
+ "source": [
17
+ "### References:\n",
18
+ "1. Pytorch tutorial: https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html\n",
19
+ "2. Kopitiam sample: https://github.com/alvations/kopitiam/blob/master/Kopitiam%20mit%20Attention.ipynb\n",
20
+ "3. BLEU score: https://machinelearningmastery.com/calculate-bleu-score-for-text-python/"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "markdown",
25
+ "metadata": {},
26
+ "source": [
27
+ "## Data pre-processing"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 1,
33
+ "metadata": {
34
+ "colab": {},
35
+ "colab_type": "code",
36
+ "id": "l2qpBg3uWIy6"
37
+ },
38
+ "outputs": [],
39
+ "source": [
40
+ "from __future__ import unicode_literals, print_function, division\n",
41
+ "from io import open\n",
42
+ "import unicodedata\n",
43
+ "import string\n",
44
+ "import re\n",
45
+ "import random\n",
46
+ "import os\n",
47
+ "\n",
48
+ "import torch\n",
49
+ "import torch.nn as nn\n",
50
+ "from torch import optim\n",
51
+ "import torch.nn.functional as F\n",
52
+ "from torch.autograd import Variable\n",
53
+ "from nltk.translate.bleu_score import sentence_bleu"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 2,
59
+ "metadata": {
60
+ "colab": {},
61
+ "colab_type": "code",
62
+ "id": "tuy6ysg_WIy-"
63
+ },
64
+ "outputs": [],
65
+ "source": [
66
+ "import torch\n",
67
+ "import torch.nn as nn\n",
68
+ "from torch import optim\n",
69
+ "import torch.nn.functional as F\n",
70
+ "from torch.autograd import Variable\n",
71
+ "\n",
72
+ "# Loss function: https://pytorch.org/docs/stable/nn.html#torch.nn.NLLLoss\n",
73
+ "\n",
74
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
75
+ "use_cuda = torch.cuda.is_available()"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 3,
81
+ "metadata": {
82
+ "colab": {
83
+ "base_uri": "https://localhost:8080/",
84
+ "height": 102
85
+ },
86
+ "colab_type": "code",
87
+ "id": "l4LvE_r2WIzB",
88
+ "outputId": "2e8f940e-bc78-4c96-cd6e-5f0ac477df4b"
89
+ },
90
+ "outputs": [
91
+ {
92
+ "name": "stdout",
93
+ "output_type": "stream",
94
+ "text": [
95
+ "Requirement already satisfied: nltk in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (3.7)\n",
96
+ "Requirement already satisfied: click in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (8.0.4)\n",
97
+ "Requirement already satisfied: joblib in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (1.1.1)\n",
98
+ "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (2022.7.9)\n",
99
+ "Requirement already satisfied: tqdm in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (4.64.1)\n",
100
+ "Requirement already satisfied: colorama in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from click->nltk) (0.4.6)\n"
101
+ ]
102
+ },
103
+ {
104
+ "name": "stderr",
105
+ "output_type": "stream",
106
+ "text": [
107
+ "[nltk_data] Downloading package punkt to\n",
108
+ "[nltk_data] C:\\Users\\elisa\\AppData\\Roaming\\nltk_data...\n",
109
+ "[nltk_data] Package punkt is already up-to-date!\n"
110
+ ]
111
+ },
112
+ {
113
+ "data": {
114
+ "text/plain": [
115
+ "True"
116
+ ]
117
+ },
118
+ "execution_count": 3,
119
+ "metadata": {},
120
+ "output_type": "execute_result"
121
+ }
122
+ ],
123
+ "source": [
124
+ "import pandas as pd\n",
125
+ "from gensim.corpora.dictionary import Dictionary\n",
126
+ "from nltk import word_tokenize\n",
127
+ "\n",
128
+ "!pip install nltk \n",
129
+ "import nltk\n",
130
+ "nltk.download('punkt')"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": 4,
136
+ "metadata": {
137
+ "colab": {
138
+ "base_uri": "https://localhost:8080/",
139
+ "height": 221
140
+ },
141
+ "colab_type": "code",
142
+ "id": "kqBsanwyWIzE",
143
+ "outputId": "428053ee-79ad-49a4-8cf5-f09fc5d74dc5"
144
+ },
145
+ "outputs": [
146
+ {
147
+ "name": "stdout",
148
+ "output_type": "stream",
149
+ "text": [
150
+ "(15531, 2)\n"
151
+ ]
152
+ },
153
+ {
154
+ "data": {
155
+ "text/html": [
156
+ "<div>\n",
157
+ "<style scoped>\n",
158
+ " .dataframe tbody tr th:only-of-type {\n",
159
+ " vertical-align: middle;\n",
160
+ " }\n",
161
+ "\n",
162
+ " .dataframe tbody tr th {\n",
163
+ " vertical-align: top;\n",
164
+ " }\n",
165
+ "\n",
166
+ " .dataframe thead th {\n",
167
+ " text-align: right;\n",
168
+ " }\n",
169
+ "</style>\n",
170
+ "<table border=\"1\" class=\"dataframe\">\n",
171
+ " <thead>\n",
172
+ " <tr style=\"text-align: right;\">\n",
173
+ " <th></th>\n",
174
+ " <th>English</th>\n",
175
+ " <th>Indonesian</th>\n",
176
+ " </tr>\n",
177
+ " </thead>\n",
178
+ " <tbody>\n",
179
+ " <tr>\n",
180
+ " <th>0</th>\n",
181
+ " <td>run !</td>\n",
182
+ " <td>lari !</td>\n",
183
+ " </tr>\n",
184
+ " <tr>\n",
185
+ " <th>1</th>\n",
186
+ " <td>who ?</td>\n",
187
+ " <td>siapa ?</td>\n",
188
+ " </tr>\n",
189
+ " <tr>\n",
190
+ " <th>2</th>\n",
191
+ " <td>wow !</td>\n",
192
+ " <td>wow !</td>\n",
193
+ " </tr>\n",
194
+ " <tr>\n",
195
+ " <th>3</th>\n",
196
+ " <td>help !</td>\n",
197
+ " <td>tolong !</td>\n",
198
+ " </tr>\n",
199
+ " <tr>\n",
200
+ " <th>4</th>\n",
201
+ " <td>jump !</td>\n",
202
+ " <td>lompat !</td>\n",
203
+ " </tr>\n",
204
+ " </tbody>\n",
205
+ "</table>\n",
206
+ "</div>"
207
+ ],
208
+ "text/plain": [
209
+ " English Indonesian\n",
210
+ "0 run ! lari !\n",
211
+ "1 who ? siapa ?\n",
212
+ "2 wow ! wow !\n",
213
+ "3 help ! tolong !\n",
214
+ "4 jump ! lompat !"
215
+ ]
216
+ },
217
+ "execution_count": 4,
218
+ "metadata": {},
219
+ "output_type": "execute_result"
220
+ }
221
+ ],
222
+ "source": [
223
+ "# read in the input file which has the English and the Bahasa sentence pairs separated by tab\n",
224
+ "fp = open('../corpus/eng-indo-augmented.txt', 'r')\n",
225
+ "text = fp.read()\n",
226
+ "text = text.splitlines()\n",
227
+ "fp.close()\n",
228
+ "\n",
229
+ "# Turn a Unicode string to plain ASCII, thanks to\n",
230
+ "# https://stackoverflow.com/a/518232/2809427\n",
231
+ "def unicodeToAscii(s):\n",
232
+ " return ''.join(\n",
233
+ " c for c in unicodedata.normalize('NFD', s)\n",
234
+ " if unicodedata.category(c) != 'Mn'\n",
235
+ " )\n",
236
+ "\n",
237
+ "# Lowercase, trim, and remove non-letter characters\n",
238
+ "\n",
239
+ "\n",
240
+ "def normalizeString(s):\n",
241
+ " s = unicodeToAscii(s.lower().strip())\n",
242
+ " s = re.sub(r\"([.!?])\", r\" \\1\", s)\n",
243
+ " s = re.sub(r\"[^a-zA-Z.!?]+\", r\" \", s)\n",
244
+ " return s\n",
245
+ "\n",
246
+ "\n",
247
+ "text_dict = {\"English\": [], \"Indonesian\": []}\n",
248
+ "for l in text:\n",
249
+ " split_text = l.split(\"\\t\")\n",
250
+ " text_dict[\"English\"].append(normalizeString(split_text[0]))\n",
251
+ " text_dict[\"Indonesian\"].append(normalizeString(split_text[1]))\n",
252
+ " \n",
253
+ "df = pd.DataFrame.from_dict(text_dict)\n",
254
+ "print(df.shape)\n",
255
+ "df.head()"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 5,
261
+ "metadata": {
262
+ "colab": {},
263
+ "colab_type": "code",
264
+ "id": "O4BBhVu5WIzL"
265
+ },
266
+ "outputs": [],
267
+ "source": [
268
+ "MAX_LENGTH = 102\n",
269
+ "MIN_LENGTH = 1"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 6,
275
+ "metadata": {
276
+ "colab": {
277
+ "base_uri": "https://localhost:8080/",
278
+ "height": 255
279
+ },
280
+ "colab_type": "code",
281
+ "id": "man1RTr1WIzO",
282
+ "outputId": "68996a6a-87cb-4cde-f69e-8125ad5565d7"
283
+ },
284
+ "outputs": [
285
+ {
286
+ "name": "stdout",
287
+ "output_type": "stream",
288
+ "text": [
289
+ "(15531, 3)\n",
290
+ "Current shape: (15531, 3)\n",
291
+ "New shape: (15352, 3)\n"
292
+ ]
293
+ },
294
+ {
295
+ "data": {
296
+ "text/html": [
297
+ "<div>\n",
298
+ "<style scoped>\n",
299
+ " .dataframe tbody tr th:only-of-type {\n",
300
+ " vertical-align: middle;\n",
301
+ " }\n",
302
+ "\n",
303
+ " .dataframe tbody tr th {\n",
304
+ " vertical-align: top;\n",
305
+ " }\n",
306
+ "\n",
307
+ " .dataframe thead th {\n",
308
+ " text-align: right;\n",
309
+ " }\n",
310
+ "</style>\n",
311
+ "<table border=\"1\" class=\"dataframe\">\n",
312
+ " <thead>\n",
313
+ " <tr style=\"text-align: right;\">\n",
314
+ " <th></th>\n",
315
+ " <th>index</th>\n",
316
+ " <th>English</th>\n",
317
+ " <th>Indonesian</th>\n",
318
+ " </tr>\n",
319
+ " </thead>\n",
320
+ " <tbody>\n",
321
+ " <tr>\n",
322
+ " <th>0</th>\n",
323
+ " <td>34</td>\n",
324
+ " <td>i m sad .</td>\n",
325
+ " <td>saya sedih .</td>\n",
326
+ " </tr>\n",
327
+ " <tr>\n",
328
+ " <th>1</th>\n",
329
+ " <td>35</td>\n",
330
+ " <td>it s me !</td>\n",
331
+ " <td>ini aku !</td>\n",
332
+ " </tr>\n",
333
+ " <tr>\n",
334
+ " <th>2</th>\n",
335
+ " <td>53</td>\n",
336
+ " <td>i get it .</td>\n",
337
+ " <td>aku mengerti .</td>\n",
338
+ " </tr>\n",
339
+ " <tr>\n",
340
+ " <th>3</th>\n",
341
+ " <td>54</td>\n",
342
+ " <td>i got it .</td>\n",
343
+ " <td>aku mengerti .</td>\n",
344
+ " </tr>\n",
345
+ " <tr>\n",
346
+ " <th>4</th>\n",
347
+ " <td>57</td>\n",
348
+ " <td>i m okay .</td>\n",
349
+ " <td>aku baik baik saja .</td>\n",
350
+ " </tr>\n",
351
+ " </tbody>\n",
352
+ "</table>\n",
353
+ "</div>"
354
+ ],
355
+ "text/plain": [
356
+ " index English Indonesian\n",
357
+ "0 34 i m sad . saya sedih .\n",
358
+ "1 35 it s me ! ini aku !\n",
359
+ "2 53 i get it . aku mengerti .\n",
360
+ "3 54 i got it . aku mengerti .\n",
361
+ "4 57 i m okay . aku baik baik saja ."
362
+ ]
363
+ },
364
+ "execution_count": 6,
365
+ "metadata": {},
366
+ "output_type": "execute_result"
367
+ }
368
+ ],
369
+ "source": [
370
+ "def should_keep_row(row):\n",
371
+ " \"\"\" Should the current row be kept as training set\"\"\"\n",
372
+ " # indo_num_words = len(word_tokenize(row[\"Indonesian\"]))\n",
373
+ " eng_num_words = len(word_tokenize(row[\"English\"]))\n",
374
+ " max_words_required = MAX_LENGTH - 2\n",
375
+ " min_words_required = MIN_LENGTH\n",
376
+ "\n",
377
+ " return min_words_required <= eng_num_words <= max_words_required\n",
378
+ "\n",
379
+ "df[\"keep_row\"] = df.apply(should_keep_row, axis=1)\n",
380
+ "print(df.shape)\n",
381
+ "df.head()\n",
382
+ "\n",
383
+ "print(\"Current shape: \" + str(df.shape))\n",
384
+ "df = df[df[\"keep_row\"]]\n",
385
+ "print(\"New shape: \" + str(df.shape))\n",
386
+ "df.head()\n",
387
+ "df = df.reset_index().drop(columns=[\"keep_row\"])\n",
388
+ "df.head()\n"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": 7,
394
+ "metadata": {},
395
+ "outputs": [
396
+ {
397
+ "name": "stdout",
398
+ "output_type": "stream",
399
+ "text": [
400
+ "First 10 Indonesian words in Dictionary:\n",
401
+ " [(0, '<s>'), (1, '</s>'), (2, 'UNK'), (3, '<'), (4, 's'), (5, '>'), (6, 'saya'), (7, 'sedih'), (8, '.'), (9, 'ini')]\n",
402
+ "\n",
403
+ "First 10 English words in Dictionary:\n",
404
+ " [(0, '<s>'), (1, '</s>'), (2, 'UNK'), (3, '<'), (4, 's'), (5, '>'), (6, 'i'), (7, 'm'), (8, 'sad'), (9, '.')]\n"
405
+ ]
406
+ }
407
+ ],
408
+ "source": [
409
+ "# Define unique tokens and indices\n",
410
+ "START, START_IDX = '<s>', 0\n",
411
+ "END, END_IDX = '</s>', 1\n",
412
+ "UNK, UNK_IDX = 'UNK', 2\n",
413
+ "\n",
414
+ "# Define SOS and EOS tokens\n",
415
+ "SOS_token = START_IDX\n",
416
+ "EOS_token = END_IDX\n",
417
+ "\n",
418
+ "# Tokenize sentences and add start and end tokens\n",
419
+ "english_sents = [START] + [word_tokenize(sent.lower()) for sent in df['English']] + [END]\n",
420
+ "indo_sents = [START] + [word_tokenize(sent.lower()) for sent in df['Indonesian']] + [END]\n",
421
+ "\n",
422
+ "# Create dictionaries for English and Indonesian vocabularies\n",
423
+ "english_vocab = Dictionary([[START], [END], [UNK]])\n",
424
+ "english_vocab.add_documents([[word] for sentence in english_sents for word in sentence])\n",
425
+ "\n",
426
+ "indo_vocab = Dictionary([[START], [END], [UNK]])\n",
427
+ "indo_vocab.add_documents([[word] for sentence in indo_sents for word in sentence])\n",
428
+ "\n",
429
+ "# Display the first 10 words in the vocabularies\n",
430
+ "print('First 10 Indonesian words in Dictionary:\\n', sorted(indo_vocab.items())[:10])\n",
431
+ "print()\n",
432
+ "print('First 10 English words in Dictionary:\\n', sorted(english_vocab.items())[:10])"
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "markdown",
437
+ "metadata": {},
438
+ "source": [
439
+ "## Compute BLEU score"
440
+ ]
441
+ },
442
+ {
443
+ "cell_type": "code",
444
+ "execution_count": 8,
445
+ "metadata": {
446
+ "colab": {},
447
+ "colab_type": "code",
448
+ "id": "RjdR62hJWIzW"
449
+ },
450
+ "outputs": [],
451
+ "source": [
452
+ "#input val_sent_pairs[0] english input to translate output is candidate\n",
453
+ "#val_sent_pairs[1] reference \n",
454
+ "def calculate_bleu_score(reference_sent,candidate_sent):\n",
455
+ " reference = [word_tokenize(reference_sent)]\n",
456
+ " candidate = word_tokenize(candidate_sent)\n",
457
+ " \n",
458
+ " if '<s>' in candidate:\n",
459
+ " candidate.remove('<s>')\n",
460
+ " if '</s>' in candidate:\n",
461
+ " candidate.remove('</s>') \n",
462
+ " gram_1_score = sentence_bleu(reference,candidate,weights=(1, 0, 0, 0))\n",
463
+ " gram_2_score = sentence_bleu(reference,candidate,weights=(0.5, 0.5, 0, 0))\n",
464
+ " gram_3_score = sentence_bleu(reference,candidate,weights=(0.33, 0.33, 0.33, 0))\n",
465
+ " gram_4_score = sentence_bleu(reference,candidate,weights=(0.25, 0.25, 0.25, 0.25))\n",
466
+ " blue_score = (gram_1_score+gram_2_score+gram_3_score+gram_4_score)/4\n",
467
+ " #print(blue_score)\n",
468
+ " return blue_score"
469
+ ]
470
+ },
471
+ {
472
+ "cell_type": "markdown",
473
+ "metadata": {},
474
+ "source": [
475
+ "## Utility methods"
476
+ ]
477
+ },
478
+ {
479
+ "cell_type": "code",
480
+ "execution_count": 9,
481
+ "metadata": {
482
+ "colab": {},
483
+ "colab_type": "code",
484
+ "id": "kWpJQ74mWIza"
485
+ },
486
+ "outputs": [],
487
+ "source": [
488
+ "import pickle\n",
489
+ "# Lets save our dictionaries.\n",
490
+ "with open('../evaluation/vocabs/simple_indo_vocab.Dictionary.pkl', 'wb') as fout:\n",
491
+ " pickle.dump(indo_vocab, fout)\n",
492
+ " \n",
493
+ "with open('../evaluation/vocabs/simple_english_vocab.Dictionary.pkl', 'wb') as fout:\n",
494
+ " pickle.dump(english_vocab, fout)"
495
+ ]
496
+ },
497
+ {
498
+ "cell_type": "code",
499
+ "execution_count": 10,
500
+ "metadata": {
501
+ "colab": {
502
+ "base_uri": "https://localhost:8080/",
503
+ "height": 119
504
+ },
505
+ "colab_type": "code",
506
+ "id": "aLocWKZ_WIzd",
507
+ "outputId": "2da4f69f-d088-463d-827e-64c6b8a881a5"
508
+ },
509
+ "outputs": [
510
+ {
511
+ "data": {
512
+ "text/plain": [
513
+ "tensor([[ 0],\n",
514
+ " [34],\n",
515
+ " [10],\n",
516
+ " [47],\n",
517
+ " [19],\n",
518
+ " [ 1]])"
519
+ ]
520
+ },
521
+ "execution_count": 10,
522
+ "metadata": {},
523
+ "output_type": "execute_result"
524
+ }
525
+ ],
526
+ "source": [
527
+ "# Vectorizes a sentence with a given vocab\n",
528
+ "def vectorize_sent(sent, vocab):\n",
529
+ " return vocab.doc2idx([START] + word_tokenize(sent.lower()) + [END], unknown_word_index=2)\n",
530
+ "\n",
531
+ "# Creates a PyTorch variable from a sentence against a given vocab\n",
532
+ "def variable_from_sent(sent, vocab):\n",
533
+ " vsent = vectorize_sent(sent, vocab)\n",
534
+ " #print(vsent)\n",
535
+ " result = Variable(torch.LongTensor(vsent).view(-1, 1))\n",
536
+ " #print(result)\n",
537
+ " return result.cuda() if use_cuda else result\n",
538
+ "\n",
539
+ "# Test\n",
540
+ "new_kopi = \"Is it love?\"\n",
541
+ "variable_from_sent(new_kopi, english_vocab)"
542
+ ]
543
+ },
544
+ {
545
+ "cell_type": "markdown",
546
+ "metadata": {
547
+ "colab_type": "text",
548
+ "id": "nXn7WpKGWIzg"
549
+ },
550
+ "source": [
551
+ "## Split into train and validation"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": 11,
557
+ "metadata": {
558
+ "colab": {
559
+ "base_uri": "https://localhost:8080/",
560
+ "height": 85
561
+ },
562
+ "colab_type": "code",
563
+ "id": "34nfNaefWIzh",
564
+ "outputId": "41f4902b-3109-4924-ac76-d316c5501a8b"
565
+ },
566
+ "outputs": [
567
+ {
568
+ "name": "stdout",
569
+ "output_type": "stream",
570
+ "text": [
571
+ "(13049, 3)\n",
572
+ "(2303, 3)\n",
573
+ "nancy mengangkat kotak dan melihat isinya .\n",
574
+ "('nancy picked the box up and looked inside .', 'nancy mengangkat kotak dan melihat isinya .')\n"
575
+ ]
576
+ }
577
+ ],
578
+ "source": [
579
+ "from sklearn.model_selection import train_test_split\n",
580
+ "df_train, df_val = train_test_split(df, test_size=0.15)\n",
581
+ "print(df_train.shape)\n",
582
+ "print(df_val.shape)\n",
583
+ "\n",
584
+ "df_train = df_train.reset_index(drop=True)\n",
585
+ "df_val = df_val.reset_index(drop=True)\n",
586
+ "df_train.head()\n",
587
+ "\n",
588
+ "indo_tensors = df_train['Indonesian'].apply(lambda s: variable_from_sent(s, indo_vocab))\n",
589
+ "print(df_train.iloc[0]['Indonesian'])\n",
590
+ "df_train\n",
591
+ "\n",
592
+ "english_tensors = df_train['English'].apply(lambda s: variable_from_sent(s, english_vocab))\n",
593
+ "#print(df_train.iloc[0]['English'])\n",
594
+ "#print(english_tensors[0])\n",
595
+ "# Now, each item in `sent_pairs` is our data point. \n",
596
+ "#print(\"############################\")\n",
597
+ "sent_pairs = list(zip(english_tensors.values, indo_tensors.values))\n",
598
+ "#print(sent_pairs[:5])\n",
599
+ "#print(\"############################\")\n",
600
+ "pairs = list(zip(df_train['English'], df_train['Indonesian']))\n",
601
+ "print(pairs[0])"
602
+ ]
603
+ },
604
+ {
605
+ "cell_type": "code",
606
+ "execution_count": 12,
607
+ "metadata": {
608
+ "colab": {
609
+ "base_uri": "https://localhost:8080/",
610
+ "height": 34
611
+ },
612
+ "colab_type": "code",
613
+ "id": "efvf_EetWIzn",
614
+ "outputId": "54523297-023a-4500-aea9-2fe33458b648"
615
+ },
616
+ "outputs": [
617
+ {
618
+ "name": "stdout",
619
+ "output_type": "stream",
620
+ "text": [
621
+ "('i know what david saw .', 'aku tahu apa yang david lihat .')\n"
622
+ ]
623
+ }
624
+ ],
625
+ "source": [
626
+ "def get_validation_pairs(df_val_in):\n",
627
+ " indo_val_tensors = df_val_in['Indonesian'].apply(lambda s: variable_from_sent(s, indo_vocab))\n",
628
+ " english_val_tensors = df_val_in['English'].apply(lambda s: variable_from_sent(s, english_vocab))\n",
629
+ " val_sent_tensor_pairs = list(zip(english_val_tensors.values, indo_val_tensors.values))\n",
630
+ " val_sent_pairs = list(zip(df_val_in['English'], df_val_in['Indonesian']))\n",
631
+ " return val_sent_pairs, val_sent_tensor_pairs\n",
632
+ "\n",
633
+ "\n",
634
+ "val_sent_pairs, val_sent_tensor_pairs = get_validation_pairs(df_val)\n",
635
+ "print(val_sent_pairs[0])"
636
+ ]
637
+ },
638
+ {
639
+ "cell_type": "markdown",
640
+ "metadata": {
641
+ "colab_type": "text",
642
+ "id": "s4gcyKGOWIz_"
643
+ },
644
+ "source": [
645
+ "## Define encoder and attention based decoder model "
646
+ ]
647
+ },
648
+ {
649
+ "cell_type": "code",
650
+ "execution_count": 13,
651
+ "metadata": {
652
+ "colab": {},
653
+ "colab_type": "code",
654
+ "id": "nkjIwC9vWI0B"
655
+ },
656
+ "outputs": [],
657
+ "source": [
658
+ "class EncoderRNN(nn.Module):\n",
659
+ " def __init__(self, input_size, hidden_size):\n",
660
+ " super(EncoderRNN, self).__init__()\n",
661
+ " self.hidden_size = hidden_size\n",
662
+ "\n",
663
+ " self.embedding = nn.Embedding(input_size, hidden_size)\n",
664
+ " self.gru = nn.GRU(hidden_size, hidden_size)\n",
665
+ "\n",
666
+ " def forward(self, input, hidden):\n",
667
+ " embedded = self.embedding(input).view(1, 1, -1)\n",
668
+ " output = embedded\n",
669
+ " output, hidden = self.gru(output, hidden)\n",
670
+ " return output, hidden\n",
671
+ "\n",
672
+ " def initHidden(self):\n",
673
+ " return torch.zeros(1, 1, self.hidden_size, device=device)\n",
674
+ "\n",
675
+ "class AttnDecoderRNN(nn.Module):\n",
676
+ " def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):\n",
677
+ " super(AttnDecoderRNN, self).__init__()\n",
678
+ " self.hidden_size = hidden_size\n",
679
+ " self.output_size = output_size\n",
680
+ " self.dropout_p = dropout_p\n",
681
+ " self.max_length = max_length\n",
682
+ "\n",
683
+ " self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n",
684
+ " self.attn = nn.Linear(self.hidden_size * 2, self.max_length)\n",
685
+ " self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)\n",
686
+ " self.dropout = nn.Dropout(self.dropout_p)\n",
687
+ " self.gru = nn.GRU(self.hidden_size, self.hidden_size)\n",
688
+ " self.out = nn.Linear(self.hidden_size, self.output_size)\n",
689
+ "\n",
690
+ " def forward(self, input, hidden, encoder_outputs):\n",
691
+ " embedded = self.embedding(input).view(1, 1, -1)\n",
692
+ " embedded = self.dropout(embedded)\n",
693
+ "\n",
694
+ " attn_weights = F.softmax(\n",
695
+ " self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)\n",
696
+ " attn_applied = torch.bmm(attn_weights.unsqueeze(0),\n",
697
+ " encoder_outputs.unsqueeze(0))\n",
698
+ "\n",
699
+ " output = torch.cat((embedded[0], attn_applied[0]), 1)\n",
700
+ " output = self.attn_combine(output).unsqueeze(0)\n",
701
+ "\n",
702
+ " output = F.relu(output)\n",
703
+ " output, hidden = self.gru(output, hidden)\n",
704
+ "\n",
705
+ " output = F.log_softmax(self.out(output[0]), dim=1)\n",
706
+ " return output, hidden, attn_weights\n",
707
+ "\n",
708
+ " def initHidden(self):\n",
709
+ " return torch.zeros(1, 1, self.hidden_size, device=device)"
710
+ ]
711
+ },
712
+ {
713
+ "cell_type": "markdown",
714
+ "metadata": {
715
+ "colab_type": "text",
716
+ "id": "fy8wdwWLWI0F"
717
+ },
718
+ "source": [
719
+ "## Get training and validation set loss"
720
+ ]
721
+ },
722
+ {
723
+ "cell_type": "code",
724
+ "execution_count": 14,
725
+ "metadata": {
726
+ "colab": {},
727
+ "colab_type": "code",
728
+ "id": "nDRV7_JHWI0H"
729
+ },
730
+ "outputs": [],
731
+ "source": [
732
+ "teacher_forcing_ratio = 0.5\n",
733
+ "\n",
734
+ "\n",
735
+ "def get_train_loss(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):\n",
736
+ " encoder_hidden = encoder.initHidden()\n",
737
+ "\n",
738
+ " encoder_optimizer.zero_grad()\n",
739
+ " decoder_optimizer.zero_grad()\n",
740
+ "\n",
741
+ " input_length = input_tensor.size(0)\n",
742
+ " target_length = target_tensor.size(0)\n",
743
+ " \n",
744
+ " encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\n",
745
+ "\n",
746
+ " loss = 0\n",
747
+ "\n",
748
+ " for ei in range(input_length):\n",
749
+ " encoder_output, encoder_hidden = encoder(\n",
750
+ " input_tensor[ei], encoder_hidden)\n",
751
+ " encoder_outputs[ei] = encoder_output[0, 0]\n",
752
+ "\n",
753
+ " decoder_input = torch.tensor([[SOS_token]], device=device)\n",
754
+ "\n",
755
+ " decoder_hidden = encoder_hidden\n",
756
+ "\n",
757
+ " use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False\n",
758
+ "\n",
759
+ " if use_teacher_forcing:\n",
760
+ " # Teacher forcing: Feed the target as the next input\n",
761
+ " for di in range(target_length):\n",
762
+ " decoder_output, decoder_hidden, decoder_attention = decoder(\n",
763
+ " decoder_input, decoder_hidden, encoder_outputs)\n",
764
+ " loss += criterion(decoder_output, target_tensor[di])\n",
765
+ " decoder_input = target_tensor[di] # Teacher forcing\n",
766
+ "\n",
767
+ " else:\n",
768
+ " # Without teacher forcing: use its own predictions as the next input\n",
769
+ " for di in range(target_length):\n",
770
+ " decoder_output, decoder_hidden, decoder_attention = decoder(\n",
771
+ " decoder_input, decoder_hidden, encoder_outputs)\n",
772
+ " topv, topi = decoder_output.topk(1)\n",
773
+ " decoder_input = topi.squeeze().detach() # detach from history as input\n",
774
+ "\n",
775
+ " loss += criterion(decoder_output, target_tensor[di])\n",
776
+ " if decoder_input.item() == EOS_token:\n",
777
+ " break\n",
778
+ "\n",
779
+ " loss.backward()\n",
780
+ "\n",
781
+ " encoder_optimizer.step()\n",
782
+ " decoder_optimizer.step()\n",
783
+ "\n",
784
+ " return loss.item() / target_length\n",
785
+ "\n",
786
+ "def get_validation_loss(input_tensor, target_tensor, encoder, decoder, criterion, max_length=MAX_LENGTH):\n",
787
+ " encoder_hidden = encoder.initHidden()\n",
788
+ "\n",
789
+ " input_length = input_tensor.size(0)\n",
790
+ " target_length = target_tensor.size(0)\n",
791
+ "\n",
792
+ " encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\n",
793
+ "\n",
794
+ " total_loss = 0\n",
795
+ "\n",
796
+ " for ei in range(input_length):\n",
797
+ " encoder_output, encoder_hidden = encoder(\n",
798
+ " input_tensor[ei], encoder_hidden)\n",
799
+ " encoder_outputs[ei] = encoder_output[0, 0]\n",
800
+ "\n",
801
+ " decoder_input = torch.tensor([[SOS_token]], device=device)\n",
802
+ "\n",
803
+ " decoder_hidden = encoder_hidden\n",
804
+ " \n",
805
+ " with torch.no_grad():\n",
806
+ " for di in range(target_length):\n",
807
+ " decoder_output, decoder_hidden, decoder_attention = decoder(\n",
808
+ " decoder_input, decoder_hidden, encoder_outputs)\n",
809
+ " topv, topi = decoder_output.topk(1)\n",
810
+ " decoder_input = topi.squeeze().detach() # detach from history as input\n",
811
+ "\n",
812
+ " loss = criterion(decoder_output, target_tensor[di])\n",
813
+ " total_loss += float(loss.item())\n",
814
+ " if decoder_input.item() == EOS_token:\n",
815
+ " break\n",
816
+ "\n",
817
+ " return total_loss / target_length"
818
+ ]
819
+ },
820
+ {
821
+ "cell_type": "markdown",
822
+ "metadata": {
823
+ "colab_type": "text",
824
+ "id": "ePf3q2gFWI0R"
825
+ },
826
+ "source": [
827
+ "## Utilities - required for training"
828
+ ]
829
+ },
830
+ {
831
+ "cell_type": "code",
832
+ "execution_count": 15,
833
+ "metadata": {
834
+ "colab": {},
835
+ "colab_type": "code",
836
+ "id": "9jQVkj64WI0T"
837
+ },
838
+ "outputs": [],
839
+ "source": [
840
+ "import time\n",
841
+ "import math\n",
842
+ "\n",
843
+ "\n",
844
+ "def asMinutes(s):\n",
845
+ " m = math.floor(s / 60)\n",
846
+ " s -= m * 60\n",
847
+ " return '%dm %ds' % (m, s)\n",
848
+ "\n",
849
+ "\n",
850
+ "def timeSince(since, percent):\n",
851
+ " now = time.time()\n",
852
+ " s = now - since\n",
853
+ " es = s / (percent)\n",
854
+ " rs = es - s\n",
855
+ " return '%s (- %s)' % (asMinutes(s), asMinutes(rs))\n",
856
+ "\n",
857
+ "\n",
858
+ "SAVE_PATH = 'results'\n",
859
+ "\n",
860
+ "if not os.path.exists(SAVE_PATH):\n",
861
+ " os.makedirs(SAVE_PATH)\n",
862
+ "\n",
863
+ "import matplotlib.pyplot as plt\n",
864
+ "plt.switch_backend('agg')\n",
865
+ "import matplotlib.ticker as ticker\n",
866
+ "import numpy as np\n",
867
+ "\n",
868
+ "\n",
869
+ "def showPlot(points):\n",
870
+ " plt.figure()\n",
871
+ " fig, ax = plt.subplots()\n",
872
+ " # this locator puts ticks at regular intervals\n",
873
+ " loc = ticker.MultipleLocator(base=0.2)\n",
874
+ " ax.yaxis.set_major_locator(loc)\n",
875
+ " plt.plot(points)"
876
+ ]
877
+ },
878
+ {
879
+ "cell_type": "markdown",
880
+ "metadata": {
881
+ "colab_type": "text",
882
+ "id": "xDZRZpqhWI0W"
883
+ },
884
+ "source": [
885
+ "## Training loop and get evaluation result"
886
+ ]
887
+ },
888
+ {
889
+ "cell_type": "code",
890
+ "execution_count": 16,
891
+ "metadata": {
892
+ "colab": {},
893
+ "colab_type": "code",
894
+ "id": "IJx18gLeWI0Y"
895
+ },
896
+ "outputs": [],
897
+ "source": [
898
+ "def trainIters(encoder, decoder, n_iters, batch_size = 1, print_every=1000, save_every=1000, plot_every=100, learning_rate=0.0001):\n",
899
+ " start = time.time()\n",
900
+ " plot_losses = []\n",
901
+ " val_losses = []\n",
902
+ " bleu_scores = []\n",
903
+ " \n",
904
+ " print_loss_total = 0 # Reset every print_every\n",
905
+ " plot_loss_total = 0 # Reset every plot_every\n",
906
+ "\n",
907
+ " encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)\n",
908
+ " decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)\n",
909
+ " #training_pairs = [sent_pairs[i] for i in range(n_iters)]\n",
910
+ " training_pairs = [random.sample(sent_pairs, batch_size) for i in range(n_iters)]\n",
911
+ "\n",
912
+ "\n",
913
+ " criterion = nn.NLLLoss()\n",
914
+ " MAX_PATIENCE = 50\n",
915
+ " patience = MAX_PATIENCE \n",
916
+ " prev_val_loss =lowest_so_far = prev_bleu = 999\n",
917
+ " highest_so_far = -np.inf # for bleu\n",
918
+ " stopping_criteria_on = True\n",
919
+ " using_bleu_stopping = False\n",
920
+ "\n",
921
+ " for iter in range(1, n_iters + 1):\n",
922
+ " training_pair = training_pairs[iter - 1]\n",
923
+ "\n",
924
+ " input_tensor = training_pair[0][0]\n",
925
+ " target_tensor = training_pair[0][1]\n",
926
+ "\n",
927
+ "\n",
928
+ " loss = get_train_loss(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)\n",
929
+ " print_loss_total += loss\n",
930
+ " plot_loss_total += loss\n",
931
+ "\n",
932
+ " stopping_delta = 0.001 # if improvement is not more than this amount after n tries, exit the loop\n",
933
+ "\n",
934
+ "\n",
935
+ "\n",
936
+ " if iter % print_every == 0:\n",
937
+ " print_loss_avg = print_loss_total / print_every\n",
938
+ " print_loss_total = 0\n",
939
+ " print('Training loss: %s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),\n",
940
+ " iter, iter / n_iters * 100, print_loss_avg))\n",
941
+ "\n",
942
+ " total_val_loss = 0\n",
943
+ " total_bleu_score = 0\n",
944
+ " total_val_pairs = len(val_sent_tensor_pairs)\n",
945
+ " \n",
946
+ " for itr in range(0, len(val_sent_tensor_pairs)):\n",
947
+ " val_input_tensor = val_sent_tensor_pairs[itr][0]\n",
948
+ " val_target_tensor = val_sent_tensor_pairs[itr][1]\n",
949
+ " reference_sent = val_sent_pairs[itr][1]\n",
950
+ " candidate_sent = translate(val_sent_pairs[itr][0], encoder, decoder)\n",
951
+ " bleu_score = calculate_bleu_score(reference_sent,candidate_sent)\n",
952
+ " total_bleu_score += bleu_score\n",
953
+ " val_loss = get_validation_loss(val_input_tensor, val_target_tensor, encoder, decoder, criterion)\n",
954
+ " total_val_loss += val_loss\n",
955
+ "\n",
956
+ " avg_val_loss = total_val_loss / total_val_pairs\n",
957
+ " val_losses.append(avg_val_loss)\n",
958
+ " avg_bleu_scores = total_bleu_score / total_val_pairs\n",
959
+ " bleu_scores.append(avg_bleu_scores)\n",
960
+ " \n",
961
+ " print('Validation loss: %s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),\n",
962
+ " iter, iter / n_iters * 100, avg_val_loss))\n",
963
+ " print('Bleu scores: %s (%d %d%%) %.8f' % (timeSince(start, iter / n_iters),\n",
964
+ " iter, iter / n_iters * 100, avg_bleu_scores))\n",
965
+ " if stopping_criteria_on:\n",
966
+ " if not using_bleu_stopping:\n",
967
+ " if (prev_val_loss - avg_val_loss) > stopping_delta and avg_val_loss < lowest_so_far:\n",
968
+ " print(f\"Improvement in validation loss, saving model. Prev {prev_val_loss} Curr {avg_val_loss}\")\n",
969
+ " lowest_so_far = avg_val_loss\n",
970
+ " encoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_encoder')\n",
971
+ " print('save encoder weights to ', encoder_save_path)\n",
972
+ " torch.save(encoder.state_dict(), encoder_save_path)\n",
973
+ " decoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_decoder')\n",
974
+ " print('save decoder weights to ', decoder_save_path)\n",
975
+ " torch.save(decoder.state_dict(), decoder_save_path)\n",
976
+ " patience = MAX_PATIENCE # reset to max\n",
977
+ " else:\n",
978
+ " print(f\"No improvement in validation loss, losing patience {patience}\")\n",
979
+ " patience -= 1\n",
980
+ "\n",
981
+ " if patience == 0: # break out of training\n",
982
+ " break\n",
983
+ "\n",
984
+ " prev_val_loss = avg_val_loss\n",
985
+ " else: # bleu\n",
986
+ " if (avg_bleu_scores - prev_bleu) > stopping_delta and avg_bleu_scores > highest_so_far: \n",
987
+ " print(f\"Improvement in bleu scores, saving model. Prev {prev_bleu} Curr {avg_bleu_scores}\")\n",
988
+ " highest_so_far = avg_bleu_scores\n",
989
+ " encoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_encoder')\n",
990
+ " print('save encoder weights to ', encoder_save_path)\n",
991
+ " torch.save(encoder.state_dict(), encoder_save_path)\n",
992
+ " decoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_decoder')\n",
993
+ " print('save decoder weights to ', decoder_save_path)\n",
994
+ " torch.save(decoder.state_dict(), decoder_save_path)\n",
995
+ " patience = MAX_PATIENCE # reset to max\n",
996
+ " else:\n",
997
+ " print(f\"No improvement in bleu scores, losing patience {patience}\")\n",
998
+ " patience -= 1\n",
999
+ "\n",
1000
+ " if patience == 0: # break out of training\n",
1001
+ " break \n",
1002
+ " \n",
1003
+ " prev_bleu = avg_bleu_scores\n",
1004
+ " \n",
1005
+ "\n",
1006
+ " print(\"##########################################################\")\n",
1007
+ "\n",
1008
+ " if iter % plot_every == 0:\n",
1009
+ " plot_loss_avg = plot_loss_total / plot_every\n",
1010
+ " plot_losses.append(plot_loss_avg)\n",
1011
+ " plot_loss_total = 0\n",
1012
+ " \n",
1013
+ " # save trained encoder and decoder\n",
1014
+ " if iter % save_every == 0:\n",
1015
+ " encoder_save_path = '%s/%s-%d.pth' % (SAVE_PATH, 'encoder', iter)\n",
1016
+ " print('save encoder weights to ', encoder_save_path)\n",
1017
+ " torch.save(encoder.state_dict(), encoder_save_path)\n",
1018
+ " decoder_save_path = '%s/%s-%d.pth' % (SAVE_PATH, 'decoder', iter)\n",
1019
+ " print('save decoder weights to ', decoder_save_path)\n",
1020
+ " torch.save(decoder.state_dict(), decoder_save_path)\n",
1021
+ "\n",
1022
+ " showPlot(plot_losses)\n",
1023
+ " showPlot(val_losses)\n",
1024
+ " showPlot(bleu_scores)\n",
1025
+ " return plot_losses, val_losses, bleu_scores\n",
1026
+ "\n",
1027
+ "\n",
1028
+ "def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):\n",
1029
+ " with torch.no_grad():\n",
1030
+ " # input_tensor = tensorFromSentence(input_lang, sentence)\n",
1031
+ " input_tensor = variable_from_sent(sentence, english_vocab)\n",
1032
+ " input_length = input_tensor.size()[0]\n",
1033
+ " encoder_hidden = encoder.initHidden()\n",
1034
+ "\n",
1035
+ " encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\n",
1036
+ "\n",
1037
+ " for ei in range(input_length):\n",
1038
+ " encoder_output, encoder_hidden = encoder(input_tensor[ei],\n",
1039
+ " encoder_hidden)\n",
1040
+ " encoder_outputs[ei] += encoder_output[0, 0]\n",
1041
+ "\n",
1042
+ " decoder_input = torch.tensor([[SOS_token]], device=device) # SOS\n",
1043
+ "\n",
1044
+ " decoder_hidden = encoder_hidden\n",
1045
+ "\n",
1046
+ " decoded_words = []\n",
1047
+ " decoder_attentions = torch.zeros(max_length, max_length)\n",
1048
+ "\n",
1049
+ " for di in range(max_length):\n",
1050
+ " decoder_output, decoder_hidden, decoder_attention = decoder(\n",
1051
+ " decoder_input, decoder_hidden, encoder_outputs)\n",
1052
+ " decoder_attentions[di] = decoder_attention.data\n",
1053
+ " topv, topi = decoder_output.data.topk(1)\n",
1054
+ " if topi.item() == EOS_token:\n",
1055
+ " decoded_words.append('</s>')\n",
1056
+ " break\n",
1057
+ " else:\n",
1058
+ " decoded_words.append(indo_vocab.id2token[topi.item()])\n",
1059
+ "\n",
1060
+ " decoder_input = topi.squeeze().detach()\n",
1061
+ "\n",
1062
+ " return decoded_words, decoder_attentions[:di + 1]\n",
1063
+ "\n",
1064
+ "def evaluateRandomly(encoder, decoder, n=10):\n",
1065
+ " for i in range(n):\n",
1066
+ " pair = random.choice(pairs)\n",
1067
+ " print('>', pair[0])\n",
1068
+ " print('=', pair[1])\n",
1069
+ " output_words, attentions = evaluate(encoder, decoder, pair[0])\n",
1070
+ " output_sentence = ' '.join(output_words)\n",
1071
+ " print('<', output_sentence)\n",
1072
+ " print('')\n",
1073
+ " \n",
1074
+ "def translate(input_sentence, enc, dec):\n",
1075
+ " output_words, attentions = evaluate(\n",
1076
+ " enc, dec, input_sentence)\n",
1077
+ " candidate = ' '.join(output_words)\n",
1078
+ " return candidate"
1079
+ ]
1080
+ },
1081
+ {
1082
+ "cell_type": "markdown",
1083
+ "metadata": {
1084
+ "colab_type": "text",
1085
+ "id": "uERT7riuWI0b"
1086
+ },
1087
+ "source": [
1088
+ "## Perform training"
1089
+ ]
1090
+ },
1091
+ {
1092
+ "cell_type": "code",
1093
+ "execution_count": 17,
1094
+ "metadata": {
1095
+ "colab": {
1096
+ "base_uri": "https://localhost:8080/",
1097
+ "height": 8466
1098
+ },
1099
+ "colab_type": "code",
1100
+ "id": "vLkx3FDdWI0c",
1101
+ "outputId": "0baaaed0-b3a4-486a-c560-f4373b38fb99"
1102
+ },
1103
+ "outputs": [
1104
+ {
1105
+ "name": "stdout",
1106
+ "output_type": "stream",
1107
+ "text": [
1108
+ "Training loss: 4m 52s (- 361m 5s) (1000 1%) 4.2309\n"
1109
+ ]
1110
+ },
1111
+ {
1112
+ "name": "stderr",
1113
+ "output_type": "stream",
1114
+ "text": [
1115
+ "c:\\Users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages\\nltk\\translate\\bleu_score.py:552: UserWarning: \n",
1116
+ "The hypothesis contains 0 counts of 2-gram overlaps.\n",
1117
+ "Therefore the BLEU score evaluates to 0, independently of\n",
1118
+ "how many N-gram overlaps of lower order it contains.\n",
1119
+ "Consider using lower n-gram order or use SmoothingFunction()\n",
1120
+ " warnings.warn(_msg)\n",
1121
+ "c:\\Users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages\\nltk\\translate\\bleu_score.py:552: UserWarning: \n",
1122
+ "The hypothesis contains 0 counts of 3-gram overlaps.\n",
1123
+ "Therefore the BLEU score evaluates to 0, independently of\n",
1124
+ "how many N-gram overlaps of lower order it contains.\n",
1125
+ "Consider using lower n-gram order or use SmoothingFunction()\n",
1126
+ " warnings.warn(_msg)\n",
1127
+ "c:\\Users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages\\nltk\\translate\\bleu_score.py:552: UserWarning: \n",
1128
+ "The hypothesis contains 0 counts of 4-gram overlaps.\n",
1129
+ "Therefore the BLEU score evaluates to 0, independently of\n",
1130
+ "how many N-gram overlaps of lower order it contains.\n",
1131
+ "Consider using lower n-gram order or use SmoothingFunction()\n",
1132
+ " warnings.warn(_msg)\n"
1133
+ ]
1134
+ },
1135
+ {
1136
+ "name": "stdout",
1137
+ "output_type": "stream",
1138
+ "text": [
1139
+ "Validation loss: 6m 45s (- 500m 15s) (1000 1%) 4.0005\n",
1140
+ "Bleu scores: 6m 45s (- 500m 15s) (1000 1%) 0.03154827\n",
1141
+ "Improvement in validation loss, saving model. Prev 999 Curr 4.000509120991064\n",
1142
+ "save encoder weights to results/best_encoder.pth\n",
1143
+ "save decoder weights to results/best_decoder.pth\n",
1144
+ "##########################################################\n",
1145
+ "save encoder weights to results/encoder-1000.pth\n",
1146
+ "save decoder weights to results/decoder-1000.pth\n",
1147
+ "Training loss: 10m 20s (- 377m 16s) (2000 2%) 3.9650\n",
1148
+ "Validation loss: 12m 6s (- 442m 5s) (2000 2%) 3.7445\n",
1149
+ "Bleu scores: 12m 6s (- 442m 5s) (2000 2%) 0.04307411\n",
1150
+ "Improvement in validation loss, saving model. Prev 4.000509120991064 Curr 3.7444545816390606\n",
1151
+ "save encoder weights to results/best_encoder.pth\n",
1152
+ "save decoder weights to results/best_decoder.pth\n",
1153
+ "##########################################################\n",
1154
+ "save encoder weights to results/encoder-2000.pth\n",
1155
+ "save decoder weights to results/decoder-2000.pth\n",
1156
+ "Training loss: 15m 50s (- 380m 7s) (3000 4%) 3.7754\n",
1157
+ "Validation loss: 18m 1s (- 432m 45s) (3000 4%) 3.7098\n",
1158
+ "Bleu scores: 18m 1s (- 432m 45s) (3000 4%) 0.05259536\n",
1159
+ "Improvement in validation loss, saving model. Prev 3.7444545816390606 Curr 3.7098362678888783\n",
1160
+ "save encoder weights to results/best_encoder.pth\n",
1161
+ "save decoder weights to results/best_decoder.pth\n",
1162
+ "##########################################################\n",
1163
+ "save encoder weights to results/encoder-3000.pth\n",
1164
+ "save decoder weights to results/decoder-3000.pth\n",
1165
+ "Training loss: 23m 20s (- 414m 18s) (4000 5%) 3.4911\n",
1166
+ "Validation loss: 26m 9s (- 464m 17s) (4000 5%) 3.5552\n",
1167
+ "Bleu scores: 26m 9s (- 464m 17s) (4000 5%) 0.06098530\n",
1168
+ "Improvement in validation loss, saving model. Prev 3.7098362678888783 Curr 3.555194068107049\n",
1169
+ "save encoder weights to results/best_encoder.pth\n",
1170
+ "save decoder weights to results/best_decoder.pth\n",
1171
+ "##########################################################\n",
1172
+ "save encoder weights to results/encoder-4000.pth\n",
1173
+ "save decoder weights to results/decoder-4000.pth\n",
1174
+ "Training loss: 30m 51s (- 431m 59s) (5000 6%) 3.3821\n",
1175
+ "Validation loss: 33m 46s (- 472m 57s) (5000 6%) 3.3999\n",
1176
+ "Bleu scores: 33m 46s (- 472m 57s) (5000 6%) 0.06369386\n",
1177
+ "Improvement in validation loss, saving model. Prev 3.555194068107049 Curr 3.3999293611511723\n",
1178
+ "save encoder weights to results/best_encoder.pth\n",
1179
+ "save decoder weights to results/best_decoder.pth\n",
1180
+ "##########################################################\n",
1181
+ "save encoder weights to results/encoder-5000.pth\n",
1182
+ "save decoder weights to results/decoder-5000.pth\n",
1183
+ "Training loss: 38m 53s (- 447m 15s) (6000 8%) 3.2100\n",
1184
+ "Validation loss: 41m 52s (- 481m 39s) (6000 8%) 3.3394\n",
1185
+ "Bleu scores: 41m 52s (- 481m 39s) (6000 8%) 0.06984730\n",
1186
+ "Improvement in validation loss, saving model. Prev 3.3999293611511723 Curr 3.339417389060846\n",
1187
+ "save encoder weights to results/best_encoder.pth\n",
1188
+ "save decoder weights to results/best_decoder.pth\n",
1189
+ "##########################################################\n",
1190
+ "save encoder weights to results/encoder-6000.pth\n",
1191
+ "save decoder weights to results/decoder-6000.pth\n",
1192
+ "Training loss: 47m 14s (- 458m 51s) (7000 9%) 3.1873\n",
1193
+ "Validation loss: 50m 24s (- 489m 43s) (7000 9%) 3.1959\n",
1194
+ "Bleu scores: 50m 24s (- 489m 43s) (7000 9%) 0.07534899\n",
1195
+ "Improvement in validation loss, saving model. Prev 3.339417389060846 Curr 3.1959179781729814\n",
1196
+ "save encoder weights to results/best_encoder.pth\n",
1197
+ "save decoder weights to results/best_decoder.pth\n",
1198
+ "##########################################################\n",
1199
+ "save encoder weights to results/encoder-7000.pth\n",
1200
+ "save decoder weights to results/decoder-7000.pth\n",
1201
+ "Training loss: 55m 44s (- 466m 47s) (8000 10%) 2.9833\n",
1202
+ "Validation loss: 58m 38s (- 491m 10s) (8000 10%) 3.0740\n",
1203
+ "Bleu scores: 58m 38s (- 491m 10s) (8000 10%) 0.08536996\n",
1204
+ "Improvement in validation loss, saving model. Prev 3.1959179781729814 Curr 3.073970045550703\n",
1205
+ "save encoder weights to results/best_encoder.pth\n",
1206
+ "save decoder weights to results/best_decoder.pth\n",
1207
+ "##########################################################\n",
1208
+ "save encoder weights to results/encoder-8000.pth\n",
1209
+ "save decoder weights to results/decoder-8000.pth\n",
1210
+ "Training loss: 63m 56s (- 468m 52s) (9000 12%) 2.8639\n",
1211
+ "Validation loss: 66m 58s (- 491m 10s) (9000 12%) 3.0076\n",
1212
+ "Bleu scores: 66m 58s (- 491m 10s) (9000 12%) 0.08991626\n",
1213
+ "Improvement in validation loss, saving model. Prev 3.073970045550703 Curr 3.007592470861209\n",
1214
+ "save encoder weights to results/best_encoder.pth\n",
1215
+ "save decoder weights to results/best_decoder.pth\n",
1216
+ "##########################################################\n",
1217
+ "save encoder weights to results/encoder-9000.pth\n",
1218
+ "save decoder weights to results/decoder-9000.pth\n",
1219
+ "Training loss: 72m 10s (- 469m 10s) (10000 13%) 2.7221\n",
1220
+ "Validation loss: 75m 21s (- 489m 48s) (10000 13%) 2.9584\n",
1221
+ "Bleu scores: 75m 21s (- 489m 48s) (10000 13%) 0.09578874\n",
1222
+ "Improvement in validation loss, saving model. Prev 3.007592470861209 Curr 2.9583699655412103\n",
1223
+ "save encoder weights to results/best_encoder.pth\n",
1224
+ "save decoder weights to results/best_decoder.pth\n",
1225
+ "##########################################################\n",
1226
+ "save encoder weights to results/encoder-10000.pth\n",
1227
+ "save decoder weights to results/decoder-10000.pth\n",
1228
+ "Training loss: 80m 48s (- 470m 8s) (11000 14%) 2.6615\n",
1229
+ "Validation loss: 84m 2s (- 488m 59s) (11000 14%) 2.9471\n",
1230
+ "Bleu scores: 84m 2s (- 488m 59s) (11000 14%) 0.09783871\n",
1231
+ "Improvement in validation loss, saving model. Prev 2.9583699655412103 Curr 2.947079241857556\n",
1232
+ "save encoder weights to results/best_encoder.pth\n",
1233
+ "save decoder weights to results/best_decoder.pth\n",
1234
+ "##########################################################\n",
1235
+ "save encoder weights to results/encoder-11000.pth\n",
1236
+ "save decoder weights to results/decoder-11000.pth\n",
1237
+ "Training loss: 89m 24s (- 469m 25s) (12000 16%) 2.6592\n",
1238
+ "Validation loss: 92m 38s (- 486m 23s) (12000 16%) 2.7873\n",
1239
+ "Bleu scores: 92m 38s (- 486m 23s) (12000 16%) 0.10490669\n",
1240
+ "Improvement in validation loss, saving model. Prev 2.947079241857556 Curr 2.7872625701037794\n",
1241
+ "save encoder weights to results/best_encoder.pth\n",
1242
+ "save decoder weights to results/best_decoder.pth\n",
1243
+ "##########################################################\n",
1244
+ "save encoder weights to results/encoder-12000.pth\n",
1245
+ "save decoder weights to results/decoder-12000.pth\n",
1246
+ "Training loss: 97m 57s (- 467m 10s) (13000 17%) 2.5483\n",
1247
+ "Validation loss: 101m 3s (- 482m 0s) (13000 17%) 2.7313\n",
1248
+ "Bleu scores: 101m 3s (- 482m 0s) (13000 17%) 0.10702588\n",
1249
+ "Improvement in validation loss, saving model. Prev 2.7872625701037794 Curr 2.7312805779151867\n",
1250
+ "save encoder weights to results/best_encoder.pth\n",
1251
+ "save decoder weights to results/best_decoder.pth\n",
1252
+ "##########################################################\n",
1253
+ "save encoder weights to results/encoder-13000.pth\n",
1254
+ "save decoder weights to results/decoder-13000.pth\n",
1255
+ "Training loss: 106m 1s (- 461m 56s) (14000 18%) 2.4441\n",
1256
+ "Validation loss: 166m 8s (- 723m 55s) (14000 18%) 2.6903\n",
1257
+ "Bleu scores: 166m 8s (- 723m 55s) (14000 18%) 0.11510809\n",
1258
+ "Improvement in validation loss, saving model. Prev 2.7312805779151867 Curr 2.6903174619521613\n",
1259
+ "save encoder weights to results/best_encoder.pth\n",
1260
+ "save decoder weights to results/best_decoder.pth\n",
1261
+ "##########################################################\n",
1262
+ "save encoder weights to results/encoder-14000.pth\n",
1263
+ "save decoder weights to results/decoder-14000.pth\n",
1264
+ "Training loss: 170m 58s (- 683m 55s) (15000 20%) 2.2965\n",
1265
+ "Validation loss: 172m 55s (- 691m 43s) (15000 20%) 2.5950\n",
1266
+ "Bleu scores: 172m 55s (- 691m 43s) (15000 20%) 0.12714536\n",
1267
+ "Improvement in validation loss, saving model. Prev 2.6903174619521613 Curr 2.5950060204451675\n",
1268
+ "save encoder weights to results/best_encoder.pth\n",
1269
+ "save decoder weights to results/best_decoder.pth\n",
1270
+ "##########################################################\n",
1271
+ "save encoder weights to results/encoder-15000.pth\n",
1272
+ "save decoder weights to results/decoder-15000.pth\n",
1273
+ "Training loss: 176m 31s (- 650m 54s) (16000 21%) 2.2813\n",
1274
+ "Validation loss: 178m 22s (- 657m 44s) (16000 21%) 2.4976\n",
1275
+ "Bleu scores: 178m 22s (- 657m 44s) (16000 21%) 0.13238018\n",
1276
+ "Improvement in validation loss, saving model. Prev 2.5950060204451675 Curr 2.497571861260076\n",
1277
+ "save encoder weights to results/best_encoder.pth\n",
1278
+ "save decoder weights to results/best_decoder.pth\n",
1279
+ "##########################################################\n",
1280
+ "save encoder weights to results/encoder-16000.pth\n",
1281
+ "save decoder weights to results/decoder-16000.pth\n",
1282
+ "Training loss: 182m 3s (- 621m 7s) (17000 22%) 2.1507\n",
1283
+ "Validation loss: 183m 53s (- 627m 24s) (17000 22%) 2.4512\n",
1284
+ "Bleu scores: 183m 53s (- 627m 24s) (17000 22%) 0.14081074\n",
1285
+ "Improvement in validation loss, saving model. Prev 2.497571861260076 Curr 2.451238969365191\n",
1286
+ "save encoder weights to results/best_encoder.pth\n",
1287
+ "save decoder weights to results/best_decoder.pth\n",
1288
+ "##########################################################\n",
1289
+ "save encoder weights to results/encoder-17000.pth\n",
1290
+ "save decoder weights to results/decoder-17000.pth\n",
1291
+ "Training loss: 187m 26s (- 593m 33s) (18000 24%) 2.0866\n",
1292
+ "Validation loss: 189m 17s (- 599m 25s) (18000 24%) 2.3845\n",
1293
+ "Bleu scores: 189m 17s (- 599m 25s) (18000 24%) 0.15000098\n",
1294
+ "Improvement in validation loss, saving model. Prev 2.451238969365191 Curr 2.384489413921074\n",
1295
+ "save encoder weights to results/best_encoder.pth\n",
1296
+ "save decoder weights to results/best_decoder.pth\n",
1297
+ "##########################################################\n",
1298
+ "save encoder weights to results/encoder-18000.pth\n",
1299
+ "save decoder weights to results/decoder-18000.pth\n",
1300
+ "Training loss: 192m 50s (- 568m 22s) (19000 25%) 1.9876\n",
1301
+ "Validation loss: 194m 40s (- 573m 47s) (19000 25%) 2.3249\n",
1302
+ "Bleu scores: 194m 40s (- 573m 47s) (19000 25%) 0.15965307\n",
1303
+ "Improvement in validation loss, saving model. Prev 2.384489413921074 Curr 2.324945202636609\n",
1304
+ "save encoder weights to results/best_encoder.pth\n",
1305
+ "save decoder weights to results/best_decoder.pth\n",
1306
+ "##########################################################\n",
1307
+ "save encoder weights to results/encoder-19000.pth\n",
1308
+ "save decoder weights to results/decoder-19000.pth\n",
1309
+ "Training loss: 198m 15s (- 545m 13s) (20000 26%) 1.9923\n",
1310
+ "Validation loss: 200m 16s (- 550m 44s) (20000 26%) 2.2664\n",
1311
+ "Bleu scores: 200m 16s (- 550m 44s) (20000 26%) 0.16310327\n",
1312
+ "Improvement in validation loss, saving model. Prev 2.324945202636609 Curr 2.2663642665737243\n",
1313
+ "save encoder weights to results/best_encoder.pth\n",
1314
+ "save decoder weights to results/best_decoder.pth\n",
1315
+ "##########################################################\n",
1316
+ "save encoder weights to results/encoder-20000.pth\n",
1317
+ "save decoder weights to results/decoder-20000.pth\n",
1318
+ "Training loss: 203m 57s (- 524m 27s) (21000 28%) 1.8699\n",
1319
+ "Validation loss: 206m 7s (- 530m 3s) (21000 28%) 2.2703\n",
1320
+ "Bleu scores: 206m 7s (- 530m 3s) (21000 28%) 0.17257318\n",
1321
+ "No improvement in validation loss, losing patience 50\n",
1322
+ "##########################################################\n",
1323
+ "save encoder weights to results/encoder-21000.pth\n",
1324
+ "save decoder weights to results/decoder-21000.pth\n",
1325
+ "Training loss: 209m 59s (- 505m 54s) (22000 29%) 1.7955\n",
1326
+ "Validation loss: 212m 7s (- 511m 1s) (22000 29%) 2.1703\n",
1327
+ "Bleu scores: 212m 7s (- 511m 1s) (22000 29%) 0.17884532\n",
1328
+ "Improvement in validation loss, saving model. Prev 2.2702700825915545 Curr 2.1703052959574354\n",
1329
+ "save encoder weights to results/best_encoder.pth\n",
1330
+ "save decoder weights to results/best_decoder.pth\n",
1331
+ "##########################################################\n",
1332
+ "save encoder weights to results/encoder-22000.pth\n",
1333
+ "save decoder weights to results/decoder-22000.pth\n",
1334
+ "Training loss: 215m 49s (- 487m 57s) (23000 30%) 1.7531\n",
1335
+ "Validation loss: 217m 41s (- 492m 11s) (23000 30%) 2.1149\n",
1336
+ "Bleu scores: 217m 41s (- 492m 11s) (23000 30%) 0.18669578\n",
1337
+ "Improvement in validation loss, saving model. Prev 2.1703052959574354 Curr 2.11492065994964\n",
1338
+ "save encoder weights to results/best_encoder.pth\n",
1339
+ "save decoder weights to results/best_decoder.pth\n",
1340
+ "##########################################################\n",
1341
+ "save encoder weights to results/encoder-23000.pth\n",
1342
+ "save decoder weights to results/decoder-23000.pth\n",
1343
+ "Training loss: 221m 12s (- 470m 3s) (24000 32%) 1.7290\n",
1344
+ "Validation loss: 223m 10s (- 474m 14s) (24000 32%) 2.0907\n",
1345
+ "Bleu scores: 223m 10s (- 474m 14s) (24000 32%) 0.19487671\n",
1346
+ "Improvement in validation loss, saving model. Prev 2.11492065994964 Curr 2.090669993744987\n",
1347
+ "save encoder weights to results/best_encoder.pth\n",
1348
+ "save decoder weights to results/best_decoder.pth\n",
1349
+ "##########################################################\n",
1350
+ "save encoder weights to results/encoder-24000.pth\n",
1351
+ "save decoder weights to results/decoder-24000.pth\n",
1352
+ "Training loss: 226m 59s (- 453m 58s) (25000 33%) 1.6945\n",
1353
+ "Validation loss: 228m 54s (- 457m 48s) (25000 33%) 2.0511\n",
1354
+ "Bleu scores: 228m 54s (- 457m 48s) (25000 33%) 0.20555118\n",
1355
+ "Improvement in validation loss, saving model. Prev 2.090669993744987 Curr 2.0510722188946704\n",
1356
+ "save encoder weights to results/best_encoder.pth\n",
1357
+ "save decoder weights to results/best_decoder.pth\n",
1358
+ "##########################################################\n",
1359
+ "save encoder weights to results/encoder-25000.pth\n",
1360
+ "save decoder weights to results/decoder-25000.pth\n",
1361
+ "Training loss: 232m 36s (- 438m 22s) (26000 34%) 1.6333\n",
1362
+ "Validation loss: 234m 40s (- 442m 15s) (26000 34%) 2.0361\n",
1363
+ "Bleu scores: 234m 40s (- 442m 15s) (26000 34%) 0.20926462\n",
1364
+ "Improvement in validation loss, saving model. Prev 2.0510722188946704 Curr 2.036086866139529\n",
1365
+ "save encoder weights to results/best_encoder.pth\n",
1366
+ "save decoder weights to results/best_decoder.pth\n",
1367
+ "##########################################################\n",
1368
+ "save encoder weights to results/encoder-26000.pth\n",
1369
+ "save decoder weights to results/decoder-26000.pth\n",
1370
+ "Training loss: 238m 16s (- 423m 36s) (27000 36%) 1.4851\n",
1371
+ "Validation loss: 240m 14s (- 427m 5s) (27000 36%) 1.9672\n",
1372
+ "Bleu scores: 240m 14s (- 427m 5s) (27000 36%) 0.21352279\n",
1373
+ "Improvement in validation loss, saving model. Prev 2.036086866139529 Curr 1.9672220808431697\n",
1374
+ "save encoder weights to results/best_encoder.pth\n",
1375
+ "save decoder weights to results/best_decoder.pth\n",
1376
+ "##########################################################\n",
1377
+ "save encoder weights to results/encoder-27000.pth\n",
1378
+ "save decoder weights to results/decoder-27000.pth\n",
1379
+ "Training loss: 243m 44s (- 409m 8s) (28000 37%) 1.5367\n",
1380
+ "Validation loss: 245m 39s (- 412m 21s) (28000 37%) 1.9125\n",
1381
+ "Bleu scores: 245m 39s (- 412m 21s) (28000 37%) 0.22457683\n",
1382
+ "Improvement in validation loss, saving model. Prev 1.9672220808431697 Curr 1.9125249442450718\n",
1383
+ "save encoder weights to results/best_encoder.pth\n",
1384
+ "save decoder weights to results/best_decoder.pth\n",
1385
+ "##########################################################\n",
1386
+ "save encoder weights to results/encoder-28000.pth\n",
1387
+ "save decoder weights to results/decoder-28000.pth\n",
1388
+ "Training loss: 249m 15s (- 395m 21s) (29000 38%) 1.4655\n",
1389
+ "Validation loss: 251m 19s (- 398m 38s) (29000 38%) 1.8698\n",
1390
+ "Bleu scores: 251m 19s (- 398m 38s) (29000 38%) 0.23247323\n",
1391
+ "Improvement in validation loss, saving model. Prev 1.9125249442450718 Curr 1.869782677108545\n",
1392
+ "save encoder weights to results/best_encoder.pth\n",
1393
+ "save decoder weights to results/best_decoder.pth\n",
1394
+ "##########################################################\n",
1395
+ "save encoder weights to results/encoder-29000.pth\n",
1396
+ "save decoder weights to results/decoder-29000.pth\n",
1397
+ "Training loss: 254m 56s (- 382m 24s) (30000 40%) 1.4104\n",
1398
+ "Validation loss: 256m 45s (- 385m 8s) (30000 40%) 1.8300\n",
1399
+ "Bleu scores: 256m 45s (- 385m 8s) (30000 40%) 0.23475004\n",
1400
+ "Improvement in validation loss, saving model. Prev 1.869782677108545 Curr 1.8300033148270234\n",
1401
+ "save encoder weights to results/best_encoder.pth\n",
1402
+ "save decoder weights to results/best_decoder.pth\n",
1403
+ "##########################################################\n",
1404
+ "save encoder weights to results/encoder-30000.pth\n",
1405
+ "save decoder weights to results/decoder-30000.pth\n"
1406
+ ]
1407
+ }
1408
+ ],
1409
+ "source": [
1410
+ "hidden_size = 512\n",
1411
+ "encoder1 = EncoderRNN(len(english_vocab), hidden_size).to(device)\n",
1412
+ "attn_decoder1 = AttnDecoderRNN(hidden_size, len(indo_vocab), dropout_p=0.5).to(device)\n",
1413
+ "\n",
1414
+ "trainIters(encoder1, attn_decoder1, 75000, print_every=1000)\n",
1415
+ "\n",
1416
+ "evaluateRandomly(encoder1, attn_decoder1)\n",
1417
+ "\n",
1418
+ "output_words, attentions = evaluate(\n",
1419
+ " encoder1, attn_decoder1, \"do you love me?\")"
1420
+ ]
1421
+ },
1422
+ {
1423
+ "cell_type": "markdown",
1424
+ "metadata": {
1425
+ "colab_type": "text",
1426
+ "id": "Uc9fOh1SWI0i"
1427
+ },
1428
+ "source": [
1429
+ "## Check some translations - note the below sentences are not there in the training and the validation sets"
1430
+ ]
1431
+ },
1432
+ {
1433
+ "cell_type": "code",
1434
+ "execution_count": null,
1435
+ "metadata": {
1436
+ "colab": {},
1437
+ "colab_type": "code",
1438
+ "id": "Da2782FxWI0l"
1439
+ },
1440
+ "outputs": [],
1441
+ "source": [
1442
+ "print(translate(\"tom is playing with ball .\", encoder1, attn_decoder1))\n",
1443
+ "\n",
1444
+ "print(translate(\"she is standing there .\", encoder1, attn_decoder1))\n",
1445
+ "\n",
1446
+ "print(translate(\"he is a bad man .\", encoder1, attn_decoder1))\n",
1447
+ "\n",
1448
+ "print(translate(\"he wants to sleep .\", encoder1, attn_decoder1))\n",
1449
+ "\n",
1450
+ "print(translate(\"i can't see you crying .\", encoder1, attn_decoder1))\n",
1451
+ "\n",
1452
+ "print(translate(\"my dog is running around .\", encoder1, attn_decoder1))\n",
1453
+ "\n",
1454
+ "print(translate(\"it is very popular .\", encoder1, attn_decoder1))\n",
1455
+ "\n",
1456
+ "print(translate(\"she speaks american english to tom's father .\", encoder1, attn_decoder1))\n",
1457
+ "\n",
1458
+ "print(translate(\"please eat lunch in the afternoon .\", encoder1, attn_decoder1))\n",
1459
+ "\n",
1460
+ "print(translate(\"i see red roses in the garden .\", encoder1, attn_decoder1))"
1461
+ ]
1462
+ }
1463
+ ],
1464
+ "metadata": {
1465
+ "accelerator": "GPU",
1466
+ "colab": {
1467
+ "collapsed_sections": [],
1468
+ "name": "Eng2Indo Attention Simple corpus - working copy with stopping.ipynb",
1469
+ "provenance": [],
1470
+ "toc_visible": true,
1471
+ "version": "0.3.2"
1472
+ },
1473
+ "kernelspec": {
1474
+ "display_name": "deeplearning",
1475
+ "language": "python",
1476
+ "name": "python3"
1477
+ },
1478
+ "language_info": {
1479
+ "codemirror_mode": {
1480
+ "name": "ipython",
1481
+ "version": 3
1482
+ },
1483
+ "file_extension": ".py",
1484
+ "mimetype": "text/x-python",
1485
+ "name": "python",
1486
+ "nbconvert_exporter": "python",
1487
+ "pygments_lexer": "ipython3",
1488
+ "version": "3.10.12"
1489
+ }
1490
+ },
1491
+ "nbformat": 4,
1492
+ "nbformat_minor": 1
1493
+ }