Mr-FineTuner
/

Finetune-seq2seq

Model card Files Files and versions

xet

Community

Mr-FineTuner commited on Dec 12, 2023

Commit

5d6ec7a

1 Parent(s): 0a2fe91

Upload Eng2Indo_Seq2Seq_Attention.ipynb

Browse files

Files changed (1) hide show

fine tuning/Eng2Indo_Seq2Seq_Attention.ipynb +1493 -0

fine tuning/Eng2Indo_Seq2Seq_Attention.ipynb ADDED Viewed

	@@ -0,0 +1,1493 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "rycMa52tWIy4"
+   },
+   "source": [
+    "## English to Indonesian translation using attention"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### References:\n",
+    "1. Pytorch tutorial: https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html\n",
+    "2. Kopitiam sample: https://github.com/alvations/kopitiam/blob/master/Kopitiam%20mit%20Attention.ipynb\n",
+    "3. BLEU score: https://machinelearningmastery.com/calculate-bleu-score-for-text-python/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data pre-processing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "l2qpBg3uWIy6"
+   },
+   "outputs": [],
+   "source": [
+    "from __future__ import unicode_literals, print_function, division\n",
+    "from io import open\n",
+    "import unicodedata\n",
+    "import string\n",
+    "import re\n",
+    "import random\n",
+    "import os\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from torch import optim\n",
+    "import torch.nn.functional as F\n",
+    "from torch.autograd import Variable\n",
+    "from nltk.translate.bleu_score import sentence_bleu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "tuy6ysg_WIy-"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from torch import optim\n",
+    "import torch.nn.functional as F\n",
+    "from torch.autograd import Variable\n",
+    "\n",
+    "# Loss function: https://pytorch.org/docs/stable/nn.html#torch.nn.NLLLoss\n",
+    "\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "use_cuda = torch.cuda.is_available()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 102
+    },
+    "colab_type": "code",
+    "id": "l4LvE_r2WIzB",
+    "outputId": "2e8f940e-bc78-4c96-cd6e-5f0ac477df4b"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: nltk in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (3.7)\n",
+      "Requirement already satisfied: click in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (8.0.4)\n",
+      "Requirement already satisfied: joblib in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (1.1.1)\n",
+      "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (2022.7.9)\n",
+      "Requirement already satisfied: tqdm in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from nltk) (4.64.1)\n",
+      "Requirement already satisfied: colorama in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from click->nltk) (0.4.6)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to\n",
+      "[nltk_data]     C:\\Users\\elisa\\AppData\\Roaming\\nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from gensim.corpora.dictionary import Dictionary\n",
+    "from nltk import word_tokenize\n",
+    "\n",
+    "!pip install nltk  \n",
+    "import nltk\n",
+    "nltk.download('punkt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 221
+    },
+    "colab_type": "code",
+    "id": "kqBsanwyWIzE",
+    "outputId": "428053ee-79ad-49a4-8cf5-f09fc5d74dc5"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(15531, 2)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>English</th>\n",
+       "      <th>Indonesian</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>run !</td>\n",
+       "      <td>lari !</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>who ?</td>\n",
+       "      <td>siapa ?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>wow !</td>\n",
+       "      <td>wow !</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>help !</td>\n",
+       "      <td>tolong !</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>jump !</td>\n",
+       "      <td>lompat !</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  English Indonesian\n",
+       "0   run !     lari !\n",
+       "1   who ?    siapa ?\n",
+       "2   wow !      wow !\n",
+       "3  help !   tolong !\n",
+       "4  jump !   lompat !"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# read in the input file which has the English and the Bahasa sentence pairs separated by tab\n",
+    "fp = open('../corpus/eng-indo-augmented.txt', 'r')\n",
+    "text = fp.read()\n",
+    "text = text.splitlines()\n",
+    "fp.close()\n",
+    "\n",
+    "# Turn a Unicode string to plain ASCII, thanks to\n",
+    "# https://stackoverflow.com/a/518232/2809427\n",
+    "def unicodeToAscii(s):\n",
+    "    return ''.join(\n",
+    "        c for c in unicodedata.normalize('NFD', s)\n",
+    "        if unicodedata.category(c) != 'Mn'\n",
+    "    )\n",
+    "\n",
+    "# Lowercase, trim, and remove non-letter characters\n",
+    "\n",
+    "\n",
+    "def normalizeString(s):\n",
+    "    s = unicodeToAscii(s.lower().strip())\n",
+    "    s = re.sub(r\"([.!?])\", r\" \\1\", s)\n",
+    "    s = re.sub(r\"[^a-zA-Z.!?]+\", r\" \", s)\n",
+    "    return s\n",
+    "\n",
+    "\n",
+    "text_dict = {\"English\": [], \"Indonesian\": []}\n",
+    "for l in text:\n",
+    "    split_text = l.split(\"\\t\")\n",
+    "    text_dict[\"English\"].append(normalizeString(split_text[0]))\n",
+    "    text_dict[\"Indonesian\"].append(normalizeString(split_text[1]))\n",
+    "    \n",
+    "df = pd.DataFrame.from_dict(text_dict)\n",
+    "print(df.shape)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "O4BBhVu5WIzL"
+   },
+   "outputs": [],
+   "source": [
+    "MAX_LENGTH = 102\n",
+    "MIN_LENGTH = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 255
+    },
+    "colab_type": "code",
+    "id": "man1RTr1WIzO",
+    "outputId": "68996a6a-87cb-4cde-f69e-8125ad5565d7"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(15531, 3)\n",
+      "Current shape: (15531, 3)\n",
+      "New shape: (15352, 3)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>English</th>\n",
+       "      <th>Indonesian</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>34</td>\n",
+       "      <td>i m sad .</td>\n",
+       "      <td>saya sedih .</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>35</td>\n",
+       "      <td>it s me !</td>\n",
+       "      <td>ini aku !</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>53</td>\n",
+       "      <td>i get it .</td>\n",
+       "      <td>aku mengerti .</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>54</td>\n",
+       "      <td>i got it .</td>\n",
+       "      <td>aku mengerti .</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>57</td>\n",
+       "      <td>i m okay .</td>\n",
+       "      <td>aku baik baik saja .</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index     English            Indonesian\n",
+       "0     34   i m sad .          saya sedih .\n",
+       "1     35   it s me !             ini aku !\n",
+       "2     53  i get it .        aku mengerti .\n",
+       "3     54  i got it .        aku mengerti .\n",
+       "4     57  i m okay .  aku baik baik saja ."
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def should_keep_row(row):\n",
+    "    \"\"\" Should the current row be kept as training set\"\"\"\n",
+    "    # indo_num_words = len(word_tokenize(row[\"Indonesian\"]))\n",
+    "    eng_num_words = len(word_tokenize(row[\"English\"]))\n",
+    "    max_words_required = MAX_LENGTH - 2\n",
+    "    min_words_required = MIN_LENGTH\n",
+    "\n",
+    "    return min_words_required <= eng_num_words <= max_words_required\n",
+    "\n",
+    "df[\"keep_row\"] = df.apply(should_keep_row, axis=1)\n",
+    "print(df.shape)\n",
+    "df.head()\n",
+    "\n",
+    "print(\"Current shape: \" + str(df.shape))\n",
+    "df = df[df[\"keep_row\"]]\n",
+    "print(\"New shape: \" + str(df.shape))\n",
+    "df.head()\n",
+    "df = df.reset_index().drop(columns=[\"keep_row\"])\n",
+    "df.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "First 10 Indonesian words in Dictionary:\n",
+      " [(0, '<s>'), (1, '</s>'), (2, 'UNK'), (3, '<'), (4, 's'), (5, '>'), (6, 'saya'), (7, 'sedih'), (8, '.'), (9, 'ini')]\n",
+      "\n",
+      "First 10 English words in Dictionary:\n",
+      " [(0, '<s>'), (1, '</s>'), (2, 'UNK'), (3, '<'), (4, 's'), (5, '>'), (6, 'i'), (7, 'm'), (8, 'sad'), (9, '.')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define unique tokens and indices\n",
+    "START, START_IDX = '<s>', 0\n",
+    "END, END_IDX = '</s>', 1\n",
+    "UNK, UNK_IDX = 'UNK', 2\n",
+    "\n",
+    "# Define SOS and EOS tokens\n",
+    "SOS_token = START_IDX\n",
+    "EOS_token = END_IDX\n",
+    "\n",
+    "# Tokenize sentences and add start and end tokens\n",
+    "english_sents = [START] + [word_tokenize(sent.lower()) for sent in df['English']] + [END]\n",
+    "indo_sents = [START] + [word_tokenize(sent.lower()) for sent in df['Indonesian']] + [END]\n",
+    "\n",
+    "# Create dictionaries for English and Indonesian vocabularies\n",
+    "english_vocab = Dictionary([[START], [END], [UNK]])\n",
+    "english_vocab.add_documents([[word] for sentence in english_sents for word in sentence])\n",
+    "\n",
+    "indo_vocab = Dictionary([[START], [END], [UNK]])\n",
+    "indo_vocab.add_documents([[word] for sentence in indo_sents for word in sentence])\n",
+    "\n",
+    "# Display the first 10 words in the vocabularies\n",
+    "print('First 10 Indonesian words in Dictionary:\\n', sorted(indo_vocab.items())[:10])\n",
+    "print()\n",
+    "print('First 10 English words in Dictionary:\\n', sorted(english_vocab.items())[:10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compute BLEU score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "RjdR62hJWIzW"
+   },
+   "outputs": [],
+   "source": [
+    "#input val_sent_pairs[0] english input to translate output is candidate\n",
+    "#val_sent_pairs[1] reference \n",
+    "def calculate_bleu_score(reference_sent,candidate_sent):\n",
+    "    reference = [word_tokenize(reference_sent)]\n",
+    "    candidate = word_tokenize(candidate_sent)\n",
+    "    \n",
+    "    if '<s>' in candidate:\n",
+    "        candidate.remove('<s>')\n",
+    "    if '</s>' in candidate:\n",
+    "        candidate.remove('</s>')         \n",
+    "    gram_1_score = sentence_bleu(reference,candidate,weights=(1, 0, 0, 0))\n",
+    "    gram_2_score = sentence_bleu(reference,candidate,weights=(0.5, 0.5, 0, 0))\n",
+    "    gram_3_score = sentence_bleu(reference,candidate,weights=(0.33, 0.33, 0.33, 0))\n",
+    "    gram_4_score = sentence_bleu(reference,candidate,weights=(0.25, 0.25, 0.25, 0.25))\n",
+    "    blue_score = (gram_1_score+gram_2_score+gram_3_score+gram_4_score)/4\n",
+    "    #print(blue_score)\n",
+    "    return blue_score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Utility methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "kWpJQ74mWIza"
+   },
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "# Lets save our dictionaries.\n",
+    "with open('../evaluation/vocabs/simple_indo_vocab.Dictionary.pkl', 'wb') as fout:\n",
+    "   pickle.dump(indo_vocab, fout)\n",
+    "    \n",
+    "with open('../evaluation/vocabs/simple_english_vocab.Dictionary.pkl', 'wb') as fout:\n",
+    "   pickle.dump(english_vocab, fout)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 119
+    },
+    "colab_type": "code",
+    "id": "aLocWKZ_WIzd",
+    "outputId": "2da4f69f-d088-463d-827e-64c6b8a881a5"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 0],\n",
+       "        [34],\n",
+       "        [10],\n",
+       "        [47],\n",
+       "        [19],\n",
+       "        [ 1]])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Vectorizes a sentence with a given vocab\n",
+    "def vectorize_sent(sent, vocab):\n",
+    "    return vocab.doc2idx([START] + word_tokenize(sent.lower()) + [END], unknown_word_index=2)\n",
+    "\n",
+    "# Creates a PyTorch variable from a sentence against a given vocab\n",
+    "def variable_from_sent(sent, vocab):\n",
+    "    vsent = vectorize_sent(sent, vocab)\n",
+    "    #print(vsent)\n",
+    "    result = Variable(torch.LongTensor(vsent).view(-1, 1))\n",
+    "    #print(result)\n",
+    "    return result.cuda() if use_cuda else result\n",
+    "\n",
+    "# Test\n",
+    "new_kopi = \"Is it love?\"\n",
+    "variable_from_sent(new_kopi, english_vocab)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "nXn7WpKGWIzg"
+   },
+   "source": [
+    "## Split into train and validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 85
+    },
+    "colab_type": "code",
+    "id": "34nfNaefWIzh",
+    "outputId": "41f4902b-3109-4924-ac76-d316c5501a8b"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(13049, 3)\n",
+      "(2303, 3)\n",
+      "nancy mengangkat kotak dan melihat isinya .\n",
+      "('nancy picked the box up and looked inside .', 'nancy mengangkat kotak dan melihat isinya .')\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "df_train, df_val = train_test_split(df, test_size=0.15)\n",
+    "print(df_train.shape)\n",
+    "print(df_val.shape)\n",
+    "\n",
+    "df_train = df_train.reset_index(drop=True)\n",
+    "df_val = df_val.reset_index(drop=True)\n",
+    "df_train.head()\n",
+    "\n",
+    "indo_tensors = df_train['Indonesian'].apply(lambda s: variable_from_sent(s, indo_vocab))\n",
+    "print(df_train.iloc[0]['Indonesian'])\n",
+    "df_train\n",
+    "\n",
+    "english_tensors = df_train['English'].apply(lambda s: variable_from_sent(s, english_vocab))\n",
+    "#print(df_train.iloc[0]['English'])\n",
+    "#print(english_tensors[0])\n",
+    "# Now, each item in `sent_pairs` is our data point. \n",
+    "#print(\"############################\")\n",
+    "sent_pairs = list(zip(english_tensors.values, indo_tensors.values))\n",
+    "#print(sent_pairs[:5])\n",
+    "#print(\"############################\")\n",
+    "pairs = list(zip(df_train['English'], df_train['Indonesian']))\n",
+    "print(pairs[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "efvf_EetWIzn",
+    "outputId": "54523297-023a-4500-aea9-2fe33458b648"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('i know what david saw .', 'aku tahu apa yang david lihat .')\n"
+     ]
+    }
+   ],
+   "source": [
+    "def get_validation_pairs(df_val_in):\n",
+    "    indo_val_tensors = df_val_in['Indonesian'].apply(lambda s: variable_from_sent(s, indo_vocab))\n",
+    "    english_val_tensors = df_val_in['English'].apply(lambda s: variable_from_sent(s, english_vocab))\n",
+    "    val_sent_tensor_pairs = list(zip(english_val_tensors.values, indo_val_tensors.values))\n",
+    "    val_sent_pairs = list(zip(df_val_in['English'], df_val_in['Indonesian']))\n",
+    "    return val_sent_pairs, val_sent_tensor_pairs\n",
+    "\n",
+    "\n",
+    "val_sent_pairs, val_sent_tensor_pairs = get_validation_pairs(df_val)\n",
+    "print(val_sent_pairs[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "s4gcyKGOWIz_"
+   },
+   "source": [
+    "## Define encoder and attention based decoder model "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "nkjIwC9vWI0B"
+   },
+   "outputs": [],
+   "source": [
+    "class EncoderRNN(nn.Module):\n",
+    "    def __init__(self, input_size, hidden_size):\n",
+    "        super(EncoderRNN, self).__init__()\n",
+    "        self.hidden_size = hidden_size\n",
+    "\n",
+    "        self.embedding = nn.Embedding(input_size, hidden_size)\n",
+    "        self.gru = nn.GRU(hidden_size, hidden_size)\n",
+    "\n",
+    "    def forward(self, input, hidden):\n",
+    "        embedded = self.embedding(input).view(1, 1, -1)\n",
+    "        output = embedded\n",
+    "        output, hidden = self.gru(output, hidden)\n",
+    "        return output, hidden\n",
+    "\n",
+    "    def initHidden(self):\n",
+    "        return torch.zeros(1, 1, self.hidden_size, device=device)\n",
+    "\n",
+    "class AttnDecoderRNN(nn.Module):\n",
+    "    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):\n",
+    "        super(AttnDecoderRNN, self).__init__()\n",
+    "        self.hidden_size = hidden_size\n",
+    "        self.output_size = output_size\n",
+    "        self.dropout_p = dropout_p\n",
+    "        self.max_length = max_length\n",
+    "\n",
+    "        self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n",
+    "        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)\n",
+    "        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)\n",
+    "        self.dropout = nn.Dropout(self.dropout_p)\n",
+    "        self.gru = nn.GRU(self.hidden_size, self.hidden_size)\n",
+    "        self.out = nn.Linear(self.hidden_size, self.output_size)\n",
+    "\n",
+    "    def forward(self, input, hidden, encoder_outputs):\n",
+    "        embedded = self.embedding(input).view(1, 1, -1)\n",
+    "        embedded = self.dropout(embedded)\n",
+    "\n",
+    "        attn_weights = F.softmax(\n",
+    "            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)\n",
+    "        attn_applied = torch.bmm(attn_weights.unsqueeze(0),\n",
+    "                                 encoder_outputs.unsqueeze(0))\n",
+    "\n",
+    "        output = torch.cat((embedded[0], attn_applied[0]), 1)\n",
+    "        output = self.attn_combine(output).unsqueeze(0)\n",
+    "\n",
+    "        output = F.relu(output)\n",
+    "        output, hidden = self.gru(output, hidden)\n",
+    "\n",
+    "        output = F.log_softmax(self.out(output[0]), dim=1)\n",
+    "        return output, hidden, attn_weights\n",
+    "\n",
+    "    def initHidden(self):\n",
+    "        return torch.zeros(1, 1, self.hidden_size, device=device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "fy8wdwWLWI0F"
+   },
+   "source": [
+    "## Get training and validation set loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "nDRV7_JHWI0H"
+   },
+   "outputs": [],
+   "source": [
+    "teacher_forcing_ratio = 0.5\n",
+    "\n",
+    "\n",
+    "def get_train_loss(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):\n",
+    "    encoder_hidden = encoder.initHidden()\n",
+    "\n",
+    "    encoder_optimizer.zero_grad()\n",
+    "    decoder_optimizer.zero_grad()\n",
+    "\n",
+    "    input_length = input_tensor.size(0)\n",
+    "    target_length = target_tensor.size(0)\n",
+    "    \n",
+    "    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\n",
+    "\n",
+    "    loss = 0\n",
+    "\n",
+    "    for ei in range(input_length):\n",
+    "        encoder_output, encoder_hidden = encoder(\n",
+    "            input_tensor[ei], encoder_hidden)\n",
+    "        encoder_outputs[ei] = encoder_output[0, 0]\n",
+    "\n",
+    "    decoder_input = torch.tensor([[SOS_token]], device=device)\n",
+    "\n",
+    "    decoder_hidden = encoder_hidden\n",
+    "\n",
+    "    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False\n",
+    "\n",
+    "    if use_teacher_forcing:\n",
+    "        # Teacher forcing: Feed the target as the next input\n",
+    "        for di in range(target_length):\n",
+    "            decoder_output, decoder_hidden, decoder_attention = decoder(\n",
+    "                decoder_input, decoder_hidden, encoder_outputs)\n",
+    "            loss += criterion(decoder_output, target_tensor[di])\n",
+    "            decoder_input = target_tensor[di]  # Teacher forcing\n",
+    "\n",
+    "    else:\n",
+    "        # Without teacher forcing: use its own predictions as the next input\n",
+    "        for di in range(target_length):\n",
+    "            decoder_output, decoder_hidden, decoder_attention = decoder(\n",
+    "                decoder_input, decoder_hidden, encoder_outputs)\n",
+    "            topv, topi = decoder_output.topk(1)\n",
+    "            decoder_input = topi.squeeze().detach()  # detach from history as input\n",
+    "\n",
+    "            loss += criterion(decoder_output, target_tensor[di])\n",
+    "            if decoder_input.item() == EOS_token:\n",
+    "                break\n",
+    "\n",
+    "    loss.backward()\n",
+    "\n",
+    "    encoder_optimizer.step()\n",
+    "    decoder_optimizer.step()\n",
+    "\n",
+    "    return loss.item() / target_length\n",
+    "\n",
+    "def get_validation_loss(input_tensor, target_tensor, encoder, decoder, criterion, max_length=MAX_LENGTH):\n",
+    "    encoder_hidden = encoder.initHidden()\n",
+    "\n",
+    "    input_length = input_tensor.size(0)\n",
+    "    target_length = target_tensor.size(0)\n",
+    "\n",
+    "    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\n",
+    "\n",
+    "    total_loss = 0\n",
+    "\n",
+    "    for ei in range(input_length):\n",
+    "        encoder_output, encoder_hidden = encoder(\n",
+    "            input_tensor[ei], encoder_hidden)\n",
+    "        encoder_outputs[ei] = encoder_output[0, 0]\n",
+    "\n",
+    "    decoder_input = torch.tensor([[SOS_token]], device=device)\n",
+    "\n",
+    "    decoder_hidden = encoder_hidden\n",
+    "    \n",
+    "    with torch.no_grad():\n",
+    "        for di in range(target_length):\n",
+    "            decoder_output, decoder_hidden, decoder_attention = decoder(\n",
+    "                decoder_input, decoder_hidden, encoder_outputs)\n",
+    "            topv, topi = decoder_output.topk(1)\n",
+    "            decoder_input = topi.squeeze().detach()  # detach from history as input\n",
+    "\n",
+    "            loss = criterion(decoder_output, target_tensor[di])\n",
+    "            total_loss += float(loss.item())\n",
+    "            if decoder_input.item() == EOS_token:\n",
+    "                break\n",
+    "\n",
+    "    return total_loss / target_length"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "ePf3q2gFWI0R"
+   },
+   "source": [
+    "## Utilities - required for training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "9jQVkj64WI0T"
+   },
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "import math\n",
+    "\n",
+    "\n",
+    "def asMinutes(s):\n",
+    "    m = math.floor(s / 60)\n",
+    "    s -= m * 60\n",
+    "    return '%dm %ds' % (m, s)\n",
+    "\n",
+    "\n",
+    "def timeSince(since, percent):\n",
+    "    now = time.time()\n",
+    "    s = now - since\n",
+    "    es = s / (percent)\n",
+    "    rs = es - s\n",
+    "    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))\n",
+    "\n",
+    "\n",
+    "SAVE_PATH = 'results'\n",
+    "\n",
+    "if not os.path.exists(SAVE_PATH):\n",
+    "  os.makedirs(SAVE_PATH)\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.switch_backend('agg')\n",
+    "import matplotlib.ticker as ticker\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "def showPlot(points):\n",
+    "    plt.figure()\n",
+    "    fig, ax = plt.subplots()\n",
+    "    # this locator puts ticks at regular intervals\n",
+    "    loc = ticker.MultipleLocator(base=0.2)\n",
+    "    ax.yaxis.set_major_locator(loc)\n",
+    "    plt.plot(points)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "xDZRZpqhWI0W"
+   },
+   "source": [
+    "## Training loop and get evaluation result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "IJx18gLeWI0Y"
+   },
+   "outputs": [],
+   "source": [
+    "def trainIters(encoder, decoder, n_iters, batch_size = 1, print_every=1000, save_every=1000, plot_every=100, learning_rate=0.0001):\n",
+    "    start = time.time()\n",
+    "    plot_losses = []\n",
+    "    val_losses = []\n",
+    "    bleu_scores = []\n",
+    "    \n",
+    "    print_loss_total = 0  # Reset every print_every\n",
+    "    plot_loss_total = 0  # Reset every plot_every\n",
+    "\n",
+    "    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)\n",
+    "    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)\n",
+    "    #training_pairs = [sent_pairs[i] for i in range(n_iters)]\n",
+    "    training_pairs = [random.sample(sent_pairs, batch_size) for i in range(n_iters)]\n",
+    "\n",
+    "\n",
+    "    criterion = nn.NLLLoss()\n",
+    "    MAX_PATIENCE = 50\n",
+    "    patience = MAX_PATIENCE  \n",
+    "    prev_val_loss =lowest_so_far = prev_bleu =  999\n",
+    "    highest_so_far = -np.inf # for bleu\n",
+    "    stopping_criteria_on = True\n",
+    "    using_bleu_stopping = False\n",
+    "\n",
+    "    for iter in range(1, n_iters + 1):\n",
+    "        training_pair = training_pairs[iter - 1]\n",
+    "\n",
+    "        input_tensor = training_pair[0][0]\n",
+    "        target_tensor = training_pair[0][1]\n",
+    "\n",
+    "\n",
+    "        loss = get_train_loss(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)\n",
+    "        print_loss_total += loss\n",
+    "        plot_loss_total += loss\n",
+    "\n",
+    "        stopping_delta = 0.001  # if improvement is not more than this amount after n tries, exit the loop\n",
+    "\n",
+    "\n",
+    "\n",
+    "        if iter % print_every == 0:\n",
+    "            print_loss_avg = print_loss_total / print_every\n",
+    "            print_loss_total = 0\n",
+    "            print('Training loss: %s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),\n",
+    "                                         iter, iter / n_iters * 100, print_loss_avg))\n",
+    "\n",
+    "            total_val_loss = 0\n",
+    "            total_bleu_score = 0\n",
+    "            total_val_pairs = len(val_sent_tensor_pairs)\n",
+    "            \n",
+    "            for itr in range(0, len(val_sent_tensor_pairs)):\n",
+    "                val_input_tensor = val_sent_tensor_pairs[itr][0]\n",
+    "                val_target_tensor = val_sent_tensor_pairs[itr][1]\n",
+    "                reference_sent = val_sent_pairs[itr][1]\n",
+    "                candidate_sent = translate(val_sent_pairs[itr][0], encoder, decoder)\n",
+    "                bleu_score = calculate_bleu_score(reference_sent,candidate_sent)\n",
+    "                total_bleu_score += bleu_score\n",
+    "                val_loss = get_validation_loss(val_input_tensor, val_target_tensor, encoder, decoder, criterion)\n",
+    "                total_val_loss += val_loss\n",
+    "\n",
+    "            avg_val_loss = total_val_loss / total_val_pairs\n",
+    "            val_losses.append(avg_val_loss)\n",
+    "            avg_bleu_scores = total_bleu_score / total_val_pairs\n",
+    "            bleu_scores.append(avg_bleu_scores)\n",
+    "            \n",
+    "            print('Validation loss: %s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),\n",
+    "                                                          iter, iter / n_iters * 100, avg_val_loss))\n",
+    "            print('Bleu scores: %s (%d %d%%) %.8f' % (timeSince(start, iter / n_iters),\n",
+    "                                                          iter, iter / n_iters * 100, avg_bleu_scores))\n",
+    "            if  stopping_criteria_on:\n",
+    "                if not using_bleu_stopping:\n",
+    "                    if (prev_val_loss - avg_val_loss) > stopping_delta and avg_val_loss < lowest_so_far:\n",
+    "                        print(f\"Improvement in validation loss, saving model. Prev {prev_val_loss} Curr {avg_val_loss}\")\n",
+    "                        lowest_so_far = avg_val_loss\n",
+    "                        encoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_encoder')\n",
+    "                        print('save encoder weights to ', encoder_save_path)\n",
+    "                        torch.save(encoder.state_dict(), encoder_save_path)\n",
+    "                        decoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_decoder')\n",
+    "                        print('save decoder weights to ', decoder_save_path)\n",
+    "                        torch.save(decoder.state_dict(), decoder_save_path)\n",
+    "                        patience = MAX_PATIENCE # reset to max\n",
+    "                    else:\n",
+    "                        print(f\"No improvement in validation loss, losing patience {patience}\")\n",
+    "                        patience -= 1\n",
+    "\n",
+    "                    if patience == 0:  # break out of training\n",
+    "                        break\n",
+    "\n",
+    "                    prev_val_loss = avg_val_loss\n",
+    "                else: # bleu\n",
+    "                    if (avg_bleu_scores - prev_bleu) > stopping_delta and avg_bleu_scores > highest_so_far: \n",
+    "                        print(f\"Improvement in bleu scores, saving model. Prev {prev_bleu} Curr {avg_bleu_scores}\")\n",
+    "                        highest_so_far = avg_bleu_scores\n",
+    "                        encoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_encoder')\n",
+    "                        print('save encoder weights to ', encoder_save_path)\n",
+    "                        torch.save(encoder.state_dict(), encoder_save_path)\n",
+    "                        decoder_save_path = '%s/%s.pth' % (SAVE_PATH, 'best_decoder')\n",
+    "                        print('save decoder weights to ', decoder_save_path)\n",
+    "                        torch.save(decoder.state_dict(), decoder_save_path)\n",
+    "                        patience = MAX_PATIENCE # reset to max\n",
+    "                    else:\n",
+    "                        print(f\"No improvement in bleu scores, losing patience {patience}\")\n",
+    "                        patience -= 1\n",
+    "\n",
+    "                    if patience == 0:  # break out of training\n",
+    "                        break \n",
+    "                    \n",
+    "                    prev_bleu = avg_bleu_scores\n",
+    "                \n",
+    "\n",
+    "            print(\"##########################################################\")\n",
+    "\n",
+    "        if iter % plot_every == 0:\n",
+    "            plot_loss_avg = plot_loss_total / plot_every\n",
+    "            plot_losses.append(plot_loss_avg)\n",
+    "            plot_loss_total = 0\n",
+    "        \n",
+    "        # save trained encoder and decoder\n",
+    "        if iter % save_every == 0:\n",
+    "            encoder_save_path = '%s/%s-%d.pth' % (SAVE_PATH, 'encoder', iter)\n",
+    "            print('save encoder weights to ', encoder_save_path)\n",
+    "            torch.save(encoder.state_dict(), encoder_save_path)\n",
+    "            decoder_save_path = '%s/%s-%d.pth' % (SAVE_PATH, 'decoder', iter)\n",
+    "            print('save decoder weights to ', decoder_save_path)\n",
+    "            torch.save(decoder.state_dict(), decoder_save_path)\n",
+    "\n",
+    "    showPlot(plot_losses)\n",
+    "    showPlot(val_losses)\n",
+    "    showPlot(bleu_scores)\n",
+    "    return plot_losses, val_losses, bleu_scores\n",
+    "\n",
+    "\n",
+    "def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):\n",
+    "    with torch.no_grad():\n",
+    "        # input_tensor = tensorFromSentence(input_lang, sentence)\n",
+    "        input_tensor = variable_from_sent(sentence, english_vocab)\n",
+    "        input_length = input_tensor.size()[0]\n",
+    "        encoder_hidden = encoder.initHidden()\n",
+    "\n",
+    "        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\n",
+    "\n",
+    "        for ei in range(input_length):\n",
+    "            encoder_output, encoder_hidden = encoder(input_tensor[ei],\n",
+    "                                                     encoder_hidden)\n",
+    "            encoder_outputs[ei] += encoder_output[0, 0]\n",
+    "\n",
+    "        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS\n",
+    "\n",
+    "        decoder_hidden = encoder_hidden\n",
+    "\n",
+    "        decoded_words = []\n",
+    "        decoder_attentions = torch.zeros(max_length, max_length)\n",
+    "\n",
+    "        for di in range(max_length):\n",
+    "            decoder_output, decoder_hidden, decoder_attention = decoder(\n",
+    "                decoder_input, decoder_hidden, encoder_outputs)\n",
+    "            decoder_attentions[di] = decoder_attention.data\n",
+    "            topv, topi = decoder_output.data.topk(1)\n",
+    "            if topi.item() == EOS_token:\n",
+    "                decoded_words.append('</s>')\n",
+    "                break\n",
+    "            else:\n",
+    "                decoded_words.append(indo_vocab.id2token[topi.item()])\n",
+    "\n",
+    "            decoder_input = topi.squeeze().detach()\n",
+    "\n",
+    "        return decoded_words, decoder_attentions[:di + 1]\n",
+    "\n",
+    "def evaluateRandomly(encoder, decoder, n=10):\n",
+    "    for i in range(n):\n",
+    "        pair = random.choice(pairs)\n",
+    "        print('>', pair[0])\n",
+    "        print('=', pair[1])\n",
+    "        output_words, attentions = evaluate(encoder, decoder, pair[0])\n",
+    "        output_sentence = ' '.join(output_words)\n",
+    "        print('<', output_sentence)\n",
+    "        print('')\n",
+    "        \n",
+    "def translate(input_sentence, enc, dec):\n",
+    "    output_words, attentions = evaluate(\n",
+    "        enc, dec, input_sentence)\n",
+    "    candidate = ' '.join(output_words)\n",
+    "    return candidate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "uERT7riuWI0b"
+   },
+   "source": [
+    "## Perform training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 8466
+    },
+    "colab_type": "code",
+    "id": "vLkx3FDdWI0c",
+    "outputId": "0baaaed0-b3a4-486a-c560-f4373b38fb99"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training loss: 4m 52s (- 361m 5s) (1000 1%) 4.2309\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages\\nltk\\translate\\bleu_score.py:552: UserWarning: \n",
+      "The hypothesis contains 0 counts of 2-gram overlaps.\n",
+      "Therefore the BLEU score evaluates to 0, independently of\n",
+      "how many N-gram overlaps of lower order it contains.\n",
+      "Consider using lower n-gram order or use SmoothingFunction()\n",
+      "  warnings.warn(_msg)\n",
+      "c:\\Users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages\\nltk\\translate\\bleu_score.py:552: UserWarning: \n",
+      "The hypothesis contains 0 counts of 3-gram overlaps.\n",
+      "Therefore the BLEU score evaluates to 0, independently of\n",
+      "how many N-gram overlaps of lower order it contains.\n",
+      "Consider using lower n-gram order or use SmoothingFunction()\n",
+      "  warnings.warn(_msg)\n",
+      "c:\\Users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages\\nltk\\translate\\bleu_score.py:552: UserWarning: \n",
+      "The hypothesis contains 0 counts of 4-gram overlaps.\n",
+      "Therefore the BLEU score evaluates to 0, independently of\n",
+      "how many N-gram overlaps of lower order it contains.\n",
+      "Consider using lower n-gram order or use SmoothingFunction()\n",
+      "  warnings.warn(_msg)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Validation loss: 6m 45s (- 500m 15s) (1000 1%) 4.0005\n",
+      "Bleu scores: 6m 45s (- 500m 15s) (1000 1%) 0.03154827\n",
+      "Improvement in validation loss, saving model. Prev 999 Curr 4.000509120991064\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-1000.pth\n",
+      "save decoder weights to  results/decoder-1000.pth\n",
+      "Training loss: 10m 20s (- 377m 16s) (2000 2%) 3.9650\n",
+      "Validation loss: 12m 6s (- 442m 5s) (2000 2%) 3.7445\n",
+      "Bleu scores: 12m 6s (- 442m 5s) (2000 2%) 0.04307411\n",
+      "Improvement in validation loss, saving model. Prev 4.000509120991064 Curr 3.7444545816390606\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-2000.pth\n",
+      "save decoder weights to  results/decoder-2000.pth\n",
+      "Training loss: 15m 50s (- 380m 7s) (3000 4%) 3.7754\n",
+      "Validation loss: 18m 1s (- 432m 45s) (3000 4%) 3.7098\n",
+      "Bleu scores: 18m 1s (- 432m 45s) (3000 4%) 0.05259536\n",
+      "Improvement in validation loss, saving model. Prev 3.7444545816390606 Curr 3.7098362678888783\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-3000.pth\n",
+      "save decoder weights to  results/decoder-3000.pth\n",
+      "Training loss: 23m 20s (- 414m 18s) (4000 5%) 3.4911\n",
+      "Validation loss: 26m 9s (- 464m 17s) (4000 5%) 3.5552\n",
+      "Bleu scores: 26m 9s (- 464m 17s) (4000 5%) 0.06098530\n",
+      "Improvement in validation loss, saving model. Prev 3.7098362678888783 Curr 3.555194068107049\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-4000.pth\n",
+      "save decoder weights to  results/decoder-4000.pth\n",
+      "Training loss: 30m 51s (- 431m 59s) (5000 6%) 3.3821\n",
+      "Validation loss: 33m 46s (- 472m 57s) (5000 6%) 3.3999\n",
+      "Bleu scores: 33m 46s (- 472m 57s) (5000 6%) 0.06369386\n",
+      "Improvement in validation loss, saving model. Prev 3.555194068107049 Curr 3.3999293611511723\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-5000.pth\n",
+      "save decoder weights to  results/decoder-5000.pth\n",
+      "Training loss: 38m 53s (- 447m 15s) (6000 8%) 3.2100\n",
+      "Validation loss: 41m 52s (- 481m 39s) (6000 8%) 3.3394\n",
+      "Bleu scores: 41m 52s (- 481m 39s) (6000 8%) 0.06984730\n",
+      "Improvement in validation loss, saving model. Prev 3.3999293611511723 Curr 3.339417389060846\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-6000.pth\n",
+      "save decoder weights to  results/decoder-6000.pth\n",
+      "Training loss: 47m 14s (- 458m 51s) (7000 9%) 3.1873\n",
+      "Validation loss: 50m 24s (- 489m 43s) (7000 9%) 3.1959\n",
+      "Bleu scores: 50m 24s (- 489m 43s) (7000 9%) 0.07534899\n",
+      "Improvement in validation loss, saving model. Prev 3.339417389060846 Curr 3.1959179781729814\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-7000.pth\n",
+      "save decoder weights to  results/decoder-7000.pth\n",
+      "Training loss: 55m 44s (- 466m 47s) (8000 10%) 2.9833\n",
+      "Validation loss: 58m 38s (- 491m 10s) (8000 10%) 3.0740\n",
+      "Bleu scores: 58m 38s (- 491m 10s) (8000 10%) 0.08536996\n",
+      "Improvement in validation loss, saving model. Prev 3.1959179781729814 Curr 3.073970045550703\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-8000.pth\n",
+      "save decoder weights to  results/decoder-8000.pth\n",
+      "Training loss: 63m 56s (- 468m 52s) (9000 12%) 2.8639\n",
+      "Validation loss: 66m 58s (- 491m 10s) (9000 12%) 3.0076\n",
+      "Bleu scores: 66m 58s (- 491m 10s) (9000 12%) 0.08991626\n",
+      "Improvement in validation loss, saving model. Prev 3.073970045550703 Curr 3.007592470861209\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-9000.pth\n",
+      "save decoder weights to  results/decoder-9000.pth\n",
+      "Training loss: 72m 10s (- 469m 10s) (10000 13%) 2.7221\n",
+      "Validation loss: 75m 21s (- 489m 48s) (10000 13%) 2.9584\n",
+      "Bleu scores: 75m 21s (- 489m 48s) (10000 13%) 0.09578874\n",
+      "Improvement in validation loss, saving model. Prev 3.007592470861209 Curr 2.9583699655412103\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-10000.pth\n",
+      "save decoder weights to  results/decoder-10000.pth\n",
+      "Training loss: 80m 48s (- 470m 8s) (11000 14%) 2.6615\n",
+      "Validation loss: 84m 2s (- 488m 59s) (11000 14%) 2.9471\n",
+      "Bleu scores: 84m 2s (- 488m 59s) (11000 14%) 0.09783871\n",
+      "Improvement in validation loss, saving model. Prev 2.9583699655412103 Curr 2.947079241857556\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-11000.pth\n",
+      "save decoder weights to  results/decoder-11000.pth\n",
+      "Training loss: 89m 24s (- 469m 25s) (12000 16%) 2.6592\n",
+      "Validation loss: 92m 38s (- 486m 23s) (12000 16%) 2.7873\n",
+      "Bleu scores: 92m 38s (- 486m 23s) (12000 16%) 0.10490669\n",
+      "Improvement in validation loss, saving model. Prev 2.947079241857556 Curr 2.7872625701037794\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-12000.pth\n",
+      "save decoder weights to  results/decoder-12000.pth\n",
+      "Training loss: 97m 57s (- 467m 10s) (13000 17%) 2.5483\n",
+      "Validation loss: 101m 3s (- 482m 0s) (13000 17%) 2.7313\n",
+      "Bleu scores: 101m 3s (- 482m 0s) (13000 17%) 0.10702588\n",
+      "Improvement in validation loss, saving model. Prev 2.7872625701037794 Curr 2.7312805779151867\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-13000.pth\n",
+      "save decoder weights to  results/decoder-13000.pth\n",
+      "Training loss: 106m 1s (- 461m 56s) (14000 18%) 2.4441\n",
+      "Validation loss: 166m 8s (- 723m 55s) (14000 18%) 2.6903\n",
+      "Bleu scores: 166m 8s (- 723m 55s) (14000 18%) 0.11510809\n",
+      "Improvement in validation loss, saving model. Prev 2.7312805779151867 Curr 2.6903174619521613\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-14000.pth\n",
+      "save decoder weights to  results/decoder-14000.pth\n",
+      "Training loss: 170m 58s (- 683m 55s) (15000 20%) 2.2965\n",
+      "Validation loss: 172m 55s (- 691m 43s) (15000 20%) 2.5950\n",
+      "Bleu scores: 172m 55s (- 691m 43s) (15000 20%) 0.12714536\n",
+      "Improvement in validation loss, saving model. Prev 2.6903174619521613 Curr 2.5950060204451675\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-15000.pth\n",
+      "save decoder weights to  results/decoder-15000.pth\n",
+      "Training loss: 176m 31s (- 650m 54s) (16000 21%) 2.2813\n",
+      "Validation loss: 178m 22s (- 657m 44s) (16000 21%) 2.4976\n",
+      "Bleu scores: 178m 22s (- 657m 44s) (16000 21%) 0.13238018\n",
+      "Improvement in validation loss, saving model. Prev 2.5950060204451675 Curr 2.497571861260076\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-16000.pth\n",
+      "save decoder weights to  results/decoder-16000.pth\n",
+      "Training loss: 182m 3s (- 621m 7s) (17000 22%) 2.1507\n",
+      "Validation loss: 183m 53s (- 627m 24s) (17000 22%) 2.4512\n",
+      "Bleu scores: 183m 53s (- 627m 24s) (17000 22%) 0.14081074\n",
+      "Improvement in validation loss, saving model. Prev 2.497571861260076 Curr 2.451238969365191\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-17000.pth\n",
+      "save decoder weights to  results/decoder-17000.pth\n",
+      "Training loss: 187m 26s (- 593m 33s) (18000 24%) 2.0866\n",
+      "Validation loss: 189m 17s (- 599m 25s) (18000 24%) 2.3845\n",
+      "Bleu scores: 189m 17s (- 599m 25s) (18000 24%) 0.15000098\n",
+      "Improvement in validation loss, saving model. Prev 2.451238969365191 Curr 2.384489413921074\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-18000.pth\n",
+      "save decoder weights to  results/decoder-18000.pth\n",
+      "Training loss: 192m 50s (- 568m 22s) (19000 25%) 1.9876\n",
+      "Validation loss: 194m 40s (- 573m 47s) (19000 25%) 2.3249\n",
+      "Bleu scores: 194m 40s (- 573m 47s) (19000 25%) 0.15965307\n",
+      "Improvement in validation loss, saving model. Prev 2.384489413921074 Curr 2.324945202636609\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-19000.pth\n",
+      "save decoder weights to  results/decoder-19000.pth\n",
+      "Training loss: 198m 15s (- 545m 13s) (20000 26%) 1.9923\n",
+      "Validation loss: 200m 16s (- 550m 44s) (20000 26%) 2.2664\n",
+      "Bleu scores: 200m 16s (- 550m 44s) (20000 26%) 0.16310327\n",
+      "Improvement in validation loss, saving model. Prev 2.324945202636609 Curr 2.2663642665737243\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-20000.pth\n",
+      "save decoder weights to  results/decoder-20000.pth\n",
+      "Training loss: 203m 57s (- 524m 27s) (21000 28%) 1.8699\n",
+      "Validation loss: 206m 7s (- 530m 3s) (21000 28%) 2.2703\n",
+      "Bleu scores: 206m 7s (- 530m 3s) (21000 28%) 0.17257318\n",
+      "No improvement in validation loss, losing patience 50\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-21000.pth\n",
+      "save decoder weights to  results/decoder-21000.pth\n",
+      "Training loss: 209m 59s (- 505m 54s) (22000 29%) 1.7955\n",
+      "Validation loss: 212m 7s (- 511m 1s) (22000 29%) 2.1703\n",
+      "Bleu scores: 212m 7s (- 511m 1s) (22000 29%) 0.17884532\n",
+      "Improvement in validation loss, saving model. Prev 2.2702700825915545 Curr 2.1703052959574354\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-22000.pth\n",
+      "save decoder weights to  results/decoder-22000.pth\n",
+      "Training loss: 215m 49s (- 487m 57s) (23000 30%) 1.7531\n",
+      "Validation loss: 217m 41s (- 492m 11s) (23000 30%) 2.1149\n",
+      "Bleu scores: 217m 41s (- 492m 11s) (23000 30%) 0.18669578\n",
+      "Improvement in validation loss, saving model. Prev 2.1703052959574354 Curr 2.11492065994964\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-23000.pth\n",
+      "save decoder weights to  results/decoder-23000.pth\n",
+      "Training loss: 221m 12s (- 470m 3s) (24000 32%) 1.7290\n",
+      "Validation loss: 223m 10s (- 474m 14s) (24000 32%) 2.0907\n",
+      "Bleu scores: 223m 10s (- 474m 14s) (24000 32%) 0.19487671\n",
+      "Improvement in validation loss, saving model. Prev 2.11492065994964 Curr 2.090669993744987\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-24000.pth\n",
+      "save decoder weights to  results/decoder-24000.pth\n",
+      "Training loss: 226m 59s (- 453m 58s) (25000 33%) 1.6945\n",
+      "Validation loss: 228m 54s (- 457m 48s) (25000 33%) 2.0511\n",
+      "Bleu scores: 228m 54s (- 457m 48s) (25000 33%) 0.20555118\n",
+      "Improvement in validation loss, saving model. Prev 2.090669993744987 Curr 2.0510722188946704\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-25000.pth\n",
+      "save decoder weights to  results/decoder-25000.pth\n",
+      "Training loss: 232m 36s (- 438m 22s) (26000 34%) 1.6333\n",
+      "Validation loss: 234m 40s (- 442m 15s) (26000 34%) 2.0361\n",
+      "Bleu scores: 234m 40s (- 442m 15s) (26000 34%) 0.20926462\n",
+      "Improvement in validation loss, saving model. Prev 2.0510722188946704 Curr 2.036086866139529\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-26000.pth\n",
+      "save decoder weights to  results/decoder-26000.pth\n",
+      "Training loss: 238m 16s (- 423m 36s) (27000 36%) 1.4851\n",
+      "Validation loss: 240m 14s (- 427m 5s) (27000 36%) 1.9672\n",
+      "Bleu scores: 240m 14s (- 427m 5s) (27000 36%) 0.21352279\n",
+      "Improvement in validation loss, saving model. Prev 2.036086866139529 Curr 1.9672220808431697\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-27000.pth\n",
+      "save decoder weights to  results/decoder-27000.pth\n",
+      "Training loss: 243m 44s (- 409m 8s) (28000 37%) 1.5367\n",
+      "Validation loss: 245m 39s (- 412m 21s) (28000 37%) 1.9125\n",
+      "Bleu scores: 245m 39s (- 412m 21s) (28000 37%) 0.22457683\n",
+      "Improvement in validation loss, saving model. Prev 1.9672220808431697 Curr 1.9125249442450718\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-28000.pth\n",
+      "save decoder weights to  results/decoder-28000.pth\n",
+      "Training loss: 249m 15s (- 395m 21s) (29000 38%) 1.4655\n",
+      "Validation loss: 251m 19s (- 398m 38s) (29000 38%) 1.8698\n",
+      "Bleu scores: 251m 19s (- 398m 38s) (29000 38%) 0.23247323\n",
+      "Improvement in validation loss, saving model. Prev 1.9125249442450718 Curr 1.869782677108545\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-29000.pth\n",
+      "save decoder weights to  results/decoder-29000.pth\n",
+      "Training loss: 254m 56s (- 382m 24s) (30000 40%) 1.4104\n",
+      "Validation loss: 256m 45s (- 385m 8s) (30000 40%) 1.8300\n",
+      "Bleu scores: 256m 45s (- 385m 8s) (30000 40%) 0.23475004\n",
+      "Improvement in validation loss, saving model. Prev 1.869782677108545 Curr 1.8300033148270234\n",
+      "save encoder weights to  results/best_encoder.pth\n",
+      "save decoder weights to  results/best_decoder.pth\n",
+      "##########################################################\n",
+      "save encoder weights to  results/encoder-30000.pth\n",
+      "save decoder weights to  results/decoder-30000.pth\n"
+     ]
+    }
+   ],
+   "source": [
+    "hidden_size = 512\n",
+    "encoder1 = EncoderRNN(len(english_vocab), hidden_size).to(device)\n",
+    "attn_decoder1 = AttnDecoderRNN(hidden_size, len(indo_vocab), dropout_p=0.5).to(device)\n",
+    "\n",
+    "trainIters(encoder1, attn_decoder1, 75000, print_every=1000)\n",
+    "\n",
+    "evaluateRandomly(encoder1, attn_decoder1)\n",
+    "\n",
+    "output_words, attentions = evaluate(\n",
+    "    encoder1, attn_decoder1, \"do you love me?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Uc9fOh1SWI0i"
+   },
+   "source": [
+    "## Check some translations - note the below sentences are not there in the training and the validation sets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Da2782FxWI0l"
+   },
+   "outputs": [],
+   "source": [
+    "print(translate(\"tom is playing with ball .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"she is standing there .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"he is a bad man .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"he wants to sleep .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"i can't see you crying .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"my dog is running around .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"it is very popular .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"she speaks american english to tom's father .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"please eat lunch in the afternoon .\", encoder1, attn_decoder1))\n",
+    "\n",
+    "print(translate(\"i see red roses in the garden .\", encoder1, attn_decoder1))"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "Eng2Indo Attention Simple corpus - working copy with stopping.ipynb",
+   "provenance": [],
+   "toc_visible": true,
+   "version": "0.3.2"
+  },
+  "kernelspec": {
+   "display_name": "deeplearning",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}