Mr-FineTuner commited on
Commit
9ac40d2
·
1 Parent(s): 4e5f0af

Upload Attention_Simple_Indo_Evaluation.ipynb

Browse files
fine tuning/Attention_Simple_Indo_Evaluation.ipynb ADDED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {
7
+ "pycharm": {
8
+ "metadata": false,
9
+ "name": "#%%\n"
10
+ }
11
+ },
12
+ "outputs": [
13
+ {
14
+ "name": "stdout",
15
+ "output_type": "stream",
16
+ "text": [
17
+ "2.0.1\n"
18
+ ]
19
+ }
20
+ ],
21
+ "source": [
22
+ "import pickle\n",
23
+ "import torch\n",
24
+ "import torch.nn as nn\n",
25
+ "from torch import optim\n",
26
+ "import torch.nn.functional as F\n",
27
+ "from torch.autograd import Variable\n",
28
+ "from nltk import word_tokenize\n",
29
+ "\n",
30
+ "MIN_LENGTH = 4\n",
31
+ "MAX_LENGTH = 102\n",
32
+ "START, START_IDX = '<s>', 0\n",
33
+ "END, END_IDX = '</s>', 1\n",
34
+ "UNK, UNK_IDX = 'UNK', 2\n",
35
+ "\n",
36
+ "SOS_token = START_IDX\n",
37
+ "EOS_token = END_IDX\n",
38
+ "\n",
39
+ "print(torch.__version__)\n",
40
+ "\n",
41
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
42
+ "use_cuda = torch.cuda.is_available()\n",
43
+ "\n",
44
+ "# Lets load our dictionaries.\n",
45
+ "f_eng = open('vocabs/simple_english_vocab.Dictionary.pkl', 'rb')\n",
46
+ "english_vocab = pickle.load(f_eng)\n",
47
+ "\n",
48
+ "f_ind = open('vocabs/simple_indo_vocab.Dictionary.pkl', 'rb')\n",
49
+ "indo_vocab = pickle.load(f_ind)"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 2,
55
+ "metadata": {
56
+ "pycharm": {}
57
+ },
58
+ "outputs": [],
59
+ "source": [
60
+ "class EncoderRNN(nn.Module):\n",
61
+ " def __init__(self, input_size, hidden_size):\n",
62
+ " super(EncoderRNN, self).__init__()\n",
63
+ " self.hidden_size = hidden_size\n",
64
+ "\n",
65
+ " self.embedding = nn.Embedding(input_size, hidden_size)\n",
66
+ " self.gru = nn.GRU(hidden_size, hidden_size)\n",
67
+ "\n",
68
+ " def forward(self, input, hidden):\n",
69
+ " embedded = self.embedding(input).view(1, 1, -1)\n",
70
+ " output = embedded\n",
71
+ " output, hidden = self.gru(output, hidden)\n",
72
+ " return output, hidden\n",
73
+ "\n",
74
+ " def initHidden(self):\n",
75
+ " return torch.zeros(1, 1, self.hidden_size, device=device)\n",
76
+ "\n",
77
+ "class AttnDecoderRNN(nn.Module):\n",
78
+ " def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH): # Add max_length as a parameter\n",
79
+ " super(AttnDecoderRNN, self).__init__()\n",
80
+ " self.hidden_size = hidden_size\n",
81
+ " self.output_size = output_size\n",
82
+ " self.dropout_p = dropout_p\n",
83
+ " self.max_length = max_length # Update max_length\n",
84
+ "\n",
85
+ " self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n",
86
+ " self.attn = nn.Linear(self.hidden_size * 2, self.max_length) # Update attention layer\n",
87
+ " self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)\n",
88
+ " self.dropout = nn.Dropout(self.dropout_p)\n",
89
+ " self.gru = nn.GRU(self.hidden_size, self.hidden_size)\n",
90
+ " self.out = nn.Linear(self.hidden_size, self.output_size)\n",
91
+ "\n",
92
+ " def forward(self, input, hidden, encoder_outputs):\n",
93
+ " embedded = self.embedding(input).view(1, 1, -1)\n",
94
+ " embedded = self.dropout(embedded)\n",
95
+ "\n",
96
+ " attn_weights = F.softmax(\n",
97
+ " self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)\n",
98
+ " attn_applied = torch.bmm(attn_weights.unsqueeze(0),\n",
99
+ " encoder_outputs.unsqueeze(0))\n",
100
+ "\n",
101
+ " output = torch.cat((embedded[0], attn_applied[0]), 1)\n",
102
+ " output = self.attn_combine(output).unsqueeze(0)\n",
103
+ "\n",
104
+ " output = F.relu(output)\n",
105
+ " output, hidden = self.gru(output, hidden)\n",
106
+ "\n",
107
+ " output = F.log_softmax(self.out(output[0]), dim=1)\n",
108
+ " return output, hidden, attn_weights\n",
109
+ "\n",
110
+ " def initHidden(self):\n",
111
+ " return torch.zeros(1, 1, self.hidden_size, device=device)"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 3,
117
+ "metadata": {
118
+ "pycharm": {}
119
+ },
120
+ "outputs": [],
121
+ "source": [
122
+ "#mengonversi kalimat menjadi vektor \n",
123
+ "def vectorize_sent(sent, vocab):\n",
124
+ " return vocab.doc2idx([START] + word_tokenize(sent.lower()) + [END], unknown_word_index=2)\n",
125
+ "\n",
126
+ "#mengonversi vektor ke dalam bentuk tensor pytorch untuk input model \n",
127
+ "def variable_from_sent(sent, vocab):\n",
128
+ " vsent = vectorize_sent(sent, vocab)\n",
129
+ " # print(vsent)\n",
130
+ " result = Variable(torch.LongTensor(vsent).view(-1, 1))\n",
131
+ " # print(result)\n",
132
+ " return result.cuda() if use_cuda else result"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 4,
138
+ "metadata": {
139
+ "pycharm": {}
140
+ },
141
+ "outputs": [],
142
+ "source": [
143
+ "#lakukan evaluasi pada model\n",
144
+ "def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):\n",
145
+ " with torch.no_grad():\n",
146
+ " # input_tensor = tensorFromSentence(input_lang, sentence)\n",
147
+ " input_tensor = variable_from_sent(sentence, english_vocab)\n",
148
+ " input_length = input_tensor.size()[0]\n",
149
+ " encoder_hidden = encoder.initHidden()\n",
150
+ "\n",
151
+ " encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)\n",
152
+ "\n",
153
+ " for ei in range(input_length):\n",
154
+ " encoder_output, encoder_hidden = encoder(input_tensor[ei],\n",
155
+ " encoder_hidden)\n",
156
+ " encoder_outputs[ei] += encoder_output[0, 0]\n",
157
+ "\n",
158
+ " decoder_input = torch.tensor([[SOS_token]], device=device) # SOS\n",
159
+ "\n",
160
+ " decoder_hidden = encoder_hidden\n",
161
+ "\n",
162
+ " decoded_words = []\n",
163
+ " decoder_attentions = torch.zeros(max_length, max_length)\n",
164
+ "\n",
165
+ " for di in range(max_length):\n",
166
+ " decoder_output, decoder_hidden, decoder_attention = decoder(\n",
167
+ " decoder_input, decoder_hidden, encoder_outputs)\n",
168
+ " decoder_attentions[di] = decoder_attention.data\n",
169
+ " topv, topi = decoder_output.data.topk(1)\n",
170
+ " if topi.item() == EOS_token:\n",
171
+ " decoded_words.append('</s>')\n",
172
+ " break\n",
173
+ " else:\n",
174
+ " decoded_words.append(indo_vocab.id2token[topi.item()])\n",
175
+ "\n",
176
+ " decoder_input = topi.squeeze().detach()\n",
177
+ "\n",
178
+ " return decoded_words, decoder_attentions[:di + 1]"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "execution_count": 5,
184
+ "metadata": {
185
+ "pycharm": {}
186
+ },
187
+ "outputs": [],
188
+ "source": [
189
+ "hidden_size = 512\n",
190
+ "encoder = EncoderRNN(len(english_vocab), hidden_size).to(device)\n",
191
+ "attn_decoder = AttnDecoderRNN(hidden_size, len(indo_vocab), dropout_p=0.5).to(device)"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "markdown",
196
+ "metadata": {
197
+ "pycharm": {}
198
+ },
199
+ "source": [
200
+ "### Load a network weight snapshot"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 6,
206
+ "metadata": {
207
+ "pycharm": {}
208
+ },
209
+ "outputs": [
210
+ {
211
+ "data": {
212
+ "text/plain": [
213
+ "EncoderRNN(\n",
214
+ " (embedding): Embedding(18990, 512)\n",
215
+ " (gru): GRU(512, 512)\n",
216
+ ")"
217
+ ]
218
+ },
219
+ "execution_count": 6,
220
+ "metadata": {},
221
+ "output_type": "execute_result"
222
+ }
223
+ ],
224
+ "source": [
225
+ "ENCODER_PATH = 'results/encoder-{}.pth'\n",
226
+ "DECODER_PATH = 'results/decoder-{}.pth'\n",
227
+ "\n",
228
+ "EPOCH_NO = 75000\n",
229
+ "\n",
230
+ "encoder.load_state_dict(torch.load(ENCODER_PATH.format(EPOCH_NO), map_location=device))\n",
231
+ "encoder.eval()"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": 7,
237
+ "metadata": {
238
+ "pycharm": {}
239
+ },
240
+ "outputs": [
241
+ {
242
+ "data": {
243
+ "text/plain": [
244
+ "AttnDecoderRNN(\n",
245
+ " (embedding): Embedding(18722, 512)\n",
246
+ " (attn): Linear(in_features=1024, out_features=102, bias=True)\n",
247
+ " (attn_combine): Linear(in_features=1024, out_features=512, bias=True)\n",
248
+ " (dropout): Dropout(p=0.5, inplace=False)\n",
249
+ " (gru): GRU(512, 512)\n",
250
+ " (out): Linear(in_features=512, out_features=18722, bias=True)\n",
251
+ ")"
252
+ ]
253
+ },
254
+ "execution_count": 7,
255
+ "metadata": {},
256
+ "output_type": "execute_result"
257
+ }
258
+ ],
259
+ "source": [
260
+ "attn_decoder.load_state_dict(torch.load(DECODER_PATH.format(EPOCH_NO),map_location=device))\n",
261
+ "attn_decoder.eval()"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "code",
266
+ "execution_count": 8,
267
+ "metadata": {
268
+ "pycharm": {}
269
+ },
270
+ "outputs": [
271
+ {
272
+ "name": "stdout",
273
+ "output_type": "stream",
274
+ "text": [
275
+ "> Tom is a good man\n",
276
+ "< <s> tom adalah seorang yang baik . </s>\n"
277
+ ]
278
+ }
279
+ ],
280
+ "source": [
281
+ "input_sentence = \"Tom is a good man\"\n",
282
+ "output_words, attentions = evaluate(encoder, attn_decoder, input_sentence)\n",
283
+ "output_sentence = ' '.join(output_words)\n",
284
+ "print('>', input_sentence)\n",
285
+ "print('<', output_sentence)"
286
+ ]
287
+ },
288
+ {
289
+ "cell_type": "markdown",
290
+ "metadata": {
291
+ "pycharm": {}
292
+ },
293
+ "source": [
294
+ "### Load an array of network weights snapshots"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": 9,
300
+ "metadata": {},
301
+ "outputs": [
302
+ {
303
+ "name": "stdout",
304
+ "output_type": "stream",
305
+ "text": [
306
+ "[-1]\n"
307
+ ]
308
+ }
309
+ ],
310
+ "source": [
311
+ "test_id = english_vocab.doc2idx([\"liling\"])\n",
312
+ "print(test_id)"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 10,
318
+ "metadata": {
319
+ "pycharm": {}
320
+ },
321
+ "outputs": [],
322
+ "source": [
323
+ "ENCODER_PATH = 'results/encoder-{}.pth'\n",
324
+ "DECODER_PATH = 'results/decoder-{}.pth'\n",
325
+ "\n",
326
+ "EPOCH_NO = [75000]\n",
327
+ "\n",
328
+ "def load_weights(encoder, decoder, epoch_no, device):\n",
329
+ " encoder.load_state_dict(torch.load(ENCODER_PATH.format(epoch_no),map_location=device))\n",
330
+ " decoder.load_state_dict(torch.load(DECODER_PATH.format(epoch_no),map_location=device))\n",
331
+ " return encoder, decoder\n",
332
+ "\n",
333
+ "\n",
334
+ "def input_validation(input_text, english_vocab):\n",
335
+ " max_words_required = MAX_LENGTH - 2\n",
336
+ " min_words_required = MIN_LENGTH\n",
337
+ " input_tokenized = word_tokenize(input_text.lower())\n",
338
+ " final_text = None\n",
339
+ " message = \"\"\n",
340
+ " if not min_words_required <= len(input_tokenized) <= max_words_required:\n",
341
+ " message = \"The input sentence should be between {} and {} words\".format(MIN_LENGTH, MAX_LENGTH - 2)\n",
342
+ " else:\n",
343
+ " input_ids = english_vocab.doc2idx(input_tokenized)\n",
344
+ " unknown_tokens = []\n",
345
+ " for key, val in enumerate(input_ids):\n",
346
+ " if val == -1:\n",
347
+ " unknown_token = input_tokenized[key]\n",
348
+ " unknown_tokens.append(unknown_token)\n",
349
+ " input_tokenized[key] = UNK.lower()\n",
350
+ " print(\"'{}' is not found in the english corpus\".format(unknown_token))\n",
351
+ " final_text = \" \".join(input_tokenized)\n",
352
+ " return final_text, message\n"
353
+ ]
354
+ },
355
+ {
356
+ "cell_type": "code",
357
+ "execution_count": 29,
358
+ "metadata": {},
359
+ "outputs": [
360
+ {
361
+ "name": "stdout",
362
+ "output_type": "stream",
363
+ "text": [
364
+ "Note: you may need to restart the kernel to use updated packages.Requirement already satisfied: ipywidgets in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (8.0.4)\n",
365
+ "Requirement already satisfied: ipykernel>=4.5.1 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipywidgets) (6.25.0)\n",
366
+ "Requirement already satisfied: ipython>=6.1.0 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipywidgets) (8.12.2)\n",
367
+ "Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipywidgets) (5.7.1)\n",
368
+ "Requirement already satisfied: widgetsnbextension~=4.0 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipywidgets) (4.0.5)\n",
369
+ "Requirement already satisfied: jupyterlab-widgets~=3.0 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipywidgets) (3.0.5)\n",
370
+ "Requirement already satisfied: comm>=0.1.1 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.2)\n",
371
+ "Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (1.6.7)\n",
372
+ "Requirement already satisfied: jupyter-client>=6.1.12 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (7.4.9)\n",
373
+ "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.0)\n",
374
+ "Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.6)\n",
375
+ "Requirement already satisfied: nest-asyncio in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (1.5.6)\n",
376
+ "Requirement already satisfied: packaging in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (23.2)\n",
377
+ "Requirement already satisfied: psutil in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (5.9.0)\n",
378
+ "Requirement already satisfied: pyzmq>=20 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (23.2.0)\n",
379
+ "Requirement already satisfied: tornado>=6.1 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipykernel>=4.5.1->ipywidgets) (6.3.2)\n",
380
+ "Requirement already satisfied: backcall in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)\n",
381
+ "Requirement already satisfied: decorator in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)\n",
382
+ "Requirement already satisfied: jedi>=0.16 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.18.1)\n",
383
+ "Requirement already satisfied: pickleshare in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.7.5)\n",
384
+ "Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.36)\n",
385
+ "Requirement already satisfied: pygments>=2.4.0 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.15.1)\n",
386
+ "Requirement already satisfied: stack-data in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)\n",
387
+ "Requirement already satisfied: colorama in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
388
+ "Requirement already satisfied: parso<0.9.0,>=0.8.0 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)\n",
389
+ "Requirement already satisfied: entrypoints in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (0.4)\n",
390
+ "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (2.8.2)\n",
391
+ "Requirement already satisfied: platformdirs>=2.5 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (3.10.0)\n",
392
+ "Requirement already satisfied: pywin32>=300 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (305.1)\n",
393
+ "Requirement already satisfied: wcwidth in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets) (0.2.5)\n",
394
+ "Requirement already satisfied: executing in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.8.3)\n",
395
+ "Requirement already satisfied: asttokens in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.0.5)\n",
396
+ "Requirement already satisfied: pure-eval in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)\n",
397
+ "Requirement already satisfied: six>=1.5 in c:\\users\\elisa\\anaconda3\\envs\\deeplearning\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (1.16.0)\n",
398
+ "\n"
399
+ ]
400
+ }
401
+ ],
402
+ "source": [
403
+ "# Please uncomment below line to use Jupyter widgets\n",
404
+ "%pip install ipywidgets"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "code",
409
+ "execution_count": 11,
410
+ "metadata": {
411
+ "pycharm": {}
412
+ },
413
+ "outputs": [
414
+ {
415
+ "data": {
416
+ "application/vnd.jupyter.widget-view+json": {
417
+ "model_id": "3161da6ac5e34bc98df35cb77a0f9c3c",
418
+ "version_major": 2,
419
+ "version_minor": 0
420
+ },
421
+ "text/plain": [
422
+ "Text(value='', description='English:', placeholder='Type something')"
423
+ ]
424
+ },
425
+ "metadata": {},
426
+ "output_type": "display_data"
427
+ },
428
+ {
429
+ "data": {
430
+ "application/vnd.jupyter.widget-view+json": {
431
+ "model_id": "c32657456c0e4b258c1fb843c2821a57",
432
+ "version_major": 2,
433
+ "version_minor": 0
434
+ },
435
+ "text/plain": [
436
+ "Button(button_style='info', description='Translate', icon='check', style=ButtonStyle(), tooltip='Translate')"
437
+ ]
438
+ },
439
+ "metadata": {},
440
+ "output_type": "display_data"
441
+ }
442
+ ],
443
+ "source": [
444
+ "import ipywidgets as widgets\n",
445
+ "\n",
446
+ "input_text = widgets.Text(\n",
447
+ " value='',\n",
448
+ " placeholder='Type something',\n",
449
+ " description='English:',\n",
450
+ " disabled=False\n",
451
+ ")\n",
452
+ "\n",
453
+ "button = widgets.Button(\n",
454
+ " description='Translate',\n",
455
+ " disabled=False,\n",
456
+ " button_style='info', # 'success', 'info', 'warning', 'danger' or ''\n",
457
+ " tooltip='Translate',\n",
458
+ " icon='check'\n",
459
+ ")\n",
460
+ "\n",
461
+ "def on_button_clicked(b):\n",
462
+ " eng2indo_translation(input_text.value, encoder, attn_decoder, english_vocab)\n",
463
+ "\n",
464
+ "button.on_click(on_button_clicked)\n",
465
+ "\n",
466
+ "display(input_text)\n",
467
+ "display(button)\n",
468
+ "\n",
469
+ "def eng2indo_translation(text, encoder, decoder, english_vocab):\n",
470
+ " sentences = text.split('.') # Split input text into sentences based on \".\"\n",
471
+ " translated_sentences = []\n",
472
+ "\n",
473
+ " for sentence in sentences:\n",
474
+ " final_text, message = input_validation(sentence.strip(), english_vocab)\n",
475
+ "\n",
476
+ " if final_text:\n",
477
+ " print('>', sentence)\n",
478
+ " print('>>', final_text)\n",
479
+ "\n",
480
+ " for epoch in EPOCH_NO:\n",
481
+ " encoder, decoder = load_weights(encoder, decoder, epoch, device)\n",
482
+ " output_words, attentions = evaluate(encoder, decoder, final_text)\n",
483
+ " output_sentence = ' '.join(output_words).replace('<s>', '').replace('</s>', '')\n",
484
+ " translated_sentences.append(output_sentence)\n",
485
+ "\n",
486
+ " # Concatenate translated sentences\n",
487
+ " final_output = ' '.join(translated_sentences)\n",
488
+ " print(\"Final Translation:\", final_output)\n",
489
+ "\n",
490
+ " if message:\n",
491
+ " print(\"Validation Message:\", message)\n"
492
+ ]
493
+ },
494
+ {
495
+ "cell_type": "code",
496
+ "execution_count": 12,
497
+ "metadata": {},
498
+ "outputs": [
499
+ {
500
+ "name": "stdout",
501
+ "output_type": "stream",
502
+ "text": [
503
+ "> she is standing there \n",
504
+ ">> she is standing there\n",
505
+ "Final Translation: dia ada di sana . \n",
506
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
507
+ "> he is a bad man \n",
508
+ ">> he is a bad man\n",
509
+ "Final Translation: dia adalah seorang yang yang sangat \n",
510
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
511
+ "> he wants to sleep \n",
512
+ ">> he wants to sleep\n",
513
+ "Final Translation: dia ingin tidak lagi . \n",
514
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
515
+ "'n't' is not found in the english corpus\n",
516
+ "> i can't see you crying \n",
517
+ ">> i ca unk see you crying\n",
518
+ "Final Translation: aku sudah makan kamu kamu . \n",
519
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
520
+ "> my dog is running around \n",
521
+ ">> my dog is running around\n",
522
+ "Final Translation: saya saya berada di luar . \n",
523
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
524
+ "> it is very popular \n",
525
+ ">> it is very popular\n",
526
+ "Final Translation: ini benar benar . \n",
527
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
528
+ "''s' is not found in the english corpus\n",
529
+ "> she speaks american english to tom's father \n",
530
+ ">> she speaks american english to tom unk father\n",
531
+ "Final Translation: dia sudah amerika amerika amerika tom telah \n",
532
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
533
+ "> please eat lunch in the afternoon \n",
534
+ ">> please eat lunch in the afternoon\n",
535
+ "Final Translation: tolong makan makan di depan . \n",
536
+ "Validation Message: The input sentence should be between 4 and 100 words\n",
537
+ "> i see red roses in the garden \n",
538
+ ">> i see red roses in the garden\n",
539
+ "Final Translation: saya melihat melihat di di taman . \n",
540
+ "Validation Message: The input sentence should be between 4 and 100 words\n"
541
+ ]
542
+ }
543
+ ],
544
+ "source": [
545
+ "eng2indo_translation(\"she is standing there .\", encoder, attn_decoder, english_vocab)\n",
546
+ "eng2indo_translation(\"he is a bad man .\", encoder, attn_decoder, english_vocab)\n",
547
+ "eng2indo_translation(\"he wants to sleep .\", encoder, attn_decoder, english_vocab)\n",
548
+ "eng2indo_translation(\"i can't see you crying .\", encoder, attn_decoder, english_vocab)\n",
549
+ "eng2indo_translation(\"my dog is running around .\", encoder, attn_decoder, english_vocab)\n",
550
+ "eng2indo_translation(\"it is very popular .\", encoder, attn_decoder, english_vocab)\n",
551
+ "eng2indo_translation(\"she speaks american english to tom's father .\", encoder, attn_decoder, english_vocab)\n",
552
+ "eng2indo_translation(\"please eat lunch in the afternoon .\", encoder, attn_decoder, english_vocab)\n",
553
+ "eng2indo_translation(\"i see red roses in the garden .\", encoder, attn_decoder, english_vocab)"
554
+ ]
555
+ }
556
+ ],
557
+ "metadata": {
558
+ "kernelspec": {
559
+ "display_name": "Python 3",
560
+ "language": "python",
561
+ "name": "python3"
562
+ },
563
+ "language_info": {
564
+ "codemirror_mode": {
565
+ "name": "ipython",
566
+ "version": 3
567
+ },
568
+ "file_extension": ".py",
569
+ "mimetype": "text/x-python",
570
+ "name": "python",
571
+ "nbconvert_exporter": "python",
572
+ "pygments_lexer": "ipython3",
573
+ "version": "3.10.12"
574
+ },
575
+ "stem_cell": {
576
+ "cell_type": "raw",
577
+ "metadata": {
578
+ "pycharm": {
579
+ "metadata": false
580
+ }
581
+ },
582
+ "source": ""
583
+ }
584
+ },
585
+ "nbformat": 4,
586
+ "nbformat_minor": 2
587
+ }