DiegoTheExplorar commited on
Commit
3ee1dc9
·
verified ·
1 Parent(s): 0a65d6f

Upload Seq2SeqModel.py

Browse files
Files changed (1) hide show
  1. Seq2SeqModel.py +71 -0
Seq2SeqModel.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+ import random
4
+ """
5
+ This class puts together the decoder and encoder and
6
+ receives Klingon and Engish data from the tokenization process
7
+
8
+ """
9
+
10
+ class Seq2SeqModel(nn.Module):
11
+ def __init__(self, encoder, decoder, device):
12
+ super().__init__()
13
+ self.encoder = encoder
14
+ self.decoder = decoder
15
+ self.device = device
16
+ # cause encoder and decoder must have same no.of layers
17
+ assert (encoder.hid_dim == decoder.hid_dim), "Hidden dimensions of encoder and decoder not equal"
18
+ assert (encoder.n_layers == decoder.n_layers), "Encoder and decoder layers not equal"
19
+
20
+ """
21
+ Parameters:
22
+ ----------
23
+ input : Tensor
24
+ Input tensor containing token indices (seq_len, batch_size)
25
+ Tokenized English Data
26
+
27
+ trg : Tensor
28
+ Target tensor containing token indices (seq_len, batch_size)
29
+ This is what our tokenized Klingon Data
30
+
31
+ teacher_forcing_ratio: double
32
+ the % of time I use ground-truths aka during training
33
+ Returns:
34
+ -------
35
+ prediction : Tensor
36
+ Predicted output tensor from the GRU (seq_len, batch_size, output_dim)
37
+
38
+ hidden : Tensor
39
+ Hidden state tensor from the GRU (n_layers, batch_size, hid_dim)
40
+ """
41
+ def forward(self,input, trg, teacher_forcing_ratio):
42
+ batch_size = trg.shape[1]
43
+ trg_length = trg.shape[0]
44
+ trg_size = self.decoder.output_dim
45
+ #storing decorder outputs
46
+ outputs = torch.zeros(trg_length,batch_size,trg_size).to(self.device)
47
+ #output of encoder used as input for decoder
48
+ hidden = self.encoder(input)
49
+ #print("Encoder hidden state shape:", hidden.shape)
50
+ # basically we want to single out the first input into the decoder as a
51
+ #start of sentence token. This is to let the decoder know when to start making predictions
52
+ input = trg[0, :]
53
+ for t in range(1, trg_length):
54
+ #forward pass through decoder. hidden here refers to context vector from
55
+ #encoder. hidden keeps getting updated
56
+ output, hidden = self.decoder(input, hidden)
57
+ #print("Decoder output shape:", output.shape)
58
+ #Here I am just storing all the predictions made
59
+ outputs[t] = output
60
+
61
+ #leaving usage of teacher forcing to chance
62
+ teacher_force = random.random() < teacher_forcing_ratio
63
+ #print("Output tensor shape in Seq to Seq:", output.shape)
64
+
65
+ # Get the highest predicted token from our predictions
66
+ highest = output.argmax(1)
67
+
68
+ # If teacher forcing is used use next token else use predicted token
69
+ input = trg[t] if teacher_force else highest
70
+
71
+ return outputs