vishnuraggav's picture
First
ed8878f
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
DEVICE = 'cpu'
class Encoder(nn.Module):
def __init__(self, vocab_size, embed_dim, enc_hid_dim, dec_hid_dim, dropout_p=0.3):
super().__init__()
self.embed = nn.Embedding(vocab_size, embed_dim)
self.rnn = nn.GRU(embed_dim, enc_hid_dim, batch_first=True, bidirectional=True)
self.fc = nn.Linear(enc_hid_dim*2, dec_hid_dim)
self.dropout = nn.Dropout(dropout_p)
def forward(self, x):
embedded = self.dropout(self.embed(x))
outputs, hidden = self.rnn(embedded)
hidden = self.fc(torch.cat((hidden[0], hidden[1]), dim=1))
hidden = hidden.unsqueeze(0)
return outputs, hidden
class Attention(nn.Module):
def __init__(self, enc_hid_dim, dec_hid_dim):
super().__init__()
self.attn = nn.Linear((enc_hid_dim*2)+dec_hid_dim, dec_hid_dim)
self.value = nn.Linear(dec_hid_dim, 1, bias=False)
def forward(self, encoder_outputs, dec_hidden):
# dec_hidden --> (1, batch, hidden_dim)
# encoder_outputs --> (batch, seq_len, hidden_dim * 2)
seq_len = encoder_outputs.shape[1]
dec_hidden = dec_hidden.permute(1, 0, 2).repeat(1, seq_len, 1)
concatenated = torch.cat((dec_hidden, encoder_outputs), dim=2)
energy = F.relu(self.attn(concatenated))
attention = self.value(energy)
attention = F.softmax(attention, dim=1)
# attn_weights --> (batch, seq_len, 1)
attn_weights = attention.permute(0, 2, 1)
context = torch.bmm(attn_weights, encoder_outputs)
# context --> (batch, 1, hidden_size * 2)
return context
class Decoder(nn.Module):
def __init__(self, embed_dim, vocab_size, enc_hid_dim, dec_hid_dim, attn):
super().__init__()
self.attention = attn
self.embed = nn.Embedding(vocab_size, embed_dim)
self.rnn = nn.GRU((enc_hid_dim*2)+embed_dim, dec_hid_dim, batch_first=True)
self.fc = nn.Linear(dec_hid_dim, vocab_size)
def forward(self, token, encoder_outputs, hidden):
embedding = self.embed(token)
# context --> (batch, 1, hidden_size * 2)
context = self.attention(encoder_outputs, hidden)
rnn_input = torch.cat((context, embedding), dim=2)
dec_outputs, dec_hidden = self.rnn(rnn_input, hidden)
dec_outputs = dec_outputs.squeeze(1)
predictions = self.fc(dec_outputs)
return predictions, dec_hidden
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder):
super().__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, source, target, teacher_forcing_ratio=0.5):
# source --> (batch, src_seq_len)
batch_size = target.shape[0]
target_len = target.shape[1]
target_vocab_size = self.decoder.fc.out_features
outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(DEVICE)
encoder_outputs, hidden = self.encoder(source)
# Grab the SOS token
x = source[:, 0]
for time_step in range(1, target_len):
predictions, hidden = self.decoder(x.unsqueeze(1), encoder_outputs, hidden)
outputs[:, time_step, :] = predictions
top_1 = torch.argmax(predictions, dim=1)
x = target[:, time_step] if random.random() < teacher_forcing_ratio else top_1
return outputs