Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import random | |
| DEVICE = 'cpu' | |
| class Encoder(nn.Module): | |
| def __init__(self, vocab_size, embed_dim, enc_hid_dim, dec_hid_dim, dropout_p=0.3): | |
| super().__init__() | |
| self.embed = nn.Embedding(vocab_size, embed_dim) | |
| self.rnn = nn.GRU(embed_dim, enc_hid_dim, batch_first=True, bidirectional=True) | |
| self.fc = nn.Linear(enc_hid_dim*2, dec_hid_dim) | |
| self.dropout = nn.Dropout(dropout_p) | |
| def forward(self, x): | |
| embedded = self.dropout(self.embed(x)) | |
| outputs, hidden = self.rnn(embedded) | |
| hidden = self.fc(torch.cat((hidden[0], hidden[1]), dim=1)) | |
| hidden = hidden.unsqueeze(0) | |
| return outputs, hidden | |
| class Attention(nn.Module): | |
| def __init__(self, enc_hid_dim, dec_hid_dim): | |
| super().__init__() | |
| self.attn = nn.Linear((enc_hid_dim*2)+dec_hid_dim, dec_hid_dim) | |
| self.value = nn.Linear(dec_hid_dim, 1, bias=False) | |
| def forward(self, encoder_outputs, dec_hidden): | |
| # dec_hidden --> (1, batch, hidden_dim) | |
| # encoder_outputs --> (batch, seq_len, hidden_dim * 2) | |
| seq_len = encoder_outputs.shape[1] | |
| dec_hidden = dec_hidden.permute(1, 0, 2).repeat(1, seq_len, 1) | |
| concatenated = torch.cat((dec_hidden, encoder_outputs), dim=2) | |
| energy = F.relu(self.attn(concatenated)) | |
| attention = self.value(energy) | |
| attention = F.softmax(attention, dim=1) | |
| # attn_weights --> (batch, seq_len, 1) | |
| attn_weights = attention.permute(0, 2, 1) | |
| context = torch.bmm(attn_weights, encoder_outputs) | |
| # context --> (batch, 1, hidden_size * 2) | |
| return context | |
| class Decoder(nn.Module): | |
| def __init__(self, embed_dim, vocab_size, enc_hid_dim, dec_hid_dim, attn): | |
| super().__init__() | |
| self.attention = attn | |
| self.embed = nn.Embedding(vocab_size, embed_dim) | |
| self.rnn = nn.GRU((enc_hid_dim*2)+embed_dim, dec_hid_dim, batch_first=True) | |
| self.fc = nn.Linear(dec_hid_dim, vocab_size) | |
| def forward(self, token, encoder_outputs, hidden): | |
| embedding = self.embed(token) | |
| # context --> (batch, 1, hidden_size * 2) | |
| context = self.attention(encoder_outputs, hidden) | |
| rnn_input = torch.cat((context, embedding), dim=2) | |
| dec_outputs, dec_hidden = self.rnn(rnn_input, hidden) | |
| dec_outputs = dec_outputs.squeeze(1) | |
| predictions = self.fc(dec_outputs) | |
| return predictions, dec_hidden | |
| class Seq2Seq(nn.Module): | |
| def __init__(self, encoder, decoder): | |
| super().__init__() | |
| self.encoder = encoder | |
| self.decoder = decoder | |
| def forward(self, source, target, teacher_forcing_ratio=0.5): | |
| # source --> (batch, src_seq_len) | |
| batch_size = target.shape[0] | |
| target_len = target.shape[1] | |
| target_vocab_size = self.decoder.fc.out_features | |
| outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(DEVICE) | |
| encoder_outputs, hidden = self.encoder(source) | |
| # Grab the SOS token | |
| x = source[:, 0] | |
| for time_step in range(1, target_len): | |
| predictions, hidden = self.decoder(x.unsqueeze(1), encoder_outputs, hidden) | |
| outputs[:, time_step, :] = predictions | |
| top_1 = torch.argmax(predictions, dim=1) | |
| x = target[:, time_step] if random.random() < teacher_forcing_ratio else top_1 | |
| return outputs | |