kd13
/

nano-translate-v2

lstm_seq2seq_bahdanau_en_hi

text2text-generation

Model card Files Files and versions

kd13 commited on 18 days ago

Commit

f9af18f

·

verified ·

1 Parent(s): 041c5c8

Update modeling_lstm_seq2seq_en_hi.py

Files changed (1) hide show

modeling_lstm_seq2seq_en_hi.py +16 -9

modeling_lstm_seq2seq_en_hi.py CHANGED Viewed

@@ -77,6 +77,9 @@ class Encoder(nn.Module):
             total_length=input_ids.size(1)
         )
         return LSTMEncoderOutput(
             last_hidden_state=outputs,
             hidden_state=hidden,
@@ -289,16 +292,20 @@ class Seq2SeqHFModel(PreTrainedModel):
     def _merge_bidir_state(self, state, bridge_layer):
         num_directions = 2
-        num_layers_times_dirs, batch_size, hidden_dim = state.size()
         num_layers = num_layers_times_dirs // num_directions
-        state = state.view(num_layers, num_directions, batch_size, hidden_dim)
-        forward_state = state[:, 0, :, :]
-        backward_state = state[:, 1, :, :]
-        merged = torch.cat([forward_state, backward_state], dim=-1)
-        merged = bridge_layer(merged)
-        return torch.tanh(merged)
     def forward(
         self,

             total_length=input_ids.size(1)
         )
+        hidden = hidden.transpose(0, 1).contiguous()   # (batch, layers*dirs, hidden)
+        cell = cell.transpose(0, 1).contiguous()       # (batch, layers*dirs, hidden)
         return LSTMEncoderOutput(
             last_hidden_state=outputs,
             hidden_state=hidden,
     def _merge_bidir_state(self, state, bridge_layer):
         num_directions = 2
+        batch_size, num_layers_times_dirs, hidden_dim = state.size()
         num_layers = num_layers_times_dirs // num_directions
+        state = state.view(batch_size, num_layers, num_directions, hidden_dim)
+        forward_state = state[:, :, 0, :]   # (batch, num_layers, hidden)
+        backward_state = state[:, :, 1, :]  # (batch, num_layers, hidden)
+        merged = torch.cat([forward_state, backward_state], dim=-1)   # (batch, num_layers, 2*hidden)
+        merged = bridge_layer(merged)                                 # (batch, num_layers, hidden)
+        merged = torch.tanh(merged)
+        return merged.transpose(0, 1).contiguous()   # (num_layers, batch, hidden)
     def forward(
         self,