Taykhoom
/

ERNIE-RNA

@@ -115,16 +115,15 @@ class ErnieRNAAttention(nn.Module):
         twod_bias_new = attn_weights.view(bsz, self.num_heads, tgt_len, tgt_len)
         attn_probs = F.softmax(attn_weights, dim=-1)
         attn_probs = self.dropout(attn_probs)
         out = torch.bmm(attn_probs, v)
         out = out.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim)
         out = self.out_proj(out)
-        attn_weights_out = None
-        if output_attentions:
-            attn_weights_out = twod_bias_new
         return out, attn_weights_out, twod_bias_new

         twod_bias_new = attn_weights.view(bsz, self.num_heads, tgt_len, tgt_len)
         attn_probs = F.softmax(attn_weights, dim=-1)
+        attn_weights_out = None
+        if output_attentions:
+            attn_weights_out = attn_probs.view(bsz, self.num_heads, tgt_len, tgt_len)
         attn_probs = self.dropout(attn_probs)
         out = torch.bmm(attn_probs, v)
         out = out.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim)
         out = self.out_proj(out)
         return out, attn_weights_out, twod_bias_new