Taykhoom commited on
Commit
9947146
·
verified ·
1 Parent(s): 25670fa

Fix output_attentions: return post-softmax probabilities, not pre-softmax 2D bias

Browse files
Files changed (1) hide show
  1. modeling_ernierna.py +3 -4
modeling_ernierna.py CHANGED
@@ -115,16 +115,15 @@ class ErnieRNAAttention(nn.Module):
115
  twod_bias_new = attn_weights.view(bsz, self.num_heads, tgt_len, tgt_len)
116
 
117
  attn_probs = F.softmax(attn_weights, dim=-1)
 
 
 
118
  attn_probs = self.dropout(attn_probs)
119
 
120
  out = torch.bmm(attn_probs, v)
121
  out = out.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim)
122
  out = self.out_proj(out)
123
 
124
- attn_weights_out = None
125
- if output_attentions:
126
- attn_weights_out = twod_bias_new
127
-
128
  return out, attn_weights_out, twod_bias_new
129
 
130
 
 
115
  twod_bias_new = attn_weights.view(bsz, self.num_heads, tgt_len, tgt_len)
116
 
117
  attn_probs = F.softmax(attn_weights, dim=-1)
118
+ attn_weights_out = None
119
+ if output_attentions:
120
+ attn_weights_out = attn_probs.view(bsz, self.num_heads, tgt_len, tgt_len)
121
  attn_probs = self.dropout(attn_probs)
122
 
123
  out = torch.bmm(attn_probs, v)
124
  out = out.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim)
125
  out = self.out_proj(out)
126
 
 
 
 
 
127
  return out, attn_weights_out, twod_bias_new
128
 
129