Jackmin801 commited on
Commit ·
bc43a5e
1
Parent(s): df1a7f6
allow math kernel
Browse files- modeling_bert.py +1 -1
modeling_bert.py
CHANGED
|
@@ -378,7 +378,7 @@ class JinaBertSelfAttention(nn.Module):
|
|
| 378 |
b, _, s, _ = query_layer.shape
|
| 379 |
new_bias = attention_mask + bias
|
| 380 |
attn = scaled_dot_product_attention(query_layer, key_layer, value_layer, new_bias)
|
| 381 |
-
attn = attn.permute(0, 2, 1, 3)
|
| 382 |
return (attn.view(b, s, self.all_head_size),)
|
| 383 |
|
| 384 |
# Take the dot product between "query" and "key" to get the raw attention scores.
|
|
|
|
| 378 |
b, _, s, _ = query_layer.shape
|
| 379 |
new_bias = attention_mask + bias
|
| 380 |
attn = scaled_dot_product_attention(query_layer, key_layer, value_layer, new_bias)
|
| 381 |
+
attn = attn.permute(0, 2, 1, 3).contiguous()
|
| 382 |
return (attn.view(b, s, self.all_head_size),)
|
| 383 |
|
| 384 |
# Take the dot product between "query" and "key" to get the raw attention scores.
|