Update modeling_neobert.py
Browse files- modeling_neobert.py +2 -2
modeling_neobert.py
CHANGED
|
@@ -206,7 +206,7 @@ class EncoderBlock(nn.Module):
|
|
| 206 |
self.ffn = SwiGLU(config.hidden_size, intermediate_size, config.hidden_size, bias=False)
|
| 207 |
|
| 208 |
# Layer norms
|
| 209 |
-
rms_norm_cls = nn.RMSNorm if config._attn_implementation != 'onnx_eager' else NeoBERTEagerRMSNorm
|
| 210 |
self.attention_norm = rms_norm_cls(config.hidden_size, config.norm_eps)
|
| 211 |
self.ffn_norm = rms_norm_cls(config.hidden_size, config.norm_eps)
|
| 212 |
|
|
@@ -315,7 +315,7 @@ class NeoBERT(NeoBERTPreTrainedModel):
|
|
| 315 |
for _ in range(config.num_hidden_layers):
|
| 316 |
self.transformer_encoder.append(EncoderBlock(config))
|
| 317 |
|
| 318 |
-
rms_norm_cls = nn.RMSNorm if config._attn_implementation != 'onnx_eager' else NeoBERTEagerRMSNorm
|
| 319 |
self.layer_norm = rms_norm_cls(config.hidden_size, config.norm_eps)
|
| 320 |
|
| 321 |
# Initialize weights and apply final processing
|
|
|
|
| 206 |
self.ffn = SwiGLU(config.hidden_size, intermediate_size, config.hidden_size, bias=False)
|
| 207 |
|
| 208 |
# Layer norms
|
| 209 |
+
rms_norm_cls = nn.RMSNorm if config._attn_implementation != 'onnx_eager' and hasattr(nn, 'RMSNorm') else NeoBERTEagerRMSNorm
|
| 210 |
self.attention_norm = rms_norm_cls(config.hidden_size, config.norm_eps)
|
| 211 |
self.ffn_norm = rms_norm_cls(config.hidden_size, config.norm_eps)
|
| 212 |
|
|
|
|
| 315 |
for _ in range(config.num_hidden_layers):
|
| 316 |
self.transformer_encoder.append(EncoderBlock(config))
|
| 317 |
|
| 318 |
+
rms_norm_cls = nn.RMSNorm if config._attn_implementation != 'onnx_eager' and hasattr(nn, 'RMSNorm') else NeoBERTEagerRMSNorm
|
| 319 |
self.layer_norm = rms_norm_cls(config.hidden_size, config.norm_eps)
|
| 320 |
|
| 321 |
# Initialize weights and apply final processing
|