Update configuration_jeeves.py
Browse files- configuration_jeeves.py +5 -0
configuration_jeeves.py
CHANGED
|
@@ -88,6 +88,11 @@ class JeevesConfig(PretrainedConfig):
|
|
| 88 |
self.head_dim = d_model // n_heads
|
| 89 |
self.hidden_size = d_model # HF convention
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
@property
|
| 92 |
def effective_depth(self) -> int:
|
| 93 |
if self.loop_block_idx is not None:
|
|
|
|
| 88 |
self.head_dim = d_model // n_heads
|
| 89 |
self.hidden_size = d_model # HF convention
|
| 90 |
|
| 91 |
+
# Standard HF aliases (required by GenerationMixin in transformers 5.x)
|
| 92 |
+
self.num_hidden_layers = n_layers
|
| 93 |
+
self.num_attention_heads = n_heads
|
| 94 |
+
self.num_key_value_heads = n_kv_heads
|
| 95 |
+
|
| 96 |
@property
|
| 97 |
def effective_depth(self) -> int:
|
| 98 |
if self.loop_block_idx is not None:
|