Anurich commited on
Commit
ddffa29
·
verified ·
1 Parent(s): ca73a01

Update configuration_jeeves.py

Browse files
Files changed (1) hide show
  1. configuration_jeeves.py +5 -0
configuration_jeeves.py CHANGED
@@ -88,6 +88,11 @@ class JeevesConfig(PretrainedConfig):
88
  self.head_dim = d_model // n_heads
89
  self.hidden_size = d_model # HF convention
90
 
 
 
 
 
 
91
  @property
92
  def effective_depth(self) -> int:
93
  if self.loop_block_idx is not None:
 
88
  self.head_dim = d_model // n_heads
89
  self.hidden_size = d_model # HF convention
90
 
91
+ # Standard HF aliases (required by GenerationMixin in transformers 5.x)
92
+ self.num_hidden_layers = n_layers
93
+ self.num_attention_heads = n_heads
94
+ self.num_key_value_heads = n_kv_heads
95
+
96
  @property
97
  def effective_depth(self) -> int:
98
  if self.loop_block_idx is not None: