smithblack-0 commited on
Commit
af7974e
·
verified ·
1 Parent(s): 72e7455

Update architecture and tokenizer

Browse files
Files changed (1) hide show
  1. huggingface.py +1 -1
huggingface.py CHANGED
@@ -120,7 +120,7 @@ class ShramForCausalLM(PreTrainedModel, GenerationMixin):
120
  num_local_heads=self.config.num_sliding_window_heads,
121
  local_head_dim=self.config.head_dim,
122
  num_mosrah_heads=self.config.num_mosrah_heads,
123
- mosrah_head_dim=self.config.hidden_size // self.config.num_selected_heads,
124
  batch_size=batch_size,
125
  device=device,
126
  )
 
120
  num_local_heads=self.config.num_sliding_window_heads,
121
  local_head_dim=self.config.head_dim,
122
  num_mosrah_heads=self.config.num_mosrah_heads,
123
+ mosrah_head_dim=self.config.head_dim,
124
  batch_size=batch_size,
125
  device=device,
126
  )