Update architecture and tokenizer
Browse files- huggingface.py +1 -1
huggingface.py
CHANGED
|
@@ -120,7 +120,7 @@ class ShramForCausalLM(PreTrainedModel, GenerationMixin):
|
|
| 120 |
num_local_heads=self.config.num_sliding_window_heads,
|
| 121 |
local_head_dim=self.config.head_dim,
|
| 122 |
num_mosrah_heads=self.config.num_mosrah_heads,
|
| 123 |
-
mosrah_head_dim=self.config.
|
| 124 |
batch_size=batch_size,
|
| 125 |
device=device,
|
| 126 |
)
|
|
|
|
| 120 |
num_local_heads=self.config.num_sliding_window_heads,
|
| 121 |
local_head_dim=self.config.head_dim,
|
| 122 |
num_mosrah_heads=self.config.num_mosrah_heads,
|
| 123 |
+
mosrah_head_dim=self.config.head_dim,
|
| 124 |
batch_size=batch_size,
|
| 125 |
device=device,
|
| 126 |
)
|