Upload folder using huggingface_hub
Browse files- config.json +5 -2
- modeling_shivik_m2.py +9 -0
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"model_type": "
|
| 3 |
"vocab_size": 49152,
|
| 4 |
"d_model": 2048,
|
| 5 |
"n_layers": 24,
|
|
@@ -7,5 +7,8 @@
|
|
| 7 |
"kv_heads": 4,
|
| 8 |
"rotary_dim": 128,
|
| 9 |
"context_length": 4096,
|
| 10 |
-
"use_cache": true
|
|
|
|
|
|
|
|
|
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_type": "shivik_m2",
|
| 3 |
"vocab_size": 49152,
|
| 4 |
"d_model": 2048,
|
| 5 |
"n_layers": 24,
|
|
|
|
| 7 |
"kv_heads": 4,
|
| 8 |
"rotary_dim": 128,
|
| 9 |
"context_length": 4096,
|
| 10 |
+
"use_cache": true,
|
| 11 |
+
"architectures": [
|
| 12 |
+
"ShivikM2ForCausalLM"
|
| 13 |
+
]
|
| 14 |
}
|
modeling_shivik_m2.py
CHANGED
|
@@ -189,3 +189,12 @@ class ShivikM2ForCausalLM(PreTrainedModel, GenerationMixin):
|
|
| 189 |
self.model.lm_head.weight = self.model.embed.weight
|
| 190 |
def forward(self, input_ids=None, past_key_values=None, **kwargs):
|
| 191 |
return self.model(input_ids, past_key_values, use_cache=kwargs.get("use_cache", False))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
self.model.lm_head.weight = self.model.embed.weight
|
| 190 |
def forward(self, input_ids=None, past_key_values=None, **kwargs):
|
| 191 |
return self.model(input_ids, past_key_values, use_cache=kwargs.get("use_cache", False))
|
| 192 |
+
|
| 193 |
+
# === Auto-register with Transformers ===
|
| 194 |
+
try:
|
| 195 |
+
from transformers.models.auto.configuration_auto import CONFIG_MAPPING
|
| 196 |
+
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING
|
| 197 |
+
CONFIG_MAPPING.register("shivik_m2", ShivikM2Config)
|
| 198 |
+
MODEL_FOR_CAUSAL_LM_MAPPING.register("shivik_m2", ShivikM2ForCausalLM)
|
| 199 |
+
except:
|
| 200 |
+
pass
|