(Trained with Unsloth)

Browse files

Files changed (3) hide show

config.json +101 -0
tokenizer.json +0 -0
tokenizer_config.json +14 -0

config.json ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+    "architectures": [
+        "GraniteMoeHybridForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "attention_multiplier": 0.0078125,
+    "bos_token_id": 100257,
+    "torch_dtype": "bfloat16",
+    "embedding_multiplier": 12,
+    "eos_token_id": 100257,
+    "hidden_act": "silu",
+    "hidden_size": 1536,
+    "initializer_range": 0.1,
+    "intermediate_size": 512,
+    "layer_types": [
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "attention",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "attention",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "attention",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba",
+        "attention",
+        "mamba",
+        "mamba",
+        "mamba",
+        "mamba"
+    ],
+    "logits_scaling": 6,
+    "mamba_chunk_size": 256,
+    "mamba_conv_bias": true,
+    "mamba_d_conv": 4,
+    "mamba_d_head": 64,
+    "mamba_d_state": 128,
+    "mamba_expand": 2,
+    "mamba_n_groups": 1,
+    "mamba_n_heads": 48,
+    "mamba_proj_bias": false,
+    "max_position_embeddings": 131072,
+    "model_name": "unsloth/granite-4.0-h-tiny-base",
+    "model_type": "granitemoehybrid",
+    "normalization_function": "rmsnorm",
+    "num_attention_heads": 12,
+    "num_experts_per_tok": 6,
+    "num_hidden_layers": 40,
+    "num_key_value_heads": 4,
+    "num_local_experts": 64,
+    "output_router_logits": false,
+    "pad_token_id": 100256,
+    "position_embedding_type": "nope",
+    "residual_multiplier": 0.22,
+    "rms_norm_eps": 1e-05,
+    "rope_parameters": {
+        "rope_theta": 10000,
+        "rope_type": "default"
+    },
+    "router_aux_loss_coef": 0.0,
+    "shared_intermediate_size": 1024,
+    "tie_word_embeddings": true,
+    "time_step_limit": [
+        0.0,
+        {
+            "__float__": "Infinity"
+        }
+    ],
+    "time_step_max": 0.1,
+    "time_step_min": 0.001,
+    "unsloth_fixed": true,
+    "unsloth_version": "2026.3.15",
+    "use_cache": false,
+    "vocab_size": 100352
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|pad|>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|unk|>"
+}