rkazants commited on
Commit
0f49643
·
verified ·
1 Parent(s): 0e429bb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -3
README.md CHANGED
@@ -1,3 +1,34 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ Here is a code to create this tiny model:
6
+
7
+ ```python
8
+ import os
9
+
10
+ from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
11
+
12
+ model_dir = "state-spaces/mamba-130m-hf"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
14
+
15
+ # === Step 1: Define tiny model config ===
16
+ config = MambaConfig(
17
+ d_model=64, # Smaller hidden dimension
18
+ n_layer=2, # Just one layer
19
+ d_state=16, # Minimal state size
20
+ expand=2, # No expansion (linear)
21
+ conv_kernel=3, # Smallest convolution kernel
22
+ vocab_size=50280,
23
+ )
24
+
25
+ # === Step 2: Create model from config ===
26
+ model = MambaForCausalLM(config)
27
+
28
+ # === Step 4: Save model and tokenizer to disk ===
29
+ output_dir = "./tiny-mamba"
30
+ os.makedirs(output_dir, exist_ok=True)
31
+ model.save_pretrained(output_dir)
32
+ tokenizer.save_pretrained(output_dir)
33
+ print(f"Tiny Mamba model and tokenizer saved to: {output_dir}")
34
+ ```