Update README.md
Browse files
README.md
CHANGED
|
@@ -18,14 +18,17 @@ from transformers import AutoTokenizer, AutoConfig, Lfm2MoeForCausalLM
|
|
| 18 |
model_id = "LiquidAI/LFM2-24B-A2B"
|
| 19 |
config = AutoConfig.from_pretrained(model_id)
|
| 20 |
|
| 21 |
-
config.num_hidden_layers =
|
| 22 |
config.layer_types = [
|
|
|
|
| 23 |
"full_attention",
|
| 24 |
"conv",
|
| 25 |
]
|
| 26 |
config.num_attention_heads = 4
|
| 27 |
config.num_key_value_heads = 4
|
| 28 |
config.hidden_size = 16
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# === Step 2: Create model from config ===
|
| 31 |
model = Lfm2MoeForCausalLM(config)
|
|
@@ -39,5 +42,4 @@ os.makedirs(output_dir, exist_ok=True)
|
|
| 39 |
model.save_pretrained(output_dir, safe_serialization=False)
|
| 40 |
tokenizer.save_pretrained(output_dir)
|
| 41 |
|
| 42 |
-
|
| 43 |
```
|
|
|
|
| 18 |
model_id = "LiquidAI/LFM2-24B-A2B"
|
| 19 |
config = AutoConfig.from_pretrained(model_id)
|
| 20 |
|
| 21 |
+
config.num_hidden_layers = 3
|
| 22 |
config.layer_types = [
|
| 23 |
+
"full_attention",
|
| 24 |
"full_attention",
|
| 25 |
"conv",
|
| 26 |
]
|
| 27 |
config.num_attention_heads = 4
|
| 28 |
config.num_key_value_heads = 4
|
| 29 |
config.hidden_size = 16
|
| 30 |
+
config.num_dense_layers = 1
|
| 31 |
+
config.moe_intermediate_size = 16
|
| 32 |
|
| 33 |
# === Step 2: Create model from config ===
|
| 34 |
model = Lfm2MoeForCausalLM(config)
|
|
|
|
| 42 |
model.save_pretrained(output_dir, safe_serialization=False)
|
| 43 |
tokenizer.save_pretrained(output_dir)
|
| 44 |
|
|
|
|
| 45 |
```
|