| { | |
| "model_type": "fasih", | |
| "architectures": [ | |
| "GPT" | |
| ], | |
| "sequence_len": 2048, | |
| "vocab_size": 65536, | |
| "n_layer": 24, | |
| "n_head": 12, | |
| "n_kv_head": 12, | |
| "n_embd": 1536, | |
| "window_pattern": "SSSL", | |
| "framework": "nanochat", | |
| "training": { | |
| "stage": "sft", | |
| "sft_step": 187, | |
| "pretrain_steps": 15000, | |
| "pretrain_tokens": "15.7B", | |
| "sft_dataset": "InstAr-500k", | |
| "val_bpb": 0.351, | |
| "benchmarks": { | |
| "ArabicMMLU": "31.95%", | |
| "ACVA": "57.62%" | |
| } | |
| } | |
| } |