| { | |
| "architectures": [ | |
| "MathstralNanoForCausalLM" | |
| ], | |
| "model_type": "mathstral_nano", | |
| "n_layer": 4, | |
| "n_head": 8, | |
| "d_model": 64, | |
| "d_ff": 256, | |
| "vocab_size": 256, | |
| "seq_len": 64, | |
| "tokenizer": "byte-level (UTF-8 bytes 0-255, no special tokens)", | |
| "total_params": 236928, | |
| "torch_dtype": "float32", | |
| "transformers_version": "n/a (custom numpy model)" | |
| } |