| { | |
| "model_type": "3oc_lm", | |
| "architectures": [ | |
| "ThirdOrderContinuousLM" | |
| ], | |
| "custom_inference_required": true, | |
| "custom_code_included": false, | |
| "weight_format": "safetensors", | |
| "hidden_size": 1024, | |
| "intermediate_size": 4096, | |
| "num_hidden_layers": 28, | |
| "num_attention_heads": 16, | |
| "num_key_value_heads": 4, | |
| "head_dim": 64, | |
| "vocab_size": 151643, | |
| "tokenizer_class": "Qwen2Tokenizer", | |
| "tokenizer_family": "Qwen2.5", | |
| "model_max_length": 131072, | |
| "positional_encoding": "RoPE", | |
| "normalization": "RMSNorm", | |
| "attention_type": "GQA", | |
| "mlp_activation": "SiLU", | |
| "torch_dtype": "bfloat16", | |
| "eos_token_id": 151643, | |
| "pad_token_id": 151643, | |
| "tie_word_embeddings": true, | |
| "parameter_count": 493140052 | |
| } | |