| { |
| "transformer_architecture": { |
| "vocab_size": 128000, |
| "vocab_file": "vocab.json", |
| "hidden_size": 4608, |
| "num_layers": 27, |
| "num_attention_heads": 36, |
| "num_local_attention_heads": 0, |
| "local_attention_window_size": null, |
| "rotary_embedding_base": 1000000, |
| "rotary_percentage": 1.0, |
| "sequence_length": 8192, |
| "norm_type": "layernorm", |
| "relative_position_embedding_type": "rotary_complex", |
| "mlp_type": "default", |
| "mlp_factor": 4.0, |
| "attention_bias": true, |
| "attention_qkv_in_one": false, |
| "attention_num_kv_heads": 4, |
| "attention_use_matmul": false, |
| "mlp_bias": true, |
| "key_query_norm": false, |
| "weight_tying": false, |
| "masked_softmax": { |
| "kernel": "torch", |
| "softmax_in_fp32": true, |
| "scale": 1.0, |
| "deterministic_flash_attn_bwd": false |
| }, |
| "layernorm": { |
| "optimization_type": "torch", |
| "layernorm_epsilon": 1e-05 |
| }, |
| "precision": "bfloat16", |
| "dropout_embedding": 0.0, |
| "dropout_attention_probs": 0.0, |
| "dropout_after_attention": 0.0, |
| "dropout_after_mlp": 0.0, |
| "finetunable_token_ids": [], |
| "image_encoder": false, |
| "dropout_image_encoder": 0.0, |
| "lora_config": null |
| } |
| } |
|
|