| {"producer": {"name": "ammo", "version": "0.7.4"}, "architecture": "MedusaForCausalLM", "dtype": "float16", "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_size": 4096, "norm_epsilon": 1e-06, "vocab_size": 32000, "max_position_embeddings": 2048, "hidden_act": "silu", "use_parallel_embedding": true, "embedding_sharding_dim": 0, "quantization": {"quant_algo": "FP8", "kv_cache_quant_algo": "FP8"}, "mapping": {"world_size": 1, "tp_size": 1, "pp_size": 1}, "head_size": 128, "intermediate_size": 11008, "position_embedding_type": "rope_gpt_neox", "rotary_base": 10000.0, "max_draft_len": 63, "num_medusa_heads": 5, "num_medusa_layers": 1} |