| { |
| "producer": { |
| "name": "ammo", |
| "version": "0.7.4" |
| }, |
| "architecture": "LlamaForCausalLM", |
| "dtype": "float16", |
| "num_hidden_layers": 80, |
| "num_attention_heads": 64, |
| "num_key_value_heads": 8, |
| "hidden_size": 8192, |
| "norm_epsilon": 1e-05, |
| "vocab_size": 128256, |
| "max_position_embeddings": 8192, |
| "hidden_act": "silu", |
| "use_parallel_embedding": true, |
| "embedding_sharding_dim": 0, |
| "quantization": { |
| "quant_algo": "W4A16_AWQ", |
| "kv_cache_quant_algo": "INT8", |
| "group_size": 64, |
| "has_zero_point": false, |
| "pre_quant_scale": true, |
| "exclude_modules": [ |
| "lm_head" |
| ] |
| } |
| } |