| { | |
| "num_layers": 16, | |
| "num_kv_heads": 8, | |
| "head_dim": 96, | |
| "hidden_size": 1536, | |
| "components": [ | |
| "prefill_int8.onnx", | |
| "decode_int8.onnx" | |
| ], | |
| "kv_cache_format": "[batch, 8, seq_len, 96]", | |
| "position_ids_format": "[4, batch, seq_len]" | |
| } |
| { | |
| "num_layers": 16, | |
| "num_kv_heads": 8, | |
| "head_dim": 96, | |
| "hidden_size": 1536, | |
| "components": [ | |
| "prefill_int8.onnx", | |
| "decode_int8.onnx" | |
| ], | |
| "kv_cache_format": "[batch, 8, seq_len, 96]", | |
| "position_ids_format": "[4, batch, seq_len]" | |
| } |