glm-ocr-onnx-webgpu / kv /kv_config.json
brad-agi's picture
Upload kv/kv_config.json with huggingface_hub
7c14ef8 verified
{
"num_layers": 16,
"num_kv_heads": 8,
"head_dim": 96,
"hidden_size": 1536,
"components": [
"prefill_int8.onnx",
"decode_int8.onnx"
],
"kv_cache_format": "[batch, 8, seq_len, 96]",
"position_ids_format": "[4, batch, seq_len]"
}