glm-ocr-onnx / manifest.json
psyche's picture
Duplicate from Ji-Ha/glm-ocr-onnx
d8268c5
{
"model_id": "zai-org/GLM-OCR",
"dtype": "float16",
"opset": 18,
"static": true,
"external_data": true,
"image_size": {
"width": 840,
"height": 840
},
"max_seq_len": 2048,
"image_token_id": 59280,
"eos_token_ids": [
59246,
59253
],
"export_prompt": "Recognize the text in the image and output in Markdown format. Preserve the original layout (headings/paragraphs/tables/formulas). Do not fabricate content that does not exist in the image.",
"hidden_size": 1536,
"t_img": 900,
"export_devices": {
"vision": "cuda",
"text": "cuda"
},
"graphs": {
"vision": "fp16/glm_ocr_vision.onnx",
"embed": "fp16/glm_ocr_embed.onnx",
"rope": "fp16/glm_ocr_rope_document.onnx",
"decode_prefill_kv": "fp16/glm_ocr_decode_prefill_kv.onnx",
"decode_step_kv": "fp16/glm_ocr_decode_step_kv.onnx",
"vision_quant": "quant/glm_ocr_vision_quant.onnx"
},
"kv_cache": {
"num_layers": 16,
"num_key_value_heads": 8,
"head_dim": 96,
"max_cache_len": 2048
},
"default_profile": "document",
"prompt_profiles": {
"document": {
"prompt": "Recognize the text in the image and output in Markdown format. Preserve the original layout (headings/paragraphs/tables/formulas). Do not fabricate content that does not exist in the image.",
"rope": "fp16/glm_ocr_rope_document.onnx"
},
"text": {
"prompt": "Text Recognition:",
"rope": "fp16/glm_ocr_rope_text.onnx"
},
"table": {
"prompt": "Table Recognition:",
"rope": "fp16/glm_ocr_rope_table.onnx"
},
"formula": {
"prompt": "Formula Recognition:",
"rope": "fp16/glm_ocr_rope_formula.onnx"
}
},
"notes": [
"Vision wrapper handles packed [T,D] outputs by unsqueezing to [1,T,D].",
"Rope graphs are prompt-profile specific constants generated from get_rope_index (mRoPE).",
"Do splice in JS: replace contiguous image_token_id block of length t_img with image_embeds.",
"Decode outputs logits for last token only.",
"custom_w8: quantized MatMul/Gemm weights for graph 'decode_prefill_kv'.",
"dual-vision artifact: graphs.vision=fp16 and graphs.vision_quant=quantized."
],
"vision": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_vision.onnx",
"sha256": "efa009d7c358e4ebf83515b80a96fd828a05e40bd86be190dfabd9fc0f29bc73",
"bytes": 715251,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_vision.onnx.data",
"data_sha256": "8691969b71702daabc378e031b66c62d13d054a7cfed17e2729ea707079d2b45",
"data_bytes": 14571213824
},
"embed": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_embed.onnx",
"sha256": "700497f7747ce77b2b34f519b662d2bee68f44e638de9391301a74011fd2bf20",
"bytes": 1791,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_embed.onnx.data",
"data_sha256": "44c2c5e0a2a8d65605f06897249a03fb1d11051aee155ba598e1d1a302ababd0",
"data_bytes": 364904448
},
"rope": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx",
"sha256": "57bbac5220e5828a5ea0bf901974bc44afa190e3e7d9927bceca3ca8a63c4dc3",
"bytes": 4170,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx.data",
"data_sha256": "cb38103e3aceeb1adeb7104611bd035656dd8433b3c03eb96ac2b5e5df9b55b4",
"data_bytes": 98304
},
"decode": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode.onnx",
"sha256": "6d536a5b1e671e2229bb88a050c747ad6fdc9db3292927af4552c33928a46001",
"bytes": 2429565,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode.onnx.data",
"data_sha256": "6847fe2bfe59df72975856dc61730b2ecb8a0f79455a898375fc4f7c2c7cb2da",
"data_bytes": 2328431616
},
"decode_prefill_kv": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_prefill_kv.onnx",
"sha256": "27974f2680f37205f1a51244dbe7fc65e0eefbcf926a3fc9e7a34a5095d4a755",
"bytes": 2547577,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_prefill_kv.onnx.data",
"data_sha256": "6847fe2bfe59df72975856dc61730b2ecb8a0f79455a898375fc4f7c2c7cb2da",
"data_bytes": 2328431616
},
"decode_step_kv": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_step_kv.onnx",
"sha256": "cea69c66eee2ca5e2e1e62185fd50bae0b431735027d107d070a4d8e02f4a318",
"bytes": 4162536,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_step_kv.onnx.data",
"data_sha256": "b3a254982459a269a566bdcaa574f9edb4c77fb470c0745e32a36e6ecc1ca927",
"data_bytes": 2328349696
},
"rope_document": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx",
"sha256": "57bbac5220e5828a5ea0bf901974bc44afa190e3e7d9927bceca3ca8a63c4dc3",
"bytes": 4170,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx.data",
"data_sha256": "cb38103e3aceeb1adeb7104611bd035656dd8433b3c03eb96ac2b5e5df9b55b4",
"data_bytes": 98304
},
"rope_text": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_text.onnx",
"sha256": "b68daba410b29a635de18b135212569db1c28add28fbe7c5b63dcd0c7b3d0b63",
"bytes": 4166,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_text.onnx.data",
"data_sha256": "4147cb50328e4bb79a289496b2d0dea536c02898fc62591fad72da0b636a33ee",
"data_bytes": 98304
},
"rope_table": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_table.onnx",
"sha256": "88faf6507c9baa52f4069b4a6469c5ab692038a538ef3d9df52ad2408648ba22",
"bytes": 4167,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_table.onnx.data",
"data_sha256": "4147cb50328e4bb79a289496b2d0dea536c02898fc62591fad72da0b636a33ee",
"data_bytes": 98304
},
"rope_formula": {
"onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_formula.onnx",
"sha256": "de0c135e8dd941ec1a0246384d20c20984eb2bbed8fd08b43830ba40c081f232",
"bytes": 4169,
"data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_formula.onnx.data",
"data_sha256": "2929c9761dd6510fa6734a2ded0bd07d7b8f2705072a0542e76b7ccda9c0f713",
"data_bytes": 98304
}
}