Ji-Ha commited on
Commit
e90a1ff
·
verified ·
1 Parent(s): 3ba1398

Upload 20 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ fp16/glm_ocr_decode_prefill_kv.onnx.data filter=lfs diff=lfs merge=lfs -text
37
+ fp16/glm_ocr_decode_step_kv.onnx.data filter=lfs diff=lfs merge=lfs -text
38
+ fp16/glm_ocr_embed.onnx.data filter=lfs diff=lfs merge=lfs -text
39
+ fp16/glm_ocr_vision.onnx.data filter=lfs diff=lfs merge=lfs -text
40
+ quant/glm_ocr_vision_quant.onnx.data filter=lfs diff=lfs merge=lfs -text
fp16/glm_ocr_decode_prefill_kv.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e469aac93dff5835c621d1e7a8987aa802b112d8092b780d4742a317534cd627
3
+ size 2551204
fp16/glm_ocr_decode_prefill_kv.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd63f01dac7b06ee4920b7c90d38cfc081988f1eb93b04eaa684c6e923e5f360
3
+ size 846466048
fp16/glm_ocr_decode_step_kv.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea69c66eee2ca5e2e1e62185fd50bae0b431735027d107d070a4d8e02f4a318
3
+ size 4162536
fp16/glm_ocr_decode_step_kv.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a254982459a269a566bdcaa574f9edb4c77fb470c0745e32a36e6ecc1ca927
3
+ size 2328349696
fp16/glm_ocr_embed.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:700497f7747ce77b2b34f519b662d2bee68f44e638de9391301a74011fd2bf20
3
+ size 1791
fp16/glm_ocr_embed.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c2c5e0a2a8d65605f06897249a03fb1d11051aee155ba598e1d1a302ababd0
3
+ size 364904448
fp16/glm_ocr_rope_document.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57bbac5220e5828a5ea0bf901974bc44afa190e3e7d9927bceca3ca8a63c4dc3
3
+ size 4170
fp16/glm_ocr_rope_document.onnx.data ADDED
Binary file (98.3 kB). View file
 
fp16/glm_ocr_rope_formula.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de0c135e8dd941ec1a0246384d20c20984eb2bbed8fd08b43830ba40c081f232
3
+ size 4169
fp16/glm_ocr_rope_formula.onnx.data ADDED
Binary file (98.3 kB). View file
 
fp16/glm_ocr_rope_table.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88faf6507c9baa52f4069b4a6469c5ab692038a538ef3d9df52ad2408648ba22
3
+ size 4167
fp16/glm_ocr_rope_table.onnx.data ADDED
Binary file (98.3 kB). View file
 
fp16/glm_ocr_rope_text.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b68daba410b29a635de18b135212569db1c28add28fbe7c5b63dcd0c7b3d0b63
3
+ size 4166
fp16/glm_ocr_rope_text.onnx.data ADDED
Binary file (98.3 kB). View file
 
fp16/glm_ocr_vision.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa009d7c358e4ebf83515b80a96fd828a05e40bd86be190dfabd9fc0f29bc73
3
+ size 715251
fp16/glm_ocr_vision.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8691969b71702daabc378e031b66c62d13d054a7cfed17e2729ea707079d2b45
3
+ size 14571213824
manifest.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "zai-org/GLM-OCR",
3
+ "dtype": "float16",
4
+ "opset": 18,
5
+ "static": true,
6
+ "external_data": true,
7
+ "image_size": {
8
+ "width": 840,
9
+ "height": 840
10
+ },
11
+ "max_seq_len": 2048,
12
+ "image_token_id": 59280,
13
+ "eos_token_ids": [
14
+ 59246,
15
+ 59253
16
+ ],
17
+ "export_prompt": "Recognize the text in the image and output in Markdown format. Preserve the original layout (headings/paragraphs/tables/formulas). Do not fabricate content that does not exist in the image.",
18
+ "hidden_size": 1536,
19
+ "t_img": 900,
20
+ "export_devices": {
21
+ "vision": "cuda",
22
+ "text": "cuda"
23
+ },
24
+ "graphs": {
25
+ "vision": "fp16/glm_ocr_vision.onnx",
26
+ "embed": "fp16/glm_ocr_embed.onnx",
27
+ "rope": "fp16/glm_ocr_rope_document.onnx",
28
+ "decode_prefill_kv": "fp16/glm_ocr_decode_prefill_kv.onnx",
29
+ "decode_step_kv": "fp16/glm_ocr_decode_step_kv.onnx",
30
+ "vision_quant": "quant/glm_ocr_vision_quant.onnx"
31
+ },
32
+ "kv_cache": {
33
+ "num_layers": 16,
34
+ "num_key_value_heads": 8,
35
+ "head_dim": 96,
36
+ "max_cache_len": 2048
37
+ },
38
+ "default_profile": "document",
39
+ "prompt_profiles": {
40
+ "document": {
41
+ "prompt": "Recognize the text in the image and output in Markdown format. Preserve the original layout (headings/paragraphs/tables/formulas). Do not fabricate content that does not exist in the image.",
42
+ "rope": "fp16/glm_ocr_rope_document.onnx"
43
+ },
44
+ "text": {
45
+ "prompt": "Text Recognition:",
46
+ "rope": "fp16/glm_ocr_rope_text.onnx"
47
+ },
48
+ "table": {
49
+ "prompt": "Table Recognition:",
50
+ "rope": "fp16/glm_ocr_rope_table.onnx"
51
+ },
52
+ "formula": {
53
+ "prompt": "Formula Recognition:",
54
+ "rope": "fp16/glm_ocr_rope_formula.onnx"
55
+ }
56
+ },
57
+ "notes": [
58
+ "Vision wrapper handles packed [T,D] outputs by unsqueezing to [1,T,D].",
59
+ "Rope graphs are prompt-profile specific constants generated from get_rope_index (mRoPE).",
60
+ "Do splice in JS: replace contiguous image_token_id block of length t_img with image_embeds.",
61
+ "Decode outputs logits for last token only.",
62
+ "custom_w8: quantized MatMul/Gemm weights for graph 'decode_prefill_kv'.",
63
+ "dual-vision artifact: graphs.vision=fp16 and graphs.vision_quant=quantized."
64
+ ],
65
+ "vision": {
66
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_vision.onnx",
67
+ "sha256": "efa009d7c358e4ebf83515b80a96fd828a05e40bd86be190dfabd9fc0f29bc73",
68
+ "bytes": 715251,
69
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_vision.onnx.data",
70
+ "data_sha256": "8691969b71702daabc378e031b66c62d13d054a7cfed17e2729ea707079d2b45",
71
+ "data_bytes": 14571213824
72
+ },
73
+ "embed": {
74
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_embed.onnx",
75
+ "sha256": "700497f7747ce77b2b34f519b662d2bee68f44e638de9391301a74011fd2bf20",
76
+ "bytes": 1791,
77
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_embed.onnx.data",
78
+ "data_sha256": "44c2c5e0a2a8d65605f06897249a03fb1d11051aee155ba598e1d1a302ababd0",
79
+ "data_bytes": 364904448
80
+ },
81
+ "rope": {
82
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx",
83
+ "sha256": "57bbac5220e5828a5ea0bf901974bc44afa190e3e7d9927bceca3ca8a63c4dc3",
84
+ "bytes": 4170,
85
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx.data",
86
+ "data_sha256": "cb38103e3aceeb1adeb7104611bd035656dd8433b3c03eb96ac2b5e5df9b55b4",
87
+ "data_bytes": 98304
88
+ },
89
+ "decode": {
90
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode.onnx",
91
+ "sha256": "6d536a5b1e671e2229bb88a050c747ad6fdc9db3292927af4552c33928a46001",
92
+ "bytes": 2429565,
93
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode.onnx.data",
94
+ "data_sha256": "6847fe2bfe59df72975856dc61730b2ecb8a0f79455a898375fc4f7c2c7cb2da",
95
+ "data_bytes": 2328431616
96
+ },
97
+ "decode_prefill_kv": {
98
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_prefill_kv.onnx",
99
+ "sha256": "27974f2680f37205f1a51244dbe7fc65e0eefbcf926a3fc9e7a34a5095d4a755",
100
+ "bytes": 2547577,
101
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_prefill_kv.onnx.data",
102
+ "data_sha256": "6847fe2bfe59df72975856dc61730b2ecb8a0f79455a898375fc4f7c2c7cb2da",
103
+ "data_bytes": 2328431616
104
+ },
105
+ "decode_step_kv": {
106
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_step_kv.onnx",
107
+ "sha256": "cea69c66eee2ca5e2e1e62185fd50bae0b431735027d107d070a4d8e02f4a318",
108
+ "bytes": 4162536,
109
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_decode_step_kv.onnx.data",
110
+ "data_sha256": "b3a254982459a269a566bdcaa574f9edb4c77fb470c0745e32a36e6ecc1ca927",
111
+ "data_bytes": 2328349696
112
+ },
113
+ "rope_document": {
114
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx",
115
+ "sha256": "57bbac5220e5828a5ea0bf901974bc44afa190e3e7d9927bceca3ca8a63c4dc3",
116
+ "bytes": 4170,
117
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_document.onnx.data",
118
+ "data_sha256": "cb38103e3aceeb1adeb7104611bd035656dd8433b3c03eb96ac2b5e5df9b55b4",
119
+ "data_bytes": 98304
120
+ },
121
+ "rope_text": {
122
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_text.onnx",
123
+ "sha256": "b68daba410b29a635de18b135212569db1c28add28fbe7c5b63dcd0c7b3d0b63",
124
+ "bytes": 4166,
125
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_text.onnx.data",
126
+ "data_sha256": "4147cb50328e4bb79a289496b2d0dea536c02898fc62591fad72da0b636a33ee",
127
+ "data_bytes": 98304
128
+ },
129
+ "rope_table": {
130
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_table.onnx",
131
+ "sha256": "88faf6507c9baa52f4069b4a6469c5ab692038a538ef3d9df52ad2408648ba22",
132
+ "bytes": 4167,
133
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_table.onnx.data",
134
+ "data_sha256": "4147cb50328e4bb79a289496b2d0dea536c02898fc62591fad72da0b636a33ee",
135
+ "data_bytes": 98304
136
+ },
137
+ "rope_formula": {
138
+ "onnx": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_formula.onnx",
139
+ "sha256": "de0c135e8dd941ec1a0246384d20c20984eb2bbed8fd08b43830ba40c081f232",
140
+ "bytes": 4169,
141
+ "data": "artifact_glm_ocr_web_split/fp16/glm_ocr_rope_formula.onnx.data",
142
+ "data_sha256": "2929c9761dd6510fa6734a2ded0bd07d7b8f2705072a0542e76b7ccda9c0f713",
143
+ "data_bytes": 98304
144
+ }
145
+ }
manifest.web.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "zai-org/GLM-OCR",
3
+ "dtype": "float16",
4
+ "opset": 18,
5
+ "max_seq_len": 2048,
6
+ "image_size": {
7
+ "width": 840,
8
+ "height": 840
9
+ },
10
+ "image_token_id": 59280,
11
+ "eos_token_ids": [
12
+ 59246,
13
+ 59253
14
+ ],
15
+ "kv_cache": {
16
+ "num_layers": 16,
17
+ "num_key_value_heads": 8,
18
+ "head_dim": 96,
19
+ "max_cache_len": 2048
20
+ },
21
+ "prompt_profile": "document",
22
+ "base_url": "./",
23
+ "graphs": {
24
+ "vision": "fp16/glm_ocr_vision.onnx",
25
+ "embed": "fp16/glm_ocr_embed.onnx",
26
+ "rope": "fp16/glm_ocr_rope_document.onnx",
27
+ "decode_prefill_kv": "fp16/glm_ocr_decode_prefill_kv.onnx",
28
+ "decode_step_kv": "fp16/glm_ocr_decode_step_kv.onnx",
29
+ "vision_quant": "quant/glm_ocr_vision_quant.onnx"
30
+ },
31
+ "runtime": {
32
+ "webgpu": {
33
+ "executionProviders": [
34
+ "webgpu"
35
+ ],
36
+ "logSeverityLevel": 3
37
+ },
38
+ "wasm": {
39
+ "executionProviders": [
40
+ "wasm"
41
+ ],
42
+ "logSeverityLevel": 3
43
+ }
44
+ },
45
+ "profiles": {
46
+ "full_browser_kv": {
47
+ "description": "All sessions in browser, KV prefill+step decoding.",
48
+ "graphs": [
49
+ "vision",
50
+ "embed",
51
+ "rope",
52
+ "decode_prefill_kv",
53
+ "decode_step_kv"
54
+ ],
55
+ "requires_image_embeds_input": false,
56
+ "estimated_weight_gb": 18.254
57
+ },
58
+ "hybrid_server_vision_client_kv": {
59
+ "description": "Vision/image embedding runs on server; browser runs text decode with KV.",
60
+ "graphs": [
61
+ "embed",
62
+ "rope",
63
+ "decode_prefill_kv",
64
+ "decode_step_kv"
65
+ ],
66
+ "requires_image_embeds_input": true,
67
+ "estimated_weight_gb": 4.683
68
+ },
69
+ "hybrid_server_vision_client_prefill_only": {
70
+ "description": "Vision on server; browser uses decode_prefill_kv only (simpler, slower token loop).",
71
+ "graphs": [
72
+ "embed",
73
+ "rope",
74
+ "decode_prefill_kv"
75
+ ],
76
+ "requires_image_embeds_input": true,
77
+ "estimated_weight_gb": 2.511
78
+ }
79
+ },
80
+ "load_order": [
81
+ "vision",
82
+ "vision_quant",
83
+ "embed",
84
+ "rope",
85
+ "decode_prefill_kv",
86
+ "decode_step_kv"
87
+ ],
88
+ "memory_budget_gb": {
89
+ "full_browser_kv": 18.254,
90
+ "full_browser_prefill_only": 16.082,
91
+ "hybrid_server_vision_client_kv": 4.683,
92
+ "hybrid_server_vision_client_prefill_only": 2.511
93
+ },
94
+ "per_graph_size_gb": {
95
+ "vision": 13.571,
96
+ "embed": 0.34,
97
+ "rope_profile": 0.0,
98
+ "decode_prefill_kv": 2.171,
99
+ "decode_step_kv": 2.172,
100
+ "vision_quant": 0.0
101
+ },
102
+ "warnings": [
103
+ "vision is 13.57 GB; likely too large for direct browser startup on most clients.",
104
+ "decode_prefill_kv is 2.17 GB; likely too large for direct browser startup on most clients.",
105
+ "decode_step_kv is 2.17 GB; likely too large for direct browser startup on most clients."
106
+ ],
107
+ "notes": [
108
+ "This manifest is for ORT Web (WebGPU/WASM) session wiring.",
109
+ "Prefer lazy session creation; do not load unused graphs.",
110
+ "Use rope graph matching the exported prompt profile.",
111
+ "For browser deployments, hybrid mode is usually required at current model size."
112
+ ]
113
+ }
quant/glm_ocr_vision_quant.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3daf41d962e7878703682f8aed901bc697d1e9652e148c21decef39691750702
3
+ size 767365
quant/glm_ocr_vision_quant.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdfd0d65846923674dc0ae0c18c30d09dfb4752125fb93c45b68ae6c4fb7b264
3
+ size 490746880