ykhrustalev commited on
Commit
150f978
Β·
verified Β·
1 Parent(s): a0ebe6c

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +35 -5
README.md CHANGED
@@ -58,15 +58,29 @@ ONNX export of [LFM2.5-VL-1.6B](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B)
58
  ```
59
  onnx/
60
  β”œβ”€β”€ embed_tokens.onnx # Token embeddings (FP32)
 
61
  β”œβ”€β”€ embed_tokens_fp16.onnx # Token embeddings (FP16)
 
62
  β”œβ”€β”€ embed_images.onnx # Vision encoder (FP32)
 
63
  β”œβ”€β”€ embed_images_fp16.onnx # Vision encoder (FP16)
 
64
  β”œβ”€β”€ embed_images_q4.onnx # Vision encoder (Q4)
 
65
  β”œβ”€β”€ embed_images_q8.onnx # Vision encoder (Q8)
 
66
  β”œβ”€β”€ decoder.onnx # Language decoder (FP32)
 
67
  β”œβ”€β”€ decoder_fp16.onnx # Language decoder (FP16)
 
68
  β”œβ”€β”€ decoder_q4.onnx # Language decoder (Q4)
69
- └── decoder_q8.onnx # Language decoder (Q8)
 
 
 
 
 
 
70
  ```
71
 
72
  ## Python
@@ -94,6 +108,16 @@ embed_tokens_path = hf_hub_download(model_id, "onnx/embed_tokens_fp16.onnx")
94
  embed_images_path = hf_hub_download(model_id, "onnx/embed_images_fp16.onnx")
95
  decoder_path = hf_hub_download(model_id, "onnx/decoder_q4.onnx")
96
 
 
 
 
 
 
 
 
 
 
 
97
  # Load ONNX sessions
98
  embed_tokens = ort.InferenceSession(embed_tokens_path)
99
  embed_images = ort.InferenceSession(embed_images_path)
@@ -220,12 +244,18 @@ const modelBase = `https://huggingface.co/${modelId}/resolve/main`;
220
  const tokenizer = await AutoTokenizer.from_pretrained(modelId);
221
 
222
  // Load ONNX sessions with external data
223
- async function loadSession(name) {
 
224
  const onnxPath = `${modelBase}/onnx/${name}.onnx`;
225
- const dataPath = `${modelBase}/onnx/${name}.onnx_data`;
 
 
 
 
 
226
  return ort.InferenceSession.create(onnxPath, {
227
  executionProviders: ["webgpu"],
228
- externalData: [{ path: `${name}.onnx_data`, data: dataPath }],
229
  });
230
  }
231
 
@@ -310,7 +340,7 @@ console.log(tokenizer.decode(generatedTokens, { skip_special_tokens: true }));
310
  - Recommended: `embed_images_fp16.onnx` + `decoder_q4.onnx`
311
  - For higher quality: `embed_images_fp16.onnx` + `decoder_fp16.onnx`
312
  - Image preprocessing requires tiling (512Γ—512), patch extraction (16Γ—16), and normalization
313
- - Models use external data files (`.onnx_data`) that are loaded automatically
314
  - int64 tensors require `BigInt64Array`
315
 
316
  ## License
 
58
  ```
59
  onnx/
60
  β”œβ”€β”€ embed_tokens.onnx # Token embeddings (FP32)
61
+ β”œβ”€β”€ embed_tokens.onnx_data
62
  β”œβ”€β”€ embed_tokens_fp16.onnx # Token embeddings (FP16)
63
+ β”œβ”€β”€ embed_tokens_fp16.onnx_data
64
  β”œβ”€β”€ embed_images.onnx # Vision encoder (FP32)
65
+ β”œβ”€β”€ embed_images.onnx_data*
66
  β”œβ”€β”€ embed_images_fp16.onnx # Vision encoder (FP16)
67
+ β”œβ”€β”€ embed_images_fp16.onnx_data*
68
  β”œβ”€β”€ embed_images_q4.onnx # Vision encoder (Q4)
69
+ β”œβ”€β”€ embed_images_q4.onnx_data
70
  β”œβ”€β”€ embed_images_q8.onnx # Vision encoder (Q8)
71
+ β”œβ”€β”€ embed_images_q8.onnx_data
72
  β”œβ”€β”€ decoder.onnx # Language decoder (FP32)
73
+ β”œβ”€β”€ decoder.onnx_data*
74
  β”œβ”€β”€ decoder_fp16.onnx # Language decoder (FP16)
75
+ β”œβ”€β”€ decoder_fp16.onnx_data*
76
  β”œβ”€β”€ decoder_q4.onnx # Language decoder (Q4)
77
+ β”œβ”€β”€ decoder_q4.onnx_data
78
+ β”œβ”€β”€ decoder_q8.onnx # Language decoder (Q8)
79
+ └── decoder_q8.onnx_data
80
+
81
+ * Large models (>2GB) split weights across multiple files:
82
+ decoder.onnx_data, decoder.onnx_data_1, decoder.onnx_data_2, etc.
83
+ All data files must be in the same directory as the .onnx file.
84
  ```
85
 
86
  ## Python
 
108
  embed_images_path = hf_hub_download(model_id, "onnx/embed_images_fp16.onnx")
109
  decoder_path = hf_hub_download(model_id, "onnx/decoder_q4.onnx")
110
 
111
+ # Download all data files (handles multiple splits for large models)
112
+ from huggingface_hub import list_repo_files
113
+ for f in list_repo_files(model_id):
114
+ if any(f.startswith(f"onnx/{name}") for name in [
115
+ "embed_tokens_fp16.onnx_data",
116
+ "embed_images_fp16.onnx_data",
117
+ "decoder_q4.onnx_data"
118
+ ]):
119
+ hf_hub_download(model_id, f)
120
+
121
  # Load ONNX sessions
122
  embed_tokens = ort.InferenceSession(embed_tokens_path)
123
  embed_images = ort.InferenceSession(embed_images_path)
 
244
  const tokenizer = await AutoTokenizer.from_pretrained(modelId);
245
 
246
  // Load ONNX sessions with external data
247
+ // For models with multiple data files (>2GB), add additional entries to externalData array
248
+ async function loadSession(name, dataFiles = 1) {
249
  const onnxPath = `${modelBase}/onnx/${name}.onnx`;
250
+ const externalData = [];
251
+ for (let i = 0; i < dataFiles; i++) {
252
+ const suffix = i === 0 ? "" : `_${i}`;
253
+ const fileName = `${name}.onnx_data${suffix}`;
254
+ externalData.push({ path: fileName, data: `${modelBase}/onnx/${fileName}` });
255
+ }
256
  return ort.InferenceSession.create(onnxPath, {
257
  executionProviders: ["webgpu"],
258
+ externalData,
259
  });
260
  }
261
 
 
340
  - Recommended: `embed_images_fp16.onnx` + `decoder_q4.onnx`
341
  - For higher quality: `embed_images_fp16.onnx` + `decoder_fp16.onnx`
342
  - Image preprocessing requires tiling (512Γ—512), patch extraction (16Γ—16), and normalization
343
+ - Large models (>2GB) split weights across multiple files (e.g., `.onnx_data`, `.onnx_data_1`). Use `loadSession(name, dataFiles)` with the number of data files
344
  - int64 tensors require `BigInt64Array`
345
 
346
  ## License