LiquidAI
/

LFM2.5-VL-1.6B-ONNX

@@ -58,15 +58,29 @@ ONNX export of [LFM2.5-VL-1.6B](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B)
 ```
 onnx/
 ├── embed_tokens.onnx           # Token embeddings (FP32)
 ├── embed_tokens_fp16.onnx      # Token embeddings (FP16)
 ├── embed_images.onnx           # Vision encoder (FP32)
 ├── embed_images_fp16.onnx      # Vision encoder (FP16)
 ├── embed_images_q4.onnx        # Vision encoder (Q4)
 ├── embed_images_q8.onnx        # Vision encoder (Q8)
 ├── decoder.onnx                # Language decoder (FP32)
 ├── decoder_fp16.onnx           # Language decoder (FP16)
 ├── decoder_q4.onnx             # Language decoder (Q4)
-└── decoder_q8.onnx             # Language decoder (Q8)
 ```
 ## Python
@@ -94,6 +108,16 @@ embed_tokens_path = hf_hub_download(model_id, "onnx/embed_tokens_fp16.onnx")
 embed_images_path = hf_hub_download(model_id, "onnx/embed_images_fp16.onnx")
 decoder_path = hf_hub_download(model_id, "onnx/decoder_q4.onnx")
 # Load ONNX sessions
 embed_tokens = ort.InferenceSession(embed_tokens_path)
 embed_images = ort.InferenceSession(embed_images_path)
@@ -220,12 +244,18 @@ const modelBase = `https://huggingface.co/${modelId}/resolve/main`;
 const tokenizer = await AutoTokenizer.from_pretrained(modelId);
 // Load ONNX sessions with external data
-async function loadSession(name) {
   const onnxPath = `${modelBase}/onnx/${name}.onnx`;
-  const dataPath = `${modelBase}/onnx/${name}.onnx_data`;
   return ort.InferenceSession.create(onnxPath, {
     executionProviders: ["webgpu"],
-    externalData: [{ path: `${name}.onnx_data`, data: dataPath }],
   });
 }
@@ -310,7 +340,7 @@ console.log(tokenizer.decode(generatedTokens, { skip_special_tokens: true }));
 - Recommended: `embed_images_fp16.onnx` + `decoder_q4.onnx`
 - For higher quality: `embed_images_fp16.onnx` + `decoder_fp16.onnx`
 - Image preprocessing requires tiling (512×512), patch extraction (16×16), and normalization
-- Models use external data files (`.onnx_data`) that are loaded automatically
 - int64 tensors require `BigInt64Array`
 ## License

 ```
 onnx/
 ├── embed_tokens.onnx           # Token embeddings (FP32)
+├── embed_tokens.onnx_data
 ├── embed_tokens_fp16.onnx      # Token embeddings (FP16)
+├── embed_tokens_fp16.onnx_data
 ├── embed_images.onnx           # Vision encoder (FP32)
+├── embed_images.onnx_data*
 ├── embed_images_fp16.onnx      # Vision encoder (FP16)
+├── embed_images_fp16.onnx_data*
 ├── embed_images_q4.onnx        # Vision encoder (Q4)
+├── embed_images_q4.onnx_data
 ├── embed_images_q8.onnx        # Vision encoder (Q8)
+├── embed_images_q8.onnx_data
 ├── decoder.onnx                # Language decoder (FP32)
+├── decoder.onnx_data*
 ├── decoder_fp16.onnx           # Language decoder (FP16)
+├── decoder_fp16.onnx_data*
 ├── decoder_q4.onnx             # Language decoder (Q4)
+├── decoder_q4.onnx_data
+├── decoder_q8.onnx             # Language decoder (Q8)
+└── decoder_q8.onnx_data
+* Large models (>2GB) split weights across multiple files:
+  decoder.onnx_data, decoder.onnx_data_1, decoder.onnx_data_2, etc.
+  All data files must be in the same directory as the .onnx file.
 ```
 ## Python
 embed_images_path = hf_hub_download(model_id, "onnx/embed_images_fp16.onnx")
 decoder_path = hf_hub_download(model_id, "onnx/decoder_q4.onnx")
+# Download all data files (handles multiple splits for large models)
+from huggingface_hub import list_repo_files
+for f in list_repo_files(model_id):
+    if any(f.startswith(f"onnx/{name}") for name in [
+        "embed_tokens_fp16.onnx_data",
+        "embed_images_fp16.onnx_data",
+        "decoder_q4.onnx_data"
+    ]):
+        hf_hub_download(model_id, f)
 # Load ONNX sessions
 embed_tokens = ort.InferenceSession(embed_tokens_path)
 embed_images = ort.InferenceSession(embed_images_path)
 const tokenizer = await AutoTokenizer.from_pretrained(modelId);
 // Load ONNX sessions with external data
+// For models with multiple data files (>2GB), add additional entries to externalData array
+async function loadSession(name, dataFiles = 1) {
   const onnxPath = `${modelBase}/onnx/${name}.onnx`;
+  const externalData = [];
+  for (let i = 0; i < dataFiles; i++) {
+    const suffix = i === 0 ? "" : `_${i}`;
+    const fileName = `${name}.onnx_data${suffix}`;
+    externalData.push({ path: fileName, data: `${modelBase}/onnx/${fileName}` });
+  }
   return ort.InferenceSession.create(onnxPath, {
     executionProviders: ["webgpu"],
+    externalData,
   });
 }
 - Recommended: `embed_images_fp16.onnx` + `decoder_q4.onnx`
 - For higher quality: `embed_images_fp16.onnx` + `decoder_fp16.onnx`
 - Image preprocessing requires tiling (512×512), patch extraction (16×16), and normalization
+- Large models (>2GB) split weights across multiple files (e.g., `.onnx_data`, `.onnx_data_1`). Use `loadSession(name, dataFiles)` with the number of data files
 - int64 tensors require `BigInt64Array`
 ## License