Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
|
@@ -58,15 +58,29 @@ ONNX export of [LFM2.5-VL-1.6B](https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B)
|
|
| 58 |
```
|
| 59 |
onnx/
|
| 60 |
βββ embed_tokens.onnx # Token embeddings (FP32)
|
|
|
|
| 61 |
βββ embed_tokens_fp16.onnx # Token embeddings (FP16)
|
|
|
|
| 62 |
βββ embed_images.onnx # Vision encoder (FP32)
|
|
|
|
| 63 |
βββ embed_images_fp16.onnx # Vision encoder (FP16)
|
|
|
|
| 64 |
βββ embed_images_q4.onnx # Vision encoder (Q4)
|
|
|
|
| 65 |
βββ embed_images_q8.onnx # Vision encoder (Q8)
|
|
|
|
| 66 |
βββ decoder.onnx # Language decoder (FP32)
|
|
|
|
| 67 |
βββ decoder_fp16.onnx # Language decoder (FP16)
|
|
|
|
| 68 |
βββ decoder_q4.onnx # Language decoder (Q4)
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
```
|
| 71 |
|
| 72 |
## Python
|
|
@@ -94,6 +108,16 @@ embed_tokens_path = hf_hub_download(model_id, "onnx/embed_tokens_fp16.onnx")
|
|
| 94 |
embed_images_path = hf_hub_download(model_id, "onnx/embed_images_fp16.onnx")
|
| 95 |
decoder_path = hf_hub_download(model_id, "onnx/decoder_q4.onnx")
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# Load ONNX sessions
|
| 98 |
embed_tokens = ort.InferenceSession(embed_tokens_path)
|
| 99 |
embed_images = ort.InferenceSession(embed_images_path)
|
|
@@ -220,12 +244,18 @@ const modelBase = `https://huggingface.co/${modelId}/resolve/main`;
|
|
| 220 |
const tokenizer = await AutoTokenizer.from_pretrained(modelId);
|
| 221 |
|
| 222 |
// Load ONNX sessions with external data
|
| 223 |
-
|
|
|
|
| 224 |
const onnxPath = `${modelBase}/onnx/${name}.onnx`;
|
| 225 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
return ort.InferenceSession.create(onnxPath, {
|
| 227 |
executionProviders: ["webgpu"],
|
| 228 |
-
externalData
|
| 229 |
});
|
| 230 |
}
|
| 231 |
|
|
@@ -310,7 +340,7 @@ console.log(tokenizer.decode(generatedTokens, { skip_special_tokens: true }));
|
|
| 310 |
- Recommended: `embed_images_fp16.onnx` + `decoder_q4.onnx`
|
| 311 |
- For higher quality: `embed_images_fp16.onnx` + `decoder_fp16.onnx`
|
| 312 |
- Image preprocessing requires tiling (512Γ512), patch extraction (16Γ16), and normalization
|
| 313 |
-
-
|
| 314 |
- int64 tensors require `BigInt64Array`
|
| 315 |
|
| 316 |
## License
|
|
|
|
| 58 |
```
|
| 59 |
onnx/
|
| 60 |
βββ embed_tokens.onnx # Token embeddings (FP32)
|
| 61 |
+
βββ embed_tokens.onnx_data
|
| 62 |
βββ embed_tokens_fp16.onnx # Token embeddings (FP16)
|
| 63 |
+
βββ embed_tokens_fp16.onnx_data
|
| 64 |
βββ embed_images.onnx # Vision encoder (FP32)
|
| 65 |
+
βββ embed_images.onnx_data*
|
| 66 |
βββ embed_images_fp16.onnx # Vision encoder (FP16)
|
| 67 |
+
βββ embed_images_fp16.onnx_data*
|
| 68 |
βββ embed_images_q4.onnx # Vision encoder (Q4)
|
| 69 |
+
βββ embed_images_q4.onnx_data
|
| 70 |
βββ embed_images_q8.onnx # Vision encoder (Q8)
|
| 71 |
+
βββ embed_images_q8.onnx_data
|
| 72 |
βββ decoder.onnx # Language decoder (FP32)
|
| 73 |
+
βββ decoder.onnx_data*
|
| 74 |
βββ decoder_fp16.onnx # Language decoder (FP16)
|
| 75 |
+
βββ decoder_fp16.onnx_data*
|
| 76 |
βββ decoder_q4.onnx # Language decoder (Q4)
|
| 77 |
+
βββ decoder_q4.onnx_data
|
| 78 |
+
βββ decoder_q8.onnx # Language decoder (Q8)
|
| 79 |
+
βββ decoder_q8.onnx_data
|
| 80 |
+
|
| 81 |
+
* Large models (>2GB) split weights across multiple files:
|
| 82 |
+
decoder.onnx_data, decoder.onnx_data_1, decoder.onnx_data_2, etc.
|
| 83 |
+
All data files must be in the same directory as the .onnx file.
|
| 84 |
```
|
| 85 |
|
| 86 |
## Python
|
|
|
|
| 108 |
embed_images_path = hf_hub_download(model_id, "onnx/embed_images_fp16.onnx")
|
| 109 |
decoder_path = hf_hub_download(model_id, "onnx/decoder_q4.onnx")
|
| 110 |
|
| 111 |
+
# Download all data files (handles multiple splits for large models)
|
| 112 |
+
from huggingface_hub import list_repo_files
|
| 113 |
+
for f in list_repo_files(model_id):
|
| 114 |
+
if any(f.startswith(f"onnx/{name}") for name in [
|
| 115 |
+
"embed_tokens_fp16.onnx_data",
|
| 116 |
+
"embed_images_fp16.onnx_data",
|
| 117 |
+
"decoder_q4.onnx_data"
|
| 118 |
+
]):
|
| 119 |
+
hf_hub_download(model_id, f)
|
| 120 |
+
|
| 121 |
# Load ONNX sessions
|
| 122 |
embed_tokens = ort.InferenceSession(embed_tokens_path)
|
| 123 |
embed_images = ort.InferenceSession(embed_images_path)
|
|
|
|
| 244 |
const tokenizer = await AutoTokenizer.from_pretrained(modelId);
|
| 245 |
|
| 246 |
// Load ONNX sessions with external data
|
| 247 |
+
// For models with multiple data files (>2GB), add additional entries to externalData array
|
| 248 |
+
async function loadSession(name, dataFiles = 1) {
|
| 249 |
const onnxPath = `${modelBase}/onnx/${name}.onnx`;
|
| 250 |
+
const externalData = [];
|
| 251 |
+
for (let i = 0; i < dataFiles; i++) {
|
| 252 |
+
const suffix = i === 0 ? "" : `_${i}`;
|
| 253 |
+
const fileName = `${name}.onnx_data${suffix}`;
|
| 254 |
+
externalData.push({ path: fileName, data: `${modelBase}/onnx/${fileName}` });
|
| 255 |
+
}
|
| 256 |
return ort.InferenceSession.create(onnxPath, {
|
| 257 |
executionProviders: ["webgpu"],
|
| 258 |
+
externalData,
|
| 259 |
});
|
| 260 |
}
|
| 261 |
|
|
|
|
| 340 |
- Recommended: `embed_images_fp16.onnx` + `decoder_q4.onnx`
|
| 341 |
- For higher quality: `embed_images_fp16.onnx` + `decoder_fp16.onnx`
|
| 342 |
- Image preprocessing requires tiling (512Γ512), patch extraction (16Γ16), and normalization
|
| 343 |
+
- Large models (>2GB) split weights across multiple files (e.g., `.onnx_data`, `.onnx_data_1`). Use `loadSession(name, dataFiles)` with the number of data files
|
| 344 |
- int64 tensors require `BigInt64Array`
|
| 345 |
|
| 346 |
## License
|