Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
|
@@ -59,10 +59,18 @@ LFM2.5 is a hybrid architecture combining multiplicative gates and short convolu
|
|
| 59 |
|
| 60 |
```
|
| 61 |
onnx/
|
| 62 |
-
βββ model.onnx # FP32
|
| 63 |
-
βββ
|
| 64 |
-
βββ
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
```
|
| 67 |
|
| 68 |
## Python
|
|
@@ -86,7 +94,12 @@ from transformers import AutoTokenizer
|
|
| 86 |
# Download model (Q4 recommended)
|
| 87 |
model_id = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
|
| 88 |
model_path = hf_hub_download(model_id, "onnx/model_q4.onnx")
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Load model and tokenizer
|
| 92 |
session = ort.InferenceSession(model_path)
|
|
|
|
| 59 |
|
| 60 |
```
|
| 61 |
onnx/
|
| 62 |
+
βββ model.onnx # FP32 model graph
|
| 63 |
+
βββ model.onnx_data* # FP32 weights
|
| 64 |
+
βββ model_fp16.onnx # FP16 model graph
|
| 65 |
+
βββ model_fp16.onnx_data* # FP16 weights
|
| 66 |
+
βββ model_q4.onnx # Q4 model graph (recommended)
|
| 67 |
+
βββ model_q4.onnx_data # Q4 weights
|
| 68 |
+
βββ model_q8.onnx # Q8 model graph
|
| 69 |
+
βββ model_q8.onnx_data # Q8 weights
|
| 70 |
+
|
| 71 |
+
* Large models (>2GB) split weights across multiple files:
|
| 72 |
+
model.onnx_data, model.onnx_data_1, model.onnx_data_2, etc.
|
| 73 |
+
All data files must be in the same directory as the .onnx file.
|
| 74 |
```
|
| 75 |
|
| 76 |
## Python
|
|
|
|
| 94 |
# Download model (Q4 recommended)
|
| 95 |
model_id = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
|
| 96 |
model_path = hf_hub_download(model_id, "onnx/model_q4.onnx")
|
| 97 |
+
|
| 98 |
+
# Download all data files (handles multiple splits for large models)
|
| 99 |
+
from huggingface_hub import list_repo_files
|
| 100 |
+
for f in list_repo_files(model_id):
|
| 101 |
+
if f.startswith("onnx/model_q4.onnx_data"):
|
| 102 |
+
hf_hub_download(model_id, f)
|
| 103 |
|
| 104 |
# Load model and tokenizer
|
| 105 |
session = ort.InferenceSession(model_path)
|