ykhrustalev commited on
Commit
76e0e7c
Β·
verified Β·
1 Parent(s): 4bb28dd

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +18 -5
README.md CHANGED
@@ -59,10 +59,18 @@ LFM2.5 is a hybrid architecture combining multiplicative gates and short convolu
59
 
60
  ```
61
  onnx/
62
- β”œβ”€β”€ model.onnx # FP32
63
- β”œβ”€β”€ model_fp16.onnx # FP16
64
- β”œβ”€β”€ model_q4.onnx # Q4 (recommended)
65
- └── model_q8.onnx # Q8
 
 
 
 
 
 
 
 
66
  ```
67
 
68
  ## Python
@@ -86,7 +94,12 @@ from transformers import AutoTokenizer
86
  # Download model (Q4 recommended)
87
  model_id = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
88
  model_path = hf_hub_download(model_id, "onnx/model_q4.onnx")
89
- data_path = hf_hub_download(model_id, "onnx/model_q4.onnx_data")
 
 
 
 
 
90
 
91
  # Load model and tokenizer
92
  session = ort.InferenceSession(model_path)
 
59
 
60
  ```
61
  onnx/
62
+ β”œβ”€β”€ model.onnx # FP32 model graph
63
+ β”œβ”€β”€ model.onnx_data* # FP32 weights
64
+ β”œβ”€β”€ model_fp16.onnx # FP16 model graph
65
+ β”œβ”€β”€ model_fp16.onnx_data* # FP16 weights
66
+ β”œβ”€β”€ model_q4.onnx # Q4 model graph (recommended)
67
+ β”œβ”€β”€ model_q4.onnx_data # Q4 weights
68
+ β”œβ”€β”€ model_q8.onnx # Q8 model graph
69
+ └── model_q8.onnx_data # Q8 weights
70
+
71
+ * Large models (>2GB) split weights across multiple files:
72
+ model.onnx_data, model.onnx_data_1, model.onnx_data_2, etc.
73
+ All data files must be in the same directory as the .onnx file.
74
  ```
75
 
76
  ## Python
 
94
  # Download model (Q4 recommended)
95
  model_id = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
96
  model_path = hf_hub_download(model_id, "onnx/model_q4.onnx")
97
+
98
+ # Download all data files (handles multiple splits for large models)
99
+ from huggingface_hub import list_repo_files
100
+ for f in list_repo_files(model_id):
101
+ if f.startswith("onnx/model_q4.onnx_data"):
102
+ hf_hub_download(model_id, f)
103
 
104
  # Load model and tokenizer
105
  session = ort.InferenceSession(model_path)