Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

.gitattributes +4 -0
README.md +257 -0
chat_template.jinja +64 -0
config.json +66 -0
generation_config.json +7 -0
onnx/model.onnx +3 -0
onnx/model.onnx_data +3 -0
onnx/model_fp16.onnx +3 -0
onnx/model_fp16.onnx_data +3 -0
onnx/model_q4.onnx +3 -0
onnx/model_q4.onnx_data +3 -0
onnx/model_q4f16.onnx +3 -0
onnx/model_q4f16.onnx_data +3 -0
tokenizer.json +0 -0
tokenizer_config.json +22 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,257 @@

+---
+license: other
+license_name: lfm1.0
+license_link: LICENSE
+language:
+  - en
+  - ja
+  - ko
+  - fr
+  - es
+  - de
+  - it
+  - pt
+  - ar
+  - zh
+pipeline_tag: text-generation
+tags:
+  - liquid
+  - edge
+  - lfm2.5
+  - onnx
+  - onnxruntime
+  - webgpu
+base_model:
+  - LiquidAI/LFM2.5-350M
+---
+![Liquid AI](https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/2b08LKpev0DNEk6DlnWkY.png)
+[**Try LFM**](https://playground.liquid.ai/) • [**Documentation**](https://docs.liquid.ai/lfm) • [**LEAP**](https://leap.liquid.ai/) • [**Blog**](https://www.liquid.ai/blog/)
+## LFM2.5-350M-ONNX
+ONNX export of [LFM2.5-350M](https://huggingface.co/LiquidAI/LFM2.5-350M) for cross-platform inference.
+## Variants
+| Variant | Size | Description |
+|---------|------|-------------|
+| FP16 | ~692MB | All weights in FP16 |
+| Q4 | ~298MB | INT4 MatMul weights (including lm_head), FP16 embeddings and norms |
+| Q4F16 | ~298MB | INT4 MatMul weights, FP16 lm_head/embeddings and norms |
+Q4 and Q4F16 use symmetric block-wise quantization (block_size=32) via MatMulNBits.
+The difference is that Q4 also quantizes the lm_head projection to INT4, while Q4F16
+keeps it in FP16. Both store the token embedding (Gather) and normalization weights in FP16.
+## Model Files
+```
+onnx/
+├── model.onnx              # FP32
+├── model_fp16.onnx         # FP16
+├── model_q4.onnx           # Q4
+└── model_q4f16.onnx        # Q4F16
+```
+## Python
+### Installation
+```bash
+pip install onnxruntime transformers numpy huggingface_hub
+# or with GPU support:
+pip install onnxruntime-gpu transformers numpy huggingface_hub
+```
+### Inference
+```python
+import numpy as np
+import onnxruntime as ort
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer
+# Download model
+model_id = "LiquidAI/LFM2.5-350M-ONNX"
+model_path = hf_hub_download(model_id, "onnx/model_q4.onnx")
+data_path = hf_hub_download(model_id, "onnx/model_q4.onnx_data")
+# Load model and tokenizer
+session = ort.InferenceSession(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+# Prepare chat input
+messages = [{"role": "user", "content": "What is the capital of France?"}]
+prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+input_ids = np.array([tokenizer.encode(prompt, add_special_tokens=False)], dtype=np.int64)
+# Initialize KV cache
+ONNX_DTYPE = {"tensor(float)": np.float32, "tensor(float16)": np.float16, "tensor(int64)": np.int64}
+cache = {}
+for inp in session.get_inputs():
+    if inp.name in {"input_ids", "attention_mask", "position_ids"}:
+        continue
+    shape = [d if isinstance(d, int) else 1 for d in inp.shape]
+    for i, d in enumerate(inp.shape):
+        if isinstance(d, str) and "sequence" in d.lower():
+            shape[i] = 0
+    cache[inp.name] = np.zeros(shape, dtype=ONNX_DTYPE.get(inp.type, np.float32))
+# Check if model uses position_ids
+input_names = {inp.name for inp in session.get_inputs()}
+use_position_ids = "position_ids" in input_names
+# Generate tokens
+seq_len = input_ids.shape[1]
+generated_tokens = []
+for step in range(512):  # max tokens
+    if step == 0:
+        ids = input_ids
+        pos = np.arange(seq_len, dtype=np.int64).reshape(1, -1)
+    else:
+        ids = np.array([[generated_tokens[-1]]], dtype=np.int64)
+        pos = np.array([[seq_len + len(generated_tokens) - 1]], dtype=np.int64)
+    attn_mask = np.ones((1, seq_len + len(generated_tokens)), dtype=np.int64)
+    feed = {"input_ids": ids, "attention_mask": attn_mask, **cache}
+    if use_position_ids:
+        feed["position_ids"] = pos
+    outputs = session.run(None, feed)
+    next_token = int(np.argmax(outputs[0][0, -1]))
+    generated_tokens.append(next_token)
+    # Update cache
+    for i, out in enumerate(session.get_outputs()[1:], 1):
+        name = out.name.replace("present_conv", "past_conv").replace("present.", "past_key_values.")
+        if name in cache:
+            cache[name] = outputs[i]
+    if next_token == tokenizer.eos_token_id:
+        break
+print(tokenizer.decode(generated_tokens, skip_special_tokens=True))
+```
+## WebGPU (Browser)
+### Installation
+```bash
+npm install onnxruntime-web @huggingface/transformers
+```
+### Enable WebGPU
+WebGPU is required for browser inference. To enable:
+1. **Chrome/Edge**: Navigate to `chrome://flags/#enable-unsafe-webgpu`, enable, and restart
+2. **Verify**: Check `chrome://gpu` for "WebGPU" status
+3. **Test**: Run `navigator.gpu.requestAdapter()` in DevTools console
+### Inference
+```javascript
+import * as ort from "onnxruntime-web/webgpu";
+import { AutoTokenizer } from "@huggingface/transformers";
+// Check WebGPU availability
+if (!navigator.gpu) {
+  throw new Error("WebGPU not available. Enable at chrome://flags/#enable-unsafe-webgpu");
+}
+const adapter = await navigator.gpu.requestAdapter();
+if (!adapter) {
+  throw new Error("WebGPU adapter not found. Check chrome://gpu for status.");
+}
+ort.env.wasm.numThreads = 1;
+const modelId = "LiquidAI/LFM2.5-350M-ONNX";
+const modelBase = `https://huggingface.co/${modelId}/resolve/main`;
+// Load tokenizer
+const tokenizer = await AutoTokenizer.from_pretrained(modelId);
+// Load ONNX session with external data
+const onnxPath = `${modelBase}/onnx/model_q4.onnx`;
+const dataPath = `${modelBase}/onnx/model_q4.onnx_data`;
+const session = await ort.InferenceSession.create(onnxPath, {
+  executionProviders: ["webgpu"],
+  externalData: [{ path: "model_q4.onnx_data", data: dataPath }],
+});
+// Model config (from config.json)
+const hiddenSize = 1024;
+const numKVHeads = 8;
+const headDim = 64;
+// Initialize KV cache
+function initCache() {
+  const cache = {};
+  for (const name of session.inputNames) {
+    if (name.startsWith("past_conv")) {
+      cache[name] = new ort.Tensor("float32", new Float32Array(hiddenSize * 3), [1, hiddenSize, 3]);
+    } else if (name.startsWith("past_key_values")) {
+      cache[name] = new ort.Tensor("float32", new Float32Array(0), [1, numKVHeads, 0, headDim]);
+    }
+  }
+  return cache;
+}
+// Update cache from outputs
+function updateCache(cache, outputs) {
+  for (const [name, tensor] of Object.entries(outputs)) {
+    if (name.startsWith("present_conv")) {
+      cache[name.replace("present_conv", "past_conv")] = tensor;
+    } else if (name.startsWith("present.")) {
+      cache[name.replace("present.", "past_key_values.")] = tensor;
+    }
+  }
+}
+// Build prompt and tokenize
+const messages = [{ role: "user", content: "What is the capital of France?" }];
+const prompt = tokenizer.apply_chat_template(messages, { add_generation_prompt: true, tokenize: false });
+const inputIds = tokenizer.encode(prompt);
+// Generation loop
+const cache = initCache();
+const eosTokenId = tokenizer.eos_token_id;
+const generatedTokens = [];
+let curLen = inputIds.length;
+let ids = inputIds;
+for (let step = 0; step < 512; step++) {
+  const inputIdsTensor = new ort.Tensor("int64", new BigInt64Array(ids.map(BigInt)), [1, ids.length]);
+  const attentionMask = new ort.Tensor("int64", new BigInt64Array(curLen).fill(1n), [1, curLen]);
+  const outputs = await session.run({ input_ids: inputIdsTensor, attention_mask: attentionMask, ...cache });
+  // Greedy decode: argmax of last token logits
+  const logits = outputs.logits;
+  const vocabSize = logits.dims[2];
+  const lastLogits = logits.data.slice((logits.dims[1] - 1) * vocabSize);
+  const nextToken = lastLogits.indexOf(Math.max(...lastLogits));
+  generatedTokens.push(nextToken);
+  if (nextToken === eosTokenId) break;
+  updateCache(cache, outputs);
+  ids = [nextToken];
+  curLen++;
+}
+console.log(tokenizer.decode(generatedTokens, { skip_special_tokens: true }));
+```
+### WebGPU Notes
+* Models use external data files (`.onnx_data`) that are loaded automatically
+* int64 tensors require `BigInt64Array`
+## License
+This model is released under the [LFM 1.0 License](LICENSE).

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,64 @@

+{{- bos_token -}}
+{%- set keep_past_thinking = keep_past_thinking | default(false) -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+    {%- set sys_content = messages[0]["content"] -%}
+    {%- if sys_content is not string -%}
+        {%- for item in sys_content -%}
+            {%- if item["type"] == "text" -%}
+                {%- set ns.system_prompt = ns.system_prompt + item["text"] -%}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {%- set ns.system_prompt = sys_content -%}
+    {%- endif -%}
+    {%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+    {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
+    {%- for tool in tools -%}
+        {%- if tool is not string -%}
+            {%- set tool = tool | tojson -%}
+        {%- endif -%}
+        {%- set ns.system_prompt = ns.system_prompt + tool -%}
+        {%- if not loop.last -%}
+            {%- set ns.system_prompt = ns.system_prompt + ", " -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {%- set ns.system_prompt = ns.system_prompt + "]" -%}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+    {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- set ns.last_assistant_index = -1 -%}
+{%- for message in messages -%}
+    {%- if message["role"] == "assistant" -%}
+        {%- set ns.last_assistant_index = loop.index0 -%}
+    {%- endif -%}
+{%- endfor -%}
+{%- for message in messages -%}
+    {{- "<|im_start|>" + message["role"] + "\n" -}}
+    {%- set content = message["content"] -%}
+    {%- if content is not string -%}
+        {%- set ns.content = "" -%}
+        {%- for item in content -%}
+            {%- if item["type"] == "image" -%}
+                {%- set ns.content = ns.content + "<image>" -%}
+            {%- elif item["type"] == "text" -%}
+                {%- set ns.content = ns.content + item["text"] -%}
+            {%- else -%}
+                {%- set ns.content = ns.content + item | tojson -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- set content = ns.content -%}
+    {%- endif -%}
+    {%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
+        {%- if "</think>" in content -%}
+            {%- set content = content.split("</think>")[-1] | trim -%}
+        {%- endif -%}
+    {%- endif -%}
+    {{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{- "<|im_start|>assistant\n" -}}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "architectures": [
+    "Lfm2ForCausalLM"
+  ],
+  "block_auto_adjust_ff_dim": true,
+  "block_dim": 1024,
+  "block_ff_dim": 6656,
+  "block_ffn_dim_multiplier": 1.0,
+  "block_mlp_init_scale": 1.0,
+  "block_multiple_of": 256,
+  "block_norm_eps": 1e-05,
+  "block_out_init_scale": 1.0,
+  "block_use_swiglu": true,
+  "block_use_xavier_init": true,
+  "bos_token_id": 1,
+  "conv_L_cache": 3,
+  "conv_bias": false,
+  "conv_dim": 1024,
+  "conv_use_xavier_init": true,
+  "dtype": "bfloat16",
+  "eos_token_id": 7,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 6656,
+  "layer_types": [
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv",
+    "full_attention",
+    "conv"
+  ],
+  "max_position_embeddings": 128000,
+  "model_type": "lfm2",
+  "norm_eps": 1e-05,
+  "num_attention_heads": 16,
+  "num_heads": 16,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "rope_parameters": {
+    "rope_theta": 1000000.0,
+    "rope_type": "default"
+  },
+  "tie_embedding": true,
+  "transformers_version": "5.0.0.dev0",
+  "use_cache": true,
+  "use_pos_enc": true,
+  "vocab_size": 65536,
+  "transformers.js_config": {
+    "kv_cache_dtype": {
+      "fp32": "float32"
+    },
+    "use_external_data_format": true
+  }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 7,
+  "pad_token_id": 0,
+  "transformers_version": "4.54.0"
+}

onnx/model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c43efd6be4b93b69676b8d6c9699042cd0aef76329eea1934636111103880ff
+size 145288

onnx/model.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00ec28c8f2c6e5cbca8c5e6b15d6b51cf849a91b5f51a4f495f7fbc3dc6ca0dc
+size 1450700800

onnx/model_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea5f5fa192a5c3dae08ffbcaa3c8eeeeffb417aa3ac7d9e51ae71a8c5b108f64
+size 151040

onnx/model_fp16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdca4e312c904c91527ad62299d4c17e5e9b699938cd1fb18c8a7d90c8468ddc
+size 725350400

onnx/model_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5542e4d0c077cbe9537d78ac1ad7d03d7da79a1baa26dc3ac338fdf9059adc33
+size 177899

onnx/model_q4.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15e418caccb304d414625da177ad412792332f54078532924c1fda5a98cbb484
+size 312342528

onnx/model_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c73ea401925f1325119f1c1e24b5015c43d9ed7393283ec45dcc7ce376683e39
+size 178589

onnx/model_q4f16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15e418caccb304d414625da177ad412792332f54078532924c1fda5a98cbb484
+size 312342528

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "additional_special_tokens": null,
+  "backend": "tokenizers",
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "is_local": false,
+  "legacy": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {},
+  "pad_token": "<|pad|>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "TokenizersBackend",
+  "use_default_system_prompt": false,
+  "use_fast": true,
+  "chat_template": "{{- bos_token -}}\n{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n{%- set ns = namespace(system_prompt=\"\") -%}\n{%- if messages[0][\"role\"] == \"system\" -%}\n    {%- set sys_content = messages[0][\"content\"] -%}\n    {%- if sys_content is not string -%}\n        {%- for item in sys_content -%}\n            {%- if item[\"type\"] == \"text\" -%}\n                {%- set ns.system_prompt = ns.system_prompt + item[\"text\"] -%}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {%- set ns.system_prompt = sys_content -%}\n    {%- endif -%}\n    {%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n    {%- set ns.system_prompt = ns.system_prompt + (\"\\n\" if ns.system_prompt else \"\") + \"List of tools: [\" -%}\n    {%- for tool in tools -%}\n        {%- if tool is not string -%}\n            {%- set tool = tool | tojson -%}\n        {%- endif -%}\n        {%- set ns.system_prompt = ns.system_prompt + tool -%}\n        {%- if not loop.last -%}\n            {%- set ns.system_prompt = ns.system_prompt + \", \" -%}\n        {%- endif -%}\n    {%- endfor -%}\n    {%- set ns.system_prompt = ns.system_prompt + \"]\" -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n    {{- \"<|im_start|>system\\n\" + ns.system_prompt + \"<|im_end|>\\n\" -}}\n{%- endif -%}\n{%- set ns.last_assistant_index = -1 -%}\n{%- for message in messages -%}\n    {%- if message[\"role\"] == \"assistant\" -%}\n        {%- set ns.last_assistant_index = loop.index0 -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- for message in messages -%}\n    {{- \"<|im_start|>\" + message[\"role\"] + \"\\n\" -}}\n    {%- set content = message[\"content\"] -%}\n    {%- if content is not string -%}\n        {%- set ns.content = \"\" -%}\n        {%- for item in content -%}\n            {%- if item[\"type\"] == \"image\" -%}\n                {%- set ns.content = ns.content + \"<image>\" -%}\n            {%- elif item[\"type\"] == \"text\" -%}\n                {%- set ns.content = ns.content + item[\"text\"] -%}\n            {%- else -%}\n                {%- set ns.content = ns.content + item | tojson -%}\n            {%- endif -%}\n        {%- endfor -%}\n        {%- set content = ns.content -%}\n    {%- endif -%}\n    {%- if message[\"role\"] == \"assistant\" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n        {%- if \"</think>\" in content -%}\n            {%- set content = content.split(\"</think>\")[-1] | trim -%}\n        {%- endif -%}\n    {%- endif -%}\n    {{- content + \"<|im_end|>\\n\" -}}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{- \"<|im_start|>assistant\\n\" -}}\n{%- endif -%}"
+}