Upload folder using huggingface_hub
Browse files- README.md +3 -20
- processor_config.json +3 -2
- tokenizer_config.json +3 -2
README.md
CHANGED
|
@@ -54,29 +54,12 @@ It supports OCR, document comprehension, multilingual vision understanding, boun
|
|
| 54 |
| Vision Encoder | SigLIP2 NaFlex (86M) |
|
| 55 |
| Native Resolution | up to 512x512 |
|
| 56 |
|
| 57 |
-
##
|
| 58 |
|
| 59 |
```bash
|
| 60 |
-
pip install mlx-vlm
|
| 61 |
```
|
| 62 |
|
| 63 |
-
### CLI
|
| 64 |
-
|
| 65 |
-
The CLI applies the chat template automatically:
|
| 66 |
-
|
| 67 |
-
```bash
|
| 68 |
-
python -m mlx_vlm generate \
|
| 69 |
-
--model LiquidAI/LFM2.5-VL-450M-MLX-6bit \
|
| 70 |
-
--max-tokens 256 \
|
| 71 |
-
--temperature 0.1 \
|
| 72 |
-
--prompt "What do you see in this image?" \
|
| 73 |
-
--image photo.jpg
|
| 74 |
-
```
|
| 75 |
-
|
| 76 |
-
### Python
|
| 77 |
-
|
| 78 |
-
In Python you must apply the chat template before calling `generate`:
|
| 79 |
-
|
| 80 |
```python
|
| 81 |
from mlx_vlm import load, generate
|
| 82 |
from mlx_vlm.utils import load_image
|
|
@@ -85,7 +68,7 @@ model, processor = load("LiquidAI/LFM2.5-VL-450M-MLX-6bit")
|
|
| 85 |
|
| 86 |
image = load_image("photo.jpg")
|
| 87 |
|
| 88 |
-
# Apply chat template (required)
|
| 89 |
messages = [{"role": "user", "content": [
|
| 90 |
{"type": "image"},
|
| 91 |
{"type": "text", "text": "What do you see in this image?"},
|
|
|
|
| 54 |
| Vision Encoder | SigLIP2 NaFlex (86M) |
|
| 55 |
| Native Resolution | up to 512x512 |
|
| 56 |
|
| 57 |
+
## Quickstart
|
| 58 |
|
| 59 |
```bash
|
| 60 |
+
uv pip install 'mlx-vlm==0.3.9'
|
| 61 |
```
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
```python
|
| 64 |
from mlx_vlm import load, generate
|
| 65 |
from mlx_vlm.utils import load_image
|
|
|
|
| 68 |
|
| 69 |
image = load_image("photo.jpg")
|
| 70 |
|
| 71 |
+
# Apply chat template (required for LFM2.5-VL)
|
| 72 |
messages = [{"role": "user", "content": [
|
| 73 |
{"type": "image"},
|
| 74 |
{"type": "text", "text": "What do you see in this image?"},
|
processor_config.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"processor_class": "Lfm2VlProcessor",
|
| 3 |
-
"use_image_special_tokens": true
|
| 4 |
-
}
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"processor_class": "Lfm2VlProcessor",
|
| 3 |
+
"use_image_special_tokens": true,
|
| 4 |
+
"chat_template": "{{- bos_token -}}\n{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n\n{%- macro format_arg_value(arg_value) -%}\n {%- if arg_value is string -%}\n {{- '\"' + arg_value + '\"' -}}\n {%- elif arg_value is mapping -%}\n {{- arg_value | tojson -}}\n {%- else -%}\n {{- arg_value | string -}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro parse_content(content) -%}\n {%- if content is string -%}\n {{- content -}}\n {%- else -%}\n {%- set _ns = namespace(result=\"\") -%}\n {%- for item in content -%}\n {%- if item.type == \"image\" -%}\n {%- set _ns.result = _ns.result + \"<image>\" -%}\n {%- elif item.type == \"text\" -%}\n {%- set _ns.result = _ns.result + item.text -%}\n {%- else -%}\n {%- set _ns.result = _ns.result + item | tojson -%}\n {%- endif -%}\n {%- endfor -%}\n {{- _ns.result -}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro render_tool_calls(tool_calls) -%}\n {%- set tool_calls_ns = namespace(tool_calls=[]) -%}\n {%- for tool_call in tool_calls -%}\n {%- set func_name = tool_call.function.name -%}\n {%- set func_args = tool_call.function.arguments -%}\n {%- set args_ns = namespace(arg_strings=[]) -%}\n {%- for arg_name, arg_value in func_args.items() -%}\n {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name + \"=\" + format_arg_value(arg_value)] -%}\n {%- endfor -%}\n {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [func_name + \"(\" + (args_ns.arg_strings | join(\", \")) + \")\"] -%}\n {%- endfor -%}\n {{- \"<|tool_call_start|>[\" + (tool_calls_ns.tool_calls | join(\", \")) + \"]<|tool_call_end|>\" -}}\n{%- endmacro -%}\n\n{%- set ns = namespace(system_prompt=\"\", last_assistant_index=-1) -%}\n{%- if messages[0].role == \"system\" -%}\n {%- if messages[0].content is defined -%}\n {%- set ns.system_prompt = parse_content(messages[0].content) -%}\n {%- endif -%}\n {%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n {%- set ns.system_prompt = ns.system_prompt + (\"\\n\\n\" if ns.system_prompt else \"\") + \"Today's date: \" + strftime_now(\"%Y-%m-%d\") + \"\\n\\nList of tools: \" + (tools | tojson) -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n {{- \"<|im_start|>system\\n\" + ns.system_prompt + \"<|im_end|>\\n\" -}}\n{%- endif -%}\n{%- for message in messages -%}\n {%- if message.role == \"assistant\" -%}\n {%- set ns.last_assistant_index = loop.index0 -%}\n {%- endif -%}\n{%- endfor -%}\n{%- for message in messages -%}\n {{- \"<|im_start|>\" + message.role + \"\\n\" -}}\n {%- if message.role == \"assistant\" -%}\n {%- generation -%}\n {%- if message.thinking is defined and (keep_past_thinking or loop.index0 == ns.last_assistant_index) -%}\n {{- \"<think>\" + message.thinking + \"</think>\" -}}\n {%- endif -%}\n {%- if message.tool_calls is defined -%}\n {{- render_tool_calls(message.tool_calls) -}}\n {%- endif -%}\n {%- if message.content is defined -%}\n {%- set content = parse_content(message.content) -%}\n {%- if not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n {%- if \"</think>\" in content -%}\n {%- set content = content.split(\"</think>\")[-1] | trim -%}\n {%- endif -%}\n {%- endif -%}\n {{- content + (\"\" if (continue_final_message and loop.last) else \"<|im_end|>\\n\") -}}\n {%- endif -%}\n {%- endgeneration -%}\n {%- else %}\n {%- if message.content is defined -%}\n {{- parse_content(message.content) + \"<|im_end|>\\n\" -}}\n {%- endif -%}\n {%- endif %}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- \"<|im_start|>assistant\\n\" -}}\n{%- endif -%}"
|
| 5 |
+
}
|
tokenizer_config.json
CHANGED
|
@@ -4081,5 +4081,6 @@
|
|
| 4081 |
"spaces_between_special_tokens": false,
|
| 4082 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 4083 |
"use_default_system_prompt": false,
|
| 4084 |
-
"use_fast": true
|
| 4085 |
-
}
|
|
|
|
|
|
| 4081 |
"spaces_between_special_tokens": false,
|
| 4082 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 4083 |
"use_default_system_prompt": false,
|
| 4084 |
+
"use_fast": true,
|
| 4085 |
+
"chat_template": "{{- bos_token -}}\n{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n\n{%- macro format_arg_value(arg_value) -%}\n {%- if arg_value is string -%}\n {{- '\"' + arg_value + '\"' -}}\n {%- elif arg_value is mapping -%}\n {{- arg_value | tojson -}}\n {%- else -%}\n {{- arg_value | string -}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro parse_content(content) -%}\n {%- if content is string -%}\n {{- content -}}\n {%- else -%}\n {%- set _ns = namespace(result=\"\") -%}\n {%- for item in content -%}\n {%- if item.type == \"image\" -%}\n {%- set _ns.result = _ns.result + \"<image>\" -%}\n {%- elif item.type == \"text\" -%}\n {%- set _ns.result = _ns.result + item.text -%}\n {%- else -%}\n {%- set _ns.result = _ns.result + item | tojson -%}\n {%- endif -%}\n {%- endfor -%}\n {{- _ns.result -}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro render_tool_calls(tool_calls) -%}\n {%- set tool_calls_ns = namespace(tool_calls=[]) -%}\n {%- for tool_call in tool_calls -%}\n {%- set func_name = tool_call.function.name -%}\n {%- set func_args = tool_call.function.arguments -%}\n {%- set args_ns = namespace(arg_strings=[]) -%}\n {%- for arg_name, arg_value in func_args.items() -%}\n {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name + \"=\" + format_arg_value(arg_value)] -%}\n {%- endfor -%}\n {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [func_name + \"(\" + (args_ns.arg_strings | join(\", \")) + \")\"] -%}\n {%- endfor -%}\n {{- \"<|tool_call_start|>[\" + (tool_calls_ns.tool_calls | join(\", \")) + \"]<|tool_call_end|>\" -}}\n{%- endmacro -%}\n\n{%- set ns = namespace(system_prompt=\"\", last_assistant_index=-1) -%}\n{%- if messages[0].role == \"system\" -%}\n {%- if messages[0].content is defined -%}\n {%- set ns.system_prompt = parse_content(messages[0].content) -%}\n {%- endif -%}\n {%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n {%- set ns.system_prompt = ns.system_prompt + (\"\\n\\n\" if ns.system_prompt else \"\") + \"Today's date: \" + strftime_now(\"%Y-%m-%d\") + \"\\n\\nList of tools: \" + (tools | tojson) -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n {{- \"<|im_start|>system\\n\" + ns.system_prompt + \"<|im_end|>\\n\" -}}\n{%- endif -%}\n{%- for message in messages -%}\n {%- if message.role == \"assistant\" -%}\n {%- set ns.last_assistant_index = loop.index0 -%}\n {%- endif -%}\n{%- endfor -%}\n{%- for message in messages -%}\n {{- \"<|im_start|>\" + message.role + \"\\n\" -}}\n {%- if message.role == \"assistant\" -%}\n {%- generation -%}\n {%- if message.thinking is defined and (keep_past_thinking or loop.index0 == ns.last_assistant_index) -%}\n {{- \"<think>\" + message.thinking + \"</think>\" -}}\n {%- endif -%}\n {%- if message.tool_calls is defined -%}\n {{- render_tool_calls(message.tool_calls) -}}\n {%- endif -%}\n {%- if message.content is defined -%}\n {%- set content = parse_content(message.content) -%}\n {%- if not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n {%- if \"</think>\" in content -%}\n {%- set content = content.split(\"</think>\")[-1] | trim -%}\n {%- endif -%}\n {%- endif -%}\n {{- content + (\"\" if (continue_final_message and loop.last) else \"<|im_end|>\\n\") -}}\n {%- endif -%}\n {%- endgeneration -%}\n {%- else %}\n {%- if message.content is defined -%}\n {{- parse_content(message.content) + \"<|im_end|>\\n\" -}}\n {%- endif -%}\n {%- endif %}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- \"<|im_start|>assistant\\n\" -}}\n{%- endif -%}"
|
| 4086 |
+
}
|