Image-Text-to-Text
Transformers
Safetensors
English
lfm2_vl
liquid
lfm2.5
lfm2
edge
vision
conversational
Instructions to use LiquidAI/LFM2.5-VL-450M-Extract with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use LiquidAI/LFM2.5-VL-450M-Extract with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="LiquidAI/LFM2.5-VL-450M-Extract") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("LiquidAI/LFM2.5-VL-450M-Extract") model = AutoModelForImageTextToText.from_pretrained("LiquidAI/LFM2.5-VL-450M-Extract") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use LiquidAI/LFM2.5-VL-450M-Extract with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "LiquidAI/LFM2.5-VL-450M-Extract" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LiquidAI/LFM2.5-VL-450M-Extract", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/LiquidAI/LFM2.5-VL-450M-Extract
- SGLang
How to use LiquidAI/LFM2.5-VL-450M-Extract with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "LiquidAI/LFM2.5-VL-450M-Extract" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LiquidAI/LFM2.5-VL-450M-Extract", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "LiquidAI/LFM2.5-VL-450M-Extract" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LiquidAI/LFM2.5-VL-450M-Extract", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use LiquidAI/LFM2.5-VL-450M-Extract with Docker Model Runner:
docker model run hf.co/LiquidAI/LFM2.5-VL-450M-Extract
| """OpenRouter-backed VLM judge. | |
| One prompt per sample (batched-over-keys in one JSON answer), run | |
| concurrently via asyncio. The VLM judge scores every key against the | |
| image and is the only quality signal we ship. | |
| Requires `OPENROUTER_API_KEY` in the environment. OpenRouter is | |
| OpenAI-compatible, so we point the `openai` SDK's `base_url` at it. | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import base64 | |
| import json | |
| import logging | |
| import os | |
| import re | |
| from pathlib import Path | |
| from string import Template | |
| from typing import Any | |
| from openai import AsyncOpenAI | |
| from tqdm.asyncio import tqdm_asyncio | |
| logger = logging.getLogger(__name__) | |
| _PROMPT_DIR = Path(__file__).resolve().parent / "prompts" | |
| _VLM_JUDGE_TPL = Template((_PROMPT_DIR / "vlm_judge_batch.txt").read_text(encoding="utf-8")) | |
| OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" | |
| # Reasoning suppression for the VLM judge. Default model | |
| # (`qwen/qwen3.5-35b-a3b`) requires `enable_thinking=False` to avoid | |
| # burning the full token budget on internal thought and returning | |
| # `finish_reason=length` with no visible output. Both layers (top-level | |
| # `reasoning` + `chat_template_kwargs`) are sent so whichever the | |
| # underlying provider honors gets used; the other is ignored. | |
| _VLM_JUDGE_REASONING = { | |
| "reasoning": {"enabled": False}, | |
| "chat_template_kwargs": {"enable_thinking": False}, | |
| } | |
| # βββ per-key pre-classification ββββββββββββββββββββββββββββββββββββββββββββ | |
| def normalize_bool(gt_val: object, pred_val: object) -> object: | |
| """If GT is bool and pred is a yes/no/true/false string, coerce.""" | |
| if isinstance(gt_val, bool) and isinstance(pred_val, str): | |
| lower = pred_val.strip().lower() | |
| if lower in ("yes", "true"): | |
| return True | |
| if lower in ("no", "false"): | |
| return False | |
| return pred_val | |
| def initialize_per_key_evals(records: list[dict[str, Any]]) -> None: | |
| """Build a `per_key` mapping `{key: {gt, pred}}` for the VLM judge to score.""" | |
| for rec in records: | |
| per_key: dict[str, dict[str, Any]] = {} | |
| gt = rec["ground_truth"] | |
| pred = rec["prediction_json"] | |
| for key, gt_val in gt.items(): | |
| pred_val = pred.get(key) | |
| if pred_val is not None: | |
| pred_val = normalize_bool(gt_val, pred_val) | |
| per_key[key] = {"gt": gt_val, "pred": pred_val} | |
| rec["per_key"] = per_key | |
| # βββ prompt building βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _vlm_attr_block(entries: list[tuple[str, str, object]]) -> str: | |
| lines: list[str] = [] | |
| for key, desc, pred in entries: | |
| lines.append(f'- "{key}": {desc}\n predicted: {json.dumps(pred, ensure_ascii=False)}') | |
| return "\n".join(lines) | |
| def build_vlm_judge_prompt(entries: list[tuple[str, str, object]]) -> str: | |
| return _VLM_JUDGE_TPL.substitute(attributes=_vlm_attr_block(entries)) | |
| # βββ response parsing ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _NUM_RE = re.compile(r"(\d+\.?\d*)") | |
| def _clamp(x: float) -> float: | |
| return 0.0 if x < 0.0 else (1.0 if x > 1.0 else x) | |
| def parse_score(text: str | None) -> float: | |
| if not text: | |
| return 0.0 | |
| text = text.strip() | |
| try: | |
| return _clamp(float(text)) | |
| except ValueError: | |
| m = _NUM_RE.search(text) | |
| if not m: | |
| return 0.0 | |
| try: | |
| return _clamp(float(m.group(1))) | |
| except ValueError: | |
| return 0.0 | |
| def parse_batch_scores(text: str | None, expected_keys: list[str]) -> dict[str, float]: | |
| """Parse `{key: score}` from judge output; missing keys default to 0.0.""" | |
| result = {k: 0.0 for k in expected_keys} | |
| if not text: | |
| return result | |
| # Reuse the JSON extractor from extract.py β same logic. | |
| from extract import extract_json_strict_first | |
| parsed, _ = extract_json_strict_first(text) | |
| if not isinstance(parsed, dict): | |
| return result | |
| for k in expected_keys: | |
| v = parsed.get(k) | |
| if isinstance(v, bool): | |
| # Defensive: bool is a subclass of int. Don't accept it as a score. | |
| continue | |
| if isinstance(v, (int, float)): | |
| result[k] = _clamp(float(v)) | |
| elif isinstance(v, str): | |
| # Judge sometimes returns string-formatted numbers like "0.8"; | |
| # parse_score extracts the leading numeric. | |
| result[k] = parse_score(v) | |
| return result | |
| def per_sample_judge_avg(per_key: dict[str, dict[str, Any]], score_field: str) -> float | None: | |
| """Average a judge score across all keys of one sample. None if no scores.""" | |
| scores = [ | |
| float(v.get(score_field)) | |
| for v in per_key.values() | |
| if isinstance(v.get(score_field), (int, float)) and not isinstance(v.get(score_field), bool) | |
| ] | |
| return sum(scores) / len(scores) if scores else None | |
| # βββ OpenRouter client + concurrent dispatch βββββββββββββββββββββββββββββββ | |
| def _img_to_data_url(img_bytes: bytes) -> str: | |
| return f"data:image/jpeg;base64,{base64.b64encode(img_bytes).decode('ascii')}" | |
| def _make_client(api_key: str | None) -> AsyncOpenAI: | |
| key = api_key or os.environ.get("OPENROUTER_API_KEY") | |
| if not key: | |
| raise RuntimeError( | |
| "OPENROUTER_API_KEY is not set. Get a key at https://openrouter.ai/keys " | |
| "and `export OPENROUTER_API_KEY=...` before running." | |
| ) | |
| return AsyncOpenAI(base_url=OPENROUTER_BASE_URL, api_key=key, max_retries=3, timeout=120.0) | |
| async def _one_chat( | |
| client: AsyncOpenAI, | |
| *, | |
| model: str, | |
| system: str, | |
| user_text: str, | |
| image_bytes: bytes | None, | |
| max_tokens: int, | |
| semaphore: asyncio.Semaphore, | |
| extra_body: dict[str, Any] | None = None, | |
| max_retries_on_empty: int = 3, | |
| ) -> str: | |
| """Single OpenRouter chat call, rate-limited by `semaphore`. | |
| Some OpenRouter providers occasionally return HTTP 200 with empty content | |
| (no model output). Treating that as success silently fails the per-key | |
| JSON parse β so we retry up to `max_retries_on_empty` times before | |
| giving up. | |
| """ | |
| user_content: list[dict[str, Any]] = [{"type": "text", "text": user_text}] | |
| if image_bytes: | |
| user_content.insert(0, {"type": "image_url", "image_url": {"url": _img_to_data_url(image_bytes)}}) | |
| messages = [ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": user_content}, | |
| ] | |
| kwargs: dict[str, Any] = { | |
| "model": model, | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": 0.0, | |
| } | |
| if extra_body: | |
| kwargs["extra_body"] = extra_body | |
| async with semaphore: | |
| for attempt in range(max_retries_on_empty + 1): | |
| try: | |
| resp = await client.chat.completions.create(**kwargs) | |
| text = resp.choices[0].message.content or "" | |
| if text.strip(): | |
| return text | |
| finish = getattr(resp.choices[0], "finish_reason", None) | |
| if attempt < max_retries_on_empty: | |
| logger.warning( | |
| "Empty response from %s (finish_reason=%s); retrying %d/%d.", | |
| model, finish, attempt + 1, max_retries_on_empty, | |
| ) | |
| continue | |
| logger.warning( | |
| "Empty response from %s after %d retries (finish_reason=%s); giving up.", | |
| model, max_retries_on_empty, finish, | |
| ) | |
| return "" | |
| except Exception as e: | |
| logger.warning("OpenRouter call failed (%s); returning empty string.", e) | |
| return "" | |
| return "" | |
| async def _run_concurrent( | |
| coros: list[Any], | |
| *, | |
| desc: str, | |
| ) -> list[str]: | |
| """Run coroutines concurrently with a tqdm progress bar.""" | |
| return await tqdm_asyncio.gather(*coros, desc=desc) | |
| # βββ VLM judge βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_vlm_judge( | |
| records: list[dict[str, Any]], | |
| *, | |
| sample_images: dict[str, bytes], | |
| model: str, | |
| max_tokens: int = 1024, | |
| concurrency: int = 16, | |
| api_key: str | None = None, | |
| ) -> None: | |
| """Score every key against the image via an OpenRouter VLM.""" | |
| plans: list[dict[str, Any]] = [] | |
| prompts: list[str] = [] | |
| imgs: list[bytes] = [] | |
| for rec in records: | |
| per_key = rec.get("per_key", {}) | |
| if not per_key: | |
| continue | |
| keys = list(per_key.keys()) | |
| entries = [(k, rec["schema"].get(k, ""), per_key[k].get("pred")) for k in keys] | |
| img = sample_images.get(rec["key"], b"") | |
| if not img: | |
| logger.warning("VLM judge: missing image for sample %s", rec["key"]) | |
| continue | |
| prompts.append(build_vlm_judge_prompt(entries)) | |
| imgs.append(img) | |
| plans.append({"record": rec, "keys": keys}) | |
| if not plans: | |
| for rec in records: | |
| rec["vlm_judge_avg"] = None | |
| return | |
| logger.info("VLM judge: scoring %d sample(s) Γ all keys via %s.", len(plans), model) | |
| client = _make_client(api_key) | |
| sem = asyncio.Semaphore(concurrency) | |
| coros = [ | |
| _one_chat( | |
| client, | |
| model=model, | |
| system="You are a meticulous visual evaluator.", | |
| user_text=p, | |
| image_bytes=img, | |
| max_tokens=max_tokens, | |
| semaphore=sem, | |
| extra_body=_VLM_JUDGE_REASONING, | |
| ) | |
| for p, img in zip(prompts, imgs) | |
| ] | |
| raw_outputs = asyncio.run(_run_concurrent(coros, desc="VLM judge")) | |
| for plan, text in zip(plans, raw_outputs): | |
| scores = parse_batch_scores(text, plan["keys"]) | |
| rec = plan["record"] | |
| rec["vlm_judge_raw"] = text | |
| for k in plan["keys"]: | |
| rec["per_key"][k]["vlm_score"] = scores.get(k, 0.0) | |
| for rec in records: | |
| rec["vlm_judge_avg"] = per_sample_judge_avg(rec["per_key"], "vlm_score") | |