Instructions to use EvidenceAIResearch/VReason-QwenVL with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use EvidenceAIResearch/VReason-QwenVL with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="EvidenceAIResearch/VReason-QwenVL", trust_remote_code=True)
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoProcessor, AutoModelForVision2Seq

processor = AutoProcessor.from_pretrained("EvidenceAIResearch/VReason-QwenVL", trust_remote_code=True)
model = AutoModelForVision2Seq.from_pretrained("EvidenceAIResearch/VReason-QwenVL", trust_remote_code=True)
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps Settings

vLLM

How to use EvidenceAIResearch/VReason-QwenVL with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "EvidenceAIResearch/VReason-QwenVL"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "EvidenceAIResearch/VReason-QwenVL",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/EvidenceAIResearch/VReason-QwenVL

SGLang

How to use EvidenceAIResearch/VReason-QwenVL with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "EvidenceAIResearch/VReason-QwenVL" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "EvidenceAIResearch/VReason-QwenVL",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "EvidenceAIResearch/VReason-QwenVL" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "EvidenceAIResearch/VReason-QwenVL",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use EvidenceAIResearch/VReason-QwenVL with Docker Model Runner:
```
docker model run hf.co/EvidenceAIResearch/VReason-QwenVL
```

VReason-QwenVL / modeling_vreason.py

EvidenceAIResearch

Upload vreason-huatuo model artifacts

35beca7 verified 3 months ago

Raw

History Blame Contribute Delete

14.6 kB

	from __future__ import annotations

	import ast
	import json
	import re
	from pathlib import Path
	from typing import Any, Optional

	import numpy as np

	from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
	Qwen2_5_VLForConditionalGeneration,
	)


	_TOOL_RE = re.compile(
	r'<tool\s+type="(?P<tool_type>[^"]+)"\s+label=(?P<labels>\[[^\]]\])\s>\s*<image>',
	re.IGNORECASE,
	)
	_TOKEN_RE = re.compile(
	r'Reviewing\s+.+?\.\.\.\|Inspecting\s+.+?\.\.\.\|<tool\s+type="[^"]+"\s+label=\[[^\]]\]\s>\s*<image>',
	re.IGNORECASE \| re.DOTALL,
	)


	class VReasonQwen2_5_VLForConditionalGeneration(Qwen2_5_VLForConditionalGeneration):
	"""Qwen2.5-VL model with VReason helper methods.

	Loaded via:
	AutoModelForVision2Seq.from_pretrained(..., trust_remote_code=True)

	Extra method:
	model.visual_reason(...)
	"""

	@staticmethod
	def _parse_labels(raw: str) -> list[str]:
	try:
	parsed = json.loads(raw)
	if isinstance(parsed, list):
	return [str(x).strip() for x in parsed if str(x).strip()]
	except Exception:
	pass

	try:
	parsed = ast.literal_eval(raw)
	if isinstance(parsed, list):
	return [str(x).strip() for x in parsed if str(x).strip()]
	except Exception:
	pass

	return [x.strip().strip('"').strip("'") for x in raw.strip("[]").split(",") if x.strip()]

	@staticmethod
	def _extract_tag(text: str, tag: str) -> str:
	m = re.search(rf"<{tag}>(.*?)</{tag}>", text, flags=re.IGNORECASE \| re.DOTALL)
	return m.group(1).strip() if m else ""

	@classmethod
	def _parse_reasoning(cls, model_output: str) -> list[dict[str, Any]]:
	interpret = cls._extract_tag(model_output, "interpret")
	if not interpret:
	return []

	regions: list[dict[str, Any]] = []
	current_region: Optional[dict[str, Any]] = None
	current_sub: Optional[dict[str, Any]] = None

	cursor = 0
	for tok in _TOKEN_RE.finditer(interpret):
	between = interpret[cursor:tok.start()].strip()
	if between and current_sub is not None:
	current_sub["reason"] = (current_sub["reason"] + " " + between).strip()

	token = tok.group(0)
	low = token.lower()

	if low.startswith("reviewing "):
	region = token[len("Reviewing ") :].strip()
	if region.endswith("..."):
	region = region[:-3].strip()
	current_region = {
	"region": region,
	"labels": [],
	"path": None,
	"pathological": [],
	}
	regions.append(current_region)
	current_sub = None

	elif low.startswith("inspecting ") and current_region is not None:
	anatomy = token[len("Inspecting ") :].strip()
	if anatomy.endswith("..."):
	anatomy = anatomy[:-3].strip()
	current_sub = {
	"anatomies": anatomy,
	"labels": [],
	"reason": "",
	"path": None,
	}
	current_region["pathological"].append(current_sub)

	else:
	m = _TOOL_RE.search(token)
	if m:
	labels = cls._parse_labels(m.group("labels"))
	tool_type = m.group("tool_type").lower()
	if tool_type == "anatomical_roi" and current_region is not None:
	current_region["labels"] = labels
	elif tool_type == "pathological_roi" and current_sub is not None:
	current_sub["labels"] = labels

	cursor = tok.end()

	trailing = interpret[cursor:].strip()
	if trailing and current_sub is not None:
	current_sub["reason"] = (current_sub["reason"] + " " + trailing).strip()

	for r in regions:
	for p in r["pathological"]:
	p["reason"] = re.sub(r"\s+", " ", p["reason"]).strip()
	return regions

	@staticmethod
	def _ids_for_many(names: list[str], name2id: dict[str, list[int]]) -> list[int]:
	out: list[int] = []
	seen = set()
	for name in names:
	key = str(name).strip().lower()
	if key in name2id:
	for idx in name2id[key]:
	idx = int(idx)
	if idx not in seen:
	seen.add(idx)
	out.append(idx)
	return out

	@staticmethod
	def _mask_union(mask_array: np.ndarray, indices: list[int]) -> Optional[np.ndarray]:
	if mask_array is None or not indices:
	return None
	safe = [i for i in indices if 0 <= i < mask_array.shape[0]]
	if not safe:
	return None
	return mask_array[safe].any(axis=0)

	@staticmethod
	def _bbox_from_mask(mask: Optional[np.ndarray], width: int, height: int, pad: int = 0) -> tuple[int, int, int, int]:
	if mask is None or not mask.any():
	return 0, width, 0, height
	ys, xs = np.where(mask)
	x0 = max(0, int(xs.min()) - pad)
	x1 = min(width, int(xs.max()) + pad)
	y0 = max(0, int(ys.min()) - pad)
	y1 = min(height, int(ys.max()) + pad)
	return x0, x1, y0, y1

	@staticmethod
	def _to_alpha(mask: Optional[np.ndarray], invert: bool, feather: int, size_wh: tuple[int, int]):
	from PIL import Image
	import cv2

	if mask is None:
	return Image.new("L", size_wh, color=255 if invert else 0)
	selected = (~mask if invert else mask).astype(np.uint8) * 255
	if size_wh != (mask.shape[1], mask.shape[0]):
	selected = cv2.resize(selected, size_wh, interpolation=cv2.INTER_NEAREST)
	if feather > 0:
	selected = cv2.GaussianBlur(selected, ksize=(0, 0), sigmaX=feather, sigmaY=feather)
	return Image.fromarray(selected, mode="L")

	@staticmethod
	def _save_viz(
	img_base,
	mask: Optional[np.ndarray],
	out_path: Path,
	mode: str,
	blur_radius: int,
	feather: int,
	ring: int,
	roi_wh: Optional[tuple[int, int]],
	) -> None:
	import cv2
	from PIL import Image, ImageFilter

	base = img_base.convert("RGB")
	w, h = base.size

	if mask is not None and (mask.shape[1], mask.shape[0]) != (w, h):
	mask = cv2.resize(mask.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST).astype(bool)

	if mode == "blur":
	blurred = base.filter(ImageFilter.GaussianBlur(radius=blur_radius))
	alpha = VReasonQwen2_5_VLForConditionalGeneration._to_alpha(mask, invert=True, feather=feather, size_wh=base.size)
	out = Image.composite(blurred, base, alpha)

	elif mode == "crop":
	x0, x1, y0, y1 = VReasonQwen2_5_VLForConditionalGeneration._bbox_from_mask(mask, w, h, pad=ring)
	out = base.crop((x0, y0, x1, y1))

	else:
	if mask is None:
	out = base
	else:
	x0, x1, y0, y1 = VReasonQwen2_5_VLForConditionalGeneration._bbox_from_mask(mask, w, h, pad=ring)
	crop = base.crop((x0, y0, x1, y1))
	crop_mask = mask[y0:y1, x0:x1]
	blurred = crop.filter(ImageFilter.GaussianBlur(radius=blur_radius))
	alpha = VReasonQwen2_5_VLForConditionalGeneration._to_alpha(crop_mask, invert=True, feather=feather, size_wh=crop.size)
	out = Image.composite(blurred, crop, alpha)

	if roi_wh:
	out = out.resize((int(roi_wh[0]), int(roi_wh[1])), Image.BICUBIC)

	out_path.parent.mkdir(parents=True, exist_ok=True)
	out.save(out_path, format="JPEG", quality=95, subsampling=1, optimize=True)

	def visual_reason(
	self,
	*,
	processor,
	image,
	prompt_text: str = "Based on the provided chest radiograph, explain your diagnosis procedure and write a report.",
	model_output_text: Optional[str] = None,
	messages: Optional[list[dict[str, Any]]] = None,
	max_new_tokens: int = 1024,
	generation_kwargs: Optional[dict[str, Any]] = None,
	skip_special_tokens: bool = False,
	output_dir: Optional[str] = None,
	generate_roi: bool = False,
	mask_npy: Optional[str] = None,
	cxas_gpus: str = "0",
	viz_mode: str = "blurcrop",
	context_ring: int = 8,
	blur_radius: int = 31,
	feather: int = 6,
	resize_roi_to: Optional[tuple[int, int]] = None,
	) -> dict[str, Any]:
	"""Generate and parse VReason output, optionally producing ROI image artifacts.

	Args:
	processor: HF processor matching this model.
	image: Path or PIL image for frontal CXR.
	prompt_text: Default user prompt if `messages` is not provided.
	model_output_text: If provided, skips generation and parses this text directly.
	messages: Optional fully custom chat messages.
	output_dir: Directory for `reasoning.json` and ROI images.
	generate_roi: Whether to render ROI images using CXAS masks.
	mask_npy: Optional precomputed mask array path ([C,H,W]).
	"""
	import torch
	from PIL import Image

	if model_output_text is None:
	if messages is None:
	msg_image = image
	if isinstance(image, Path):
	msg_image = str(image)
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": msg_image},
	{"type": "text", "text": prompt_text},
	],
	}
	]

	prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	images_for_proc = []
	for msg in messages:
	for part in msg.get("content", []):
	if part.get("type") != "image":
	continue
	img_obj = part.get("image")
	if isinstance(img_obj, Image.Image):
	images_for_proc.append(img_obj.convert("RGB"))
	else:
	images_for_proc.append(Image.open(str(img_obj)).convert("RGB"))

	inputs = processor(text=[prompt], images=[images_for_proc], return_tensors="pt").to(self.device)

	kwargs = dict(max_new_tokens=max_new_tokens)
	if generation_kwargs:
	kwargs.update(generation_kwargs)

	with torch.no_grad():
	output_ids = self.generate(inputs, kwargs)

	model_output_text = processor.batch_decode(output_ids, skip_special_tokens=skip_special_tokens)[0]

	assert model_output_text is not None

	regions = self._parse_reasoning(model_output_text)
	result: dict[str, Any] = {
	"text": model_output_text,
	"reasoning": regions,
	"finding": self._extract_tag(model_output_text, "finding"),
	"impression": self._extract_tag(model_output_text, "impression"),
	"report": self._extract_tag(model_output_text, "report"),
	"viz_mode": viz_mode,
	}

	if generate_roi:
	if output_dir is None:
	raise ValueError("output_dir is required when generate_roi=True")

	out_dir = Path(output_dir).resolve()
	out_dir.mkdir(parents=True, exist_ok=True)

	if isinstance(image, Image.Image):
	base_img = image.convert("RGB")
	input_image_name = "input.jpg"
	else:
	image_path = Path(str(image)).resolve()
	base_img = Image.open(image_path).convert("RGB")
	input_image_name = image_path.name

	if mask_npy:
	mask_array = np.load(mask_npy).astype(bool)
	else:
	try:
	import cxas_vreason as cxas # type: ignore
	except Exception:
	import cxas # type: ignore
	mask_array = np.asarray(cxas.CXAS(gpus=cxas_gpus).eval().seg(str(image)), dtype=bool)

	try:
	from cxas_vreason.label_mapper import name2id as name2id # type: ignore
	except Exception:
	from cxas.label_mapper import name2id as name2id # type: ignore

	for i, region in enumerate(regions):
	region_labels = region.get("labels") or [region.get("region", "")]
	region_idx = self._ids_for_many(region_labels, name2id)
	region_mask = self._mask_union(mask_array, region_idx)

	region_name = f"anatomy_{i:03d}.jpg"
	region_path = out_dir / region_name
	self._save_viz(base_img, region_mask, region_path, viz_mode, blur_radius, feather, context_ring, resize_roi_to)
	region["path"] = region_name

	for j, sub in enumerate(region.get("pathological", [])):
	sub_labels = sub.get("labels") or [sub.get("anatomies", "")]
	sub_idx = self._ids_for_many(sub_labels, name2id)
	sub_mask = self._mask_union(mask_array, sub_idx)

	sub_name = f"pathology_{i:03d}_{j:03d}.jpg"
	sub_path = out_dir / sub_name
	self._save_viz(base_img, sub_mask, sub_path, viz_mode, blur_radius, feather, context_ring, resize_roi_to)
	sub["path"] = sub_name

	reasoning_json_path = out_dir / "reasoning.json"
	reasoning_json_path.write_text(
	json.dumps(
	{
	"input_image": input_image_name,
	"viz_mode": viz_mode,
	"reasoning": regions,
	"finding": result["finding"],
	"impression": result["impression"],
	"report": result["report"],
	},
	ensure_ascii=False,
	indent=2,
	),
	encoding="utf-8",
	)
	result["reasoning_json"] = str(reasoning_json_path)

	return result