SynthVision
Collection
Medical VQA datasets and fine-tuned models from the SynthVision pipeline. • 8 items • Updated • 2
Qwen3.5-2B fine-tuned on ~200K medical VQA records from the SynthVision pipeline. Best overall model in the SynthVision family.
Note: Requires
transformers>=5.3.0. Do NOT installfla(crashes on Python 3.10).
| Split | VQA-RAD | PathVQA | SLAKE | Avg EM |
|---|---|---|---|---|
| Base (Qwen3.5-2B) | 0.5477 | 0.3822 | 0.5617 | 0.4972 |
| Fine-tuned | 0.5521 | 0.4748 | 0.6880 | 0.5716 |
| Delta | +0.8% | +24.2% | +22.5% | +15.0% |
from transformers import AutoProcessor, AutoModelForImageTextToText
# Requires transformers>=5.3.0
model_id = "OpenMed/Qwen3.5-2B-MedVL"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForImageTextToText.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": "https://example.com/xray.jpg"},
{"type": "text", "text": "What are the key findings in this chest X-ray?"},
],
}
]
inputs = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=512)
print(processor.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True))
from vllm import LLM, SamplingParams
llm = LLM(model="OpenMed/Qwen3.5-2B-MedVL", max_model_len=4096, limit_mm_per_prompt={"image": 1})
messages = [{"role": "user", "content": [
{"type": "image_url", "image_url": {"url": "https://example.com/xray.jpg"}},
{"type": "text", "text": "What are the key findings in this chest X-ray?"},
]}]
output = llm.chat(messages, SamplingParams(temperature=0, max_tokens=512))
print(output[0].outputs[0].text)
# Launch server
python -m sglang.launch_server --model-path OpenMed/Qwen3.5-2B-MedVL --port 8000
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8000/v1", api_key="EMPTY")
response = client.chat.completions.create(
model="OpenMed/Qwen3.5-2B-MedVL",
messages=[{"role": "user", "content": [
{"type": "image_url", "image_url": {"url": "https://example.com/xray.jpg"}},
{"type": "text", "text": "What are the key findings in this chest X-ray?"},
]}],
max_tokens=512,
)
print(response.choices[0].message.content)