llm-pushback / test_stream.py
Sahil Seemant
Simplified project to local MLX only
06e7bdc
raw
history blame contribute delete
410 Bytes
from mlx_vlm import load
from mlx_vlm.generate import stream_generate
model, processor = load("mlx-community/Qwen3.5-4B-MLX-4bit", adapter_path="adapters")
prompt = "<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n"
for count, result in enumerate(stream_generate(model, processor, prompt, max_tokens=10)):
print(f"Yield {count}: {type(result)} -> {repr(getattr(result, 'text', str(result)))}")