"""Chat rendering helpers for text-only Qwen/Qwen-VL control prompts.""" from __future__ import annotations from typing import Any, Dict, List def render_no_think_chat( tokenizer: Any, messages: List[Dict[str, str]], *, add_generation_prompt: bool, ) -> str: """Render a chat prompt with Qwen thinking disabled when supported. Qwen3-family templates expose ``enable_thinking`` as a Jinja variable. Older templates ignore that keyword, so we fall back cleanly rather than failing training for non-Qwen or older tokenizer builds. """ kwargs = { "tokenize": False, "add_generation_prompt": add_generation_prompt, "enable_thinking": False, } try: return tokenizer.apply_chat_template(messages, **kwargs) except TypeError as exc: if "enable_thinking" not in str(exc): raise kwargs.pop("enable_thinking") return tokenizer.apply_chat_template(messages, **kwargs) def tokenize_text_only(tokenizer: Any, input_text: str, device: Any): """Tokenize a rendered text prompt without invoking VL image loading. Some Qwen-VL processors route the first positional argument to ``images``. Passing the transcript through the explicit ``text=`` keyword keeps the prompt on the text path and avoids PIL trying to parse chat text as an image. """ try: inputs = tokenizer(text=input_text, return_tensors="pt") except ValueError as exc: if "Incorrect image source" not in str(exc): raise inputs = tokenizer(text=[input_text], images=None, return_tensors="pt") except TypeError: inputs = tokenizer(input_text, return_tensors="pt") return inputs.to(device)