AntiAtropos / training /chat_utils.py
div18
utils
ba5f05d
"""Chat rendering helpers for text-only Qwen/Qwen-VL control prompts."""
from __future__ import annotations
from typing import Any, Dict, List
def render_no_think_chat(
tokenizer: Any,
messages: List[Dict[str, str]],
*,
add_generation_prompt: bool,
) -> str:
"""Render a chat prompt with Qwen thinking disabled when supported.
Qwen3-family templates expose ``enable_thinking`` as a Jinja variable.
Older templates ignore that keyword, so we fall back cleanly rather than
failing training for non-Qwen or older tokenizer builds.
"""
kwargs = {
"tokenize": False,
"add_generation_prompt": add_generation_prompt,
"enable_thinking": False,
}
try:
return tokenizer.apply_chat_template(messages, **kwargs)
except TypeError as exc:
if "enable_thinking" not in str(exc):
raise
kwargs.pop("enable_thinking")
return tokenizer.apply_chat_template(messages, **kwargs)
def tokenize_text_only(tokenizer: Any, input_text: str, device: Any):
"""Tokenize a rendered text prompt without invoking VL image loading.
Some Qwen-VL processors route the first positional argument to ``images``.
Passing the transcript through the explicit ``text=`` keyword keeps the
prompt on the text path and avoids PIL trying to parse chat text as an image.
"""
try:
inputs = tokenizer(text=input_text, return_tensors="pt")
except ValueError as exc:
if "Incorrect image source" not in str(exc):
raise
inputs = tokenizer(text=[input_text], images=None, return_tensors="pt")
except TypeError:
inputs = tokenizer(input_text, return_tensors="pt")
return inputs.to(device)