Spaces:

trl-lib
/

chat-template-inspector

Sleeping

Quentin Gallouédec

Add "Sign in with Hugging Face" OAuth so visitors access their own gated repos

cb002d0 about 1 month ago

11.2 kB

	"""Chat Template Inspector — a Gradio Space for analyzing chat templates."""

	from __future__ import annotations

	import ast
	import traceback
	from typing import Any

	import gradio as gr
	from transformers import AutoProcessor, AutoTokenizer

	from inspector.checks import ALL_CHECKS, DESCRIPTIONS, CheckResult, Context
	from inspector.format import format_template


	# ─── Tokenizer cache ───────────────────────────────────────────────────────────
	#
	# `from_pretrained` is slow (download + parse). We load each model once per
	# (model_id, token) pair so visitors with different Hub access don't share
	# cached objects. We also stash the originals of the four special tokens so an
	# empty override field truly resets them.

	_CACHE: dict[tuple[str, str], tuple[Any, bool, dict]] = {}


	def _load(model_id: str, token: str \| None = None) -> tuple[Any, bool, dict]:
	model_id = model_id.strip()
	if not model_id:
	raise gr.Error("Please enter a model id.")
	key = (model_id, token or "")
	if key in _CACHE:
	return _CACHE[key]
	try:
	proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=False, token=token)
	is_processor = hasattr(proc, "image_processor") or hasattr(proc, "feature_extractor")
	obj = proc if is_processor else AutoTokenizer.from_pretrained(model_id, trust_remote_code=False, token=token)
	except Exception:
	obj = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False, token=token)
	is_processor = False
	tok = obj.tokenizer if is_processor else obj
	originals = {attr: getattr(tok, attr, None) for attr in ("bos_token", "eos_token", "pad_token", "unk_token")}
	_CACHE[key] = (obj, is_processor, originals)
	return _CACHE[key]


	def load_from_hub(model_id: str, oauth_token: gr.OAuthToken \| None = None):
	token = oauth_token.token if oauth_token is not None else None
	try:
	obj, is_processor, originals = _load(model_id, token)
	except Exception as e:
	raise gr.Error(f"Failed to load `{model_id}`: {type(e).__name__}: {e}")
	template = (
	obj.chat_template
	if not is_processor
	else getattr(obj, "chat_template", None) or obj.tokenizer.chat_template
	)
	if not template:
	raise gr.Error(f"`{model_id}` has no chat_template.")
	return (
	template,
	originals.get("bos_token") or "",
	originals.get("eos_token") or "",
	originals.get("pad_token") or "",
	originals.get("unk_token") or "",
	model_id,
	)


	# ─── Checks runner ─────────────────────────────────────────────────────────────

	def _build_context(
	model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
	token: str \| None = None,
	) -> Context:
	obj, is_processor, originals = _load(model_id, token)
	tok = obj.tokenizer if is_processor else obj
	# Reset to original tokens, then apply overrides where the user provided one.
	for attr, original in originals.items():
	setattr(tok, attr, original)
	for attr, value in (("bos_token", bos), ("eos_token", eos), ("pad_token", pad), ("unk_token", unk)):
	if value:
	setattr(tok, attr, value)
	if is_processor:
	obj.tokenizer.chat_template = template_source
	obj.chat_template = template_source
	return Context(obj=obj, is_processor=is_processor, template_source=template_source, model_id=model_id)


	_STATUS_EMOJI = {"pass": "✅", "fail": "❌", "warning": "⚠️", "na": "➖"}


	def _format_report(results) -> str:
	by_cat: dict[str, list] = {}
	for r in results:
	by_cat.setdefault(r.category, []).append(r)
	lines: list[str] = []
	for cat, items in by_cat.items():
	lines.append(f"### {cat}")
	for r in items:
	emoji = _STATUS_EMOJI.get(r.status, "?")
	summary = f"{emoji} <b>{r.name}</b>"
	body_parts: list[str] = []
	description = DESCRIPTIONS.get(r.name, "")
	if description:
	body_parts.append(f"_{description}_")
	if r.message:
	body_parts.append(f"{r.message}")
	if r.details:
	body_parts.append("```\n" + r.details + "\n```")
	if r.reproducer:
	body_parts.append("```python\n" + r.reproducer + "\n```")
	if body_parts:
	lines.append(f"<details>\n<summary>{summary}</summary>\n")
	lines.append("\n\n".join(body_parts))
	lines.append("\n</details>")
	else:
	lines.append(summary + "<br>")
	lines.append("")
	return "\n".join(lines)


	_PLAYGROUND_DEFAULT = '''\
	[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "What's the capital of France?"},
	]
	'''


	def render_playground(
	model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
	messages_src: str, add_gen_prompt: bool,
	oauth_token: gr.OAuthToken \| None = None,
	) -> str:
	"""Parse the user's `messages` literal and render it with apply_chat_template.

	Uses `ast.literal_eval` so only Python literals are accepted — no function
	calls or attribute access — which makes it safe to expose on a public Space.
	"""
	if not messages_src.strip():
	return ""
	try:
	messages = ast.literal_eval(messages_src)
	except (ValueError, SyntaxError) as e:
	return f"# parse error: {type(e).__name__}: {e}"
	if not isinstance(messages, list):
	return "# expected a list of messages at the top level"
	token = oauth_token.token if oauth_token is not None else None
	try:
	ctx = _build_context(model_id, template_source, bos, eos, pad, unk, token=token)
	return ctx.obj.apply_chat_template(
	messages, add_generation_prompt=add_gen_prompt, tokenize=False,
	)
	except Exception as e:
	return f"# render error: {type(e).__name__}: {e}"


	def run_inspection(
	model_id: str, template_source: str, bos: str, eos: str, pad: str, unk: str,
	oauth_token: gr.OAuthToken \| None = None,
	):
	if not model_id.strip() or not template_source.strip():
	return "_Enter a model id and a template to inspect._"
	token = oauth_token.token if oauth_token is not None else None
	try:
	ctx = _build_context(model_id, template_source, bos, eos, pad, unk, token=token)
	except Exception as e:
	return f"Failed to build context: `{type(e).__name__}: {e}`\n\n```\n{traceback.format_exc()}\n```"

	results: list[CheckResult] = []
	for check in ALL_CHECKS:
	try:
	results.append(check(ctx))
	except Exception as e:
	results.append(CheckResult(
	check.__name__, "Internal", "fail",
	f"Check raised an unexpected exception: {type(e).__name__}: {e}",
	))
	return _format_report(results)


	# ─── UI ────────────────────────────────────────────────────────────────────────

	DEFAULT_MODEL = "Qwen/Qwen3-8B"
	try:
	_d_template, _d_bos, _d_eos, _d_pad, _d_unk, _d_id = load_from_hub(DEFAULT_MODEL)
	except Exception:
	_d_template = _d_bos = _d_eos = _d_pad = _d_unk = ""
	_d_id = DEFAULT_MODEL


	with gr.Blocks(title="Chat Template Inspector", fill_width=True) as demo:
	with gr.Row():
	gr.Markdown("## Chat Template Inspector", elem_id="title")
	login_btn = gr.LoginButton(size="sm")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	model_id_box = gr.Textbox(
	value=_d_id, placeholder="Qwen/Qwen3-8B",
	scale=5, show_label=False, container=False,
	)
	load_btn = gr.Button("📥 Load", scale=1, variant="secondary")
	format_btn = gr.Button("✨ Format", scale=1, variant="secondary")
	template_editor = gr.Code(
	value=_d_template, language="jinja2", show_label=False,
	lines=20, max_lines=20,
	)
	with gr.Row():
	bos_box = gr.Textbox(value=_d_bos, label="bos_token")
	eos_box = gr.Textbox(value=_d_eos, label="eos_token")
	pad_box = gr.Textbox(value=_d_pad, label="pad_token")
	unk_box = gr.Textbox(value=_d_unk, label="unk_token")

	with gr.Column():
	report_md = gr.Markdown("_Loading…_")
	gr.Markdown("### Playground")
	gr.Markdown(
	"Define a `messages` list (Python literal — only `dict`, `list`, "
	"`str`, etc., no function calls) and see it rendered with the "
	"current template."
	)
	playground_input = gr.Code(
	value=_PLAYGROUND_DEFAULT, language="python", show_label=False,
	lines=8, max_lines=8,
	)
	playground_gen_prompt = gr.Checkbox(label="add_generation_prompt", value=True)
	playground_output = gr.Code(
	value="", language="markdown", show_label=False,
	lines=8, max_lines=8, interactive=False,
	)

	inspection_inputs = [model_id_box, template_editor, bos_box, eos_box, pad_box, unk_box]
	playground_inputs = inspection_inputs + [playground_input, playground_gen_prompt]

	# Initial render on page load.
	demo.load(run_inspection, inputs=inspection_inputs, outputs=[report_md])
	demo.load(render_playground, inputs=playground_inputs, outputs=[playground_output])

	# Re-run whenever the template or any special token changes. Gradio cancels
	# any in-flight call when a new event fires, so rapid edits debounce naturally.
	for component in (template_editor, bos_box, eos_box, pad_box, unk_box):
	component.change(
	run_inspection, inputs=inspection_inputs, outputs=[report_md],
	show_progress="hidden",
	)
	component.change(
	render_playground, inputs=playground_inputs, outputs=[playground_output],
	show_progress="hidden",
	)

	# Playground re-renders on its own input changes too.
	for component in (playground_input, playground_gen_prompt):
	component.change(
	render_playground, inputs=playground_inputs, outputs=[playground_output],
	show_progress="hidden",
	)

	# Loading from the Hub overwrites the editor + token boxes (which fires
	# their .change events and triggers a fresh inspection automatically).
	load_btn.click(
	load_from_hub,
	inputs=[model_id_box],
	outputs=[template_editor, bos_box, eos_box, pad_box, unk_box, model_id_box],
	)
	format_btn.click(format_template, inputs=[template_editor], outputs=[template_editor])


	if __name__ == "__main__":
	demo.launch(theme=gr.themes.Soft())