"""Pretty-print a Jinja chat template using djlint. djlint is HTML-aware — without precautions its parser inserts spaces inside tokens like ``<|im_start|>`` (treating them as malformed HTML tags). We mask all ``<|...|>`` (and similar angle-bracket pseudo-tokens) before formatting and unmask after, so the special tokens come back byte-identical. """ from __future__ import annotations import re import subprocess # Matches typical chat-template special tokens: <|something|>, <|tool_call|>, # etc. Conservative: only chars that aren't '<', '>', or '|' between the bars. _SPECIAL_RE = re.compile(r"<\|[^|<>]+?\|>") _PLACEHOLDER = "\x01SPECIAL{}\x01" _PLACEHOLDER_RE = re.compile(r"\x01SPECIAL(\d+)\x01") def format_template(source: str) -> str: """Return ``source`` reformatted with djlint, or the original on failure.""" if not source.strip(): return source specials: list[str] = [] def _mask(m: re.Match) -> str: specials.append(m.group(0)) return _PLACEHOLDER.format(len(specials) - 1) masked = _SPECIAL_RE.sub(_mask, source) try: result = subprocess.run( ["djlint", "-", "--reformat", "--profile=jinja", "--indent=2"], input=masked, capture_output=True, text=True, timeout=10, ) except (FileNotFoundError, subprocess.TimeoutExpired): return source # djlint exits non-zero when it had to reformat — that's fine, we still # want the stdout. Only treat empty stdout as a real failure. if not result.stdout.strip(): return source formatted = _PLACEHOLDER_RE.sub(lambda m: specials[int(m.group(1))], result.stdout) return formatted.rstrip() + "\n"