"""Small shared helpers with zero heavy dependencies (safe in mock mode).""" from __future__ import annotations import contextlib import os import re import sys from collections.abc import Generator _THINK_RE = re.compile(r".*?", re.DOTALL) def strip_think(text: str) -> str: """Remove Qwen3 ```` blocks from a completion. Closed blocks are removed wholesale. If an *unclosed* ```` remains (the generation was truncated mid-thought), everything from that tag onward is dropped so thinking text never leaks into game state. """ text = _THINK_RE.sub("", text) open_idx = text.find("") if open_idx != -1: text = text[:open_idx] return text.strip() def clip_words(text: str, n: int) -> str: """Return at most *n* words from *text* (space-split, no tokenizer needed).""" words = text.split() return " ".join(words[:n]) if len(words) > n else text def close_truncated_json(text: str) -> str: """Best-effort repair of JSON cut off mid-generation (max_tokens hit). Closes an unterminated string, drops a dangling backslash/comma, completes a dangling ``"key":`` with null, then closes every open bracket. The result is parseable for the common truncation shapes; pathological cuts still raise in json.loads and are handled by the caller's fallback. """ stack: list[str] = [] in_str = False esc = False for ch in text: if in_str: if esc: esc = False elif ch == "\\": esc = True elif ch == '"': in_str = False elif ch == '"': in_str = True elif ch in "{[": stack.append(ch) elif ch in "}]": if stack: stack.pop() out = text if esc: out = out[:-1] # drop the dangling escape so the closing quote is valid if in_str: out += '"' stripped = out.rstrip() if stripped.endswith(":"): stripped += " null" elif stripped.endswith(","): stripped = stripped[:-1] return stripped + "".join("}" if ch == "{" else "]" for ch in reversed(stack)) def _safe_print(msg: str) -> None: """Print to stdout, replacing unencodable chars (cp1252 on Windows terminals).""" print( msg.encode(sys.stdout.encoding or "utf-8", errors="replace").decode( sys.stdout.encoding or "utf-8", errors="replace" ) ) @contextlib.contextmanager def _quiet_stderr() -> Generator[None, None, None]: """Redirect the C-level stderr fd to /dev/null for the duration of the block. Catches messages that bypass Python's sys.stderr redirect (e.g. llama.cpp's 'n_ctx_seq < n_ctx_train', diffusers/transformers load warnings). No-op on Windows: os.dup2 on Windows can swallow native crash messages (CUDA, DLL errors) making failures completely silent and impossible to diagnose. """ if sys.platform == "win32": yield return sys.stderr.flush() devnull_fd = os.open(os.devnull, os.O_WRONLY) saved_fd = os.dup(2) try: os.dup2(devnull_fd, 2) yield finally: sys.stderr.flush() os.dup2(saved_fd, 2) os.close(saved_fd) os.close(devnull_fd)