Spaces:
Running on Zero
Running on Zero
| """Small shared helpers with zero heavy dependencies (safe in mock mode).""" | |
| from __future__ import annotations | |
| import contextlib | |
| import os | |
| import re | |
| import sys | |
| from collections.abc import Generator | |
| _THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL) | |
| def strip_think(text: str) -> str: | |
| """Remove Qwen3 ``<think>…</think>`` blocks from a completion. | |
| Closed blocks are removed wholesale. If an *unclosed* ``<think>`` remains | |
| (the generation was truncated mid-thought), everything from that tag onward | |
| is dropped so thinking text never leaks into game state. | |
| """ | |
| text = _THINK_RE.sub("", text) | |
| open_idx = text.find("<think>") | |
| if open_idx != -1: | |
| text = text[:open_idx] | |
| return text.strip() | |
| def clip_words(text: str, n: int) -> str: | |
| """Return at most *n* words from *text* (space-split, no tokenizer needed).""" | |
| words = text.split() | |
| return " ".join(words[:n]) if len(words) > n else text | |
| def close_truncated_json(text: str) -> str: | |
| """Best-effort repair of JSON cut off mid-generation (max_tokens hit). | |
| Closes an unterminated string, drops a dangling backslash/comma, completes a | |
| dangling ``"key":`` with null, then closes every open bracket. The result is | |
| parseable for the common truncation shapes; pathological cuts still raise in | |
| json.loads and are handled by the caller's fallback. | |
| """ | |
| stack: list[str] = [] | |
| in_str = False | |
| esc = False | |
| for ch in text: | |
| if in_str: | |
| if esc: | |
| esc = False | |
| elif ch == "\\": | |
| esc = True | |
| elif ch == '"': | |
| in_str = False | |
| elif ch == '"': | |
| in_str = True | |
| elif ch in "{[": | |
| stack.append(ch) | |
| elif ch in "}]": | |
| if stack: | |
| stack.pop() | |
| out = text | |
| if esc: | |
| out = out[:-1] # drop the dangling escape so the closing quote is valid | |
| if in_str: | |
| out += '"' | |
| stripped = out.rstrip() | |
| if stripped.endswith(":"): | |
| stripped += " null" | |
| elif stripped.endswith(","): | |
| stripped = stripped[:-1] | |
| return stripped + "".join("}" if ch == "{" else "]" for ch in reversed(stack)) | |
| def _safe_print(msg: str) -> None: | |
| """Print to stdout, replacing unencodable chars (cp1252 on Windows terminals).""" | |
| print( | |
| msg.encode(sys.stdout.encoding or "utf-8", errors="replace").decode( | |
| sys.stdout.encoding or "utf-8", errors="replace" | |
| ) | |
| ) | |
| def _quiet_stderr() -> Generator[None, None, None]: | |
| """Redirect the C-level stderr fd to /dev/null for the duration of the block. | |
| Catches messages that bypass Python's sys.stderr redirect (e.g. llama.cpp's | |
| 'n_ctx_seq < n_ctx_train', diffusers/transformers load warnings). | |
| No-op on Windows: os.dup2 on Windows can swallow native crash messages (CUDA, | |
| DLL errors) making failures completely silent and impossible to diagnose. | |
| """ | |
| if sys.platform == "win32": | |
| yield | |
| return | |
| sys.stderr.flush() | |
| devnull_fd = os.open(os.devnull, os.O_WRONLY) | |
| saved_fd = os.dup(2) | |
| try: | |
| os.dup2(devnull_fd, 2) | |
| yield | |
| finally: | |
| sys.stderr.flush() | |
| os.dup2(saved_fd, 2) | |
| os.close(saved_fd) | |
| os.close(devnull_fd) | |