"""Codex inference backend for SupraDashboard — LOCAL computing via the Codex CLI. Mirror of :mod:`inference.openrouter` (same ``run_inference`` / ``available_models`` / ``default_model`` / ``healthcheck`` surface) but backed by ``codex exec``, billed to the ChatGPT **subscription** the local ``codex`` CLI is logged into (no API key needed). Intended for LOCAL runs only — a Hugging Face Space cannot hold codex's interactive OAuth and the codex CLI isn't installed there, so :mod:`inference` forces OpenRouter whenever it detects a Space. Adapted from Supra-R1 ``supra_r1/inference/codex.py``. Closed-book sandbox per call: ``codex exec -s read-only --skip-git-repo-check --ephemeral -C --json``. The working root is an empty temp dir (NOT this repo), so the model can't read our code/data; ``--json`` carries the final agent message. CAVEAT: Codex is an agent, not a bare LLM — it prepends a large system prompt, so token counts and latency are higher than a raw API call. Cost is ChatGPT-subscription quota; when exhausted, ``codex exec`` exits 1 and emits a JSON ``error`` event on STDOUT (empty stderr), which is surfaced here as a RuntimeError so the UI shows a readable message. Config (env; see .env.example): CODEX_MODEL default model selection (e.g. gpt-5.5) CODEX_MODELS comma-separated allowlist for the UI model picker CODEX_REASONING_EFFORT minimal|low|medium|high (default high) CODEX_TIMEOUT per-call seconds (default 900) CODEX_BIN codex executable name/path (default "codex") Requires the Codex CLI installed and logged in (``codex login``; ``codex login status``). """ from __future__ import annotations import json import os import shutil import subprocess import tempfile _DEFAULT_MODEL = "gpt-5.5" _DEFAULT_MODELS = "gpt-5.5,gpt-5" def _codex_bin() -> str: return os.environ.get("CODEX_BIN", "codex") def available_models() -> list[str]: """Models offered in the UI selector (env CODEX_MODELS allowlist).""" raw = os.environ.get("CODEX_MODELS", _DEFAULT_MODELS) models = [m.strip() for m in raw.split(",") if m.strip()] return models or [_DEFAULT_MODEL] def default_model() -> str: """Default UI selection: CODEX_MODEL if it's in the allowlist, else the first.""" avail = available_models() env = os.environ.get("CODEX_MODEL") return env if env in avail else avail[0] def run_inference( system_prompt: str, user_prompt: str, *, model: str | None = None, timeout: float | None = None ) -> str: """Return the model completion text for one host-guest prediction, via ``codex exec``. Codex has no system role, so the system text is prepended to the user prompt. Raises RuntimeError (with the codex error message) on timeout, missing CLI, non-zero exit, or empty output, so the UI surfaces a readable message instead of a silent blank. """ model = model or default_model() effort = os.environ.get("CODEX_REASONING_EFFORT", "high") if timeout is None: timeout = float(os.environ.get("CODEX_TIMEOUT", "900")) prompt = f"{system_prompt}\n\n{user_prompt}".strip() if system_prompt else user_prompt workdir = tempfile.mkdtemp(prefix="codex_cb_") # empty root, not this repo last_path = os.path.join(workdir, "_last.txt") cmd = [ _codex_bin(), "exec", "--json", "--skip-git-repo-check", "--ephemeral", "-s", "read-only", "-m", model, "-C", workdir, "-c", f'model_reasoning_effort="{effort}"', "-o", last_path, "-", ] # Force ChatGPT-subscription auth: strip API keys from the child env so codex can NEVER # silently fall back to metered API billing (it uses the OAuth tokens in ~/.codex/auth.json). env = {k: v for k, v in os.environ.items() if k not in ("OPENAI_API_KEY", "CODEX_API_KEY")} try: proc = subprocess.run( cmd, input=prompt, capture_output=True, text=True, timeout=timeout, env=env ) except subprocess.TimeoutExpired as exc: shutil.rmtree(workdir, ignore_errors=True) raise RuntimeError(f"codex exec timed out after {timeout}s") from exc except FileNotFoundError as exc: shutil.rmtree(workdir, ignore_errors=True) raise RuntimeError( f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`" ) from exc content, codex_err = "", "" for line in (proc.stdout or "").splitlines(): line = line.strip() if not line: continue try: ev = json.loads(line) except json.JSONDecodeError: continue etype = ev.get("type") if etype == "item.completed": item = ev.get("item") or {} if item.get("type") == "agent_message" and item.get("text"): content = item["text"] elif etype in ("error", "turn.failed"): codex_err = ev.get("message") or (ev.get("error") or {}).get("message") or codex_err if os.path.exists(last_path): txt = open(last_path).read().strip() if txt: content = txt shutil.rmtree(workdir, ignore_errors=True) if proc.returncode != 0: detail = codex_err or (proc.stderr or "")[-400:].strip() or f"exit {proc.returncode}" raise RuntimeError(f"codex exec failed: {detail}") if not content: raise RuntimeError("codex exec returned empty output" + (f": {codex_err}" if codex_err else "")) return content def healthcheck() -> tuple[bool, str]: """Lightweight readiness probe used by the UI; never raises.""" if not shutil.which(_codex_bin()): return False, f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`" try: r = subprocess.run( [_codex_bin(), "login", "status"], capture_output=True, text=True, timeout=10 ) except Exception as exc: # noqa: BLE001 — surfaced to the UI as a string return False, f"codex status check failed: {exc}" if r.returncode == 0 and "logged in" in (r.stdout + r.stderr).lower(): return True, f"Codex configured (model={default_model()})" return False, "codex not logged in; run `codex login`"