Spaces:
Running
Running
| """Codex inference backend for SupraDashboard — LOCAL computing via the Codex CLI. | |
| Mirror of :mod:`inference.openrouter` (same ``run_inference`` / ``available_models`` / | |
| ``default_model`` / ``healthcheck`` surface) but backed by ``codex exec``, billed to the | |
| ChatGPT **subscription** the local ``codex`` CLI is logged into (no API key needed). | |
| Intended for LOCAL runs only — a Hugging Face Space cannot hold codex's interactive | |
| OAuth and the codex CLI isn't installed there, so :mod:`inference` forces OpenRouter | |
| whenever it detects a Space. Adapted from Supra-R1 ``supra_r1/inference/codex.py``. | |
| Closed-book sandbox per call: ``codex exec -s read-only --skip-git-repo-check | |
| --ephemeral -C <fresh temp dir> --json``. The working root is an empty temp dir (NOT this | |
| repo), so the model can't read our code/data; ``--json`` carries the final agent message. | |
| CAVEAT: Codex is an agent, not a bare LLM — it prepends a large system prompt, so token | |
| counts and latency are higher than a raw API call. Cost is ChatGPT-subscription quota; when | |
| exhausted, ``codex exec`` exits 1 and emits a JSON ``error`` event on STDOUT (empty stderr), | |
| which is surfaced here as a RuntimeError so the UI shows a readable message. | |
| Config (env; see .env.example): | |
| CODEX_MODEL default model selection (e.g. gpt-5.5) | |
| CODEX_MODELS comma-separated allowlist for the UI model picker | |
| CODEX_REASONING_EFFORT minimal|low|medium|high (default high) | |
| CODEX_TIMEOUT per-call seconds (default 900) | |
| CODEX_BIN codex executable name/path (default "codex") | |
| Requires the Codex CLI installed and logged in (``codex login``; ``codex login status``). | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import shutil | |
| import subprocess | |
| import tempfile | |
| _DEFAULT_MODEL = "gpt-5.5" | |
| _DEFAULT_MODELS = "gpt-5.5,gpt-5" | |
| def _codex_bin() -> str: | |
| return os.environ.get("CODEX_BIN", "codex") | |
| def available_models() -> list[str]: | |
| """Models offered in the UI selector (env CODEX_MODELS allowlist).""" | |
| raw = os.environ.get("CODEX_MODELS", _DEFAULT_MODELS) | |
| models = [m.strip() for m in raw.split(",") if m.strip()] | |
| return models or [_DEFAULT_MODEL] | |
| def default_model() -> str: | |
| """Default UI selection: CODEX_MODEL if it's in the allowlist, else the first.""" | |
| avail = available_models() | |
| env = os.environ.get("CODEX_MODEL") | |
| return env if env in avail else avail[0] | |
| def run_inference( | |
| system_prompt: str, user_prompt: str, *, model: str | None = None, timeout: float | None = None | |
| ) -> str: | |
| """Return the model completion text for one host-guest prediction, via ``codex exec``. | |
| Codex has no system role, so the system text is prepended to the user prompt. Raises | |
| RuntimeError (with the codex error message) on timeout, missing CLI, non-zero exit, or | |
| empty output, so the UI surfaces a readable message instead of a silent blank. | |
| """ | |
| model = model or default_model() | |
| effort = os.environ.get("CODEX_REASONING_EFFORT", "high") | |
| if timeout is None: | |
| timeout = float(os.environ.get("CODEX_TIMEOUT", "900")) | |
| prompt = f"{system_prompt}\n\n{user_prompt}".strip() if system_prompt else user_prompt | |
| workdir = tempfile.mkdtemp(prefix="codex_cb_") # empty root, not this repo | |
| last_path = os.path.join(workdir, "_last.txt") | |
| cmd = [ | |
| _codex_bin(), "exec", "--json", "--skip-git-repo-check", "--ephemeral", | |
| "-s", "read-only", "-m", model, "-C", workdir, | |
| "-c", f'model_reasoning_effort="{effort}"', | |
| "-o", last_path, "-", | |
| ] | |
| # Force ChatGPT-subscription auth: strip API keys from the child env so codex can NEVER | |
| # silently fall back to metered API billing (it uses the OAuth tokens in ~/.codex/auth.json). | |
| env = {k: v for k, v in os.environ.items() if k not in ("OPENAI_API_KEY", "CODEX_API_KEY")} | |
| try: | |
| proc = subprocess.run( | |
| cmd, input=prompt, capture_output=True, text=True, timeout=timeout, env=env | |
| ) | |
| except subprocess.TimeoutExpired as exc: | |
| shutil.rmtree(workdir, ignore_errors=True) | |
| raise RuntimeError(f"codex exec timed out after {timeout}s") from exc | |
| except FileNotFoundError as exc: | |
| shutil.rmtree(workdir, ignore_errors=True) | |
| raise RuntimeError( | |
| f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`" | |
| ) from exc | |
| content, codex_err = "", "" | |
| for line in (proc.stdout or "").splitlines(): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| ev = json.loads(line) | |
| except json.JSONDecodeError: | |
| continue | |
| etype = ev.get("type") | |
| if etype == "item.completed": | |
| item = ev.get("item") or {} | |
| if item.get("type") == "agent_message" and item.get("text"): | |
| content = item["text"] | |
| elif etype in ("error", "turn.failed"): | |
| codex_err = ev.get("message") or (ev.get("error") or {}).get("message") or codex_err | |
| if os.path.exists(last_path): | |
| txt = open(last_path).read().strip() | |
| if txt: | |
| content = txt | |
| shutil.rmtree(workdir, ignore_errors=True) | |
| if proc.returncode != 0: | |
| detail = codex_err or (proc.stderr or "")[-400:].strip() or f"exit {proc.returncode}" | |
| raise RuntimeError(f"codex exec failed: {detail}") | |
| if not content: | |
| raise RuntimeError("codex exec returned empty output" + (f": {codex_err}" if codex_err else "")) | |
| return content | |
| def healthcheck() -> tuple[bool, str]: | |
| """Lightweight readiness probe used by the UI; never raises.""" | |
| if not shutil.which(_codex_bin()): | |
| return False, f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`" | |
| try: | |
| r = subprocess.run( | |
| [_codex_bin(), "login", "status"], capture_output=True, text=True, timeout=10 | |
| ) | |
| except Exception as exc: # noqa: BLE001 — surfaced to the UI as a string | |
| return False, f"codex status check failed: {exc}" | |
| if r.returncode == 0 and "logged in" in (r.stdout + r.stderr).lower(): | |
| return True, f"Codex configured (model={default_model()})" | |
| return False, "codex not logged in; run `codex login`" | |