Tianyi-Billy-Ma
Deploy: simplify codebase (dead-code removal, behavior-preserving)
1727091
Raw
History Blame Contribute Delete
6.24 kB
"""Codex inference backend for SupraDashboard — LOCAL computing via the Codex CLI.
Mirror of :mod:`inference.openrouter` (same ``run_inference`` / ``available_models`` /
``default_model`` / ``healthcheck`` surface) but backed by ``codex exec``, billed to the
ChatGPT **subscription** the local ``codex`` CLI is logged into (no API key needed).
Intended for LOCAL runs only — a Hugging Face Space cannot hold codex's interactive
OAuth and the codex CLI isn't installed there, so :mod:`inference` forces OpenRouter
whenever it detects a Space. Adapted from Supra-R1 ``supra_r1/inference/codex.py``.
Closed-book sandbox per call: ``codex exec -s read-only --skip-git-repo-check
--ephemeral -C <fresh temp dir> --json``. The working root is an empty temp dir (NOT this
repo), so the model can't read our code/data; ``--json`` carries the final agent message.
CAVEAT: Codex is an agent, not a bare LLM — it prepends a large system prompt, so token
counts and latency are higher than a raw API call. Cost is ChatGPT-subscription quota; when
exhausted, ``codex exec`` exits 1 and emits a JSON ``error`` event on STDOUT (empty stderr),
which is surfaced here as a RuntimeError so the UI shows a readable message.
Config (env; see .env.example):
CODEX_MODEL default model selection (e.g. gpt-5.5)
CODEX_MODELS comma-separated allowlist for the UI model picker
CODEX_REASONING_EFFORT minimal|low|medium|high (default high)
CODEX_TIMEOUT per-call seconds (default 900)
CODEX_BIN codex executable name/path (default "codex")
Requires the Codex CLI installed and logged in (``codex login``; ``codex login status``).
"""
from __future__ import annotations
import json
import os
import shutil
import subprocess
import tempfile
_DEFAULT_MODEL = "gpt-5.5"
_DEFAULT_MODELS = "gpt-5.5,gpt-5"
def _codex_bin() -> str:
return os.environ.get("CODEX_BIN", "codex")
def available_models() -> list[str]:
"""Models offered in the UI selector (env CODEX_MODELS allowlist)."""
raw = os.environ.get("CODEX_MODELS", _DEFAULT_MODELS)
models = [m.strip() for m in raw.split(",") if m.strip()]
return models or [_DEFAULT_MODEL]
def default_model() -> str:
"""Default UI selection: CODEX_MODEL if it's in the allowlist, else the first."""
avail = available_models()
env = os.environ.get("CODEX_MODEL")
return env if env in avail else avail[0]
def run_inference(
system_prompt: str, user_prompt: str, *, model: str | None = None, timeout: float | None = None
) -> str:
"""Return the model completion text for one host-guest prediction, via ``codex exec``.
Codex has no system role, so the system text is prepended to the user prompt. Raises
RuntimeError (with the codex error message) on timeout, missing CLI, non-zero exit, or
empty output, so the UI surfaces a readable message instead of a silent blank.
"""
model = model or default_model()
effort = os.environ.get("CODEX_REASONING_EFFORT", "high")
if timeout is None:
timeout = float(os.environ.get("CODEX_TIMEOUT", "900"))
prompt = f"{system_prompt}\n\n{user_prompt}".strip() if system_prompt else user_prompt
workdir = tempfile.mkdtemp(prefix="codex_cb_") # empty root, not this repo
last_path = os.path.join(workdir, "_last.txt")
cmd = [
_codex_bin(), "exec", "--json", "--skip-git-repo-check", "--ephemeral",
"-s", "read-only", "-m", model, "-C", workdir,
"-c", f'model_reasoning_effort="{effort}"',
"-o", last_path, "-",
]
# Force ChatGPT-subscription auth: strip API keys from the child env so codex can NEVER
# silently fall back to metered API billing (it uses the OAuth tokens in ~/.codex/auth.json).
env = {k: v for k, v in os.environ.items() if k not in ("OPENAI_API_KEY", "CODEX_API_KEY")}
try:
proc = subprocess.run(
cmd, input=prompt, capture_output=True, text=True, timeout=timeout, env=env
)
except subprocess.TimeoutExpired as exc:
shutil.rmtree(workdir, ignore_errors=True)
raise RuntimeError(f"codex exec timed out after {timeout}s") from exc
except FileNotFoundError as exc:
shutil.rmtree(workdir, ignore_errors=True)
raise RuntimeError(
f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`"
) from exc
content, codex_err = "", ""
for line in (proc.stdout or "").splitlines():
line = line.strip()
if not line:
continue
try:
ev = json.loads(line)
except json.JSONDecodeError:
continue
etype = ev.get("type")
if etype == "item.completed":
item = ev.get("item") or {}
if item.get("type") == "agent_message" and item.get("text"):
content = item["text"]
elif etype in ("error", "turn.failed"):
codex_err = ev.get("message") or (ev.get("error") or {}).get("message") or codex_err
if os.path.exists(last_path):
txt = open(last_path).read().strip()
if txt:
content = txt
shutil.rmtree(workdir, ignore_errors=True)
if proc.returncode != 0:
detail = codex_err or (proc.stderr or "")[-400:].strip() or f"exit {proc.returncode}"
raise RuntimeError(f"codex exec failed: {detail}")
if not content:
raise RuntimeError("codex exec returned empty output" + (f": {codex_err}" if codex_err else ""))
return content
def healthcheck() -> tuple[bool, str]:
"""Lightweight readiness probe used by the UI; never raises."""
if not shutil.which(_codex_bin()):
return False, f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`"
try:
r = subprocess.run(
[_codex_bin(), "login", "status"], capture_output=True, text=True, timeout=10
)
except Exception as exc: # noqa: BLE001 — surfaced to the UI as a string
return False, f"codex status check failed: {exc}"
if r.returncode == 0 and "logged in" in (r.stdout + r.stderr).lower():
return True, f"Codex configured (model={default_model()})"
return False, "codex not logged in; run `codex login`"