Spaces:

SupraBench
/

SupraDashboard

Running

SupraDashboard / src /inference /codex.py

Tianyi-Billy-Ma

Deploy: simplify codebase (dead-code removal, behavior-preserving)

1727091 10 days ago

6.24 kB

	"""Codex inference backend for SupraDashboard — LOCAL computing via the Codex CLI.

	Mirror of :mod:`inference.openrouter` (same ``run_inference`` / ``available_models`` /
	``default_model`` / ``healthcheck`` surface) but backed by ``codex exec``, billed to the
	ChatGPT subscription the local ``codex`` CLI is logged into (no API key needed).

	Intended for LOCAL runs only — a Hugging Face Space cannot hold codex's interactive
	OAuth and the codex CLI isn't installed there, so :mod:`inference` forces OpenRouter
	whenever it detects a Space. Adapted from Supra-R1 ``supra_r1/inference/codex.py``.

	Closed-book sandbox per call: ``codex exec -s read-only --skip-git-repo-check
	--ephemeral -C <fresh temp dir> --json``. The working root is an empty temp dir (NOT this
	repo), so the model can't read our code/data; ``--json`` carries the final agent message.

	CAVEAT: Codex is an agent, not a bare LLM — it prepends a large system prompt, so token
	counts and latency are higher than a raw API call. Cost is ChatGPT-subscription quota; when
	exhausted, ``codex exec`` exits 1 and emits a JSON ``error`` event on STDOUT (empty stderr),
	which is surfaced here as a RuntimeError so the UI shows a readable message.

	Config (env; see .env.example):
	CODEX_MODEL default model selection (e.g. gpt-5.5)
	CODEX_MODELS comma-separated allowlist for the UI model picker
	CODEX_REASONING_EFFORT minimal\|low\|medium\|high (default high)
	CODEX_TIMEOUT per-call seconds (default 900)
	CODEX_BIN codex executable name/path (default "codex")

	Requires the Codex CLI installed and logged in (``codex login``; ``codex login status``).
	"""
	from __future__ import annotations

	import json
	import os
	import shutil
	import subprocess
	import tempfile

	_DEFAULT_MODEL = "gpt-5.5"
	_DEFAULT_MODELS = "gpt-5.5,gpt-5"


	def _codex_bin() -> str:
	return os.environ.get("CODEX_BIN", "codex")


	def available_models() -> list[str]:
	"""Models offered in the UI selector (env CODEX_MODELS allowlist)."""
	raw = os.environ.get("CODEX_MODELS", _DEFAULT_MODELS)
	models = [m.strip() for m in raw.split(",") if m.strip()]
	return models or [_DEFAULT_MODEL]


	def default_model() -> str:
	"""Default UI selection: CODEX_MODEL if it's in the allowlist, else the first."""
	avail = available_models()
	env = os.environ.get("CODEX_MODEL")
	return env if env in avail else avail[0]


	def run_inference(
	system_prompt: str, user_prompt: str, *, model: str \| None = None, timeout: float \| None = None
	) -> str:
	"""Return the model completion text for one host-guest prediction, via ``codex exec``.

	Codex has no system role, so the system text is prepended to the user prompt. Raises
	RuntimeError (with the codex error message) on timeout, missing CLI, non-zero exit, or
	empty output, so the UI surfaces a readable message instead of a silent blank.
	"""
	model = model or default_model()
	effort = os.environ.get("CODEX_REASONING_EFFORT", "high")
	if timeout is None:
	timeout = float(os.environ.get("CODEX_TIMEOUT", "900"))
	prompt = f"{system_prompt}\n\n{user_prompt}".strip() if system_prompt else user_prompt

	workdir = tempfile.mkdtemp(prefix="codex_cb_") # empty root, not this repo
	last_path = os.path.join(workdir, "_last.txt")
	cmd = [
	_codex_bin(), "exec", "--json", "--skip-git-repo-check", "--ephemeral",
	"-s", "read-only", "-m", model, "-C", workdir,
	"-c", f'model_reasoning_effort="{effort}"',
	"-o", last_path, "-",
	]
	# Force ChatGPT-subscription auth: strip API keys from the child env so codex can NEVER
	# silently fall back to metered API billing (it uses the OAuth tokens in ~/.codex/auth.json).
	env = {k: v for k, v in os.environ.items() if k not in ("OPENAI_API_KEY", "CODEX_API_KEY")}
	try:
	proc = subprocess.run(
	cmd, input=prompt, capture_output=True, text=True, timeout=timeout, env=env
	)
	except subprocess.TimeoutExpired as exc:
	shutil.rmtree(workdir, ignore_errors=True)
	raise RuntimeError(f"codex exec timed out after {timeout}s") from exc
	except FileNotFoundError as exc:
	shutil.rmtree(workdir, ignore_errors=True)
	raise RuntimeError(
	f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`"
	) from exc

	content, codex_err = "", ""
	for line in (proc.stdout or "").splitlines():
	line = line.strip()
	if not line:
	continue
	try:
	ev = json.loads(line)
	except json.JSONDecodeError:
	continue
	etype = ev.get("type")
	if etype == "item.completed":
	item = ev.get("item") or {}
	if item.get("type") == "agent_message" and item.get("text"):
	content = item["text"]
	elif etype in ("error", "turn.failed"):
	codex_err = ev.get("message") or (ev.get("error") or {}).get("message") or codex_err
	if os.path.exists(last_path):
	txt = open(last_path).read().strip()
	if txt:
	content = txt
	shutil.rmtree(workdir, ignore_errors=True)

	if proc.returncode != 0:
	detail = codex_err or (proc.stderr or "")[-400:].strip() or f"exit {proc.returncode}"
	raise RuntimeError(f"codex exec failed: {detail}")
	if not content:
	raise RuntimeError("codex exec returned empty output" + (f": {codex_err}" if codex_err else ""))
	return content


	def healthcheck() -> tuple[bool, str]:
	"""Lightweight readiness probe used by the UI; never raises."""
	if not shutil.which(_codex_bin()):
	return False, f"codex CLI not found ({_codex_bin()}); install Codex and run `codex login`"
	try:
	r = subprocess.run(
	[_codex_bin(), "login", "status"], capture_output=True, text=True, timeout=10
	)
	except Exception as exc: # noqa: BLE001 — surfaced to the UI as a string
	return False, f"codex status check failed: {exc}"
	if r.returncode == 0 and "logged in" in (r.stdout + r.stderr).lower():
	return True, f"Codex configured (model={default_model()})"
	return False, "codex not logged in; run `codex login`"