kaiju-coder-7-opencode / scripts /run_kaiju_public_demo_pack.py
restokes92's picture
Upload Kaiju Coder 7 OpenCode helper package
1fe486d verified
#!/usr/bin/env python3
"""Run the public Kaiju Coder 7 business-owner demo pack.
The demo pack exercises the release path customers will actually use: a compact
model-planned prompt where useful, deterministic harness rendering, and static
verification before any public claim is refreshed.
"""
from __future__ import annotations
import argparse
import datetime as dt
import json
import sys
import time
from dataclasses import asdict, dataclass
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))
from kaiju_harness.router import result_to_json, run_task
@dataclass
class DemoTask:
task_id: str
kind: str
prompt: str
@dataclass
class DemoResult:
task_id: str
kind: str
seconds: float
task_type: str
artifact_type: str
artifact_path: str | None
project_dir: str | None
changed_files: int
errors: list[str]
DEMO_TASKS = [
DemoTask(
task_id="service-website",
kind="website",
prompt=(
"Build a premium one-page website for Harborline Bookkeeping in "
"Savannah. Include trust-focused copy, clear services, pricing "
"signals, FAQ, and the CTA Book a Cleanup Call."
),
),
DemoTask(
task_id="owner-ai-company-pack",
kind="business_suite",
prompt=(
"Build the owner-ready AI company operating pack for Harborline "
"Bookkeeping with launch kit, connector pack, intake CRM, "
"reporting agent, lead generator, sales closer, ROI dashboard, "
"operator training, and teach-once Workshop handoff."
),
),
DemoTask(
task_id="stripe-safety-plan",
kind="business_document",
prompt=(
"Write a practical Stripe checkout and webhook safety plan for a "
"local service business selling paid AI setup calls. Include key "
"states, failure handling, refund/debit rules, and launch checks."
),
),
DemoTask(
task_id="csv-parser",
kind="coding",
prompt=(
"Write a safe Node.js CSV parser utility for business-owner lead "
"imports. Include validation rules, typed output shape, example "
"usage, and a small test plan."
),
),
]
def utc_stamp() -> str:
return dt.datetime.now(dt.UTC).strftime("%Y%m%dT%H%M%SZ")
def write_summary(run_dir: Path, results: list[DemoResult], manifests: list[dict]) -> None:
payload = {
"product": "Kaiju Coder 7",
"model_id": "kaiju-coder-7",
"created_at": utc_stamp(),
"summary": {
"tasks": len(results),
"passed": sum(1 for result in results if not result.errors),
"failed": sum(1 for result in results if result.errors),
"total_seconds": round(sum(result.seconds for result in results), 3),
},
"results": [asdict(result) for result in results],
"manifests": manifests,
}
(run_dir / "results.json").write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
lines = [
"# Kaiju Coder 7 Public Demo Pack",
"",
f"- Run dir: `{run_dir}`",
f"- Tasks: `{payload['summary']['tasks']}`",
f"- Passed: `{payload['summary']['passed']}`",
f"- Failed: `{payload['summary']['failed']}`",
f"- Total seconds: `{payload['summary']['total_seconds']}`",
"",
"| Task | Kind | Result | Seconds | Changed files | Artifact |",
"|---|---|---:|---:|---:|---|",
]
for result in results:
status = "pass" if not result.errors else "fail"
artifact = result.artifact_path or result.project_dir or ""
lines.append(
f"| `{result.task_id}` | `{result.kind}` | {status} | "
f"{result.seconds:.2f} | {result.changed_files} | `{artifact}` |"
)
(run_dir / "summary.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--out-dir", type=Path, default=ROOT / "runs/public-demo-pack")
parser.add_argument("--openai-base-url", default="http://127.0.0.1:18181/v1")
parser.add_argument("--model", default="kaiju-coder-7")
parser.add_argument("--api-key-env", default="KAIJU_EVAL_API_KEY")
parser.add_argument("--planner-timeout", type=int, default=120)
args = parser.parse_args()
run_dir = args.out_dir / utc_stamp()
run_dir.mkdir(parents=True, exist_ok=True)
results: list[DemoResult] = []
manifests: list[dict] = []
for task in DEMO_TASKS:
started = time.time()
task_dir = run_dir / task.task_id
try:
result = run_task(
task.prompt,
task_dir,
kind=task.kind,
openai_base_url=args.openai_base_url,
model=args.model,
api_key_env=args.api_key_env,
planner_timeout=args.planner_timeout,
)
seconds = time.time() - started
manifests.append(json.loads(result_to_json(result)))
results.append(
DemoResult(
task_id=task.task_id,
kind=task.kind,
seconds=round(seconds, 3),
task_type=result.task_type,
artifact_type=result.artifact_type,
artifact_path=str(result.artifact_path) if result.artifact_path else None,
project_dir=str(result.project_dir) if result.project_dir else None,
changed_files=len(result.changed_files),
errors=result.errors,
)
)
except Exception as exc:
seconds = time.time() - started
results.append(
DemoResult(
task_id=task.task_id,
kind=task.kind,
seconds=round(seconds, 3),
task_type=task.kind,
artifact_type="error",
artifact_path=None,
project_dir=None,
changed_files=0,
errors=[str(exc)],
)
)
write_summary(run_dir, results, manifests)
failed = [result for result in results if result.errors]
print(f"Demo summary: {run_dir / 'summary.md'}")
return 1 if failed else 0
if __name__ == "__main__":
raise SystemExit(main())