| |
| """Run the public Kaiju Coder 7 business-owner demo pack. |
| |
| The demo pack exercises the release path customers will actually use: a compact |
| model-planned prompt where useful, deterministic harness rendering, and static |
| verification before any public claim is refreshed. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import datetime as dt |
| import json |
| import sys |
| import time |
| from dataclasses import asdict, dataclass |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| sys.path.insert(0, str(ROOT)) |
|
|
| from kaiju_harness.router import result_to_json, run_task |
|
|
|
|
| @dataclass |
| class DemoTask: |
| task_id: str |
| kind: str |
| prompt: str |
|
|
|
|
| @dataclass |
| class DemoResult: |
| task_id: str |
| kind: str |
| seconds: float |
| task_type: str |
| artifact_type: str |
| artifact_path: str | None |
| project_dir: str | None |
| changed_files: int |
| errors: list[str] |
|
|
|
|
| DEMO_TASKS = [ |
| DemoTask( |
| task_id="service-website", |
| kind="website", |
| prompt=( |
| "Build a premium one-page website for Harborline Bookkeeping in " |
| "Savannah. Include trust-focused copy, clear services, pricing " |
| "signals, FAQ, and the CTA Book a Cleanup Call." |
| ), |
| ), |
| DemoTask( |
| task_id="owner-ai-company-pack", |
| kind="business_suite", |
| prompt=( |
| "Build the owner-ready AI company operating pack for Harborline " |
| "Bookkeeping with launch kit, connector pack, intake CRM, " |
| "reporting agent, lead generator, sales closer, ROI dashboard, " |
| "operator training, and teach-once Workshop handoff." |
| ), |
| ), |
| DemoTask( |
| task_id="stripe-safety-plan", |
| kind="business_document", |
| prompt=( |
| "Write a practical Stripe checkout and webhook safety plan for a " |
| "local service business selling paid AI setup calls. Include key " |
| "states, failure handling, refund/debit rules, and launch checks." |
| ), |
| ), |
| DemoTask( |
| task_id="csv-parser", |
| kind="coding", |
| prompt=( |
| "Write a safe Node.js CSV parser utility for business-owner lead " |
| "imports. Include validation rules, typed output shape, example " |
| "usage, and a small test plan." |
| ), |
| ), |
| ] |
|
|
|
|
| def utc_stamp() -> str: |
| return dt.datetime.now(dt.UTC).strftime("%Y%m%dT%H%M%SZ") |
|
|
|
|
| def write_summary(run_dir: Path, results: list[DemoResult], manifests: list[dict]) -> None: |
| payload = { |
| "product": "Kaiju Coder 7", |
| "model_id": "kaiju-coder-7", |
| "created_at": utc_stamp(), |
| "summary": { |
| "tasks": len(results), |
| "passed": sum(1 for result in results if not result.errors), |
| "failed": sum(1 for result in results if result.errors), |
| "total_seconds": round(sum(result.seconds for result in results), 3), |
| }, |
| "results": [asdict(result) for result in results], |
| "manifests": manifests, |
| } |
| (run_dir / "results.json").write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") |
|
|
| lines = [ |
| "# Kaiju Coder 7 Public Demo Pack", |
| "", |
| f"- Run dir: `{run_dir}`", |
| f"- Tasks: `{payload['summary']['tasks']}`", |
| f"- Passed: `{payload['summary']['passed']}`", |
| f"- Failed: `{payload['summary']['failed']}`", |
| f"- Total seconds: `{payload['summary']['total_seconds']}`", |
| "", |
| "| Task | Kind | Result | Seconds | Changed files | Artifact |", |
| "|---|---|---:|---:|---:|---|", |
| ] |
| for result in results: |
| status = "pass" if not result.errors else "fail" |
| artifact = result.artifact_path or result.project_dir or "" |
| lines.append( |
| f"| `{result.task_id}` | `{result.kind}` | {status} | " |
| f"{result.seconds:.2f} | {result.changed_files} | `{artifact}` |" |
| ) |
| (run_dir / "summary.md").write_text("\n".join(lines) + "\n", encoding="utf-8") |
|
|
|
|
| def main() -> int: |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument("--out-dir", type=Path, default=ROOT / "runs/public-demo-pack") |
| parser.add_argument("--openai-base-url", default="http://127.0.0.1:18181/v1") |
| parser.add_argument("--model", default="kaiju-coder-7") |
| parser.add_argument("--api-key-env", default="KAIJU_EVAL_API_KEY") |
| parser.add_argument("--planner-timeout", type=int, default=120) |
| args = parser.parse_args() |
|
|
| run_dir = args.out_dir / utc_stamp() |
| run_dir.mkdir(parents=True, exist_ok=True) |
| results: list[DemoResult] = [] |
| manifests: list[dict] = [] |
|
|
| for task in DEMO_TASKS: |
| started = time.time() |
| task_dir = run_dir / task.task_id |
| try: |
| result = run_task( |
| task.prompt, |
| task_dir, |
| kind=task.kind, |
| openai_base_url=args.openai_base_url, |
| model=args.model, |
| api_key_env=args.api_key_env, |
| planner_timeout=args.planner_timeout, |
| ) |
| seconds = time.time() - started |
| manifests.append(json.loads(result_to_json(result))) |
| results.append( |
| DemoResult( |
| task_id=task.task_id, |
| kind=task.kind, |
| seconds=round(seconds, 3), |
| task_type=result.task_type, |
| artifact_type=result.artifact_type, |
| artifact_path=str(result.artifact_path) if result.artifact_path else None, |
| project_dir=str(result.project_dir) if result.project_dir else None, |
| changed_files=len(result.changed_files), |
| errors=result.errors, |
| ) |
| ) |
| except Exception as exc: |
| seconds = time.time() - started |
| results.append( |
| DemoResult( |
| task_id=task.task_id, |
| kind=task.kind, |
| seconds=round(seconds, 3), |
| task_type=task.kind, |
| artifact_type="error", |
| artifact_path=None, |
| project_dir=None, |
| changed_files=0, |
| errors=[str(exc)], |
| ) |
| ) |
|
|
| write_summary(run_dir, results, manifests) |
| failed = [result for result in results if result.errors] |
| print(f"Demo summary: {run_dir / 'summary.md'}") |
| return 1 if failed else 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|