| |
| """Deterministic verification checks for Kaiju router outputs.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
| from typing import Any |
|
|
|
|
| FORBIDDEN_TOKENS = ["sk_live_", "sk_test_", "rk_live_", "pplx-", "AIza", "anthropic_api_key"] |
|
|
|
|
| def check(name: str, ok: bool, detail: str) -> dict[str, Any]: |
| return {"name": name, "ok": bool(ok), "detail": detail} |
|
|
|
|
| def read_text(path: Path | None) -> str: |
| if path is None or not path.exists() or not path.is_file(): |
| return "" |
| return path.read_text(encoding="utf-8") |
|
|
|
|
| def read_project_files(project_dir: Path | None) -> dict[str, str]: |
| if project_dir is None or not project_dir.exists(): |
| return {} |
| files: dict[str, str] = {} |
| for path in project_dir.rglob("*"): |
| if path.is_file() and path.stat().st_size < 500_000: |
| files[str(path.relative_to(project_dir))] = path.read_text(encoding="utf-8") |
| return files |
|
|
|
|
| def no_forbidden_tokens(text: str) -> bool: |
| lower = text.lower() |
| return not any(token.lower() in lower for token in FORBIDDEN_TOKENS) |
|
|
|
|
| def package_has_scripts(package_text: str, scripts: list[str]) -> bool: |
| try: |
| package = json.loads(package_text or "{}") |
| except json.JSONDecodeError: |
| return False |
| package_scripts = package.get("scripts", {}) if isinstance(package, dict) else {} |
| return all(script in package_scripts for script in scripts) |
|
|
|
|
| def verify_output( |
| *, |
| task_type: str, |
| artifact_path: Path | None, |
| project_dir: Path | None, |
| changed_files: list[str], |
| response_text: str, |
| spec: dict[str, Any], |
| ) -> list[dict[str, Any]]: |
| artifact_text = read_text(artifact_path) |
| project_files = read_project_files(project_dir) |
| combined = "\n".join([artifact_text, response_text, json.dumps(spec, ensure_ascii=False), *project_files.values()]) |
| lower_artifact = artifact_text.lower() |
| lower_combined = combined.lower() |
| results: list[dict[str, Any]] = [ |
| check("artifact_or_project_exists", bool(artifact_text or project_files), "artifact file or project/repo files exist"), |
| check("changed_files_present", len(changed_files) > 0, "changed files were reported"), |
| check("no_hardcoded_secrets", no_forbidden_tokens(combined), "no obvious provider secret tokens found"), |
| ] |
|
|
| if task_type == "website": |
| results.extend( |
| [ |
| check("complete_html", all(token in lower_artifact for token in ["<!doctype html", "<html", "</html>"]), "HTML document is complete"), |
| check("required_sections", all(token in lower_artifact for token in ['id="services"', 'id="pricing"', 'id="hours"', 'id="contact"']), "required business sections exist"), |
| check("external_images", "<img " in lower_artifact and "https://images.unsplash.com/" in lower_artifact, "real external images are present"), |
| check("responsive_css", "viewport" in lower_artifact and "@media" in lower_artifact, "mobile viewport and responsive CSS exist"), |
| ] |
| ) |
| elif task_type == "business_document": |
| results.extend( |
| [ |
| check("markdown_title", artifact_text.lstrip().startswith("# "), "document starts with a Markdown title"), |
| check("no_placeholders", "{{" not in artifact_text and "}}" not in artifact_text and "[insert" not in lower_artifact, "no obvious template placeholders"), |
| check("business_next_step", any(term in lower_artifact for term in ["approval", "payment", "next step", "reply", "due"]), "document includes a concrete next step"), |
| ] |
| ) |
| elif task_type == "business_suite": |
| required = { |
| "README.md", |
| "01-launch-kit/index.html", |
| "02-content-engine/content-calendar.csv", |
| "02-content-engine/voice-and-posts.md", |
| "03-connector-pack/connector-checklist.md", |
| "04-intake-crm/intake-form.html", |
| "04-intake-crm/schema.sql", |
| "05-reporting-agent/money-momentum-report.md", |
| "06-agent-lab/automations.md", |
| "07-operator-training/OPERATOR_HANDBOOK.md", |
| "08-lead-generator/prospects.csv", |
| "09-sales-closer/pipeline.csv", |
| "09-sales-closer/proposal.md", |
| "09-sales-closer/follow-up-sequence.md", |
| "10-roi-dashboard/dashboard.html", |
| "10-roi-dashboard/roi-summary.md", |
| "11-the-workshop/taught-skill.md", |
| "kaiju-change-summary.md", |
| } |
| launch_html = project_files.get("01-launch-kit/index.html", "").lower() |
| intake_html = project_files.get("04-intake-crm/intake-form.html", "").lower() |
| roi_html = project_files.get("10-roi-dashboard/dashboard.html", "").lower() |
| readme = project_files.get("README.md", "").lower() |
| connectors = project_files.get("03-connector-pack/connector-checklist.md", "").lower() |
| roi_summary = project_files.get("10-roi-dashboard/roi-summary.md", "").lower() |
| workshop = project_files.get("11-the-workshop/taught-skill.md", "").lower() |
| results.extend( |
| [ |
| check("suite_required_files", required.issubset(project_files), "all Kiyomi-style module artifacts exist"), |
| check("owner_daily_surface", "/kiyomi" in readme and "/kiyomi-do" in readme, "owner daily commands are documented"), |
| check("verified_connector_gate", "connected and verified live" in connectors and "not-connected" in connectors, "connector states include verified and not-connected gates"), |
| check("roi_audit_gate", "automation savings are n/a until the post-launch time audit is complete" in roi_summary, "ROI savings are gated by audit status"), |
| check("workshop_golden_run", "does this look exactly right" in workshop and "never send" in workshop, "Workshop proves one item before batching"), |
| check("suite_html_complete", all(token in launch_html + intake_html + roi_html for token in ["<!doctype html", "<html", "</html>", "viewport"]), "HTML artifacts are complete and responsive"), |
| check("growth_artifacts", "score,company" in project_files.get("08-lead-generator/prospects.csv", "").lower() and "stage,lead" in project_files.get("09-sales-closer/pipeline.csv", "").lower(), "lead and sales CSV artifacts exist"), |
| check("no_owner_developer_setup", "open a terminal" not in readme and "create an oauth app" not in readme, "owner-facing docs avoid developer setup"), |
| ] |
| ) |
| elif task_type == "coding": |
| results.extend( |
| [ |
| check("markdown_title", artifact_text.lstrip().startswith("# "), "coding artifact starts with a Markdown title"), |
| check("code_blocks", "```ts" in artifact_text or "```typescript" in lower_artifact, "TypeScript code block exists"), |
| check("tests_present", "describe(" in artifact_text and "expect(" in artifact_text, "Vitest-style tests exist"), |
| check("state_config_safety", all(term in lower_artifact for term in ["state", "config", "safety", "verification"]), "state/config/safety/verification sections exist"), |
| ] |
| ) |
| elif task_type == "app": |
| results.extend( |
| [ |
| check("complete_html", all(token in lower_artifact for token in ["<!doctype html", "<html", "</html>"]), "app HTML document is complete"), |
| check("interactive_form", "<form" in lower_artifact and "<input" in lower_artifact, "interactive form exists"), |
| check("local_storage", "localstorage" in lower_artifact, "localStorage persistence exists"), |
| check("csv_export", "export csv" in lower_artifact and "text/csv" in lower_artifact, "CSV export exists"), |
| ] |
| ) |
| elif task_type == "code_project": |
| project_type = str(spec.get("project_type", "")) |
| if project_type == "cloudflare_worker": |
| required = {"package.json", "wrangler.toml", "src/index.ts", "tests/worker.test.ts", "README.md", "kaiju-change-summary.md", "kaiju.patch"} |
| else: |
| required = {"package.json", "src/app/page.tsx", "src/app/globals.css", "tests/smoke.test.ts", "README.md", "kaiju-change-summary.md", "kaiju.patch"} |
| results.extend( |
| [ |
| check("project_required_files", required.issubset(project_files), "required project files exist"), |
| check("package_scripts", package_has_scripts(project_files.get("package.json", ""), ["dev", "build", "lint", "test"]), "package scripts exist"), |
| check("tests_present", any(path.startswith("tests/") for path in project_files), "test files exist"), |
| check("unified_diff", "--- a/" in project_files.get("kaiju.patch", "") and "+++ b/" in project_files.get("kaiju.patch", ""), "patch file has unified diff markers"), |
| ] |
| ) |
| if project_type == "cloudflare_worker": |
| results.extend( |
| [ |
| check("worker_entrypoint", "export default" in project_files.get("src/index.ts", ""), "Worker fetch export exists"), |
| check("worker_routes", "/health" in combined and "/leads" in combined, "health and lead intake routes exist"), |
| check("worker_cors", "access-control-allow-origin" in lower_combined, "CORS handling exists"), |
| ] |
| ) |
| elif task_type == "repo_patch": |
| patch_text = project_files.get("kaiju-repo.patch", "") |
| results.extend( |
| [ |
| check("patch_summary", "kaiju-repo-patch-summary.md" in project_files and "## Verification" in project_files.get("kaiju-repo-patch-summary.md", ""), "repo patch summary exists"), |
| check("unified_diff", "--- a/" in patch_text and "+++ b/" in patch_text, "repo patch has unified diff markers"), |
| check("tests_touched", any(path.startswith("tests/") for path in changed_files), "patch includes tests"), |
| check("scoped_patch", 3 <= len([path for path in changed_files if path != "kaiju-manifest.json"]) <= 12, "patch has a reviewable number of changed files"), |
| ] |
| ) |
| else: |
| results.append(check("known_task_type", False, f"unknown task type: {task_type}")) |
|
|
| if "lorem ipsum" in lower_combined: |
| results.append(check("no_lorem_ipsum", False, "lorem ipsum was found")) |
| else: |
| results.append(check("no_lorem_ipsum", True, "no lorem ipsum found")) |
| return results |
|
|
|
|
| def failed_checks(results: list[dict[str, Any]]) -> list[str]: |
| return [f"{item['name']}: {item['detail']}" for item in results if not item.get("ok")] |
|
|