File size: 10,760 Bytes
c75f885 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | #!/usr/bin/env python3
"""Deterministic verification checks for Kaiju router outputs."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
FORBIDDEN_TOKENS = ["sk_live_", "sk_test_", "rk_live_", "pplx-", "AIza", "anthropic_api_key"]
def check(name: str, ok: bool, detail: str) -> dict[str, Any]:
return {"name": name, "ok": bool(ok), "detail": detail}
def read_text(path: Path | None) -> str:
if path is None or not path.exists() or not path.is_file():
return ""
return path.read_text(encoding="utf-8")
def read_project_files(project_dir: Path | None) -> dict[str, str]:
if project_dir is None or not project_dir.exists():
return {}
files: dict[str, str] = {}
for path in project_dir.rglob("*"):
if path.is_file() and path.stat().st_size < 500_000:
files[str(path.relative_to(project_dir))] = path.read_text(encoding="utf-8")
return files
def no_forbidden_tokens(text: str) -> bool:
lower = text.lower()
return not any(token.lower() in lower for token in FORBIDDEN_TOKENS)
def package_has_scripts(package_text: str, scripts: list[str]) -> bool:
try:
package = json.loads(package_text or "{}")
except json.JSONDecodeError:
return False
package_scripts = package.get("scripts", {}) if isinstance(package, dict) else {}
return all(script in package_scripts for script in scripts)
def verify_output(
*,
task_type: str,
artifact_path: Path | None,
project_dir: Path | None,
changed_files: list[str],
response_text: str,
spec: dict[str, Any],
) -> list[dict[str, Any]]:
artifact_text = read_text(artifact_path)
project_files = read_project_files(project_dir)
combined = "\n".join([artifact_text, response_text, json.dumps(spec, ensure_ascii=False), *project_files.values()])
lower_artifact = artifact_text.lower()
lower_combined = combined.lower()
results: list[dict[str, Any]] = [
check("artifact_or_project_exists", bool(artifact_text or project_files), "artifact file or project/repo files exist"),
check("changed_files_present", len(changed_files) > 0, "changed files were reported"),
check("no_hardcoded_secrets", no_forbidden_tokens(combined), "no obvious provider secret tokens found"),
]
if task_type == "website":
results.extend(
[
check("complete_html", all(token in lower_artifact for token in ["<!doctype html", "<html", "</html>"]), "HTML document is complete"),
check("required_sections", all(token in lower_artifact for token in ['id="services"', 'id="pricing"', 'id="hours"', 'id="contact"']), "required business sections exist"),
check("external_images", "<img " in lower_artifact and "https://images.unsplash.com/" in lower_artifact, "real external images are present"),
check("responsive_css", "viewport" in lower_artifact and "@media" in lower_artifact, "mobile viewport and responsive CSS exist"),
]
)
elif task_type == "business_document":
results.extend(
[
check("markdown_title", artifact_text.lstrip().startswith("# "), "document starts with a Markdown title"),
check("no_placeholders", "{{" not in artifact_text and "}}" not in artifact_text and "[insert" not in lower_artifact, "no obvious template placeholders"),
check("business_next_step", any(term in lower_artifact for term in ["approval", "payment", "next step", "reply", "due"]), "document includes a concrete next step"),
]
)
elif task_type == "business_suite":
required = {
"README.md",
"01-launch-kit/index.html",
"02-content-engine/content-calendar.csv",
"02-content-engine/voice-and-posts.md",
"03-connector-pack/connector-checklist.md",
"04-intake-crm/intake-form.html",
"04-intake-crm/schema.sql",
"05-reporting-agent/money-momentum-report.md",
"06-agent-lab/automations.md",
"07-operator-training/OPERATOR_HANDBOOK.md",
"08-lead-generator/prospects.csv",
"09-sales-closer/pipeline.csv",
"09-sales-closer/proposal.md",
"09-sales-closer/follow-up-sequence.md",
"10-roi-dashboard/dashboard.html",
"10-roi-dashboard/roi-summary.md",
"11-the-workshop/taught-skill.md",
"kaiju-change-summary.md",
}
launch_html = project_files.get("01-launch-kit/index.html", "").lower()
intake_html = project_files.get("04-intake-crm/intake-form.html", "").lower()
roi_html = project_files.get("10-roi-dashboard/dashboard.html", "").lower()
readme = project_files.get("README.md", "").lower()
connectors = project_files.get("03-connector-pack/connector-checklist.md", "").lower()
roi_summary = project_files.get("10-roi-dashboard/roi-summary.md", "").lower()
workshop = project_files.get("11-the-workshop/taught-skill.md", "").lower()
results.extend(
[
check("suite_required_files", required.issubset(project_files), "all Kiyomi-style module artifacts exist"),
check("owner_daily_surface", "/kiyomi" in readme and "/kiyomi-do" in readme, "owner daily commands are documented"),
check("verified_connector_gate", "connected and verified live" in connectors and "not-connected" in connectors, "connector states include verified and not-connected gates"),
check("roi_audit_gate", "automation savings are n/a until the post-launch time audit is complete" in roi_summary, "ROI savings are gated by audit status"),
check("workshop_golden_run", "does this look exactly right" in workshop and "never send" in workshop, "Workshop proves one item before batching"),
check("suite_html_complete", all(token in launch_html + intake_html + roi_html for token in ["<!doctype html", "<html", "</html>", "viewport"]), "HTML artifacts are complete and responsive"),
check("growth_artifacts", "score,company" in project_files.get("08-lead-generator/prospects.csv", "").lower() and "stage,lead" in project_files.get("09-sales-closer/pipeline.csv", "").lower(), "lead and sales CSV artifacts exist"),
check("no_owner_developer_setup", "open a terminal" not in readme and "create an oauth app" not in readme, "owner-facing docs avoid developer setup"),
]
)
elif task_type == "coding":
results.extend(
[
check("markdown_title", artifact_text.lstrip().startswith("# "), "coding artifact starts with a Markdown title"),
check("code_blocks", "```ts" in artifact_text or "```typescript" in lower_artifact, "TypeScript code block exists"),
check("tests_present", "describe(" in artifact_text and "expect(" in artifact_text, "Vitest-style tests exist"),
check("state_config_safety", all(term in lower_artifact for term in ["state", "config", "safety", "verification"]), "state/config/safety/verification sections exist"),
]
)
elif task_type == "app":
results.extend(
[
check("complete_html", all(token in lower_artifact for token in ["<!doctype html", "<html", "</html>"]), "app HTML document is complete"),
check("interactive_form", "<form" in lower_artifact and "<input" in lower_artifact, "interactive form exists"),
check("local_storage", "localstorage" in lower_artifact, "localStorage persistence exists"),
check("csv_export", "export csv" in lower_artifact and "text/csv" in lower_artifact, "CSV export exists"),
]
)
elif task_type == "code_project":
project_type = str(spec.get("project_type", ""))
if project_type == "cloudflare_worker":
required = {"package.json", "wrangler.toml", "src/index.ts", "tests/worker.test.ts", "README.md", "kaiju-change-summary.md", "kaiju.patch"}
else:
required = {"package.json", "src/app/page.tsx", "src/app/globals.css", "tests/smoke.test.ts", "README.md", "kaiju-change-summary.md", "kaiju.patch"}
results.extend(
[
check("project_required_files", required.issubset(project_files), "required project files exist"),
check("package_scripts", package_has_scripts(project_files.get("package.json", ""), ["dev", "build", "lint", "test"]), "package scripts exist"),
check("tests_present", any(path.startswith("tests/") for path in project_files), "test files exist"),
check("unified_diff", "--- a/" in project_files.get("kaiju.patch", "") and "+++ b/" in project_files.get("kaiju.patch", ""), "patch file has unified diff markers"),
]
)
if project_type == "cloudflare_worker":
results.extend(
[
check("worker_entrypoint", "export default" in project_files.get("src/index.ts", ""), "Worker fetch export exists"),
check("worker_routes", "/health" in combined and "/leads" in combined, "health and lead intake routes exist"),
check("worker_cors", "access-control-allow-origin" in lower_combined, "CORS handling exists"),
]
)
elif task_type == "repo_patch":
patch_text = project_files.get("kaiju-repo.patch", "")
results.extend(
[
check("patch_summary", "kaiju-repo-patch-summary.md" in project_files and "## Verification" in project_files.get("kaiju-repo-patch-summary.md", ""), "repo patch summary exists"),
check("unified_diff", "--- a/" in patch_text and "+++ b/" in patch_text, "repo patch has unified diff markers"),
check("tests_touched", any(path.startswith("tests/") for path in changed_files), "patch includes tests"),
check("scoped_patch", 3 <= len([path for path in changed_files if path != "kaiju-manifest.json"]) <= 12, "patch has a reviewable number of changed files"),
]
)
else:
results.append(check("known_task_type", False, f"unknown task type: {task_type}"))
if "lorem ipsum" in lower_combined:
results.append(check("no_lorem_ipsum", False, "lorem ipsum was found"))
else:
results.append(check("no_lorem_ipsum", True, "no lorem ipsum found"))
return results
def failed_checks(results: list[dict[str, Any]]) -> list[str]:
return [f"{item['name']}: {item['detail']}" for item in results if not item.get("ok")]
|