File size: 10,760 Bytes

c75f885

#!/usr/bin/env python3
"""Deterministic verification checks for Kaiju router outputs."""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any


FORBIDDEN_TOKENS = ["sk_live_", "sk_test_", "rk_live_", "pplx-", "AIza", "anthropic_api_key"]


def check(name: str, ok: bool, detail: str) -> dict[str, Any]:
    return {"name": name, "ok": bool(ok), "detail": detail}


def read_text(path: Path | None) -> str:
    if path is None or not path.exists() or not path.is_file():
        return ""
    return path.read_text(encoding="utf-8")


def read_project_files(project_dir: Path | None) -> dict[str, str]:
    if project_dir is None or not project_dir.exists():
        return {}
    files: dict[str, str] = {}
    for path in project_dir.rglob("*"):
        if path.is_file() and path.stat().st_size < 500_000:
            files[str(path.relative_to(project_dir))] = path.read_text(encoding="utf-8")
    return files


def no_forbidden_tokens(text: str) -> bool:
    lower = text.lower()
    return not any(token.lower() in lower for token in FORBIDDEN_TOKENS)


def package_has_scripts(package_text: str, scripts: list[str]) -> bool:
    try:
        package = json.loads(package_text or "{}")
    except json.JSONDecodeError:
        return False
    package_scripts = package.get("scripts", {}) if isinstance(package, dict) else {}
    return all(script in package_scripts for script in scripts)


def verify_output(
    *,
    task_type: str,
    artifact_path: Path | None,
    project_dir: Path | None,
    changed_files: list[str],
    response_text: str,
    spec: dict[str, Any],
) -> list[dict[str, Any]]:
    artifact_text = read_text(artifact_path)
    project_files = read_project_files(project_dir)
    combined = "\n".join([artifact_text, response_text, json.dumps(spec, ensure_ascii=False), *project_files.values()])
    lower_artifact = artifact_text.lower()
    lower_combined = combined.lower()
    results: list[dict[str, Any]] = [
        check("artifact_or_project_exists", bool(artifact_text or project_files), "artifact file or project/repo files exist"),
        check("changed_files_present", len(changed_files) > 0, "changed files were reported"),
        check("no_hardcoded_secrets", no_forbidden_tokens(combined), "no obvious provider secret tokens found"),
    ]

    if task_type == "website":
        results.extend(
            [
                check("complete_html", all(token in lower_artifact for token in ["<!doctype html", "<html", "</html>"]), "HTML document is complete"),
                check("required_sections", all(token in lower_artifact for token in ['id="services"', 'id="pricing"', 'id="hours"', 'id="contact"']), "required business sections exist"),
                check("external_images", "<img " in lower_artifact and "https://images.unsplash.com/" in lower_artifact, "real external images are present"),
                check("responsive_css", "viewport" in lower_artifact and "@media" in lower_artifact, "mobile viewport and responsive CSS exist"),
            ]
        )
    elif task_type == "business_document":
        results.extend(
            [
                check("markdown_title", artifact_text.lstrip().startswith("# "), "document starts with a Markdown title"),
                check("no_placeholders", "{{" not in artifact_text and "}}" not in artifact_text and "[insert" not in lower_artifact, "no obvious template placeholders"),
                check("business_next_step", any(term in lower_artifact for term in ["approval", "payment", "next step", "reply", "due"]), "document includes a concrete next step"),
            ]
        )
    elif task_type == "business_suite":
        required = {
            "README.md",
            "01-launch-kit/index.html",
            "02-content-engine/content-calendar.csv",
            "02-content-engine/voice-and-posts.md",
            "03-connector-pack/connector-checklist.md",
            "04-intake-crm/intake-form.html",
            "04-intake-crm/schema.sql",
            "05-reporting-agent/money-momentum-report.md",
            "06-agent-lab/automations.md",
            "07-operator-training/OPERATOR_HANDBOOK.md",
            "08-lead-generator/prospects.csv",
            "09-sales-closer/pipeline.csv",
            "09-sales-closer/proposal.md",
            "09-sales-closer/follow-up-sequence.md",
            "10-roi-dashboard/dashboard.html",
            "10-roi-dashboard/roi-summary.md",
            "11-the-workshop/taught-skill.md",
            "kaiju-change-summary.md",
        }
        launch_html = project_files.get("01-launch-kit/index.html", "").lower()
        intake_html = project_files.get("04-intake-crm/intake-form.html", "").lower()
        roi_html = project_files.get("10-roi-dashboard/dashboard.html", "").lower()
        readme = project_files.get("README.md", "").lower()
        connectors = project_files.get("03-connector-pack/connector-checklist.md", "").lower()
        roi_summary = project_files.get("10-roi-dashboard/roi-summary.md", "").lower()
        workshop = project_files.get("11-the-workshop/taught-skill.md", "").lower()
        results.extend(
            [
                check("suite_required_files", required.issubset(project_files), "all Kiyomi-style module artifacts exist"),
                check("owner_daily_surface", "/kiyomi" in readme and "/kiyomi-do" in readme, "owner daily commands are documented"),
                check("verified_connector_gate", "connected and verified live" in connectors and "not-connected" in connectors, "connector states include verified and not-connected gates"),
                check("roi_audit_gate", "automation savings are n/a until the post-launch time audit is complete" in roi_summary, "ROI savings are gated by audit status"),
                check("workshop_golden_run", "does this look exactly right" in workshop and "never send" in workshop, "Workshop proves one item before batching"),
                check("suite_html_complete", all(token in launch_html + intake_html + roi_html for token in ["<!doctype html", "<html", "</html>", "viewport"]), "HTML artifacts are complete and responsive"),
                check("growth_artifacts", "score,company" in project_files.get("08-lead-generator/prospects.csv", "").lower() and "stage,lead" in project_files.get("09-sales-closer/pipeline.csv", "").lower(), "lead and sales CSV artifacts exist"),
                check("no_owner_developer_setup", "open a terminal" not in readme and "create an oauth app" not in readme, "owner-facing docs avoid developer setup"),
            ]
        )
    elif task_type == "coding":
        results.extend(
            [
                check("markdown_title", artifact_text.lstrip().startswith("# "), "coding artifact starts with a Markdown title"),
                check("code_blocks", "```ts" in artifact_text or "```typescript" in lower_artifact, "TypeScript code block exists"),
                check("tests_present", "describe(" in artifact_text and "expect(" in artifact_text, "Vitest-style tests exist"),
                check("state_config_safety", all(term in lower_artifact for term in ["state", "config", "safety", "verification"]), "state/config/safety/verification sections exist"),
            ]
        )
    elif task_type == "app":
        results.extend(
            [
                check("complete_html", all(token in lower_artifact for token in ["<!doctype html", "<html", "</html>"]), "app HTML document is complete"),
                check("interactive_form", "<form" in lower_artifact and "<input" in lower_artifact, "interactive form exists"),
                check("local_storage", "localstorage" in lower_artifact, "localStorage persistence exists"),
                check("csv_export", "export csv" in lower_artifact and "text/csv" in lower_artifact, "CSV export exists"),
            ]
        )
    elif task_type == "code_project":
        project_type = str(spec.get("project_type", ""))
        if project_type == "cloudflare_worker":
            required = {"package.json", "wrangler.toml", "src/index.ts", "tests/worker.test.ts", "README.md", "kaiju-change-summary.md", "kaiju.patch"}
        else:
            required = {"package.json", "src/app/page.tsx", "src/app/globals.css", "tests/smoke.test.ts", "README.md", "kaiju-change-summary.md", "kaiju.patch"}
        results.extend(
            [
                check("project_required_files", required.issubset(project_files), "required project files exist"),
                check("package_scripts", package_has_scripts(project_files.get("package.json", ""), ["dev", "build", "lint", "test"]), "package scripts exist"),
                check("tests_present", any(path.startswith("tests/") for path in project_files), "test files exist"),
                check("unified_diff", "--- a/" in project_files.get("kaiju.patch", "") and "+++ b/" in project_files.get("kaiju.patch", ""), "patch file has unified diff markers"),
            ]
        )
        if project_type == "cloudflare_worker":
            results.extend(
                [
                    check("worker_entrypoint", "export default" in project_files.get("src/index.ts", ""), "Worker fetch export exists"),
                    check("worker_routes", "/health" in combined and "/leads" in combined, "health and lead intake routes exist"),
                    check("worker_cors", "access-control-allow-origin" in lower_combined, "CORS handling exists"),
                ]
            )
    elif task_type == "repo_patch":
        patch_text = project_files.get("kaiju-repo.patch", "")
        results.extend(
            [
                check("patch_summary", "kaiju-repo-patch-summary.md" in project_files and "## Verification" in project_files.get("kaiju-repo-patch-summary.md", ""), "repo patch summary exists"),
                check("unified_diff", "--- a/" in patch_text and "+++ b/" in patch_text, "repo patch has unified diff markers"),
                check("tests_touched", any(path.startswith("tests/") for path in changed_files), "patch includes tests"),
                check("scoped_patch", 3 <= len([path for path in changed_files if path != "kaiju-manifest.json"]) <= 12, "patch has a reviewable number of changed files"),
            ]
        )
    else:
        results.append(check("known_task_type", False, f"unknown task type: {task_type}"))

    if "lorem ipsum" in lower_combined:
        results.append(check("no_lorem_ipsum", False, "lorem ipsum was found"))
    else:
        results.append(check("no_lorem_ipsum", True, "no lorem ipsum found"))
    return results


def failed_checks(results: list[dict[str, Any]]) -> list[str]:
    return [f"{item['name']}: {item['detail']}" for item in results if not item.get("ok")]