browser-speak / tools /audit-validation.mjs
Mike0021's picture
Add worker network telemetry to browser evidence
d2ae80e verified
Raw
History Blame Contribute Delete
17.9 kB
#!/usr/bin/env node
import { readFile, writeFile, access } from "node:fs/promises";
import { constants } from "node:fs";
import { tmpdir } from "node:os";
import { dirname, resolve } from "node:path";
import { mkdir } from "node:fs/promises";
import { sourceFingerprint } from "./source-fingerprint.mjs";
const paths = {
clientSide: resolve(process.env.BROWSER_SPEAK_CLIENT_SIDE_JSON ?? `${tmpdir()}/browser-speak-client-side-smoke.json`),
loopback: resolve(process.env.BROWSER_SPEAK_LOOPBACK_JSON ?? `${tmpdir()}/browser-speak-loopback-series.json`),
fakeMic: resolve(process.env.BROWSER_SPEAK_FAKE_MIC_JSON ?? `${tmpdir()}/browser-speak-fake-mic-results.json`),
realMic: resolve(process.env.BROWSER_SPEAK_REAL_MIC_JSON ?? `${tmpdir()}/browser-speak-real-mic-series.json`),
webgpu: resolve(process.env.BROWSER_SPEAK_WEBGPU_JSON ?? `${tmpdir()}/browser-speak-webgpu-results.json`),
ui: resolve(process.env.BROWSER_SPEAK_UI_JSON ?? `${tmpdir()}/browser-speak-ui-smoke.json`),
evidenceExport: resolve(
process.env.BROWSER_SPEAK_EVIDENCE_EXPORT_JSON ?? `${tmpdir()}/browser-speak-evidence-export-smoke.json`,
),
};
const resultPath = resolve(process.env.BROWSER_SPEAK_AUDIT_JSON ?? `${tmpdir()}/browser-speak-validation-audit.json`);
const soft = process.env.BROWSER_SPEAK_AUDIT_SOFT === "true";
const requireRealMic = process.env.BROWSER_SPEAK_AUDIT_REQUIRE_REAL_MIC !== "false";
const requireHardwareWebgpu = process.env.BROWSER_SPEAK_AUDIT_REQUIRE_HARDWARE_WEBGPU !== "false";
const realMicMaxWer = Number(process.env.BROWSER_SPEAK_AUDIT_REAL_MIC_MAX_WER ?? 0.25);
const loopbackMaxMedianWer = Number(process.env.BROWSER_SPEAK_AUDIT_LOOPBACK_MAX_MEDIAN_WER ?? 0);
let currentSourceFingerprint = null;
async function main() {
currentSourceFingerprint = await sourceFingerprint();
const artifacts = Object.fromEntries(
await Promise.all(Object.entries(paths).map(async ([key, path]) => [key, await readJson(path)])),
);
const checks = [
await checkStaticFiles(),
checkUi(artifacts.ui),
checkEvidenceExport(artifacts.evidenceExport),
checkClientSide(artifacts.clientSide),
checkLoopback(artifacts.loopback),
checkFirstTtsChunkSafety(artifacts),
checkFakeMic(artifacts.fakeMic),
checkRealMic(artifacts.realMic),
checkHardwareWebgpu(artifacts.webgpu),
];
const requiredChecks = checks.filter((check) => check.required);
const passed = requiredChecks.every((check) => check.status === "pass");
const payload = {
generatedAt: new Date().toISOString(),
sourceFingerprint: currentSourceFingerprint,
passed,
paths,
checks,
nextActions: nextActions(checks),
};
await mkdir(dirname(resultPath), { recursive: true });
await writeFile(resultPath, `${JSON.stringify(payload, null, 2)}\n`);
console.log(`Wrote validation audit JSON: ${resultPath}`);
for (const check of checks) {
console.log(`${check.required ? "required" : "supporting"} ${check.status}: ${check.name} - ${check.message}`);
}
if (!passed && !soft) process.exitCode = 1;
}
async function checkStaticFiles() {
const files = [
"index.html",
"styles.css",
"app.js",
"workers/asr-worker.js",
"workers/llm-worker.js",
"workers/tts-worker.js",
];
const missing = [];
for (const file of files) {
if (!(await exists(file))) missing.push(file);
}
return {
name: "demo files",
required: true,
status: missing.length === 0 ? "pass" : "fail",
message: missing.length === 0 ? "Static demo files are present." : `Missing files: ${missing.join(", ")}.`,
evidence: { files, missing },
};
}
function checkUi(artifact) {
if (!artifact.ok) return missing("UI smoke", true, artifact, "UI smoke JSON is missing or unreadable.");
const freshness = sourceFreshness(artifact);
if (!freshness.ok) return stale("UI smoke", true, artifact, freshness.message);
const passed = artifact.value?.passed === true;
return {
name: "UI smoke",
required: true,
status: passed ? "pass" : "fail",
message: passed ? "Desktop/mobile UI smoke passed." : "Desktop/mobile UI smoke has not passed.",
evidence: pick(artifact.value, ["passed", "config", "errors"]),
};
}
function checkEvidenceExport(artifact) {
if (!artifact.ok) return missing("evidence export smoke", true, artifact, "Evidence export smoke JSON is missing.");
const freshness = sourceFreshness(artifact);
if (!freshness.ok) return stale("evidence export smoke", true, artifact, freshness.message);
const summary = artifact.value?.summary ?? {};
const passed =
artifact.value?.passed === true &&
summary.restoredRows >= 1 &&
summary.exportedRows >= 1 &&
/^browser-speak-evidence-.*\.json$/.test(summary.downloadName ?? "") &&
summary.downloadHrefScheme === "blob" &&
summary.clearedRows === 0 &&
summary.persistedAfterClear === null;
return {
name: "evidence export smoke",
required: true,
status: passed ? "pass" : "fail",
message: passed
? "Autosaved rows restore, evidence JSON download is requested, and Clear removes saved rows."
: "Evidence export, autosave restore, or Clear behavior did not pass.",
evidence: pick(artifact.value, ["passed", "summary", "error", "config"]),
};
}
function checkClientSide(artifact) {
if (!artifact.ok) return missing("client-side/no-server smoke", true, artifact, "Client-side smoke JSON is missing.");
const freshness = sourceFreshness(artifact);
if (!freshness.ok) return stale("client-side/no-server smoke", true, artifact, freshness.message);
const summary = artifact.value?.summary ?? {};
const passed =
artifact.value?.passed === true &&
summary.benchmarkRequests === 0 &&
summary.serverInferenceSuspects === 0 &&
summary.benchmarkErrors === 0 &&
Array.isArray(summary.missingTasks) &&
summary.missingTasks.length === 0;
return {
name: "client-side/no-server smoke",
required: true,
status: passed ? "pass" : "fail",
message: passed
? "Benchmark phase had no network requests, server-inference suspects, row errors, or missing tasks."
: "Client-side smoke did not prove the no-server benchmark phase.",
evidence: pick(artifact.value, ["passed", "summary", "error", "config"]),
};
}
function checkLoopback(artifact) {
if (!artifact.ok) return missing("loopback stability", true, artifact, "Loopback series JSON is missing.");
const freshness = sourceFreshness(artifact);
if (!freshness.ok) return stale("loopback stability", true, artifact, freshness.message);
const summary = artifact.value?.summary ?? {};
const passed =
artifact.value?.passed === true &&
summary.completedRuns >= 3 &&
summary.errorRuns === 0 &&
summary.exactTranscriptRuns >= Math.ceil(summary.completedRuns / 2) &&
summary.identityPasses >= summary.exactTranscriptRuns &&
Number.isFinite(summary.medianWer) &&
summary.medianWer <= loopbackMaxMedianWer &&
Number.isFinite(summary.medianSpeechEndToFirstAudioMs);
return {
name: "loopback stability",
required: true,
status: passed ? "pass" : "fail",
message: passed
? `Synthetic loopback completed 3 rows with median WER ${formatPercent(
summary.medianWer,
)} and ${summary.exactTranscriptRuns}/${summary.completedRuns} exact transcripts.`
: "Loopback stability evidence is missing or below threshold.",
evidence: pick(artifact.value, ["passed", "summary", "config", "error"]),
};
}
function checkFirstTtsChunkSafety(artifacts) {
const rows = [
...rowsFromArtifact(artifacts.clientSide, "client-side smoke"),
...rowsFromArtifact(artifacts.loopback, "loopback series"),
...rowsFromArtifact(artifacts.fakeMic, "fake microphone"),
...rowsFromArtifact(artifacts.realMic, "real microphone"),
].filter(({ row }) => !row.error && row.firstTtsText);
if (rows.length === 0) {
return {
name: "first TTS chunk safety",
required: true,
status: "fail",
message: "No completed benchmark rows with first TTS chunk text were available.",
evidence: {},
};
}
const unsafe = rows.filter(({ row }) => unsafeFirstTtsChunk(row));
return {
name: "first TTS chunk safety",
required: true,
status: unsafe.length === 0 ? "pass" : "fail",
message:
unsafe.length === 0
? `${rows.length} first TTS chunk(s) ended at a safe boundary.`
: `${unsafe.length}/${rows.length} first TTS chunk(s) appear to split a word.`,
evidence: {
checkedRows: rows.length,
unsafe: unsafe.slice(0, 8).map(({ source, row }) => ({
source,
kind: row.kind,
firstTtsText: row.firstTtsText,
output: row.output,
firstTtsBoundaryKind: row.firstTtsBoundaryKind ?? null,
firstTtsWordBoundarySafe: row.firstTtsWordBoundarySafe ?? null,
})),
},
};
}
function checkFakeMic(artifact) {
if (!artifact.ok) return missing("fake microphone regression", false, artifact, "Fake mic JSON is missing.");
const freshness = sourceFreshness(artifact);
const summary = artifact.value?.summary?.current ?? artifact.value?.summary?.all ?? {};
const passed =
freshness.ok &&
summary.micRuns >= 3 &&
summary.micMedianWer === 0 &&
Number.isFinite(summary.micMedianSpeechEndToFirstAudioMs);
return {
name: "fake microphone regression",
required: false,
status: passed ? "pass" : "warn",
message: passed
? "Fake mic browser-capture regression has 3 rows and 0% median WER."
: freshness.ok
? "Fake mic regression is unavailable or below the expected threshold."
: freshness.message,
evidence: { summary, freshness },
};
}
function checkRealMic(artifact) {
if (!requireRealMic) return skipped("real microphone validation", "Real mic requirement disabled by environment.");
if (!artifact.ok) return missing("real microphone validation", true, artifact, "Real human microphone JSON is missing.");
if (artifact.value?.dryRun) {
return {
name: "real microphone validation",
required: true,
status: "missing",
message: "Only a real-mic dry-run artifact exists; no human speech rows were collected.",
evidence: pick(artifact.value, ["dryRun", "config", "preflight"]),
};
}
const freshness = sourceFreshness(artifact);
if (!freshness.ok) return stale("real microphone validation", true, artifact, freshness.message);
const summary = artifact.value?.summary ?? {};
const config = artifact.value?.config ?? {};
const expected = artifact.value?.config?.count ?? 3;
const extraChromeArgs = Array.isArray(config.extraChromeArgs) ? config.extraChromeArgs : [];
const fakeCaptureArgs = extraChromeArgs.filter(isFakeCaptureArg);
const fakeCaptureAllowed = config.allowFakeCapture === true;
const passed =
artifact.value?.passed === true &&
fakeCaptureArgs.length === 0 &&
!fakeCaptureAllowed &&
summary.completedRows >= expected &&
summary.errorRows === 0 &&
summary.identityPasses >= expected &&
Number.isFinite(summary.medianSpeechEndToFirstAudioMs) &&
Number.isFinite(summary.medianWer) &&
summary.medianWer <= realMicMaxWer;
return {
name: "real microphone validation",
required: true,
status: passed ? "pass" : "fail",
message: realMicMessage(passed, fakeCaptureArgs, fakeCaptureAllowed),
evidence: { ...pick(artifact.value, ["passed", "summary", "config", "error"]), fakeCaptureArgs },
};
}
function checkHardwareWebgpu(artifact) {
if (!requireHardwareWebgpu) return skipped("hardware WebGPU benchmark", "Hardware WebGPU requirement disabled by environment.");
if (!artifact.ok) return missing("hardware WebGPU benchmark", true, artifact, "WebGPU benchmark JSON is missing.");
const freshness = sourceFreshness(artifact);
if (!freshness.ok) return stale("hardware WebGPU benchmark", true, artifact, freshness.message);
const webgpu = artifact.value?.webgpu ?? {};
const candidates = artifact.value?.candidates ?? [];
const completeCandidates = candidates.filter((candidate) => candidate.status === "complete");
const passed =
artifact.value?.skipped !== true &&
webgpu.available === true &&
webgpu.softwareAdapter !== true &&
completeCandidates.length > 0;
return {
name: "hardware WebGPU benchmark",
required: true,
status: passed ? "pass" : "missing",
message: passed
? "Hardware WebGPU benchmark completed at least one candidate."
: artifact.value?.reason ?? "Hardware WebGPU benchmark has not completed on a real adapter.",
evidence: pick(artifact.value, ["skipped", "reason", "webgpu", "candidates", "summary"]),
};
}
function skipped(name, message) {
return { name, required: false, status: "skip", message, evidence: {} };
}
function missing(name, required, artifact, message) {
return {
name,
required,
status: "missing",
message,
evidence: { path: artifact.path, error: artifact.error },
};
}
function stale(name, required, artifact, message) {
return {
name,
required,
status: "stale",
message,
evidence: {
path: artifact.path,
artifactHash: artifact.value?.sourceFingerprint?.hash ?? null,
currentHash: currentSourceFingerprint?.hash ?? null,
},
};
}
function nextActions(checks) {
const missingNames = checks
.filter((check) => check.required && check.status !== "pass")
.map((check) => check.name);
const actions = [];
if (missingNames.includes("real microphone validation") || missingNames.includes("hardware WebGPU benchmark")) {
actions.push(
"Run node tools/run-hosted-evidence-capture.mjs from a desktop Chrome session with a real microphone and hardware WebGPU to collect hosted browser evidence.",
);
}
return checks
.filter((check) => check.required && check.status !== "pass")
.map((check) => {
if (check.name === "real microphone validation") return "Run node tools/run-real-mic-series.mjs on a machine with a real microphone.";
if (check.name === "hardware WebGPU benchmark") return "Run node tools/run-webgpu-benchmark.mjs in a Chrome session exposing a hardware WebGPU adapter.";
if (check.name === "client-side/no-server smoke") return "Run node tools/run-client-side-smoke.mjs and inspect /tmp/browser-speak-client-side-smoke.json.";
if (check.name === "evidence export smoke") return "Run node tools/run-evidence-export-smoke.mjs and inspect /tmp/browser-speak-evidence-export-smoke.json.";
if (check.name === "loopback stability") return "Run node tools/run-loopback-series.mjs and inspect /tmp/browser-speak-loopback-series.json.";
if (check.name === "first TTS chunk safety") return "Run the client-side smoke and inspect firstTtsText / firstTtsWordBoundarySafe in benchmark rows.";
if (check.name === "UI smoke") return "Run node tools/run-ui-smoke.mjs.";
return `Resolve failed check: ${check.name}.`;
})
.reduce((unique, action) => {
if (!unique.includes(action)) unique.push(action);
return unique;
}, actions);
}
async function readJson(path) {
try {
return { ok: true, path, value: JSON.parse(await readFile(path, "utf8")) };
} catch (error) {
return { ok: false, path, error: error.message };
}
}
async function exists(path) {
try {
await access(path, constants.F_OK);
return true;
} catch {
return false;
}
}
function pick(value, keys) {
if (!value || typeof value !== "object") return {};
return Object.fromEntries(keys.filter((key) => key in value).map((key) => [key, value[key]]));
}
function realMicMessage(passed, fakeCaptureArgs, fakeCaptureAllowed) {
if (passed) return "Real human microphone rows meet count, WER, latency, and identity gates.";
if (fakeCaptureArgs.length > 0) {
return `Real microphone evidence used fake-capture Chrome args: ${fakeCaptureArgs.join(", ")}.`;
}
if (fakeCaptureAllowed) return "Real microphone evidence was collected with fake capture explicitly allowed.";
return "Real human microphone rows are missing or below threshold.";
}
function sourceFreshness(artifact) {
const artifactHash = artifact.value?.sourceFingerprint?.hash;
if (!artifactHash) {
return { ok: false, message: "Artifact is missing source fingerprint metadata; rerun the harness." };
}
if (artifactHash !== currentSourceFingerprint?.hash) {
return { ok: false, message: "Artifact source fingerprint is stale; rerun the harness." };
}
return { ok: true, message: "Artifact source fingerprint matches current files." };
}
function rowsFromArtifact(artifact, source) {
if (!artifact.ok || !artifact.value) return [];
const rows = [];
for (const key of ["benchmarkResults", "rows", "results"]) {
if (Array.isArray(artifact.value[key])) {
rows.push(...artifact.value[key].map((row) => ({ source, row })));
}
}
return rows;
}
function unsafeFirstTtsChunk(row) {
return row.firstTtsWordBoundarySafe === false || firstTtsLooksMidWord(row);
}
function firstTtsLooksMidWord(row) {
const chunk = String(row.firstTtsText ?? "").trim();
const output = String(row.output ?? "").trim();
if (!chunk || !output.startsWith(chunk)) return false;
const before = chunk.at(-1) ?? "";
const after = output[chunk.length] ?? "";
return isWordLikeChar(before) && isWordLikeChar(after);
}
function isFakeCaptureArg(arg) {
return (
arg === "--use-fake-device-for-media-stream" ||
arg.startsWith("--use-file-for-fake-audio-capture=") ||
arg.startsWith("--use-file-for-fake-video-capture=")
);
}
function isWordLikeChar(char) {
return /[A-Za-z0-9']/.test(char);
}
function formatPercent(value) {
if (!Number.isFinite(value)) return "-";
return `${Math.round(value * 100)}%`;
}
main().catch((error) => {
console.error(error.stack ?? error.message);
process.exitCode = 1;
});