Spaces:

Mike0021
/

browser-speak

Configuration error

App Files Files Community

browser-speak / tools /audit-validation.mjs

Mike0021's picture

Add worker network telemetry to browser evidence

d2ae80e verified about 1 month ago

History Blame Contribute Delete

17.9 kB

	#!/usr/bin/env node
	import { readFile, writeFile, access } from "node:fs/promises";
	import { constants } from "node:fs";
	import { tmpdir } from "node:os";
	import { dirname, resolve } from "node:path";
	import { mkdir } from "node:fs/promises";
	import { sourceFingerprint } from "./source-fingerprint.mjs";

	const paths = {
	clientSide: resolve(process.env.BROWSER_SPEAK_CLIENT_SIDE_JSON ?? `${tmpdir()}/browser-speak-client-side-smoke.json`),
	loopback: resolve(process.env.BROWSER_SPEAK_LOOPBACK_JSON ?? `${tmpdir()}/browser-speak-loopback-series.json`),
	fakeMic: resolve(process.env.BROWSER_SPEAK_FAKE_MIC_JSON ?? `${tmpdir()}/browser-speak-fake-mic-results.json`),
	realMic: resolve(process.env.BROWSER_SPEAK_REAL_MIC_JSON ?? `${tmpdir()}/browser-speak-real-mic-series.json`),
	webgpu: resolve(process.env.BROWSER_SPEAK_WEBGPU_JSON ?? `${tmpdir()}/browser-speak-webgpu-results.json`),
	ui: resolve(process.env.BROWSER_SPEAK_UI_JSON ?? `${tmpdir()}/browser-speak-ui-smoke.json`),
	evidenceExport: resolve(
	process.env.BROWSER_SPEAK_EVIDENCE_EXPORT_JSON ?? `${tmpdir()}/browser-speak-evidence-export-smoke.json`,
	),
	};
	const resultPath = resolve(process.env.BROWSER_SPEAK_AUDIT_JSON ?? `${tmpdir()}/browser-speak-validation-audit.json`);
	const soft = process.env.BROWSER_SPEAK_AUDIT_SOFT === "true";
	const requireRealMic = process.env.BROWSER_SPEAK_AUDIT_REQUIRE_REAL_MIC !== "false";
	const requireHardwareWebgpu = process.env.BROWSER_SPEAK_AUDIT_REQUIRE_HARDWARE_WEBGPU !== "false";
	const realMicMaxWer = Number(process.env.BROWSER_SPEAK_AUDIT_REAL_MIC_MAX_WER ?? 0.25);
	const loopbackMaxMedianWer = Number(process.env.BROWSER_SPEAK_AUDIT_LOOPBACK_MAX_MEDIAN_WER ?? 0);
	let currentSourceFingerprint = null;

	async function main() {
	currentSourceFingerprint = await sourceFingerprint();
	const artifacts = Object.fromEntries(
	await Promise.all(Object.entries(paths).map(async ([key, path]) => [key, await readJson(path)])),
	);
	const checks = [
	await checkStaticFiles(),
	checkUi(artifacts.ui),
	checkEvidenceExport(artifacts.evidenceExport),
	checkClientSide(artifacts.clientSide),
	checkLoopback(artifacts.loopback),
	checkFirstTtsChunkSafety(artifacts),
	checkFakeMic(artifacts.fakeMic),
	checkRealMic(artifacts.realMic),
	checkHardwareWebgpu(artifacts.webgpu),
	];
	const requiredChecks = checks.filter((check) => check.required);
	const passed = requiredChecks.every((check) => check.status === "pass");
	const payload = {
	generatedAt: new Date().toISOString(),
	sourceFingerprint: currentSourceFingerprint,
	passed,
	paths,
	checks,
	nextActions: nextActions(checks),
	};
	await mkdir(dirname(resultPath), { recursive: true });
	await writeFile(resultPath, `${JSON.stringify(payload, null, 2)}\n`);
	console.log(`Wrote validation audit JSON: ${resultPath}`);
	for (const check of checks) {
	console.log(`${check.required ? "required" : "supporting"} ${check.status}: ${check.name} - ${check.message}`);
	}
	if (!passed && !soft) process.exitCode = 1;
	}

	async function checkStaticFiles() {
	const files = [
	"index.html",
	"styles.css",
	"app.js",
	"workers/asr-worker.js",
	"workers/llm-worker.js",
	"workers/tts-worker.js",
	];
	const missing = [];
	for (const file of files) {
	if (!(await exists(file))) missing.push(file);
	}
	return {
	name: "demo files",
	required: true,
	status: missing.length === 0 ? "pass" : "fail",
	message: missing.length === 0 ? "Static demo files are present." : `Missing files: ${missing.join(", ")}.`,
	evidence: { files, missing },
	};
	}

	function checkUi(artifact) {
	if (!artifact.ok) return missing("UI smoke", true, artifact, "UI smoke JSON is missing or unreadable.");
	const freshness = sourceFreshness(artifact);
	if (!freshness.ok) return stale("UI smoke", true, artifact, freshness.message);
	const passed = artifact.value?.passed === true;
	return {
	name: "UI smoke",
	required: true,
	status: passed ? "pass" : "fail",
	message: passed ? "Desktop/mobile UI smoke passed." : "Desktop/mobile UI smoke has not passed.",
	evidence: pick(artifact.value, ["passed", "config", "errors"]),
	};
	}

	function checkEvidenceExport(artifact) {
	if (!artifact.ok) return missing("evidence export smoke", true, artifact, "Evidence export smoke JSON is missing.");
	const freshness = sourceFreshness(artifact);
	if (!freshness.ok) return stale("evidence export smoke", true, artifact, freshness.message);
	const summary = artifact.value?.summary ?? {};
	const passed =
	artifact.value?.passed === true &&
	summary.restoredRows >= 1 &&
	summary.exportedRows >= 1 &&
	/^browser-speak-evidence-.*\.json$/.test(summary.downloadName ?? "") &&
	summary.downloadHrefScheme === "blob" &&
	summary.clearedRows === 0 &&
	summary.persistedAfterClear === null;
	return {
	name: "evidence export smoke",
	required: true,
	status: passed ? "pass" : "fail",
	message: passed
	? "Autosaved rows restore, evidence JSON download is requested, and Clear removes saved rows."
	: "Evidence export, autosave restore, or Clear behavior did not pass.",
	evidence: pick(artifact.value, ["passed", "summary", "error", "config"]),
	};
	}

	function checkClientSide(artifact) {
	if (!artifact.ok) return missing("client-side/no-server smoke", true, artifact, "Client-side smoke JSON is missing.");
	const freshness = sourceFreshness(artifact);
	if (!freshness.ok) return stale("client-side/no-server smoke", true, artifact, freshness.message);
	const summary = artifact.value?.summary ?? {};
	const passed =
	artifact.value?.passed === true &&
	summary.benchmarkRequests === 0 &&
	summary.serverInferenceSuspects === 0 &&
	summary.benchmarkErrors === 0 &&
	Array.isArray(summary.missingTasks) &&
	summary.missingTasks.length === 0;
	return {
	name: "client-side/no-server smoke",
	required: true,
	status: passed ? "pass" : "fail",
	message: passed
	? "Benchmark phase had no network requests, server-inference suspects, row errors, or missing tasks."
	: "Client-side smoke did not prove the no-server benchmark phase.",
	evidence: pick(artifact.value, ["passed", "summary", "error", "config"]),
	};
	}

	function checkLoopback(artifact) {
	if (!artifact.ok) return missing("loopback stability", true, artifact, "Loopback series JSON is missing.");
	const freshness = sourceFreshness(artifact);
	if (!freshness.ok) return stale("loopback stability", true, artifact, freshness.message);
	const summary = artifact.value?.summary ?? {};
	const passed =
	artifact.value?.passed === true &&
	summary.completedRuns >= 3 &&
	summary.errorRuns === 0 &&
	summary.exactTranscriptRuns >= Math.ceil(summary.completedRuns / 2) &&
	summary.identityPasses >= summary.exactTranscriptRuns &&
	Number.isFinite(summary.medianWer) &&
	summary.medianWer <= loopbackMaxMedianWer &&
	Number.isFinite(summary.medianSpeechEndToFirstAudioMs);
	return {
	name: "loopback stability",
	required: true,
	status: passed ? "pass" : "fail",
	message: passed
	? `Synthetic loopback completed 3 rows with median WER ${formatPercent(
	summary.medianWer,
	)} and ${summary.exactTranscriptRuns}/${summary.completedRuns} exact transcripts.`
	: "Loopback stability evidence is missing or below threshold.",
	evidence: pick(artifact.value, ["passed", "summary", "config", "error"]),
	};
	}

	function checkFirstTtsChunkSafety(artifacts) {
	const rows = [
	...rowsFromArtifact(artifacts.clientSide, "client-side smoke"),
	...rowsFromArtifact(artifacts.loopback, "loopback series"),
	...rowsFromArtifact(artifacts.fakeMic, "fake microphone"),
	...rowsFromArtifact(artifacts.realMic, "real microphone"),
	].filter(({ row }) => !row.error && row.firstTtsText);
	if (rows.length === 0) {
	return {
	name: "first TTS chunk safety",
	required: true,
	status: "fail",
	message: "No completed benchmark rows with first TTS chunk text were available.",
	evidence: {},
	};
	}
	const unsafe = rows.filter(({ row }) => unsafeFirstTtsChunk(row));
	return {
	name: "first TTS chunk safety",
	required: true,
	status: unsafe.length === 0 ? "pass" : "fail",
	message:
	unsafe.length === 0
	? `${rows.length} first TTS chunk(s) ended at a safe boundary.`
	: `${unsafe.length}/${rows.length} first TTS chunk(s) appear to split a word.`,
	evidence: {
	checkedRows: rows.length,
	unsafe: unsafe.slice(0, 8).map(({ source, row }) => ({
	source,
	kind: row.kind,
	firstTtsText: row.firstTtsText,
	output: row.output,
	firstTtsBoundaryKind: row.firstTtsBoundaryKind ?? null,
	firstTtsWordBoundarySafe: row.firstTtsWordBoundarySafe ?? null,
	})),
	},
	};
	}

	function checkFakeMic(artifact) {
	if (!artifact.ok) return missing("fake microphone regression", false, artifact, "Fake mic JSON is missing.");
	const freshness = sourceFreshness(artifact);
	const summary = artifact.value?.summary?.current ?? artifact.value?.summary?.all ?? {};
	const passed =
	freshness.ok &&
	summary.micRuns >= 3 &&
	summary.micMedianWer === 0 &&
	Number.isFinite(summary.micMedianSpeechEndToFirstAudioMs);
	return {
	name: "fake microphone regression",
	required: false,
	status: passed ? "pass" : "warn",
	message: passed
	? "Fake mic browser-capture regression has 3 rows and 0% median WER."
	: freshness.ok
	? "Fake mic regression is unavailable or below the expected threshold."
	: freshness.message,
	evidence: { summary, freshness },
	};
	}

	function checkRealMic(artifact) {
	if (!requireRealMic) return skipped("real microphone validation", "Real mic requirement disabled by environment.");
	if (!artifact.ok) return missing("real microphone validation", true, artifact, "Real human microphone JSON is missing.");
	if (artifact.value?.dryRun) {
	return {
	name: "real microphone validation",
	required: true,
	status: "missing",
	message: "Only a real-mic dry-run artifact exists; no human speech rows were collected.",
	evidence: pick(artifact.value, ["dryRun", "config", "preflight"]),
	};
	}
	const freshness = sourceFreshness(artifact);
	if (!freshness.ok) return stale("real microphone validation", true, artifact, freshness.message);
	const summary = artifact.value?.summary ?? {};
	const config = artifact.value?.config ?? {};
	const expected = artifact.value?.config?.count ?? 3;
	const extraChromeArgs = Array.isArray(config.extraChromeArgs) ? config.extraChromeArgs : [];
	const fakeCaptureArgs = extraChromeArgs.filter(isFakeCaptureArg);
	const fakeCaptureAllowed = config.allowFakeCapture === true;
	const passed =
	artifact.value?.passed === true &&
	fakeCaptureArgs.length === 0 &&
	!fakeCaptureAllowed &&
	summary.completedRows >= expected &&
	summary.errorRows === 0 &&
	summary.identityPasses >= expected &&
	Number.isFinite(summary.medianSpeechEndToFirstAudioMs) &&
	Number.isFinite(summary.medianWer) &&
	summary.medianWer <= realMicMaxWer;
	return {
	name: "real microphone validation",
	required: true,
	status: passed ? "pass" : "fail",
	message: realMicMessage(passed, fakeCaptureArgs, fakeCaptureAllowed),
	evidence: { ...pick(artifact.value, ["passed", "summary", "config", "error"]), fakeCaptureArgs },
	};
	}

	function checkHardwareWebgpu(artifact) {
	if (!requireHardwareWebgpu) return skipped("hardware WebGPU benchmark", "Hardware WebGPU requirement disabled by environment.");
	if (!artifact.ok) return missing("hardware WebGPU benchmark", true, artifact, "WebGPU benchmark JSON is missing.");
	const freshness = sourceFreshness(artifact);
	if (!freshness.ok) return stale("hardware WebGPU benchmark", true, artifact, freshness.message);
	const webgpu = artifact.value?.webgpu ?? {};
	const candidates = artifact.value?.candidates ?? [];
	const completeCandidates = candidates.filter((candidate) => candidate.status === "complete");
	const passed =
	artifact.value?.skipped !== true &&
	webgpu.available === true &&
	webgpu.softwareAdapter !== true &&
	completeCandidates.length > 0;
	return {
	name: "hardware WebGPU benchmark",
	required: true,
	status: passed ? "pass" : "missing",
	message: passed
	? "Hardware WebGPU benchmark completed at least one candidate."
	: artifact.value?.reason ?? "Hardware WebGPU benchmark has not completed on a real adapter.",
	evidence: pick(artifact.value, ["skipped", "reason", "webgpu", "candidates", "summary"]),
	};
	}

	function skipped(name, message) {
	return { name, required: false, status: "skip", message, evidence: {} };
	}

	function missing(name, required, artifact, message) {
	return {
	name,
	required,
	status: "missing",
	message,
	evidence: { path: artifact.path, error: artifact.error },
	};
	}

	function stale(name, required, artifact, message) {
	return {
	name,
	required,
	status: "stale",
	message,
	evidence: {
	path: artifact.path,
	artifactHash: artifact.value?.sourceFingerprint?.hash ?? null,
	currentHash: currentSourceFingerprint?.hash ?? null,
	},
	};
	}

	function nextActions(checks) {
	const missingNames = checks
	.filter((check) => check.required && check.status !== "pass")
	.map((check) => check.name);
	const actions = [];
	if (missingNames.includes("real microphone validation") \|\| missingNames.includes("hardware WebGPU benchmark")) {
	actions.push(
	"Run node tools/run-hosted-evidence-capture.mjs from a desktop Chrome session with a real microphone and hardware WebGPU to collect hosted browser evidence.",
	);
	}
	return checks
	.filter((check) => check.required && check.status !== "pass")
	.map((check) => {
	if (check.name === "real microphone validation") return "Run node tools/run-real-mic-series.mjs on a machine with a real microphone.";
	if (check.name === "hardware WebGPU benchmark") return "Run node tools/run-webgpu-benchmark.mjs in a Chrome session exposing a hardware WebGPU adapter.";
	if (check.name === "client-side/no-server smoke") return "Run node tools/run-client-side-smoke.mjs and inspect /tmp/browser-speak-client-side-smoke.json.";
	if (check.name === "evidence export smoke") return "Run node tools/run-evidence-export-smoke.mjs and inspect /tmp/browser-speak-evidence-export-smoke.json.";
	if (check.name === "loopback stability") return "Run node tools/run-loopback-series.mjs and inspect /tmp/browser-speak-loopback-series.json.";
	if (check.name === "first TTS chunk safety") return "Run the client-side smoke and inspect firstTtsText / firstTtsWordBoundarySafe in benchmark rows.";
	if (check.name === "UI smoke") return "Run node tools/run-ui-smoke.mjs.";
	return `Resolve failed check: ${check.name}.`;
	})
	.reduce((unique, action) => {
	if (!unique.includes(action)) unique.push(action);
	return unique;
	}, actions);
	}

	async function readJson(path) {
	try {
	return { ok: true, path, value: JSON.parse(await readFile(path, "utf8")) };
	} catch (error) {
	return { ok: false, path, error: error.message };
	}
	}

	async function exists(path) {
	try {
	await access(path, constants.F_OK);
	return true;
	} catch {
	return false;
	}
	}

	function pick(value, keys) {
	if (!value \|\| typeof value !== "object") return {};
	return Object.fromEntries(keys.filter((key) => key in value).map((key) => [key, value[key]]));
	}

	function realMicMessage(passed, fakeCaptureArgs, fakeCaptureAllowed) {
	if (passed) return "Real human microphone rows meet count, WER, latency, and identity gates.";
	if (fakeCaptureArgs.length > 0) {
	return `Real microphone evidence used fake-capture Chrome args: ${fakeCaptureArgs.join(", ")}.`;
	}
	if (fakeCaptureAllowed) return "Real microphone evidence was collected with fake capture explicitly allowed.";
	return "Real human microphone rows are missing or below threshold.";
	}

	function sourceFreshness(artifact) {
	const artifactHash = artifact.value?.sourceFingerprint?.hash;
	if (!artifactHash) {
	return { ok: false, message: "Artifact is missing source fingerprint metadata; rerun the harness." };
	}
	if (artifactHash !== currentSourceFingerprint?.hash) {
	return { ok: false, message: "Artifact source fingerprint is stale; rerun the harness." };
	}
	return { ok: true, message: "Artifact source fingerprint matches current files." };
	}

	function rowsFromArtifact(artifact, source) {
	if (!artifact.ok \|\| !artifact.value) return [];
	const rows = [];
	for (const key of ["benchmarkResults", "rows", "results"]) {
	if (Array.isArray(artifact.value[key])) {
	rows.push(...artifact.value[key].map((row) => ({ source, row })));
	}
	}
	return rows;
	}

	function unsafeFirstTtsChunk(row) {
	return row.firstTtsWordBoundarySafe === false \|\| firstTtsLooksMidWord(row);
	}

	function firstTtsLooksMidWord(row) {
	const chunk = String(row.firstTtsText ?? "").trim();
	const output = String(row.output ?? "").trim();
	if (!chunk \|\| !output.startsWith(chunk)) return false;
	const before = chunk.at(-1) ?? "";
	const after = output[chunk.length] ?? "";
	return isWordLikeChar(before) && isWordLikeChar(after);
	}

	function isFakeCaptureArg(arg) {
	return (
	arg === "--use-fake-device-for-media-stream" \|\|
	arg.startsWith("--use-file-for-fake-audio-capture=") \|\|
	arg.startsWith("--use-file-for-fake-video-capture=")
	);
	}

	function isWordLikeChar(char) {
	return /[A-Za-z0-9']/.test(char);
	}

	function formatPercent(value) {
	if (!Number.isFinite(value)) return "-";
	return `${Math.round(value * 100)}%`;
	}

	main().catch((error) => {
	console.error(error.stack ?? error.message);
	process.exitCode = 1;
	});