Spaces:
Configuration error
Configuration error
| import { readFile, writeFile, access } from "node:fs/promises"; | |
| import { constants } from "node:fs"; | |
| import { tmpdir } from "node:os"; | |
| import { dirname, resolve } from "node:path"; | |
| import { mkdir } from "node:fs/promises"; | |
| import { sourceFingerprint } from "./source-fingerprint.mjs"; | |
| const paths = { | |
| clientSide: resolve(process.env.BROWSER_SPEAK_CLIENT_SIDE_JSON ?? `${tmpdir()}/browser-speak-client-side-smoke.json`), | |
| loopback: resolve(process.env.BROWSER_SPEAK_LOOPBACK_JSON ?? `${tmpdir()}/browser-speak-loopback-series.json`), | |
| fakeMic: resolve(process.env.BROWSER_SPEAK_FAKE_MIC_JSON ?? `${tmpdir()}/browser-speak-fake-mic-results.json`), | |
| realMic: resolve(process.env.BROWSER_SPEAK_REAL_MIC_JSON ?? `${tmpdir()}/browser-speak-real-mic-series.json`), | |
| webgpu: resolve(process.env.BROWSER_SPEAK_WEBGPU_JSON ?? `${tmpdir()}/browser-speak-webgpu-results.json`), | |
| ui: resolve(process.env.BROWSER_SPEAK_UI_JSON ?? `${tmpdir()}/browser-speak-ui-smoke.json`), | |
| evidenceExport: resolve( | |
| process.env.BROWSER_SPEAK_EVIDENCE_EXPORT_JSON ?? `${tmpdir()}/browser-speak-evidence-export-smoke.json`, | |
| ), | |
| }; | |
| const resultPath = resolve(process.env.BROWSER_SPEAK_AUDIT_JSON ?? `${tmpdir()}/browser-speak-validation-audit.json`); | |
| const soft = process.env.BROWSER_SPEAK_AUDIT_SOFT === "true"; | |
| const requireRealMic = process.env.BROWSER_SPEAK_AUDIT_REQUIRE_REAL_MIC !== "false"; | |
| const requireHardwareWebgpu = process.env.BROWSER_SPEAK_AUDIT_REQUIRE_HARDWARE_WEBGPU !== "false"; | |
| const realMicMaxWer = Number(process.env.BROWSER_SPEAK_AUDIT_REAL_MIC_MAX_WER ?? 0.25); | |
| const loopbackMaxMedianWer = Number(process.env.BROWSER_SPEAK_AUDIT_LOOPBACK_MAX_MEDIAN_WER ?? 0); | |
| let currentSourceFingerprint = null; | |
| async function main() { | |
| currentSourceFingerprint = await sourceFingerprint(); | |
| const artifacts = Object.fromEntries( | |
| await Promise.all(Object.entries(paths).map(async ([key, path]) => [key, await readJson(path)])), | |
| ); | |
| const checks = [ | |
| await checkStaticFiles(), | |
| checkUi(artifacts.ui), | |
| checkEvidenceExport(artifacts.evidenceExport), | |
| checkClientSide(artifacts.clientSide), | |
| checkLoopback(artifacts.loopback), | |
| checkFirstTtsChunkSafety(artifacts), | |
| checkFakeMic(artifacts.fakeMic), | |
| checkRealMic(artifacts.realMic), | |
| checkHardwareWebgpu(artifacts.webgpu), | |
| ]; | |
| const requiredChecks = checks.filter((check) => check.required); | |
| const passed = requiredChecks.every((check) => check.status === "pass"); | |
| const payload = { | |
| generatedAt: new Date().toISOString(), | |
| sourceFingerprint: currentSourceFingerprint, | |
| passed, | |
| paths, | |
| checks, | |
| nextActions: nextActions(checks), | |
| }; | |
| await mkdir(dirname(resultPath), { recursive: true }); | |
| await writeFile(resultPath, `${JSON.stringify(payload, null, 2)}\n`); | |
| console.log(`Wrote validation audit JSON: ${resultPath}`); | |
| for (const check of checks) { | |
| console.log(`${check.required ? "required" : "supporting"} ${check.status}: ${check.name} - ${check.message}`); | |
| } | |
| if (!passed && !soft) process.exitCode = 1; | |
| } | |
| async function checkStaticFiles() { | |
| const files = [ | |
| "index.html", | |
| "styles.css", | |
| "app.js", | |
| "workers/asr-worker.js", | |
| "workers/llm-worker.js", | |
| "workers/tts-worker.js", | |
| ]; | |
| const missing = []; | |
| for (const file of files) { | |
| if (!(await exists(file))) missing.push(file); | |
| } | |
| return { | |
| name: "demo files", | |
| required: true, | |
| status: missing.length === 0 ? "pass" : "fail", | |
| message: missing.length === 0 ? "Static demo files are present." : `Missing files: ${missing.join(", ")}.`, | |
| evidence: { files, missing }, | |
| }; | |
| } | |
| function checkUi(artifact) { | |
| if (!artifact.ok) return missing("UI smoke", true, artifact, "UI smoke JSON is missing or unreadable."); | |
| const freshness = sourceFreshness(artifact); | |
| if (!freshness.ok) return stale("UI smoke", true, artifact, freshness.message); | |
| const passed = artifact.value?.passed === true; | |
| return { | |
| name: "UI smoke", | |
| required: true, | |
| status: passed ? "pass" : "fail", | |
| message: passed ? "Desktop/mobile UI smoke passed." : "Desktop/mobile UI smoke has not passed.", | |
| evidence: pick(artifact.value, ["passed", "config", "errors"]), | |
| }; | |
| } | |
| function checkEvidenceExport(artifact) { | |
| if (!artifact.ok) return missing("evidence export smoke", true, artifact, "Evidence export smoke JSON is missing."); | |
| const freshness = sourceFreshness(artifact); | |
| if (!freshness.ok) return stale("evidence export smoke", true, artifact, freshness.message); | |
| const summary = artifact.value?.summary ?? {}; | |
| const passed = | |
| artifact.value?.passed === true && | |
| summary.restoredRows >= 1 && | |
| summary.exportedRows >= 1 && | |
| /^browser-speak-evidence-.*\.json$/.test(summary.downloadName ?? "") && | |
| summary.downloadHrefScheme === "blob" && | |
| summary.clearedRows === 0 && | |
| summary.persistedAfterClear === null; | |
| return { | |
| name: "evidence export smoke", | |
| required: true, | |
| status: passed ? "pass" : "fail", | |
| message: passed | |
| ? "Autosaved rows restore, evidence JSON download is requested, and Clear removes saved rows." | |
| : "Evidence export, autosave restore, or Clear behavior did not pass.", | |
| evidence: pick(artifact.value, ["passed", "summary", "error", "config"]), | |
| }; | |
| } | |
| function checkClientSide(artifact) { | |
| if (!artifact.ok) return missing("client-side/no-server smoke", true, artifact, "Client-side smoke JSON is missing."); | |
| const freshness = sourceFreshness(artifact); | |
| if (!freshness.ok) return stale("client-side/no-server smoke", true, artifact, freshness.message); | |
| const summary = artifact.value?.summary ?? {}; | |
| const passed = | |
| artifact.value?.passed === true && | |
| summary.benchmarkRequests === 0 && | |
| summary.serverInferenceSuspects === 0 && | |
| summary.benchmarkErrors === 0 && | |
| Array.isArray(summary.missingTasks) && | |
| summary.missingTasks.length === 0; | |
| return { | |
| name: "client-side/no-server smoke", | |
| required: true, | |
| status: passed ? "pass" : "fail", | |
| message: passed | |
| ? "Benchmark phase had no network requests, server-inference suspects, row errors, or missing tasks." | |
| : "Client-side smoke did not prove the no-server benchmark phase.", | |
| evidence: pick(artifact.value, ["passed", "summary", "error", "config"]), | |
| }; | |
| } | |
| function checkLoopback(artifact) { | |
| if (!artifact.ok) return missing("loopback stability", true, artifact, "Loopback series JSON is missing."); | |
| const freshness = sourceFreshness(artifact); | |
| if (!freshness.ok) return stale("loopback stability", true, artifact, freshness.message); | |
| const summary = artifact.value?.summary ?? {}; | |
| const passed = | |
| artifact.value?.passed === true && | |
| summary.completedRuns >= 3 && | |
| summary.errorRuns === 0 && | |
| summary.exactTranscriptRuns >= Math.ceil(summary.completedRuns / 2) && | |
| summary.identityPasses >= summary.exactTranscriptRuns && | |
| Number.isFinite(summary.medianWer) && | |
| summary.medianWer <= loopbackMaxMedianWer && | |
| Number.isFinite(summary.medianSpeechEndToFirstAudioMs); | |
| return { | |
| name: "loopback stability", | |
| required: true, | |
| status: passed ? "pass" : "fail", | |
| message: passed | |
| ? `Synthetic loopback completed 3 rows with median WER ${formatPercent( | |
| summary.medianWer, | |
| )} and ${summary.exactTranscriptRuns}/${summary.completedRuns} exact transcripts.` | |
| : "Loopback stability evidence is missing or below threshold.", | |
| evidence: pick(artifact.value, ["passed", "summary", "config", "error"]), | |
| }; | |
| } | |
| function checkFirstTtsChunkSafety(artifacts) { | |
| const rows = [ | |
| ...rowsFromArtifact(artifacts.clientSide, "client-side smoke"), | |
| ...rowsFromArtifact(artifacts.loopback, "loopback series"), | |
| ...rowsFromArtifact(artifacts.fakeMic, "fake microphone"), | |
| ...rowsFromArtifact(artifacts.realMic, "real microphone"), | |
| ].filter(({ row }) => !row.error && row.firstTtsText); | |
| if (rows.length === 0) { | |
| return { | |
| name: "first TTS chunk safety", | |
| required: true, | |
| status: "fail", | |
| message: "No completed benchmark rows with first TTS chunk text were available.", | |
| evidence: {}, | |
| }; | |
| } | |
| const unsafe = rows.filter(({ row }) => unsafeFirstTtsChunk(row)); | |
| return { | |
| name: "first TTS chunk safety", | |
| required: true, | |
| status: unsafe.length === 0 ? "pass" : "fail", | |
| message: | |
| unsafe.length === 0 | |
| ? `${rows.length} first TTS chunk(s) ended at a safe boundary.` | |
| : `${unsafe.length}/${rows.length} first TTS chunk(s) appear to split a word.`, | |
| evidence: { | |
| checkedRows: rows.length, | |
| unsafe: unsafe.slice(0, 8).map(({ source, row }) => ({ | |
| source, | |
| kind: row.kind, | |
| firstTtsText: row.firstTtsText, | |
| output: row.output, | |
| firstTtsBoundaryKind: row.firstTtsBoundaryKind ?? null, | |
| firstTtsWordBoundarySafe: row.firstTtsWordBoundarySafe ?? null, | |
| })), | |
| }, | |
| }; | |
| } | |
| function checkFakeMic(artifact) { | |
| if (!artifact.ok) return missing("fake microphone regression", false, artifact, "Fake mic JSON is missing."); | |
| const freshness = sourceFreshness(artifact); | |
| const summary = artifact.value?.summary?.current ?? artifact.value?.summary?.all ?? {}; | |
| const passed = | |
| freshness.ok && | |
| summary.micRuns >= 3 && | |
| summary.micMedianWer === 0 && | |
| Number.isFinite(summary.micMedianSpeechEndToFirstAudioMs); | |
| return { | |
| name: "fake microphone regression", | |
| required: false, | |
| status: passed ? "pass" : "warn", | |
| message: passed | |
| ? "Fake mic browser-capture regression has 3 rows and 0% median WER." | |
| : freshness.ok | |
| ? "Fake mic regression is unavailable or below the expected threshold." | |
| : freshness.message, | |
| evidence: { summary, freshness }, | |
| }; | |
| } | |
| function checkRealMic(artifact) { | |
| if (!requireRealMic) return skipped("real microphone validation", "Real mic requirement disabled by environment."); | |
| if (!artifact.ok) return missing("real microphone validation", true, artifact, "Real human microphone JSON is missing."); | |
| if (artifact.value?.dryRun) { | |
| return { | |
| name: "real microphone validation", | |
| required: true, | |
| status: "missing", | |
| message: "Only a real-mic dry-run artifact exists; no human speech rows were collected.", | |
| evidence: pick(artifact.value, ["dryRun", "config", "preflight"]), | |
| }; | |
| } | |
| const freshness = sourceFreshness(artifact); | |
| if (!freshness.ok) return stale("real microphone validation", true, artifact, freshness.message); | |
| const summary = artifact.value?.summary ?? {}; | |
| const config = artifact.value?.config ?? {}; | |
| const expected = artifact.value?.config?.count ?? 3; | |
| const extraChromeArgs = Array.isArray(config.extraChromeArgs) ? config.extraChromeArgs : []; | |
| const fakeCaptureArgs = extraChromeArgs.filter(isFakeCaptureArg); | |
| const fakeCaptureAllowed = config.allowFakeCapture === true; | |
| const passed = | |
| artifact.value?.passed === true && | |
| fakeCaptureArgs.length === 0 && | |
| !fakeCaptureAllowed && | |
| summary.completedRows >= expected && | |
| summary.errorRows === 0 && | |
| summary.identityPasses >= expected && | |
| Number.isFinite(summary.medianSpeechEndToFirstAudioMs) && | |
| Number.isFinite(summary.medianWer) && | |
| summary.medianWer <= realMicMaxWer; | |
| return { | |
| name: "real microphone validation", | |
| required: true, | |
| status: passed ? "pass" : "fail", | |
| message: realMicMessage(passed, fakeCaptureArgs, fakeCaptureAllowed), | |
| evidence: { ...pick(artifact.value, ["passed", "summary", "config", "error"]), fakeCaptureArgs }, | |
| }; | |
| } | |
| function checkHardwareWebgpu(artifact) { | |
| if (!requireHardwareWebgpu) return skipped("hardware WebGPU benchmark", "Hardware WebGPU requirement disabled by environment."); | |
| if (!artifact.ok) return missing("hardware WebGPU benchmark", true, artifact, "WebGPU benchmark JSON is missing."); | |
| const freshness = sourceFreshness(artifact); | |
| if (!freshness.ok) return stale("hardware WebGPU benchmark", true, artifact, freshness.message); | |
| const webgpu = artifact.value?.webgpu ?? {}; | |
| const candidates = artifact.value?.candidates ?? []; | |
| const completeCandidates = candidates.filter((candidate) => candidate.status === "complete"); | |
| const passed = | |
| artifact.value?.skipped !== true && | |
| webgpu.available === true && | |
| webgpu.softwareAdapter !== true && | |
| completeCandidates.length > 0; | |
| return { | |
| name: "hardware WebGPU benchmark", | |
| required: true, | |
| status: passed ? "pass" : "missing", | |
| message: passed | |
| ? "Hardware WebGPU benchmark completed at least one candidate." | |
| : artifact.value?.reason ?? "Hardware WebGPU benchmark has not completed on a real adapter.", | |
| evidence: pick(artifact.value, ["skipped", "reason", "webgpu", "candidates", "summary"]), | |
| }; | |
| } | |
| function skipped(name, message) { | |
| return { name, required: false, status: "skip", message, evidence: {} }; | |
| } | |
| function missing(name, required, artifact, message) { | |
| return { | |
| name, | |
| required, | |
| status: "missing", | |
| message, | |
| evidence: { path: artifact.path, error: artifact.error }, | |
| }; | |
| } | |
| function stale(name, required, artifact, message) { | |
| return { | |
| name, | |
| required, | |
| status: "stale", | |
| message, | |
| evidence: { | |
| path: artifact.path, | |
| artifactHash: artifact.value?.sourceFingerprint?.hash ?? null, | |
| currentHash: currentSourceFingerprint?.hash ?? null, | |
| }, | |
| }; | |
| } | |
| function nextActions(checks) { | |
| const missingNames = checks | |
| .filter((check) => check.required && check.status !== "pass") | |
| .map((check) => check.name); | |
| const actions = []; | |
| if (missingNames.includes("real microphone validation") || missingNames.includes("hardware WebGPU benchmark")) { | |
| actions.push( | |
| "Run node tools/run-hosted-evidence-capture.mjs from a desktop Chrome session with a real microphone and hardware WebGPU to collect hosted browser evidence.", | |
| ); | |
| } | |
| return checks | |
| .filter((check) => check.required && check.status !== "pass") | |
| .map((check) => { | |
| if (check.name === "real microphone validation") return "Run node tools/run-real-mic-series.mjs on a machine with a real microphone."; | |
| if (check.name === "hardware WebGPU benchmark") return "Run node tools/run-webgpu-benchmark.mjs in a Chrome session exposing a hardware WebGPU adapter."; | |
| if (check.name === "client-side/no-server smoke") return "Run node tools/run-client-side-smoke.mjs and inspect /tmp/browser-speak-client-side-smoke.json."; | |
| if (check.name === "evidence export smoke") return "Run node tools/run-evidence-export-smoke.mjs and inspect /tmp/browser-speak-evidence-export-smoke.json."; | |
| if (check.name === "loopback stability") return "Run node tools/run-loopback-series.mjs and inspect /tmp/browser-speak-loopback-series.json."; | |
| if (check.name === "first TTS chunk safety") return "Run the client-side smoke and inspect firstTtsText / firstTtsWordBoundarySafe in benchmark rows."; | |
| if (check.name === "UI smoke") return "Run node tools/run-ui-smoke.mjs."; | |
| return `Resolve failed check: ${check.name}.`; | |
| }) | |
| .reduce((unique, action) => { | |
| if (!unique.includes(action)) unique.push(action); | |
| return unique; | |
| }, actions); | |
| } | |
| async function readJson(path) { | |
| try { | |
| return { ok: true, path, value: JSON.parse(await readFile(path, "utf8")) }; | |
| } catch (error) { | |
| return { ok: false, path, error: error.message }; | |
| } | |
| } | |
| async function exists(path) { | |
| try { | |
| await access(path, constants.F_OK); | |
| return true; | |
| } catch { | |
| return false; | |
| } | |
| } | |
| function pick(value, keys) { | |
| if (!value || typeof value !== "object") return {}; | |
| return Object.fromEntries(keys.filter((key) => key in value).map((key) => [key, value[key]])); | |
| } | |
| function realMicMessage(passed, fakeCaptureArgs, fakeCaptureAllowed) { | |
| if (passed) return "Real human microphone rows meet count, WER, latency, and identity gates."; | |
| if (fakeCaptureArgs.length > 0) { | |
| return `Real microphone evidence used fake-capture Chrome args: ${fakeCaptureArgs.join(", ")}.`; | |
| } | |
| if (fakeCaptureAllowed) return "Real microphone evidence was collected with fake capture explicitly allowed."; | |
| return "Real human microphone rows are missing or below threshold."; | |
| } | |
| function sourceFreshness(artifact) { | |
| const artifactHash = artifact.value?.sourceFingerprint?.hash; | |
| if (!artifactHash) { | |
| return { ok: false, message: "Artifact is missing source fingerprint metadata; rerun the harness." }; | |
| } | |
| if (artifactHash !== currentSourceFingerprint?.hash) { | |
| return { ok: false, message: "Artifact source fingerprint is stale; rerun the harness." }; | |
| } | |
| return { ok: true, message: "Artifact source fingerprint matches current files." }; | |
| } | |
| function rowsFromArtifact(artifact, source) { | |
| if (!artifact.ok || !artifact.value) return []; | |
| const rows = []; | |
| for (const key of ["benchmarkResults", "rows", "results"]) { | |
| if (Array.isArray(artifact.value[key])) { | |
| rows.push(...artifact.value[key].map((row) => ({ source, row }))); | |
| } | |
| } | |
| return rows; | |
| } | |
| function unsafeFirstTtsChunk(row) { | |
| return row.firstTtsWordBoundarySafe === false || firstTtsLooksMidWord(row); | |
| } | |
| function firstTtsLooksMidWord(row) { | |
| const chunk = String(row.firstTtsText ?? "").trim(); | |
| const output = String(row.output ?? "").trim(); | |
| if (!chunk || !output.startsWith(chunk)) return false; | |
| const before = chunk.at(-1) ?? ""; | |
| const after = output[chunk.length] ?? ""; | |
| return isWordLikeChar(before) && isWordLikeChar(after); | |
| } | |
| function isFakeCaptureArg(arg) { | |
| return ( | |
| arg === "--use-fake-device-for-media-stream" || | |
| arg.startsWith("--use-file-for-fake-audio-capture=") || | |
| arg.startsWith("--use-file-for-fake-video-capture=") | |
| ); | |
| } | |
| function isWordLikeChar(char) { | |
| return /[A-Za-z0-9']/.test(char); | |
| } | |
| function formatPercent(value) { | |
| if (!Number.isFinite(value)) return "-"; | |
| return `${Math.round(value * 100)}%`; | |
| } | |
| main().catch((error) => { | |
| console.error(error.stack ?? error.message); | |
| process.exitCode = 1; | |
| }); | |