/** * Live Transcript MCP App * * Simple speech-to-text transcription using Web Speech API. * Transcribed text can be sent to the host via ui/message. */ import { App, type McpUiHostContext, applyDocumentTheme, } from "@modelcontextprotocol/ext-apps"; import "./global.css"; import "./mcp-app.css"; const log = { info: console.log.bind(console, "[Transcript]"), warn: console.warn.bind(console, "[Transcript]"), error: console.error.bind(console, "[Transcript]"), }; // ============================================================================ // DOM Elements // ============================================================================ const mainEl = document.querySelector(".transcript-app") as HTMLElement; const levelBarEl = document.getElementById("level-bar")!; const micLevelEl = document.getElementById("mic-level")!; const timerEl = document.getElementById("timer")!; const transcriptEl = document.getElementById("transcript")!; const startBtn = document.getElementById("start-btn")!; const copyBtn = document.getElementById("copy-btn")!; const clearBtn = document.getElementById("clear-btn")!; const sendBtn = document.getElementById("send-btn") as HTMLButtonElement; // ============================================================================ // State // ============================================================================ let isListening = false; let lastSentIndex = 0; // Track how many entries have been sent // Timer let timerStart: number | null = null; let timerInterval: number | null = null; // Audio let audioContext: AudioContext | null = null; let micStream: MediaStream | null = null; let micAnalyser: AnalyserNode | null = null; let animationFrame: number | null = null; // Speech Recognition let recognition: SpeechRecognition | null = null; // ============================================================================ // MCP App Setup // ============================================================================ const app = new App({ name: "Live Transcript", version: "1.0.0" }); app.onteardown = async () => { log.info("App teardown"); stopListening(); return {}; }; app.onerror = log.error; app.onhostcontextchanged = (ctx: McpUiHostContext) => { if (ctx.safeAreaInsets) { mainEl.style.paddingTop = `${ctx.safeAreaInsets.top}px`; mainEl.style.paddingRight = `${ctx.safeAreaInsets.right}px`; mainEl.style.paddingBottom = `${ctx.safeAreaInsets.bottom}px`; mainEl.style.paddingLeft = `${ctx.safeAreaInsets.left}px`; } if (ctx.theme) { applyDocumentTheme(ctx.theme); } }; // ============================================================================ // Audio Capture // ============================================================================ async function startAudioCapture(): Promise { try { audioContext = new AudioContext(); micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); const source = audioContext.createMediaStreamSource(micStream); micAnalyser = audioContext.createAnalyser(); micAnalyser.fftSize = 256; source.connect(micAnalyser); updateAudioLevels(); log.info("Audio capture started"); return true; } catch (e) { log.error("Failed to start audio capture:", e); return false; } } function updateAudioLevels() { if (micAnalyser && isListening) { const dataArray = new Uint8Array(micAnalyser.frequencyBinCount); micAnalyser.getByteFrequencyData(dataArray); const average = dataArray.reduce((a, b) => a + b, 0) / dataArray.length; const level = Math.min(100, (average / 128) * 100); micLevelEl.style.width = `${level}%`; } else { micLevelEl.style.width = "0%"; } animationFrame = requestAnimationFrame(updateAudioLevels); } function stopAudioCapture() { if (animationFrame) { cancelAnimationFrame(animationFrame); animationFrame = null; } if (micStream) { micStream.getTracks().forEach((track) => track.stop()); micStream = null; } if (audioContext) { audioContext.close(); audioContext = null; } micLevelEl.style.width = "0%"; } // ============================================================================ // Speech Recognition // ============================================================================ function startSpeechRecognition(): boolean { const SpeechRecognitionCtor = window.SpeechRecognition || window.webkitSpeechRecognition; if (!SpeechRecognitionCtor) { log.warn("Speech recognition not supported"); return false; } recognition = new SpeechRecognitionCtor(); recognition.continuous = true; recognition.interimResults = true; recognition.lang = "en-US"; recognition.onstart = () => { log.info("Speech recognition started"); }; recognition.onresult = (event) => { const e = event as SpeechRecognitionEvent; for (let i = e.resultIndex; i < e.results.length; i++) { const result = e.results[i]; const transcript = result[0].transcript; if (result.isFinal) { addTranscriptEntry(transcript, true); updateSendButton(); updateModelContext(); } else { updateInterimTranscript(transcript); } } }; recognition.onerror = (event) => { const e = event as SpeechRecognitionErrorEvent; log.error("Speech recognition error:", e.error); if (e.error === "not-allowed") { addTranscriptEntry("Microphone access denied", true); stopListening(); } }; recognition.onend = () => { log.info("Speech recognition ended"); if (isListening) { // Restart if still supposed to be listening try { recognition?.start(); } catch (e) { // Ignore } } }; try { recognition.start(); return true; } catch (e) { log.error("Failed to start speech recognition:", e); return false; } } function stopSpeechRecognition() { if (recognition) { try { recognition.stop(); } catch (e) { // Ignore } recognition = null; } } // ============================================================================ // UI Helpers // ============================================================================ function clearTranscriptPlaceholder() { const placeholder = transcriptEl.querySelector(".transcript-placeholder"); if (placeholder) { placeholder.remove(); } } function formatTime(seconds: number): string { const mins = Math.floor(seconds / 60); const secs = seconds % 60; return `${mins}:${secs.toString().padStart(2, "0")}`; } function startTimer() { timerStart = Date.now(); timerEl.textContent = "0:00"; timerEl.classList.add("active"); timerInterval = window.setInterval(() => { if (timerStart) { const elapsed = Math.floor((Date.now() - timerStart) / 1000); timerEl.textContent = formatTime(elapsed); } }, 1000); } function stopTimer() { if (timerInterval) { clearInterval(timerInterval); timerInterval = null; } timerEl.classList.remove("active"); } function addTranscriptEntry(text: string, isFinal: boolean) { // Skip empty entries if (!text.trim()) return; clearTranscriptPlaceholder(); // Remove interim entry const interim = transcriptEl.querySelector(".transcript-entry.interim"); if (interim) { interim.remove(); } const timestamp = new Date().toLocaleTimeString(); const entry = document.createElement("p"); entry.className = `transcript-entry${isFinal ? "" : " interim"}`; entry.innerHTML = `
${timestamp}
${escapeHtml(text)}`; transcriptEl.appendChild(entry); } function updateInterimTranscript(text: string) { clearTranscriptPlaceholder(); let interim = transcriptEl.querySelector( ".transcript-entry.interim", ) as HTMLElement; if (!interim) { interim = document.createElement("p"); interim.className = "transcript-entry interim"; transcriptEl.appendChild(interim); } const timestamp = new Date().toLocaleTimeString(); interim.innerHTML = `
${timestamp}
${escapeHtml(text)}`; } function escapeHtml(text: string): string { const div = document.createElement("div"); div.textContent = text; return div.innerHTML; } function formatEntry(entry: HTMLElement): string { const timestamp = entry.querySelector(".timestamp")?.textContent?.trim(); const clone = entry.cloneNode(true) as HTMLElement; clone.querySelector(".timestamp")?.remove(); const text = clone.textContent?.trim() || ""; if (!text) return ""; return timestamp ? `[${timestamp}] ${text}` : text; } function formatEntries(entries: HTMLElement[]): string { return entries.map(formatEntry).filter(Boolean).join("\n"); } function getAllEntries(): HTMLElement[] { return Array.from( transcriptEl.querySelectorAll(".transcript-entry:not(.interim)"), ) as HTMLElement[]; } function getUnsentEntries(): HTMLElement[] { return getAllEntries().slice(lastSentIndex); } function getAllTranscriptText(): string { return formatEntries(getAllEntries()); } function getUnsentText(): string { return formatEntries(getUnsentEntries()); } function updateSendButton() { const unsentEntries = getUnsentEntries(); sendBtn.disabled = unsentEntries.length === 0; } function updateModelContext() { const caps = app.getHostCapabilities(); if (!caps?.updateModelContext) return; const text = getUnsentText(); log.info("Updating model context:", text || "(empty)"); app .updateModelContext({ content: text ? [{ type: "text", text: `[Live transcript]: ${text}` }] : [], }) .catch((e: unknown) => { log.warn("Failed to update model context:", e); }); } // ============================================================================ // Controls // ============================================================================ async function startListening() { isListening = true; startBtn.innerHTML = ` Stop `; startBtn.classList.add("recording"); levelBarEl.classList.add("active"); startTimer(); const micOk = await startAudioCapture(); if (!micOk) { addTranscriptEntry("Microphone access denied", true); stopListening(); return; } if (!startSpeechRecognition()) { addTranscriptEntry("Speech recognition not available", true); stopListening(); } } function stopListening() { isListening = false; startBtn.innerHTML = ` Start `; startBtn.classList.remove("recording"); levelBarEl.classList.remove("active"); stopTimer(); stopSpeechRecognition(); stopAudioCapture(); } startBtn.addEventListener("click", () => { if (isListening) { stopListening(); } else { startListening(); } }); copyBtn.addEventListener("click", async () => { const text = getAllTranscriptText(); if (!text) return; try { await navigator.clipboard.writeText(text); // Brief visual feedback copyBtn.classList.add("copied"); setTimeout(() => copyBtn.classList.remove("copied"), 1000); log.info("Transcript copied to clipboard"); } catch (e) { log.error("Failed to copy:", e); } }); clearBtn.addEventListener("click", () => { transcriptEl.innerHTML = '

Your speech will appear here...

'; lastSentIndex = 0; updateSendButton(); updateModelContext(); }); sendBtn.addEventListener("click", async () => { const unsentEntries = getUnsentEntries(); if (unsentEntries.length === 0) return; const transcriptText = getUnsentText(); if (!transcriptText) return; log.info("Sending transcript:", transcriptText); try { const { isError } = await app.sendMessage({ role: "user", content: [{ type: "text", text: transcriptText }], }); if (isError) { log.warn("Message was rejected"); } else { log.info("Message sent successfully"); // Mark entries as sent unsentEntries.forEach((entry) => entry.classList.add("sent")); // Remove any existing divider transcriptEl.querySelector(".sent-divider")?.remove(); // Add divider after the last sent entry const lastEntry = unsentEntries[unsentEntries.length - 1]; const divider = document.createElement("div"); divider.className = "sent-divider"; divider.innerHTML = `sent ${new Date().toLocaleTimeString()}`; lastEntry.insertAdjacentElement("afterend", divider); // Update sent index const allEntries = transcriptEl.querySelectorAll( ".transcript-entry:not(.interim)", ); lastSentIndex = allEntries.length; updateSendButton(); updateModelContext(); // Clear context since we just sent } } catch (e) { log.error("Failed to send message:", e); } }); // ============================================================================ // Initialize // ============================================================================ app.connect().then(() => { log.info("Connected to host"); const ctx = app.getHostContext(); if (ctx) { app.onhostcontextchanged?.(ctx); } });