| | |
| | |
| | |
| | |
| | |
| | |
| | import { |
| | App, |
| | type McpUiHostContext, |
| | applyDocumentTheme, |
| | } from "@modelcontextprotocol/ext-apps"; |
| | import "./global.css"; |
| | import "./mcp-app.css"; |
| |
|
| | const log = { |
| | info: console.log.bind(console, "[Transcript]"), |
| | warn: console.warn.bind(console, "[Transcript]"), |
| | error: console.error.bind(console, "[Transcript]"), |
| | }; |
| |
|
| | |
| | |
| | |
| |
|
| | const mainEl = document.querySelector(".transcript-app") as HTMLElement; |
| | const levelBarEl = document.getElementById("level-bar")!; |
| | const micLevelEl = document.getElementById("mic-level")!; |
| | const timerEl = document.getElementById("timer")!; |
| | const transcriptEl = document.getElementById("transcript")!; |
| | const startBtn = document.getElementById("start-btn")!; |
| | const copyBtn = document.getElementById("copy-btn")!; |
| | const clearBtn = document.getElementById("clear-btn")!; |
| | const sendBtn = document.getElementById("send-btn") as HTMLButtonElement; |
| |
|
| | |
| | |
| | |
| |
|
| | let isListening = false; |
| | let lastSentIndex = 0; |
| |
|
| | |
| | let timerStart: number | null = null; |
| | let timerInterval: number | null = null; |
| |
|
| | |
| | let audioContext: AudioContext | null = null; |
| | let micStream: MediaStream | null = null; |
| | let micAnalyser: AnalyserNode | null = null; |
| | let animationFrame: number | null = null; |
| |
|
| | |
| | let recognition: SpeechRecognition | null = null; |
| |
|
| | |
| | |
| | |
| |
|
| | const app = new App({ name: "Live Transcript", version: "1.0.0" }); |
| |
|
| | app.onteardown = async () => { |
| | log.info("App teardown"); |
| | stopListening(); |
| | return {}; |
| | }; |
| |
|
| | app.onerror = log.error; |
| |
|
| | app.onhostcontextchanged = (ctx: McpUiHostContext) => { |
| | if (ctx.safeAreaInsets) { |
| | mainEl.style.paddingTop = `${ctx.safeAreaInsets.top}px`; |
| | mainEl.style.paddingRight = `${ctx.safeAreaInsets.right}px`; |
| | mainEl.style.paddingBottom = `${ctx.safeAreaInsets.bottom}px`; |
| | mainEl.style.paddingLeft = `${ctx.safeAreaInsets.left}px`; |
| | } |
| | if (ctx.theme) { |
| | applyDocumentTheme(ctx.theme); |
| | } |
| | }; |
| |
|
| | |
| | |
| | |
| |
|
| | async function startAudioCapture(): Promise<boolean> { |
| | try { |
| | audioContext = new AudioContext(); |
| | micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
| |
|
| | const source = audioContext.createMediaStreamSource(micStream); |
| | micAnalyser = audioContext.createAnalyser(); |
| | micAnalyser.fftSize = 256; |
| | source.connect(micAnalyser); |
| |
|
| | updateAudioLevels(); |
| | log.info("Audio capture started"); |
| | return true; |
| | } catch (e) { |
| | log.error("Failed to start audio capture:", e); |
| | return false; |
| | } |
| | } |
| |
|
| | function updateAudioLevels() { |
| | if (micAnalyser && isListening) { |
| | const dataArray = new Uint8Array(micAnalyser.frequencyBinCount); |
| | micAnalyser.getByteFrequencyData(dataArray); |
| |
|
| | const average = dataArray.reduce((a, b) => a + b, 0) / dataArray.length; |
| | const level = Math.min(100, (average / 128) * 100); |
| | micLevelEl.style.width = `${level}%`; |
| | } else { |
| | micLevelEl.style.width = "0%"; |
| | } |
| |
|
| | animationFrame = requestAnimationFrame(updateAudioLevels); |
| | } |
| |
|
| | function stopAudioCapture() { |
| | if (animationFrame) { |
| | cancelAnimationFrame(animationFrame); |
| | animationFrame = null; |
| | } |
| |
|
| | if (micStream) { |
| | micStream.getTracks().forEach((track) => track.stop()); |
| | micStream = null; |
| | } |
| |
|
| | if (audioContext) { |
| | audioContext.close(); |
| | audioContext = null; |
| | } |
| |
|
| | micLevelEl.style.width = "0%"; |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | function startSpeechRecognition(): boolean { |
| | const SpeechRecognitionCtor = |
| | window.SpeechRecognition || window.webkitSpeechRecognition; |
| |
|
| | if (!SpeechRecognitionCtor) { |
| | log.warn("Speech recognition not supported"); |
| | return false; |
| | } |
| |
|
| | recognition = new SpeechRecognitionCtor(); |
| | recognition.continuous = true; |
| | recognition.interimResults = true; |
| | recognition.lang = "en-US"; |
| |
|
| | recognition.onstart = () => { |
| | log.info("Speech recognition started"); |
| | }; |
| |
|
| | recognition.onresult = (event) => { |
| | const e = event as SpeechRecognitionEvent; |
| | for (let i = e.resultIndex; i < e.results.length; i++) { |
| | const result = e.results[i]; |
| | const transcript = result[0].transcript; |
| |
|
| | if (result.isFinal) { |
| | addTranscriptEntry(transcript, true); |
| | updateSendButton(); |
| | updateModelContext(); |
| | } else { |
| | updateInterimTranscript(transcript); |
| | } |
| | } |
| | }; |
| |
|
| | recognition.onerror = (event) => { |
| | const e = event as SpeechRecognitionErrorEvent; |
| | log.error("Speech recognition error:", e.error); |
| | if (e.error === "not-allowed") { |
| | addTranscriptEntry("Microphone access denied", true); |
| | stopListening(); |
| | } |
| | }; |
| |
|
| | recognition.onend = () => { |
| | log.info("Speech recognition ended"); |
| | if (isListening) { |
| | |
| | try { |
| | recognition?.start(); |
| | } catch (e) { |
| | |
| | } |
| | } |
| | }; |
| |
|
| | try { |
| | recognition.start(); |
| | return true; |
| | } catch (e) { |
| | log.error("Failed to start speech recognition:", e); |
| | return false; |
| | } |
| | } |
| |
|
| | function stopSpeechRecognition() { |
| | if (recognition) { |
| | try { |
| | recognition.stop(); |
| | } catch (e) { |
| | |
| | } |
| | recognition = null; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | function clearTranscriptPlaceholder() { |
| | const placeholder = transcriptEl.querySelector(".transcript-placeholder"); |
| | if (placeholder) { |
| | placeholder.remove(); |
| | } |
| | } |
| |
|
| | function formatTime(seconds: number): string { |
| | const mins = Math.floor(seconds / 60); |
| | const secs = seconds % 60; |
| | return `${mins}:${secs.toString().padStart(2, "0")}`; |
| | } |
| |
|
| | function startTimer() { |
| | timerStart = Date.now(); |
| | timerEl.textContent = "0:00"; |
| | timerEl.classList.add("active"); |
| | timerInterval = window.setInterval(() => { |
| | if (timerStart) { |
| | const elapsed = Math.floor((Date.now() - timerStart) / 1000); |
| | timerEl.textContent = formatTime(elapsed); |
| | } |
| | }, 1000); |
| | } |
| |
|
| | function stopTimer() { |
| | if (timerInterval) { |
| | clearInterval(timerInterval); |
| | timerInterval = null; |
| | } |
| | timerEl.classList.remove("active"); |
| | } |
| |
|
| | function addTranscriptEntry(text: string, isFinal: boolean) { |
| | |
| | if (!text.trim()) return; |
| |
|
| | clearTranscriptPlaceholder(); |
| |
|
| | |
| | const interim = transcriptEl.querySelector(".transcript-entry.interim"); |
| | if (interim) { |
| | interim.remove(); |
| | } |
| |
|
| | const timestamp = new Date().toLocaleTimeString(); |
| |
|
| | const entry = document.createElement("p"); |
| | entry.className = `transcript-entry${isFinal ? "" : " interim"}`; |
| | entry.innerHTML = `<div class="timestamp">${timestamp}</div>${escapeHtml(text)}`; |
| | transcriptEl.appendChild(entry); |
| | } |
| |
|
| | function updateInterimTranscript(text: string) { |
| | clearTranscriptPlaceholder(); |
| |
|
| | let interim = transcriptEl.querySelector( |
| | ".transcript-entry.interim", |
| | ) as HTMLElement; |
| | if (!interim) { |
| | interim = document.createElement("p"); |
| | interim.className = "transcript-entry interim"; |
| | transcriptEl.appendChild(interim); |
| | } |
| |
|
| | const timestamp = new Date().toLocaleTimeString(); |
| | interim.innerHTML = `<div class="timestamp">${timestamp}</div>${escapeHtml(text)}`; |
| | } |
| |
|
| | function escapeHtml(text: string): string { |
| | const div = document.createElement("div"); |
| | div.textContent = text; |
| | return div.innerHTML; |
| | } |
| |
|
| | function formatEntry(entry: HTMLElement): string { |
| | const timestamp = entry.querySelector(".timestamp")?.textContent?.trim(); |
| | const clone = entry.cloneNode(true) as HTMLElement; |
| | clone.querySelector(".timestamp")?.remove(); |
| | const text = clone.textContent?.trim() || ""; |
| | if (!text) return ""; |
| | return timestamp ? `[${timestamp}] ${text}` : text; |
| | } |
| |
|
| | function formatEntries(entries: HTMLElement[]): string { |
| | return entries.map(formatEntry).filter(Boolean).join("\n"); |
| | } |
| |
|
| | function getAllEntries(): HTMLElement[] { |
| | return Array.from( |
| | transcriptEl.querySelectorAll(".transcript-entry:not(.interim)"), |
| | ) as HTMLElement[]; |
| | } |
| |
|
| | function getUnsentEntries(): HTMLElement[] { |
| | return getAllEntries().slice(lastSentIndex); |
| | } |
| |
|
| | function getAllTranscriptText(): string { |
| | return formatEntries(getAllEntries()); |
| | } |
| |
|
| | function getUnsentText(): string { |
| | return formatEntries(getUnsentEntries()); |
| | } |
| |
|
| | function updateSendButton() { |
| | const unsentEntries = getUnsentEntries(); |
| | sendBtn.disabled = unsentEntries.length === 0; |
| | } |
| |
|
| | function updateModelContext() { |
| | const caps = app.getHostCapabilities(); |
| | if (!caps?.updateModelContext) return; |
| |
|
| | const text = getUnsentText(); |
| | log.info("Updating model context:", text || "(empty)"); |
| |
|
| | app |
| | .updateModelContext({ |
| | content: text |
| | ? [{ type: "text", text: `[Live transcript]: ${text}` }] |
| | : [], |
| | }) |
| | .catch((e: unknown) => { |
| | log.warn("Failed to update model context:", e); |
| | }); |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | async function startListening() { |
| | isListening = true; |
| | startBtn.innerHTML = ` |
| | <svg class="btn-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> |
| | <rect x="6" y="4" width="4" height="16"/> |
| | <rect x="14" y="4" width="4" height="16"/> |
| | </svg> |
| | Stop |
| | `; |
| | startBtn.classList.add("recording"); |
| | levelBarEl.classList.add("active"); |
| | startTimer(); |
| |
|
| | const micOk = await startAudioCapture(); |
| | if (!micOk) { |
| | addTranscriptEntry("Microphone access denied", true); |
| | stopListening(); |
| | return; |
| | } |
| |
|
| | if (!startSpeechRecognition()) { |
| | addTranscriptEntry("Speech recognition not available", true); |
| | stopListening(); |
| | } |
| | } |
| |
|
| | function stopListening() { |
| | isListening = false; |
| | startBtn.innerHTML = ` |
| | <svg class="btn-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> |
| | <polygon points="5 3 19 12 5 21 5 3"/> |
| | </svg> |
| | Start |
| | `; |
| | startBtn.classList.remove("recording"); |
| | levelBarEl.classList.remove("active"); |
| | stopTimer(); |
| |
|
| | stopSpeechRecognition(); |
| | stopAudioCapture(); |
| | } |
| |
|
| | startBtn.addEventListener("click", () => { |
| | if (isListening) { |
| | stopListening(); |
| | } else { |
| | startListening(); |
| | } |
| | }); |
| |
|
| | copyBtn.addEventListener("click", async () => { |
| | const text = getAllTranscriptText(); |
| | if (!text) return; |
| |
|
| | try { |
| | await navigator.clipboard.writeText(text); |
| | |
| | copyBtn.classList.add("copied"); |
| | setTimeout(() => copyBtn.classList.remove("copied"), 1000); |
| | log.info("Transcript copied to clipboard"); |
| | } catch (e) { |
| | log.error("Failed to copy:", e); |
| | } |
| | }); |
| |
|
| | clearBtn.addEventListener("click", () => { |
| | transcriptEl.innerHTML = |
| | '<p class="transcript-placeholder">Your speech will appear here...</p>'; |
| | lastSentIndex = 0; |
| | updateSendButton(); |
| | updateModelContext(); |
| | }); |
| |
|
| | sendBtn.addEventListener("click", async () => { |
| | const unsentEntries = getUnsentEntries(); |
| | if (unsentEntries.length === 0) return; |
| |
|
| | const transcriptText = getUnsentText(); |
| | if (!transcriptText) return; |
| |
|
| | log.info("Sending transcript:", transcriptText); |
| |
|
| | try { |
| | const { isError } = await app.sendMessage({ |
| | role: "user", |
| | content: [{ type: "text", text: transcriptText }], |
| | }); |
| |
|
| | if (isError) { |
| | log.warn("Message was rejected"); |
| | } else { |
| | log.info("Message sent successfully"); |
| |
|
| | |
| | unsentEntries.forEach((entry) => entry.classList.add("sent")); |
| |
|
| | |
| | transcriptEl.querySelector(".sent-divider")?.remove(); |
| |
|
| | |
| | const lastEntry = unsentEntries[unsentEntries.length - 1]; |
| | const divider = document.createElement("div"); |
| | divider.className = "sent-divider"; |
| | divider.innerHTML = `<span>sent ${new Date().toLocaleTimeString()}</span>`; |
| | lastEntry.insertAdjacentElement("afterend", divider); |
| |
|
| | |
| | const allEntries = transcriptEl.querySelectorAll( |
| | ".transcript-entry:not(.interim)", |
| | ); |
| | lastSentIndex = allEntries.length; |
| |
|
| | updateSendButton(); |
| | updateModelContext(); |
| | } |
| | } catch (e) { |
| | log.error("Failed to send message:", e); |
| | } |
| | }); |
| |
|
| | |
| | |
| | |
| |
|
| | app.connect().then(() => { |
| | log.info("Connected to host"); |
| | const ctx = app.getHostContext(); |
| | if (ctx) { |
| | app.onhostcontextchanged?.(ctx); |
| | } |
| | }); |
| |
|