Upload folder using huggingface_hub

e1cc3bc verified about 1 month ago

13.5 kB

	/**
	* Live Transcript MCP App
	*
	* Simple speech-to-text transcription using Web Speech API.
	* Transcribed text can be sent to the host via ui/message.
	*/
	import {
	App,
	type McpUiHostContext,
	applyDocumentTheme,
	} from "@modelcontextprotocol/ext-apps";
	import "./global.css";
	import "./mcp-app.css";

	const log = {
	info: console.log.bind(console, "[Transcript]"),
	warn: console.warn.bind(console, "[Transcript]"),
	error: console.error.bind(console, "[Transcript]"),
	};

	// ============================================================================
	// DOM Elements
	// ============================================================================

	const mainEl = document.querySelector(".transcript-app") as HTMLElement;
	const levelBarEl = document.getElementById("level-bar")!;
	const micLevelEl = document.getElementById("mic-level")!;
	const timerEl = document.getElementById("timer")!;
	const transcriptEl = document.getElementById("transcript")!;
	const startBtn = document.getElementById("start-btn")!;
	const copyBtn = document.getElementById("copy-btn")!;
	const clearBtn = document.getElementById("clear-btn")!;
	const sendBtn = document.getElementById("send-btn") as HTMLButtonElement;

	// ============================================================================
	// State
	// ============================================================================

	let isListening = false;
	let lastSentIndex = 0; // Track how many entries have been sent

	// Timer
	let timerStart: number \| null = null;
	let timerInterval: number \| null = null;

	// Audio
	let audioContext: AudioContext \| null = null;
	let micStream: MediaStream \| null = null;
	let micAnalyser: AnalyserNode \| null = null;
	let animationFrame: number \| null = null;

	// Speech Recognition
	let recognition: SpeechRecognition \| null = null;

	// ============================================================================
	// MCP App Setup
	// ============================================================================

	const app = new App({ name: "Live Transcript", version: "1.0.0" });

	app.onteardown = async () => {
	log.info("App teardown");
	stopListening();
	return {};
	};

	app.onerror = log.error;

	app.onhostcontextchanged = (ctx: McpUiHostContext) => {
	if (ctx.safeAreaInsets) {
	mainEl.style.paddingTop = `${ctx.safeAreaInsets.top}px`;
	mainEl.style.paddingRight = `${ctx.safeAreaInsets.right}px`;
	mainEl.style.paddingBottom = `${ctx.safeAreaInsets.bottom}px`;
	mainEl.style.paddingLeft = `${ctx.safeAreaInsets.left}px`;
	}
	if (ctx.theme) {
	applyDocumentTheme(ctx.theme);
	}
	};

	// ============================================================================
	// Audio Capture
	// ============================================================================

	async function startAudioCapture(): Promise<boolean> {
	try {
	audioContext = new AudioContext();
	micStream = await navigator.mediaDevices.getUserMedia({ audio: true });

	const source = audioContext.createMediaStreamSource(micStream);
	micAnalyser = audioContext.createAnalyser();
	micAnalyser.fftSize = 256;
	source.connect(micAnalyser);

	updateAudioLevels();
	log.info("Audio capture started");
	return true;
	} catch (e) {
	log.error("Failed to start audio capture:", e);
	return false;
	}
	}

	function updateAudioLevels() {
	if (micAnalyser && isListening) {
	const dataArray = new Uint8Array(micAnalyser.frequencyBinCount);
	micAnalyser.getByteFrequencyData(dataArray);

	const average = dataArray.reduce((a, b) => a + b, 0) / dataArray.length;
	const level = Math.min(100, (average / 128) * 100);
	micLevelEl.style.width = `${level}%`;
	} else {
	micLevelEl.style.width = "0%";
	}

	animationFrame = requestAnimationFrame(updateAudioLevels);
	}

	function stopAudioCapture() {
	if (animationFrame) {
	cancelAnimationFrame(animationFrame);
	animationFrame = null;
	}

	if (micStream) {
	micStream.getTracks().forEach((track) => track.stop());
	micStream = null;
	}

	if (audioContext) {
	audioContext.close();
	audioContext = null;
	}

	micLevelEl.style.width = "0%";
	}

	// ============================================================================
	// Speech Recognition
	// ============================================================================

	function startSpeechRecognition(): boolean {
	const SpeechRecognitionCtor =
	window.SpeechRecognition \|\| window.webkitSpeechRecognition;

	if (!SpeechRecognitionCtor) {
	log.warn("Speech recognition not supported");
	return false;
	}

	recognition = new SpeechRecognitionCtor();
	recognition.continuous = true;
	recognition.interimResults = true;
	recognition.lang = "en-US";

	recognition.onstart = () => {
	log.info("Speech recognition started");
	};

	recognition.onresult = (event) => {
	const e = event as SpeechRecognitionEvent;
	for (let i = e.resultIndex; i < e.results.length; i++) {
	const result = e.results[i];
	const transcript = result[0].transcript;

	if (result.isFinal) {
	addTranscriptEntry(transcript, true);
	updateSendButton();
	updateModelContext();
	} else {
	updateInterimTranscript(transcript);
	}
	}
	};

	recognition.onerror = (event) => {
	const e = event as SpeechRecognitionErrorEvent;
	log.error("Speech recognition error:", e.error);
	if (e.error === "not-allowed") {
	addTranscriptEntry("Microphone access denied", true);
	stopListening();
	}
	};

	recognition.onend = () => {
	log.info("Speech recognition ended");
	if (isListening) {
	// Restart if still supposed to be listening
	try {
	recognition?.start();
	} catch (e) {
	// Ignore
	}
	}
	};

	try {
	recognition.start();
	return true;
	} catch (e) {
	log.error("Failed to start speech recognition:", e);
	return false;
	}
	}

	function stopSpeechRecognition() {
	if (recognition) {
	try {
	recognition.stop();
	} catch (e) {
	// Ignore
	}
	recognition = null;
	}
	}

	// ============================================================================
	// UI Helpers
	// ============================================================================

	function clearTranscriptPlaceholder() {
	const placeholder = transcriptEl.querySelector(".transcript-placeholder");
	if (placeholder) {
	placeholder.remove();
	}
	}

	function formatTime(seconds: number): string {
	const mins = Math.floor(seconds / 60);
	const secs = seconds % 60;
	return `${mins}:${secs.toString().padStart(2, "0")}`;
	}

	function startTimer() {
	timerStart = Date.now();
	timerEl.textContent = "0:00";
	timerEl.classList.add("active");
	timerInterval = window.setInterval(() => {
	if (timerStart) {
	const elapsed = Math.floor((Date.now() - timerStart) / 1000);
	timerEl.textContent = formatTime(elapsed);
	}
	}, 1000);
	}

	function stopTimer() {
	if (timerInterval) {
	clearInterval(timerInterval);
	timerInterval = null;
	}
	timerEl.classList.remove("active");
	}

	function addTranscriptEntry(text: string, isFinal: boolean) {
	// Skip empty entries
	if (!text.trim()) return;

	clearTranscriptPlaceholder();

	// Remove interim entry
	const interim = transcriptEl.querySelector(".transcript-entry.interim");
	if (interim) {
	interim.remove();
	}

	const timestamp = new Date().toLocaleTimeString();

	const entry = document.createElement("p");
	entry.className = `transcript-entry${isFinal ? "" : " interim"}`;
	entry.innerHTML = `<div class="timestamp">${timestamp}</div>${escapeHtml(text)}`;
	transcriptEl.appendChild(entry);
	}

	function updateInterimTranscript(text: string) {
	clearTranscriptPlaceholder();

	let interim = transcriptEl.querySelector(
	".transcript-entry.interim",
	) as HTMLElement;
	if (!interim) {
	interim = document.createElement("p");
	interim.className = "transcript-entry interim";
	transcriptEl.appendChild(interim);
	}

	const timestamp = new Date().toLocaleTimeString();
	interim.innerHTML = `<div class="timestamp">${timestamp}</div>${escapeHtml(text)}`;
	}

	function escapeHtml(text: string): string {
	const div = document.createElement("div");
	div.textContent = text;
	return div.innerHTML;
	}

	function formatEntry(entry: HTMLElement): string {
	const timestamp = entry.querySelector(".timestamp")?.textContent?.trim();
	const clone = entry.cloneNode(true) as HTMLElement;
	clone.querySelector(".timestamp")?.remove();
	const text = clone.textContent?.trim() \|\| "";
	if (!text) return "";
	return timestamp ? `[${timestamp}] ${text}` : text;
	}

	function formatEntries(entries: HTMLElement[]): string {
	return entries.map(formatEntry).filter(Boolean).join("\n");
	}

	function getAllEntries(): HTMLElement[] {
	return Array.from(
	transcriptEl.querySelectorAll(".transcript-entry:not(.interim)"),
	) as HTMLElement[];
	}

	function getUnsentEntries(): HTMLElement[] {
	return getAllEntries().slice(lastSentIndex);
	}

	function getAllTranscriptText(): string {
	return formatEntries(getAllEntries());
	}

	function getUnsentText(): string {
	return formatEntries(getUnsentEntries());
	}

	function updateSendButton() {
	const unsentEntries = getUnsentEntries();
	sendBtn.disabled = unsentEntries.length === 0;
	}

	function updateModelContext() {
	const caps = app.getHostCapabilities();
	if (!caps?.updateModelContext) return;

	const text = getUnsentText();
	log.info("Updating model context:", text \|\| "(empty)");

	app
	.updateModelContext({
	content: text
	? [{ type: "text", text: `[Live transcript]: ${text}` }]
	: [],
	})
	.catch((e: unknown) => {
	log.warn("Failed to update model context:", e);
	});
	}

	// ============================================================================
	// Controls
	// ============================================================================

	async function startListening() {
	isListening = true;
	startBtn.innerHTML = `
	<svg class="btn-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
	<rect x="6" y="4" width="4" height="16"/>
	<rect x="14" y="4" width="4" height="16"/>
	</svg>
	Stop
	`;
	startBtn.classList.add("recording");
	levelBarEl.classList.add("active");
	startTimer();

	const micOk = await startAudioCapture();
	if (!micOk) {
	addTranscriptEntry("Microphone access denied", true);
	stopListening();
	return;
	}

	if (!startSpeechRecognition()) {
	addTranscriptEntry("Speech recognition not available", true);
	stopListening();
	}
	}

	function stopListening() {
	isListening = false;
	startBtn.innerHTML = `
	<svg class="btn-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
	<polygon points="5 3 19 12 5 21 5 3"/>
	</svg>
	Start
	`;
	startBtn.classList.remove("recording");
	levelBarEl.classList.remove("active");
	stopTimer();

	stopSpeechRecognition();
	stopAudioCapture();
	}

	startBtn.addEventListener("click", () => {
	if (isListening) {
	stopListening();
	} else {
	startListening();
	}
	});

	copyBtn.addEventListener("click", async () => {
	const text = getAllTranscriptText();
	if (!text) return;

	try {
	await navigator.clipboard.writeText(text);
	// Brief visual feedback
	copyBtn.classList.add("copied");
	setTimeout(() => copyBtn.classList.remove("copied"), 1000);
	log.info("Transcript copied to clipboard");
	} catch (e) {
	log.error("Failed to copy:", e);
	}
	});

	clearBtn.addEventListener("click", () => {
	transcriptEl.innerHTML =
	'<p class="transcript-placeholder">Your speech will appear here...</p>';
	lastSentIndex = 0;
	updateSendButton();
	updateModelContext();
	});

	sendBtn.addEventListener("click", async () => {
	const unsentEntries = getUnsentEntries();
	if (unsentEntries.length === 0) return;

	const transcriptText = getUnsentText();
	if (!transcriptText) return;

	log.info("Sending transcript:", transcriptText);

	try {
	const { isError } = await app.sendMessage({
	role: "user",
	content: [{ type: "text", text: transcriptText }],
	});

	if (isError) {
	log.warn("Message was rejected");
	} else {
	log.info("Message sent successfully");

	// Mark entries as sent
	unsentEntries.forEach((entry) => entry.classList.add("sent"));

	// Remove any existing divider
	transcriptEl.querySelector(".sent-divider")?.remove();

	// Add divider after the last sent entry
	const lastEntry = unsentEntries[unsentEntries.length - 1];
	const divider = document.createElement("div");
	divider.className = "sent-divider";
	divider.innerHTML = `<span>sent ${new Date().toLocaleTimeString()}</span>`;
	lastEntry.insertAdjacentElement("afterend", divider);

	// Update sent index
	const allEntries = transcriptEl.querySelectorAll(
	".transcript-entry:not(.interim)",
	);
	lastSentIndex = allEntries.length;

	updateSendButton();
	updateModelContext(); // Clear context since we just sent
	}
	} catch (e) {
	log.error("Failed to send message:", e);
	}
	});

	// ============================================================================
	// Initialize
	// ============================================================================

	app.connect().then(() => {
	log.info("Connected to host");
	const ctx = app.getHostContext();
	if (ctx) {
	app.onhostcontextchanged?.(ctx);
	}
	});