import { useState, useEffect, useRef } from "react"; import { Zap, AlignLeft, Quote, Type, FileText, Check, X, Dices } from "lucide-react"; import { useTTS } from "./components/TTSContext"; import { TTSProvider } from "./components/TTSProvider"; import { streamTTS, createAudioBlob } from "./tts"; import { SAMPLE_RATE, EXAMPLE_SENTENCES } from "./constants"; import { AudioResult } from "./components/AudioResult"; import { Controls } from "./components/Controls"; const AppContent = () => { const [text, setText] = useState( "Introducing Supertonic WebGPU: blazingly fast text-to-speech running 100% locally in your browser.", ); const [activeTab, setActiveTab] = useState("Freeform"); const [isGenerating, setIsGenerating] = useState(false); const [showResults, setShowResults] = useState(false); const [quality, setQuality] = useState(5); const [speed, setSpeed] = useState(1.0); const [voice, setVoice] = useState("Female"); const { pipelineReady, tts, speakerEmbeddings, downloadProgress } = useTTS(); const [stats, setStats] = useState({ firstLatency: null as number | null, processingTime: 0, charsPerSec: 0, rtf: 0, totalDuration: 0, currentDuration: 0, }); const [generationProgress, setGenerationProgress] = useState(0); const [isPlaying, setIsPlaying] = useState(false); const audioContextRef = useRef(null); const nextPlayTimeRef = useRef(0); const fullAudioBufferRef = useRef([]); const playbackStartTimeRef = useRef(0); const playbackAnimationFrameRef = useRef(0); const activeSourceNodesRef = useRef([]); const isPlaybackInterruptedRef = useRef(false); const stopGenerationRef = useRef(false); const [exampleTexts, setExampleTexts] = useState>(EXAMPLE_SENTENCES); useEffect(() => { fetch("/the-great-gatsby.txt") .then((res) => res.text()) .then((text) => { setExampleTexts((prev) => ({ ...prev, "Full story": text })); }) .catch((e) => console.error("Failed to load story", e)); }, []); useEffect(() => { return () => { if (audioContextRef.current) { audioContextRef.current.close(); } cancelAnimationFrame(playbackAnimationFrameRef.current); }; }, []); useEffect(() => { const updatePlaybackUI = () => { if (isPlaying && audioContextRef.current) { const ctx = audioContextRef.current; const elapsed = ctx.currentTime - playbackStartTimeRef.current; // If reached end of current known duration if (elapsed >= stats.totalDuration && !isGenerating && stats.totalDuration > 0) { setIsPlaying(false); setStats((prev) => ({ ...prev, currentDuration: prev.totalDuration, })); // Snap to end return; } setStats((prev) => ({ ...prev, currentDuration: Math.min(elapsed, prev.totalDuration), })); playbackAnimationFrameRef.current = requestAnimationFrame(updatePlaybackUI); } }; if (isPlaying) { playbackAnimationFrameRef.current = requestAnimationFrame(updatePlaybackUI); } else { cancelAnimationFrame(playbackAnimationFrameRef.current); } }, [isPlaying, isGenerating, stats.totalDuration]); const handleExampleClick = (type: string) => { setActiveTab(type); let selection = exampleTexts[type]; if (Array.isArray(selection)) { setText(selection[Math.floor(Math.random() * selection.length)]); return; } setText(selection); }; const stopAllAudio = () => { activeSourceNodesRef.current.forEach((node) => { try { node.stop(); } catch (e) {} }); activeSourceNodesRef.current = []; }; const handleStop = () => { stopGenerationRef.current = true; }; const handleGenerate = async () => { if (isGenerating) return; stopAllAudio(); setShowResults(true); setIsGenerating(true); setGenerationProgress(0); stopGenerationRef.current = false; setStats({ firstLatency: null, processingTime: 0, charsPerSec: 0, rtf: 0, totalDuration: 0, currentDuration: 0, }); fullAudioBufferRef.current = []; isPlaybackInterruptedRef.current = false; if (!audioContextRef.current) { audioContextRef.current = new (window.AudioContext || (window as any).webkitAudioContext)(); } const ctx = audioContextRef.current; if (ctx.state === "suspended") { await ctx.resume(); } nextPlayTimeRef.current = ctx.currentTime + 0.1; playbackStartTimeRef.current = nextPlayTimeRef.current; setIsPlaying(true); const startTime = performance.now(); let processedChars = 0; let generatedAudioSeconds = 0; try { if (!tts.current || !speakerEmbeddings.current) throw new Error("TTS pipeline not ready"); const selectedEmbedding = speakerEmbeddings.current[voice]; for await (const result of streamTTS(text, tts.current, selectedEmbedding, quality, speed)) { if (stopGenerationRef.current) { break; } const now = performance.now(); const elapsedSec = (now - startTime) / 1000; setStats((prev) => ({ ...prev, firstLatency: prev.firstLatency === null ? elapsedSec : prev.firstLatency, processingTime: elapsedSec, })); const chunkDuration = result.audio.audio.length / result.audio.sampling_rate; generatedAudioSeconds += chunkDuration; fullAudioBufferRef.current.push(result.audio.audio); // Only schedule streaming playback if user hasn't interrupted if (!isPlaybackInterruptedRef.current) { const buffer = ctx.createBuffer(1, result.audio.audio.length, result.audio.sampling_rate); buffer.copyToChannel(result.audio.audio as any, 0); const source = ctx.createBufferSource(); source.buffer = buffer; source.connect(ctx.destination); source.start(nextPlayTimeRef.current); activeSourceNodesRef.current.push(source); source.onended = () => { const idx = activeSourceNodesRef.current.indexOf(source); if (idx > -1) activeSourceNodesRef.current.splice(idx, 1); }; nextPlayTimeRef.current += buffer.duration; } processedChars += result.text.length; const currentRtf = elapsedSec / generatedAudioSeconds; const currentCharsPerSec = processedChars / elapsedSec; setStats((prev) => ({ ...prev, charsPerSec: currentCharsPerSec, rtf: currentRtf, totalDuration: generatedAudioSeconds, })); setGenerationProgress((result.index / result.total) * 100); } } catch (e) { console.error("Generation failed", e); } finally { setIsGenerating(false); isPlaybackInterruptedRef.current = false; // Reset after completion } }; const handleSeek = (percentage: number) => { if (!audioContextRef.current || fullAudioBufferRef.current.length === 0) return; const ctx = audioContextRef.current; isPlaybackInterruptedRef.current = true; stopAllAudio(); const seekTime = stats.totalDuration * percentage; let currentTimeInAudio = 0; let nextPlayTime = ctx.currentTime; // Reset startTime such that (currentTime - startTime) = seekTime playbackStartTimeRef.current = ctx.currentTime - seekTime; for (const chunk of fullAudioBufferRef.current) { const chunkDuration = chunk.length / SAMPLE_RATE; const chunkEndTime = currentTimeInAudio + chunkDuration; if (chunkEndTime > seekTime) { // This chunk needs to be played const offsetInChunk = Math.max(0, seekTime - currentTimeInAudio); const durationToPlay = chunkDuration - offsetInChunk; const buffer = ctx.createBuffer(1, chunk.length, SAMPLE_RATE); buffer.copyToChannel(chunk as any, 0); const source = ctx.createBufferSource(); source.buffer = buffer; source.connect(ctx.destination); source.start(nextPlayTime, offsetInChunk); activeSourceNodesRef.current.push(source); source.onended = () => { const idx = activeSourceNodesRef.current.indexOf(source); if (idx > -1) activeSourceNodesRef.current.splice(idx, 1); }; nextPlayTime += durationToPlay; } currentTimeInAudio += chunkDuration; } if (ctx.state === "suspended") ctx.resume(); setIsPlaying(true); }; const handleDownload = () => { if (fullAudioBufferRef.current.length === 0) return; const blob = createAudioBlob(fullAudioBufferRef.current, SAMPLE_RATE); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = "audio.wav"; a.click(); URL.revokeObjectURL(url); }; const togglePlay = async () => { if (!audioContextRef.current) return; if (isPlaying) { setIsPlaying(false); audioContextRef.current.suspend(); } else { setIsPlaying(true); audioContextRef.current.resume(); // If we finished playing and hit play again, replay from start if (!isGenerating && stats.currentDuration >= stats.totalDuration) { handleSeek(0); } else if (!isGenerating && fullAudioBufferRef.current.length > 0 && activeSourceNodesRef.current.length === 0) { // This handles the case where we paused/stopped but haven't technically reached "end" OR we are resuming replay const currentProgress = stats.totalDuration > 0 ? stats.currentDuration / stats.totalDuration : 0; handleSeek(currentProgress); } } }; const canGenerate = text.length >= 10 && pipelineReady; return (

Supertonic WebGPU

Generate speech directly in your browser

Text
Speech