import { useState, useEffect, useRef, useCallback } from "react"; import { useModel } from "./hooks/useModel.js"; import InputBar from "./components/InputBar.jsx"; import MessageList from "./components/MessageList.jsx"; import LoadingBar from "./components/LoadingBar.jsx"; import OrbitalHero from "./components/OrbitalHero.jsx"; const SYSTEM_PROMPT = "You are a helpful assistant. When given images, describe and analyze them. When given audio, transcribe or describe it. Be concise and helpful."; const EXAMPLE_IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/16/Artemis_II_patch.svg/500px-Artemis_II_patch.svg.png"; const EXAMPLE_AUDIO_URL = "/neil-armstrong.oga"; const EXAMPLE_VIDEO_URL = "http://images-assets.nasa.gov/video/One_Small_Step_Comparison_720p/One_Small_Step_Comparison_720p~small.mp4"; const STARTER_PROMPTS = [ { label: "Describe this patch", text: "What do you see in this image? Describe it in detail.", icon: "📷", imageUrl: EXAMPLE_IMAGE_URL }, { label: "Transcribe audio", text: "Transcribe this audio recording.", icon: "🎤", audioUrl: EXAMPLE_AUDIO_URL }, { label: "Analyze video*", text: "Describe what is happening in this video.", icon: "🎬", videoUrl: EXAMPLE_VIDEO_URL }, { label: "Explain a concept", text: "Explain quantum entanglement in simple terms.", icon: "💡" }, ]; const HF_MODEL_URL = "https://huggingface.co/onnx-community/gemma-4-E2B-it-ONNX"; const GEMMA_GRADIENT = "bg-gradient-to-br from-[#3186FF] to-[#4FA0FF]"; function calcTokPerSec(text, startTime) { const tokens = text.split(/\s+/).length; const elapsed = (performance.now() - startTime) / 1000; return elapsed > 0.5 ? Math.round(tokens / elapsed * 10) / 10 : null; } function StatusScreen({ children }) { return (

Gemma 4 WebGPU

{children}

); } export default function App() { const { status, loadProgress, error, checkWebGPU, loadModel, generate } = useModel(); const [messages, setMessages] = useState([]); const [streamingText, setStreamingText] = useState(""); const [isStreaming, setIsStreaming] = useState(false); const [processingStep, setProcessingStep] = useState(null); // null | "extracting frames" | "decoding audio" | "generating" const [tokPerSec, setTokPerSec] = useState(null); const [isCached, setIsCached] = useState(() => localStorage.getItem("gemma4-cached") === "true"); const [enableThinking, setEnableThinking] = useState(false); const [theme, setTheme] = useState(() => localStorage.getItem("gemma4-theme") || "system"); useEffect(() => { const root = document.documentElement; root.classList.remove("dark", "light"); if (theme !== "system") root.classList.add(theme); localStorage.setItem("gemma4-theme", theme); }, [theme]); const messagesEndRef = useRef(null); const genStartRef = useRef(0); const scrollRafRef = useRef(0); useEffect(() => { checkWebGPU(); }, [checkWebGPU]); // Throttled scroll-to-bottom via rAF useEffect(() => { cancelAnimationFrame(scrollRafRef.current); scrollRafRef.current = requestAnimationFrame(() => { messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); }); }, [messages, streamingText]); useEffect(() => { if (status === "ready") { setIsCached(true); localStorage.setItem("gemma4-cached", "true"); } }, [status]); const handleSubmit = useCallback(async ({ imageUrl, audioUrl, videoUrl, text }) => { const userContent = []; if (imageUrl) userContent.push({ type: "image" }); // Each video frame needs its own image token in the template if (videoUrl) for (let i = 0; i < 4; i++) userContent.push({ type: "image" }); if (audioUrl) userContent.push({ type: "audio" }); userContent.push({ type: "text", text: text || "Describe this." }); const userMsg = { role: "user", content: userContent, imageUrl, audioUrl, videoUrl }; const newMessages = [...messages, userMsg]; setMessages(newMessages); const apiMessages = [ { role: "system", content: SYSTEM_PROMPT }, ...newMessages.map((m) => ({ role: m.role, content: m.content })), ]; setStreamingText(""); setIsStreaming(true); setTokPerSec(null); setProcessingStep(videoUrl ? "extracting frames" : audioUrl ? "decoding audio" : "generating"); genStartRef.current = performance.now(); generate({ messages: apiMessages, imageUrl, videoUrl, audioUrl, enableThinking, onUpdate: (text) => { setProcessingStep(null); const tps = calcTokPerSec(text, genStartRef.current); if (tps !== null) setTokPerSec(tps); setStreamingText(text); }, onComplete: (text, err) => { setProcessingStep(null); if (!err && text) { setTokPerSec(calcTokPerSec(text, genStartRef.current)); setMessages((prev) => [...prev, { role: "assistant", content: text }]); } setStreamingText(""); setIsStreaming(false); }, }); }, [messages, generate, enableThinking]); if (status === "webgpu-unavailable") { return (

WebGPU is required. Use Chrome 113+ or Edge 113+.

); } if (error) { return (

Error: {error}

); } const isLoading = status === "idle" || status === "webgpu-available" || status === "loading"; return (

Gemma 4 {messages.length > 0 && !isStreaming && ( )}

{!isLoading && ( )} {tokPerSec != null && ( {tokPerSec} tok/s )} {isCached && !isLoading && ( Cached )} In-Browser · WebGPU

{isLoading ? (

Gemma 4 E2B

Multimodal AI running entirely in your browser via WebGPU

{status === "loading" ? ( ) : ( )}

) : ( <>

{messages.length === 0 && !isStreaming ? (

How can I help?

Send text, images, audio, or video — all processed locally.

{STARTER_PROMPTS.map((p) => ( ))}

) : ( )}

{isCached && Cached · } Powered by Transformers.js *Video analyzes 4 sampled frames — a tradeoff between memory and processing speed

)}

); }