import { useState, useEffect, useRef, useCallback } from "react";
import { useModel } from "./hooks/useModel.js";
import InputBar from "./components/InputBar.jsx";
import MessageList from "./components/MessageList.jsx";
import LoadingBar from "./components/LoadingBar.jsx";
import OrbitalHero from "./components/OrbitalHero.jsx";
const SYSTEM_PROMPT = "You are a helpful assistant. When given images, describe and analyze them. When given audio, transcribe or describe it. Be concise and helpful.";
const EXAMPLE_IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/thumb/1/16/Artemis_II_patch.svg/500px-Artemis_II_patch.svg.png";
const EXAMPLE_AUDIO_URL = "/neil-armstrong.oga";
const EXAMPLE_VIDEO_URL = "http://images-assets.nasa.gov/video/One_Small_Step_Comparison_720p/One_Small_Step_Comparison_720p~small.mp4";
const STARTER_PROMPTS = [
{ label: "Describe this patch", text: "What do you see in this image? Describe it in detail.", icon: "๐ท", imageUrl: EXAMPLE_IMAGE_URL },
{ label: "Transcribe audio", text: "Transcribe this audio recording.", icon: "๐ค", audioUrl: EXAMPLE_AUDIO_URL },
{ label: "Analyze video*", text: "Describe what is happening in this video.", icon: "๐ฌ", videoUrl: EXAMPLE_VIDEO_URL },
{ label: "Explain a concept", text: "Explain quantum entanglement in simple terms.", icon: "๐ก" },
];
const HF_MODEL_URL = "https://huggingface.co/onnx-community/gemma-4-E2B-it-ONNX";
const GEMMA_GRADIENT = "bg-gradient-to-br from-[#3186FF] to-[#4FA0FF]";
function calcTokPerSec(text, startTime) {
const tokens = text.split(/\s+/).length;
const elapsed = (performance.now() - startTime) / 1000;
return elapsed > 0.5 ? Math.round(tokens / elapsed * 10) / 10 : null;
}
function StatusScreen({ children }) {
return (
Gemma 4 WebGPU
{children}
);
}
export default function App() {
const { status, loadProgress, error, checkWebGPU, loadModel, generate } = useModel();
const [messages, setMessages] = useState([]);
const [streamingText, setStreamingText] = useState("");
const [isStreaming, setIsStreaming] = useState(false);
const [processingStep, setProcessingStep] = useState(null); // null | "extracting frames" | "decoding audio" | "generating"
const [tokPerSec, setTokPerSec] = useState(null);
const [isCached, setIsCached] = useState(() => localStorage.getItem("gemma4-cached") === "true");
const [enableThinking, setEnableThinking] = useState(false);
const [theme, setTheme] = useState(() => localStorage.getItem("gemma4-theme") || "system");
useEffect(() => {
const root = document.documentElement;
root.classList.remove("dark", "light");
if (theme !== "system") root.classList.add(theme);
localStorage.setItem("gemma4-theme", theme);
}, [theme]);
const messagesEndRef = useRef(null);
const genStartRef = useRef(0);
const scrollRafRef = useRef(0);
useEffect(() => {
checkWebGPU();
}, [checkWebGPU]);
// Throttled scroll-to-bottom via rAF
useEffect(() => {
cancelAnimationFrame(scrollRafRef.current);
scrollRafRef.current = requestAnimationFrame(() => {
messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
});
}, [messages, streamingText]);
useEffect(() => {
if (status === "ready") {
setIsCached(true);
localStorage.setItem("gemma4-cached", "true");
}
}, [status]);
const handleSubmit = useCallback(async ({ imageUrl, audioUrl, videoUrl, text }) => {
const userContent = [];
if (imageUrl) userContent.push({ type: "image" });
// Each video frame needs its own image token in the template
if (videoUrl) for (let i = 0; i < 4; i++) userContent.push({ type: "image" });
if (audioUrl) userContent.push({ type: "audio" });
userContent.push({ type: "text", text: text || "Describe this." });
const userMsg = { role: "user", content: userContent, imageUrl, audioUrl, videoUrl };
const newMessages = [...messages, userMsg];
setMessages(newMessages);
const apiMessages = [
{ role: "system", content: SYSTEM_PROMPT },
...newMessages.map((m) => ({ role: m.role, content: m.content })),
];
setStreamingText("");
setIsStreaming(true);
setTokPerSec(null);
setProcessingStep(videoUrl ? "extracting frames" : audioUrl ? "decoding audio" : "generating");
genStartRef.current = performance.now();
generate({
messages: apiMessages,
imageUrl,
videoUrl,
audioUrl,
enableThinking,
onUpdate: (text) => {
setProcessingStep(null);
const tps = calcTokPerSec(text, genStartRef.current);
if (tps !== null) setTokPerSec(tps);
setStreamingText(text);
},
onComplete: (text, err) => {
setProcessingStep(null);
if (!err && text) {
setTokPerSec(calcTokPerSec(text, genStartRef.current));
setMessages((prev) => [...prev, { role: "assistant", content: text }]);
}
setStreamingText("");
setIsStreaming(false);
},
});
}, [messages, generate, enableThinking]);
if (status === "webgpu-unavailable") {
return (
WebGPU is required. Use Chrome 113+ or Edge 113+.
);
}
if (error) {
return (
Error: {error}
);
}
const isLoading = status === "idle" || status === "webgpu-available" || status === "loading";
return (
G
Gemma 4
{messages.length > 0 && !isStreaming && (
)}
{!isLoading && (
)}
{tokPerSec != null && (
{tokPerSec} tok/s
)}
{isCached && !isLoading && (
Cached
)}
In-Browser ยท WebGPU
{isLoading ? (
Multimodal AI running entirely in your browser via WebGPU
{status === "loading" ? (
) : (
)}
) : (
<>
{messages.length === 0 && !isStreaming ? (
G
How can I help?
Send text, images, audio, or video โ all processed locally.
{STARTER_PROMPTS.map((p) => (
))}
) : (
)}
{isCached &&
Cached ยท }
Powered by
Transformers.js
*Video analyzes 4 sampled frames โ a tradeoff between memory and processing speed
>
)}
);
}