import { startTransition, useEffect, useRef, useState } from "react"; import { Camera, Film } from "lucide-react"; import { BrandMark } from "./components/BrandMark"; import { CaptureScene, type CaptureSource } from "./components/CaptureScene"; import { FluidBackdrop } from "./components/FluidBackdrop"; import { HfIcon } from "./components/HfIcon"; import { VLMProvider } from "./context/VLMProvider"; import { useVLM } from "./context/VLMContext"; const PROMPT_PRESETS = [ { display: "Describe the scene", prompt: "Describe the scene in one sentence.", }, { display: "What color shirt am I wearing?", prompt: "What color shirt am I wearing?", }, { display: "What am I holding?", prompt: "What am I holding?", }, { display: "How old do I look?", prompt: "How old do I look?", }, ] as const; type Scene = "landing" | "loading" | "source" | "capture"; function disposeSource(source: CaptureSource | null) { if (!source) { return; } if (source.kind === "webcam") { source.stream.getTracks().forEach((track) => track.stop()); return; } URL.revokeObjectURL(source.url); } function getErrorMessage(error: unknown) { if (error instanceof Error) { return error.message; } return "Something went wrong."; } function AppContent() { const [scene, setScene] = useState("landing"); const [source, setSource] = useState(null); const [prompt, setPrompt] = useState(PROMPT_PRESETS[0].prompt); const [mediaError, setMediaError] = useState(null); const fileInputRef = useRef(null); const sourceRef = useRef(null); const { error, loadModel, message, progress, status } = useVLM(); useEffect(() => { sourceRef.current = source; }, [source]); useEffect(() => { return () => { disposeSource(sourceRef.current); }; }, []); useEffect(() => { if (scene !== "loading" || status === "ready") { return; } let cancelled = false; void loadModel() .then(() => { if (cancelled) { return; } startTransition(() => { setScene("source"); }); }) .catch(() => undefined); return () => { cancelled = true; }; }, [loadModel, scene, status]); const beginExperience = () => { startTransition(() => { setScene("loading"); }); }; const replaceSource = (nextSource: CaptureSource) => { disposeSource(source); setMediaError(null); setSource(nextSource); startTransition(() => { setScene("capture"); }); }; const handleUseWebcam = async () => { try { if (!navigator.mediaDevices?.getUserMedia) { throw new Error("Camera access is not available in this browser."); } const stream = await navigator.mediaDevices.getUserMedia({ audio: false, video: { facingMode: "user", width: { ideal: 1280 }, height: { ideal: 720 }, }, }); replaceSource({ kind: "webcam", label: "Live camera", stream, }); } catch (cameraError) { setMediaError(getErrorMessage(cameraError)); } }; const openVideoPicker = () => { fileInputRef.current?.click(); }; const handleVideoSelection = (event: React.ChangeEvent) => { const file = event.target.files?.[0]; event.target.value = ""; if (!file) { return; } replaceSource({ kind: "file", label: file.name, url: URL.createObjectURL(file), }); }; const exitCapture = () => { disposeSource(source); setSource(null); setMediaError(null); startTransition(() => { setScene("source"); }); }; const showBackdrop = scene !== "capture"; return ( <> {showBackdrop ? : null} {scene === "landing" ? ( ) : null} {scene === "loading" ? (
Loading Model

{message}

) : null} {scene === "source" ? (
Choose Input

Caption a live camera or a local video file.

The model is ready. Pick a source and we'll start captioning each frame as quickly as the browser can process it.

{mediaError ? (
{mediaError}
) : null}
) : null} {scene === "capture" && source ? ( setMediaError(null)} onExit={exitCapture} onPromptChange={setPrompt} prompt={prompt} promptPresets={PROMPT_PRESETS} source={source} /> ) : null} ); } function App() { return ( ); } export default App;