Spaces:
Running
Running
| import { startTransition, useEffect, useRef, useState } from "react"; | |
| import { Camera, Film } from "lucide-react"; | |
| import { BrandMark } from "./components/BrandMark"; | |
| import { CaptureScene, type CaptureSource } from "./components/CaptureScene"; | |
| import { FluidBackdrop } from "./components/FluidBackdrop"; | |
| import { HfIcon } from "./components/HfIcon"; | |
| import { VLMProvider } from "./context/VLMProvider"; | |
| import { useVLM } from "./context/VLMContext"; | |
| const PROMPT_PRESETS = [ | |
| { | |
| display: "Describe the scene", | |
| prompt: "Describe the scene in one sentence.", | |
| }, | |
| { | |
| display: "What color shirt am I wearing?", | |
| prompt: "What color shirt am I wearing?", | |
| }, | |
| { | |
| display: "What am I holding?", | |
| prompt: "What am I holding?", | |
| }, | |
| { | |
| display: "How old do I look?", | |
| prompt: "How old do I look?", | |
| }, | |
| ] as const; | |
| type Scene = "landing" | "loading" | "source" | "capture"; | |
| function disposeSource(source: CaptureSource | null) { | |
| if (!source) { | |
| return; | |
| } | |
| if (source.kind === "webcam") { | |
| source.stream.getTracks().forEach((track) => track.stop()); | |
| return; | |
| } | |
| URL.revokeObjectURL(source.url); | |
| } | |
| function getErrorMessage(error: unknown) { | |
| if (error instanceof Error) { | |
| return error.message; | |
| } | |
| return "Something went wrong."; | |
| } | |
| function AppContent() { | |
| const [scene, setScene] = useState<Scene>("landing"); | |
| const [source, setSource] = useState<CaptureSource | null>(null); | |
| const [prompt, setPrompt] = useState<string>(PROMPT_PRESETS[0].prompt); | |
| const [mediaError, setMediaError] = useState<string | null>(null); | |
| const fileInputRef = useRef<HTMLInputElement>(null); | |
| const sourceRef = useRef<CaptureSource | null>(null); | |
| const { error, loadModel, message, progress, status } = useVLM(); | |
| useEffect(() => { | |
| sourceRef.current = source; | |
| }, [source]); | |
| useEffect(() => { | |
| return () => { | |
| disposeSource(sourceRef.current); | |
| }; | |
| }, []); | |
| useEffect(() => { | |
| if (scene !== "loading" || status === "ready") { | |
| return; | |
| } | |
| let cancelled = false; | |
| void loadModel() | |
| .then(() => { | |
| if (cancelled) { | |
| return; | |
| } | |
| startTransition(() => { | |
| setScene("source"); | |
| }); | |
| }) | |
| .catch(() => undefined); | |
| return () => { | |
| cancelled = true; | |
| }; | |
| }, [loadModel, scene, status]); | |
| const beginExperience = () => { | |
| startTransition(() => { | |
| setScene("loading"); | |
| }); | |
| }; | |
| const replaceSource = (nextSource: CaptureSource) => { | |
| disposeSource(source); | |
| setMediaError(null); | |
| setSource(nextSource); | |
| startTransition(() => { | |
| setScene("capture"); | |
| }); | |
| }; | |
| const handleUseWebcam = async () => { | |
| try { | |
| if (!navigator.mediaDevices?.getUserMedia) { | |
| throw new Error("Camera access is not available in this browser."); | |
| } | |
| const stream = await navigator.mediaDevices.getUserMedia({ | |
| audio: false, | |
| video: { | |
| facingMode: "user", | |
| width: { ideal: 1280 }, | |
| height: { ideal: 720 }, | |
| }, | |
| }); | |
| replaceSource({ | |
| kind: "webcam", | |
| label: "Live camera", | |
| stream, | |
| }); | |
| } catch (cameraError) { | |
| setMediaError(getErrorMessage(cameraError)); | |
| } | |
| }; | |
| const openVideoPicker = () => { | |
| fileInputRef.current?.click(); | |
| }; | |
| const handleVideoSelection = (event: React.ChangeEvent<HTMLInputElement>) => { | |
| const file = event.target.files?.[0]; | |
| event.target.value = ""; | |
| if (!file) { | |
| return; | |
| } | |
| replaceSource({ | |
| kind: "file", | |
| label: file.name, | |
| url: URL.createObjectURL(file), | |
| }); | |
| }; | |
| const exitCapture = () => { | |
| disposeSource(source); | |
| setSource(null); | |
| setMediaError(null); | |
| startTransition(() => { | |
| setScene("source"); | |
| }); | |
| }; | |
| const showBackdrop = scene !== "capture"; | |
| return ( | |
| <> | |
| {showBackdrop ? <FluidBackdrop subdued={scene === "loading"} /> : null} | |
| <input | |
| ref={fileInputRef} | |
| accept="video/*" | |
| className="hidden-file-input" | |
| onChange={handleVideoSelection} | |
| type="file" | |
| /> | |
| {scene === "landing" ? ( | |
| <button | |
| className="landing-scene" | |
| onClick={beginExperience} | |
| type="button" | |
| > | |
| <div className="landing-inner"> | |
| <BrandMark /> | |
| <div className="hero-copy"> | |
| <h1>LFM2-VL WebGPU</h1> | |
| <p> | |
| Real-time video captioning in your browser, | |
| <br /> | |
| powered by | |
| <HfIcon className="hero-inline-icon" /> | |
| <span className="hero-inline-wordmark">Transformers.js</span> | |
| </p> | |
| </div> | |
| <div className="begin-prompt">Click anywhere to begin</div> | |
| </div> | |
| </button> | |
| ) : null} | |
| {scene === "loading" ? ( | |
| <main className="scene-shell scene-shell--centered"> | |
| <BrandMark /> | |
| <section className="loading-card"> | |
| <span className="eyebrow">Loading Model</span> | |
| <h2>{message}</h2> | |
| <div aria-hidden="true" className="progress-track"> | |
| <div | |
| className="progress-fill" | |
| style={{ | |
| width: `${Math.max(progress, status === "ready" ? 100 : 6)}%`, | |
| }} | |
| /> | |
| </div> | |
| <p>{Math.round(progress)}%</p> | |
| {error ? ( | |
| <> | |
| <div className="error-banner" role="alert"> | |
| {error} | |
| </div> | |
| <button | |
| className="primary-button" | |
| onClick={() => void loadModel()} | |
| type="button" | |
| > | |
| Retry loading | |
| </button> | |
| </> | |
| ) : null} | |
| </section> | |
| </main> | |
| ) : null} | |
| {scene === "source" ? ( | |
| <main className="scene-shell"> | |
| <div className="scene-header"> | |
| <BrandMark /> | |
| </div> | |
| <section className="source-card"> | |
| <span className="eyebrow">Choose Input</span> | |
| <h2>Caption a live camera or a local video file.</h2> | |
| <p> | |
| The model is ready. Pick a source and we'll start captioning | |
| each frame as quickly as the browser can process it. | |
| </p> | |
| <div className="source-grid"> | |
| <button | |
| className="source-option" | |
| onClick={() => void handleUseWebcam()} | |
| type="button" | |
| > | |
| <div className="source-option__header"> | |
| <Camera | |
| className="source-option__icon" | |
| size={28} | |
| strokeWidth={1.9} | |
| /> | |
| <strong>Webcam</strong> | |
| </div> | |
| <span> | |
| Start a live camera stream and caption it in real time. | |
| </span> | |
| </button> | |
| <button | |
| className="source-option" | |
| onClick={openVideoPicker} | |
| type="button" | |
| > | |
| <div className="source-option__header"> | |
| <Film | |
| className="source-option__icon" | |
| size={28} | |
| strokeWidth={1.9} | |
| /> | |
| <strong>File</strong> | |
| </div> | |
| <span> | |
| Upload a local clip and run the same caption loop against it. | |
| </span> | |
| </button> | |
| </div> | |
| {mediaError ? ( | |
| <div className="error-banner" role="alert"> | |
| {mediaError} | |
| </div> | |
| ) : null} | |
| </section> | |
| </main> | |
| ) : null} | |
| {scene === "capture" && source ? ( | |
| <CaptureScene | |
| mediaError={mediaError} | |
| onChooseVideo={openVideoPicker} | |
| onChooseWebcam={handleUseWebcam} | |
| onDismissMediaError={() => setMediaError(null)} | |
| onExit={exitCapture} | |
| onPromptChange={setPrompt} | |
| prompt={prompt} | |
| promptPresets={PROMPT_PRESETS} | |
| source={source} | |
| /> | |
| ) : null} | |
| </> | |
| ); | |
| } | |
| function App() { | |
| return ( | |
| <VLMProvider> | |
| <AppContent /> | |
| </VLMProvider> | |
| ); | |
| } | |
| export default App; | |