LFM2-VL-WebGPU / src /App.tsx
mlabonne's picture
upload demo files (#1)
01488bc
import { startTransition, useEffect, useRef, useState } from "react";
import { Camera, Film } from "lucide-react";
import { BrandMark } from "./components/BrandMark";
import { CaptureScene, type CaptureSource } from "./components/CaptureScene";
import { FluidBackdrop } from "./components/FluidBackdrop";
import { HfIcon } from "./components/HfIcon";
import { VLMProvider } from "./context/VLMProvider";
import { useVLM } from "./context/VLMContext";
const PROMPT_PRESETS = [
{
display: "Describe the scene",
prompt: "Describe the scene in one sentence.",
},
{
display: "What color shirt am I wearing?",
prompt: "What color shirt am I wearing?",
},
{
display: "What am I holding?",
prompt: "What am I holding?",
},
{
display: "How old do I look?",
prompt: "How old do I look?",
},
] as const;
type Scene = "landing" | "loading" | "source" | "capture";
function disposeSource(source: CaptureSource | null) {
if (!source) {
return;
}
if (source.kind === "webcam") {
source.stream.getTracks().forEach((track) => track.stop());
return;
}
URL.revokeObjectURL(source.url);
}
function getErrorMessage(error: unknown) {
if (error instanceof Error) {
return error.message;
}
return "Something went wrong.";
}
function AppContent() {
const [scene, setScene] = useState<Scene>("landing");
const [source, setSource] = useState<CaptureSource | null>(null);
const [prompt, setPrompt] = useState<string>(PROMPT_PRESETS[0].prompt);
const [mediaError, setMediaError] = useState<string | null>(null);
const fileInputRef = useRef<HTMLInputElement>(null);
const sourceRef = useRef<CaptureSource | null>(null);
const { error, loadModel, message, progress, status } = useVLM();
useEffect(() => {
sourceRef.current = source;
}, [source]);
useEffect(() => {
return () => {
disposeSource(sourceRef.current);
};
}, []);
useEffect(() => {
if (scene !== "loading" || status === "ready") {
return;
}
let cancelled = false;
void loadModel()
.then(() => {
if (cancelled) {
return;
}
startTransition(() => {
setScene("source");
});
})
.catch(() => undefined);
return () => {
cancelled = true;
};
}, [loadModel, scene, status]);
const beginExperience = () => {
startTransition(() => {
setScene("loading");
});
};
const replaceSource = (nextSource: CaptureSource) => {
disposeSource(source);
setMediaError(null);
setSource(nextSource);
startTransition(() => {
setScene("capture");
});
};
const handleUseWebcam = async () => {
try {
if (!navigator.mediaDevices?.getUserMedia) {
throw new Error("Camera access is not available in this browser.");
}
const stream = await navigator.mediaDevices.getUserMedia({
audio: false,
video: {
facingMode: "user",
width: { ideal: 1280 },
height: { ideal: 720 },
},
});
replaceSource({
kind: "webcam",
label: "Live camera",
stream,
});
} catch (cameraError) {
setMediaError(getErrorMessage(cameraError));
}
};
const openVideoPicker = () => {
fileInputRef.current?.click();
};
const handleVideoSelection = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
event.target.value = "";
if (!file) {
return;
}
replaceSource({
kind: "file",
label: file.name,
url: URL.createObjectURL(file),
});
};
const exitCapture = () => {
disposeSource(source);
setSource(null);
setMediaError(null);
startTransition(() => {
setScene("source");
});
};
const showBackdrop = scene !== "capture";
return (
<>
{showBackdrop ? <FluidBackdrop subdued={scene === "loading"} /> : null}
<input
ref={fileInputRef}
accept="video/*"
className="hidden-file-input"
onChange={handleVideoSelection}
type="file"
/>
{scene === "landing" ? (
<button
className="landing-scene"
onClick={beginExperience}
type="button"
>
<div className="landing-inner">
<BrandMark />
<div className="hero-copy">
<h1>LFM2-VL WebGPU</h1>
<p>
Real-time video captioning in your browser,
<br />
powered by
<HfIcon className="hero-inline-icon" />
<span className="hero-inline-wordmark">Transformers.js</span>
</p>
</div>
<div className="begin-prompt">Click anywhere to begin</div>
</div>
</button>
) : null}
{scene === "loading" ? (
<main className="scene-shell scene-shell--centered">
<BrandMark />
<section className="loading-card">
<span className="eyebrow">Loading Model</span>
<h2>{message}</h2>
<div aria-hidden="true" className="progress-track">
<div
className="progress-fill"
style={{
width: `${Math.max(progress, status === "ready" ? 100 : 6)}%`,
}}
/>
</div>
<p>{Math.round(progress)}%</p>
{error ? (
<>
<div className="error-banner" role="alert">
{error}
</div>
<button
className="primary-button"
onClick={() => void loadModel()}
type="button"
>
Retry loading
</button>
</>
) : null}
</section>
</main>
) : null}
{scene === "source" ? (
<main className="scene-shell">
<div className="scene-header">
<BrandMark />
</div>
<section className="source-card">
<span className="eyebrow">Choose Input</span>
<h2>Caption a live camera or a local video file.</h2>
<p>
The model is ready. Pick a source and we&apos;ll start captioning
each frame as quickly as the browser can process it.
</p>
<div className="source-grid">
<button
className="source-option"
onClick={() => void handleUseWebcam()}
type="button"
>
<div className="source-option__header">
<Camera
className="source-option__icon"
size={28}
strokeWidth={1.9}
/>
<strong>Webcam</strong>
</div>
<span>
Start a live camera stream and caption it in real time.
</span>
</button>
<button
className="source-option"
onClick={openVideoPicker}
type="button"
>
<div className="source-option__header">
<Film
className="source-option__icon"
size={28}
strokeWidth={1.9}
/>
<strong>File</strong>
</div>
<span>
Upload a local clip and run the same caption loop against it.
</span>
</button>
</div>
{mediaError ? (
<div className="error-banner" role="alert">
{mediaError}
</div>
) : null}
</section>
</main>
) : null}
{scene === "capture" && source ? (
<CaptureScene
mediaError={mediaError}
onChooseVideo={openVideoPicker}
onChooseWebcam={handleUseWebcam}
onDismissMediaError={() => setMediaError(null)}
onExit={exitCapture}
onPromptChange={setPrompt}
prompt={prompt}
promptPresets={PROMPT_PRESETS}
source={source}
/>
) : null}
</>
);
}
function App() {
return (
<VLMProvider>
<AppContent />
</VLMProvider>
);
}
export default App;