Spaces:
Running
Running
| import { | |
| startTransition, | |
| useDeferredValue, | |
| useEffect, | |
| useEffectEvent, | |
| useRef, | |
| useState, | |
| } from "react"; | |
| import { ArrowLeft, Camera, Film, Pause, Play } from "lucide-react"; | |
| import { BrandMark } from "./BrandMark"; | |
| import { useVLM } from "../context/VLMContext"; | |
| export type CaptureSource = | |
| | { | |
| kind: "webcam"; | |
| label: string; | |
| stream: MediaStream; | |
| } | |
| | { | |
| kind: "file"; | |
| label: string; | |
| url: string; | |
| }; | |
| type CaptionEntry = { | |
| id: string; | |
| text: string; | |
| }; | |
| type CaptureSceneProps = { | |
| mediaError: string | null; | |
| onChooseVideo: () => void; | |
| onChooseWebcam: () => Promise<void>; | |
| onDismissMediaError: () => void; | |
| onExit: () => void; | |
| onPromptChange: (prompt: string) => void; | |
| prompt: string; | |
| promptPresets: readonly { | |
| display: string; | |
| prompt: string; | |
| }[]; | |
| source: CaptureSource; | |
| }; | |
| const CAPTION_LIMIT = 4; | |
| function wait(milliseconds: number) { | |
| return new Promise<void>((resolve) => { | |
| window.setTimeout(resolve, milliseconds); | |
| }); | |
| } | |
| function createCaptionId() { | |
| return ( | |
| globalThis.crypto?.randomUUID?.() ?? | |
| `caption-${Date.now()}-${Math.random()}` | |
| ); | |
| } | |
| function normalizePrompt(text: string) { | |
| return text.replace(/\s+/g, " ").trim(); | |
| } | |
| function getErrorMessage(error: unknown) { | |
| if (error instanceof Error) { | |
| return error.message; | |
| } | |
| return "Something went wrong while captioning the current frame."; | |
| } | |
| export function CaptureScene({ | |
| mediaError, | |
| onChooseVideo, | |
| onChooseWebcam, | |
| onDismissMediaError, | |
| onExit, | |
| onPromptChange, | |
| prompt, | |
| promptPresets, | |
| source, | |
| }: CaptureSceneProps) { | |
| const { generateCaption } = useVLM(); | |
| const videoRef = useRef<HTMLVideoElement>(null); | |
| const canvasRef = useRef<HTMLCanvasElement>(null); | |
| const loopIdRef = useRef(0); | |
| const [activeCaption, setActiveCaption] = useState(""); | |
| const [captionHistory, setCaptionHistory] = useState<CaptionEntry[]>([]); | |
| const [isGenerating, setIsGenerating] = useState(false); | |
| const [isPaused, setIsPaused] = useState(false); | |
| const [runtimeError, setRuntimeError] = useState<string | null>(null); | |
| const [videoReady, setVideoReady] = useState(false); | |
| const deferredPrompt = useDeferredValue( | |
| normalizePrompt(prompt) || promptPresets[0].prompt, | |
| ); | |
| useEffect(() => { | |
| const video = videoRef.current; | |
| if (!video) { | |
| return; | |
| } | |
| setVideoReady(false); | |
| setRuntimeError(null); | |
| if (source.kind === "webcam") { | |
| video.srcObject = source.stream; | |
| video.removeAttribute("src"); | |
| void video.play().catch(() => undefined); | |
| return () => { | |
| video.pause(); | |
| video.srcObject = null; | |
| }; | |
| } | |
| video.srcObject = null; | |
| video.src = source.url; | |
| video.load(); | |
| void video.play().catch(() => undefined); | |
| return () => { | |
| video.pause(); | |
| video.removeAttribute("src"); | |
| video.load(); | |
| }; | |
| }, [source]); | |
| useEffect(() => { | |
| setCaptionHistory([]); | |
| setActiveCaption(""); | |
| setIsGenerating(false); | |
| setIsPaused(false); | |
| }, [source]); | |
| useEffect(() => { | |
| if (!isPaused) { | |
| return; | |
| } | |
| setActiveCaption(""); | |
| setIsGenerating(false); | |
| }, [isPaused]); | |
| const handleCanPlay = () => { | |
| setVideoReady(true); | |
| void videoRef.current?.play().catch(() => undefined); | |
| }; | |
| const captureFrame = useEffectEvent(() => { | |
| const video = videoRef.current; | |
| const canvas = canvasRef.current; | |
| if ( | |
| !video || | |
| !canvas || | |
| !videoReady || | |
| video.paused || | |
| video.ended || | |
| video.readyState < HTMLMediaElement.HAVE_CURRENT_DATA || | |
| video.videoWidth === 0 || | |
| video.videoHeight === 0 | |
| ) { | |
| return null; | |
| } | |
| const maxDimension = 960; | |
| const scale = Math.min( | |
| 1, | |
| maxDimension / Math.max(video.videoWidth, video.videoHeight), | |
| ); | |
| const width = Math.max(1, Math.round(video.videoWidth * scale)); | |
| const height = Math.max(1, Math.round(video.videoHeight * scale)); | |
| if (canvas.width !== width) { | |
| canvas.width = width; | |
| } | |
| if (canvas.height !== height) { | |
| canvas.height = height; | |
| } | |
| const context = canvas.getContext("2d", { willReadFrequently: true }); | |
| if (!context) { | |
| return null; | |
| } | |
| context.drawImage(video, 0, 0, width, height); | |
| return context.getImageData(0, 0, width, height); | |
| }); | |
| const runCaptionPass = useEffectEvent(async (loopId: number) => { | |
| if (isPaused) { | |
| await wait(120); | |
| return; | |
| } | |
| const frame = captureFrame(); | |
| if (!frame) { | |
| await wait(120); | |
| return; | |
| } | |
| setRuntimeError(null); | |
| setIsGenerating(true); | |
| setActiveCaption(""); | |
| try { | |
| const finalCaption = await generateCaption({ | |
| frame, | |
| onStream: (text) => { | |
| if (loopIdRef.current !== loopId) { | |
| return; | |
| } | |
| setActiveCaption(text); | |
| }, | |
| prompt: deferredPrompt, | |
| }); | |
| if (loopIdRef.current !== loopId) { | |
| return; | |
| } | |
| const normalizedCaption = normalizePrompt(finalCaption); | |
| if (normalizedCaption.length === 0) { | |
| return; | |
| } | |
| startTransition(() => { | |
| setCaptionHistory((current) => { | |
| if (current[0]?.text === normalizedCaption) { | |
| return current; | |
| } | |
| return [ | |
| { id: createCaptionId(), text: normalizedCaption }, | |
| ...current, | |
| ].slice(0, CAPTION_LIMIT); | |
| }); | |
| }); | |
| } catch (error) { | |
| if (loopIdRef.current !== loopId) { | |
| return; | |
| } | |
| setRuntimeError(getErrorMessage(error)); | |
| await wait(240); | |
| } finally { | |
| if (loopIdRef.current === loopId) { | |
| setActiveCaption(""); | |
| setIsGenerating(false); | |
| } | |
| } | |
| }); | |
| useEffect(() => { | |
| loopIdRef.current += 1; | |
| const currentLoopId = loopIdRef.current; | |
| let mounted = true; | |
| const loop = async () => { | |
| while (mounted && loopIdRef.current === currentLoopId) { | |
| await runCaptionPass(currentLoopId); | |
| await wait(72); | |
| } | |
| }; | |
| void loop(); | |
| return () => { | |
| mounted = false; | |
| loopIdRef.current += 1; | |
| }; | |
| }, [source]); | |
| const displayedHistory = [...captionHistory].reverse(); | |
| return ( | |
| <main className="capture-scene"> | |
| <video | |
| ref={videoRef} | |
| autoPlay | |
| className="capture-video" | |
| loop={source.kind === "file"} | |
| muted | |
| onCanPlay={handleCanPlay} | |
| playsInline | |
| /> | |
| <canvas ref={canvasRef} className="capture-canvas" /> | |
| <div className="capture-scrim" /> | |
| <header className="capture-toolbar"> | |
| <div className="capture-toolbar__left"> | |
| <BrandMark /> | |
| <div className="status-pill"> | |
| <span className={`status-dot ${videoReady ? "is-live" : ""}`} /> | |
| {source.kind === "webcam" ? "Webcam" : source.label} | |
| </div> | |
| </div> | |
| </header> | |
| {mediaError ? ( | |
| <div className="floating-alert" role="alert"> | |
| <span>{mediaError}</span> | |
| <button | |
| className="ghost-button ghost-button--small" | |
| onClick={onDismissMediaError} | |
| type="button" | |
| > | |
| Dismiss | |
| </button> | |
| </div> | |
| ) : null} | |
| {runtimeError ? ( | |
| <div className="floating-alert floating-alert--secondary" role="alert"> | |
| <span>{runtimeError}</span> | |
| </div> | |
| ) : null} | |
| <section className="prompt-dock"> | |
| <span className="dock-label">Prompt</span> | |
| <div className="prompt-chip-row"> | |
| {promptPresets.map((preset) => ( | |
| <button | |
| key={preset.display} | |
| className={`prompt-chip ${prompt === preset.prompt ? "is-active" : ""}`} | |
| onClick={() => onPromptChange(preset.prompt)} | |
| type="button" | |
| > | |
| {preset.display} | |
| </button> | |
| ))} | |
| </div> | |
| <textarea | |
| className="prompt-input" | |
| onChange={(event) => onPromptChange(event.target.value)} | |
| placeholder="Ask the model anything about the current frame." | |
| rows={3} | |
| spellCheck={false} | |
| value={prompt} | |
| /> | |
| </section> | |
| <section className="capture-side-rail"> | |
| <div className="capture-actions"> | |
| <button | |
| className="ghost-button" | |
| onClick={() => setIsPaused((current) => !current)} | |
| type="button" | |
| > | |
| {isPaused ? ( | |
| <Play className="button-icon" size={16} strokeWidth={1.8} /> | |
| ) : ( | |
| <Pause className="button-icon" size={16} strokeWidth={1.8} /> | |
| )} | |
| {isPaused ? "Resume" : "Pause"} | |
| </button> | |
| <button | |
| className="ghost-button" | |
| onClick={() => void onChooseWebcam()} | |
| type="button" | |
| > | |
| <Camera className="button-icon" size={16} strokeWidth={1.8} /> | |
| Webcam | |
| </button> | |
| <button | |
| className="ghost-button" | |
| onClick={onChooseVideo} | |
| type="button" | |
| > | |
| <Film className="button-icon" size={16} strokeWidth={1.8} /> | |
| Video file | |
| </button> | |
| <button className="ghost-button" onClick={onExit} type="button"> | |
| <ArrowLeft className="button-icon" size={16} strokeWidth={1.8} /> | |
| Back | |
| </button> | |
| </div> | |
| <section className="caption-dock"> | |
| {displayedHistory.map((caption, index) => { | |
| const depth = displayedHistory.length - index; | |
| const opacity = Math.max(0.18, 1 - depth * 0.18); | |
| const scale = 1 - depth * 0.04; | |
| return ( | |
| <article | |
| key={caption.id} | |
| className="caption-bubble caption-bubble--history" | |
| style={{ | |
| opacity, | |
| transform: `translateY(${-depth * 8}px) scale(${scale})`, | |
| }} | |
| > | |
| {caption.text} | |
| </article> | |
| ); | |
| })} | |
| {activeCaption || isGenerating ? ( | |
| <article className="caption-bubble caption-bubble--active"> | |
| <div className="caption-meta">Live caption</div> | |
| {activeCaption || ( | |
| <span className="caption-placeholder"> | |
| Scanning current frame... | |
| </span> | |
| )} | |
| </article> | |
| ) : null} | |
| </section> | |
| </section> | |
| </main> | |
| ); | |
| } | |