Spaces:
Running
Running
| import { useState, useRef, useEffect, useCallback } from "react"; | |
| import WebcamCapture from "./WebcamCapture"; | |
| import PromptInput from "./PromptInput"; | |
| import LiveCaption, { type HistoryEntry } from "./LiveCaption"; | |
| import { useVLMContext } from "../context/useVLMContext"; | |
| import { PROMPTS, TIMING, THEME } from "../constants"; | |
| import type { VideoSource } from "../types"; | |
| const SOURCE_ICONS: Record<string, React.ReactNode> = { | |
| webcam: ( | |
| <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}> | |
| <path strokeLinecap="round" strokeLinejoin="round" d="M15.75 10.5l4.72-4.72a.75.75 0 011.28.53v11.38a.75.75 0 01-1.28.53l-4.72-4.72M4.5 18.75h9a2.25 2.25 0 002.25-2.25v-9a2.25 2.25 0 00-2.25-2.25h-9A2.25 2.25 0 002.25 7.5v9a2.25 2.25 0 002.25 2.25z" /> | |
| </svg> | |
| ), | |
| screen: ( | |
| <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}> | |
| <path strokeLinecap="round" strokeLinejoin="round" d="M9 17.25v1.007a3 3 0 01-.879 2.122L7.5 21h9l-.621-.621A3 3 0 0115 18.257V17.25m6-12V15a2.25 2.25 0 01-2.25 2.25H5.25A2.25 2.25 0 013 15V5.25m18 0A2.25 2.25 0 0018.75 3H5.25A2.25 2.25 0 003 5.25m18 0V12a2.25 2.25 0 01-2.25 2.25H5.25A2.25 2.25 0 013 12V5.25" /> | |
| </svg> | |
| ), | |
| upload: ( | |
| <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}> | |
| <path strokeLinecap="round" strokeLinejoin="round" d="M3 16.5v2.25A2.25 2.25 0 005.25 21h13.5A2.25 2.25 0 0021 18.75V16.5m-13.5-9L12 3m0 0l4.5 4.5M12 3v13.5" /> | |
| </svg> | |
| ), | |
| example: ( | |
| <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}> | |
| <path strokeLinecap="round" strokeLinejoin="round" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" /> | |
| <path strokeLinecap="round" strokeLinejoin="round" d="M15.91 11.672a.375.375 0 010 .656l-5.603 3.113a.375.375 0 01-.557-.328V8.887c0-.286.307-.466.557-.327l5.603 3.112z" /> | |
| </svg> | |
| ), | |
| }; | |
| const SOURCE_LABELS: Record<string, string> = { | |
| webcam: "Webcam", | |
| screen: "Screen", | |
| upload: "Upload", | |
| example: "Example", | |
| }; | |
| interface CaptioningViewProps { | |
| videoRef: React.RefObject<HTMLVideoElement | null>; | |
| videoSource?: VideoSource | null; | |
| onChangeSource?: () => void; | |
| } | |
| function useCaptioningLoop( | |
| videoRef: React.RefObject<HTMLVideoElement | null>, | |
| isRunning: boolean, | |
| promptRef: React.RefObject<string>, | |
| onCaptionUpdate: (caption: string) => void, | |
| onError: (error: string) => void, | |
| onGenerationComplete: (caption: string) => void, | |
| onStatsUpdate: (stats: { tps?: number; ttft?: number }) => void, | |
| ) { | |
| const { isLoaded, runInference } = useVLMContext(); | |
| const abortControllerRef = useRef<AbortController | null>(null); | |
| const onCaptionUpdateRef = useRef(onCaptionUpdate); | |
| const onErrorRef = useRef(onError); | |
| const onGenerationCompleteRef = useRef(onGenerationComplete); | |
| const onStatsUpdateRef = useRef(onStatsUpdate); | |
| useEffect(() => { | |
| onCaptionUpdateRef.current = onCaptionUpdate; | |
| }, [onCaptionUpdate]); | |
| useEffect(() => { | |
| onErrorRef.current = onError; | |
| }, [onError]); | |
| useEffect(() => { | |
| onGenerationCompleteRef.current = onGenerationComplete; | |
| }, [onGenerationComplete]); | |
| useEffect(() => { | |
| onStatsUpdateRef.current = onStatsUpdate; | |
| }, [onStatsUpdate]); | |
| useEffect(() => { | |
| abortControllerRef.current?.abort(); | |
| if (!isRunning || !isLoaded) return; | |
| abortControllerRef.current = new AbortController(); | |
| const signal = abortControllerRef.current.signal; | |
| const video = videoRef.current; | |
| const captureLoop = async () => { | |
| while (!signal.aborted) { | |
| if ( | |
| video && | |
| video.readyState >= 2 && | |
| !video.paused && | |
| video.videoWidth > 0 | |
| ) { | |
| try { | |
| const currentPrompt = promptRef.current || ""; | |
| const result = await runInference( | |
| video, | |
| currentPrompt, | |
| onCaptionUpdateRef.current, | |
| (stats) => onStatsUpdateRef.current(stats), | |
| ); | |
| if (result && !signal.aborted) { | |
| onCaptionUpdateRef.current(result); | |
| onGenerationCompleteRef.current(result); | |
| } | |
| } catch (error) { | |
| if (!signal.aborted) { | |
| const message = | |
| error instanceof Error ? error.message : String(error); | |
| onErrorRef.current(message); | |
| console.error("Error processing frame:", error); | |
| } | |
| } | |
| } | |
| if (signal.aborted) break; | |
| await new Promise((resolve) => | |
| setTimeout(resolve, TIMING.FRAME_CAPTURE_DELAY), | |
| ); | |
| } | |
| }; | |
| // NB: Wrap with a setTimeout to ensure abort controller can run before starting the loop | |
| // This is necessary for React's strict mode which calls effects twice in development. | |
| setTimeout(captureLoop, 0); | |
| return () => { | |
| abortControllerRef.current?.abort(); | |
| }; | |
| }, [isRunning, isLoaded, runInference, promptRef, videoRef]); | |
| } | |
| export default function CaptioningView({ videoRef, videoSource, onChangeSource }: CaptioningViewProps) { | |
| const { imageSize, setImageSize } = useVLMContext(); | |
| const [caption, setCaption] = useState<string>(""); | |
| const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true); | |
| const [currentPrompt, setCurrentPrompt] = useState<string>(PROMPTS.default); | |
| const [error, setError] = useState<string | null>(null); | |
| const [history, setHistory] = useState<HistoryEntry[]>([]); | |
| const [stats, setStats] = useState<{ tps?: number; ttft?: number }>({}); | |
| // Use ref to store current prompt to avoid loop restarts | |
| const promptRef = useRef<string>(currentPrompt); | |
| // Update prompt ref when state changes | |
| useEffect(() => { | |
| promptRef.current = currentPrompt; | |
| }, [currentPrompt]); | |
| const handleCaptionUpdate = useCallback((newCaption: string) => { | |
| setCaption(newCaption); | |
| setError(null); | |
| }, []); | |
| const handleError = useCallback((errorMessage: string) => { | |
| setError(errorMessage); | |
| setCaption(`Error: ${errorMessage}`); | |
| }, []); | |
| const handleGenerationComplete = useCallback((text: string) => { | |
| const now = new Date(); | |
| const timeString = now.toLocaleTimeString("en-US", { | |
| hour12: false, | |
| hour: "2-digit", | |
| minute: "2-digit", | |
| second: "2-digit", | |
| }); | |
| setHistory((prev) => | |
| [ | |
| { | |
| timestamp: timeString, | |
| text: text, | |
| }, | |
| ...prev, | |
| ].slice(0, 50), | |
| ); | |
| }, []); | |
| const handleStatsUpdate = useCallback( | |
| (newStats: { tps?: number; ttft?: number }) => { | |
| setStats((prev) => ({ ...prev, ...newStats })); | |
| }, | |
| [], | |
| ); | |
| useCaptioningLoop( | |
| videoRef, | |
| isLoopRunning, | |
| promptRef, | |
| handleCaptionUpdate, | |
| handleError, | |
| handleGenerationComplete, | |
| handleStatsUpdate, | |
| ); | |
| const handlePromptChange = useCallback((prompt: string) => { | |
| setCurrentPrompt(prompt); | |
| setError(null); | |
| }, []); | |
| const handleToggleLoop = useCallback(() => { | |
| setIsLoopRunning((prev) => !prev); | |
| if (error) setError(null); | |
| }, [error]); | |
| const getSourceLabel = () => { | |
| if (!videoSource) return "Video"; | |
| if (videoSource.name) return videoSource.name; | |
| return SOURCE_LABELS[videoSource.type] || "Video"; | |
| }; | |
| return ( | |
| <div className="absolute inset-0 text-white"> | |
| <div className="relative w-full h-full"> | |
| <WebcamCapture | |
| isRunning={isLoopRunning} | |
| onToggleRunning={handleToggleLoop} | |
| error={error} | |
| imageSize={imageSize} | |
| onImageSizeChange={setImageSize} | |
| /> | |
| {/* Source Indicator - Top Left */} | |
| {videoSource && onChangeSource && ( | |
| <div className="absolute top-5 left-5 z-30"> | |
| <button | |
| onClick={onChangeSource} | |
| className="group flex items-center gap-3 px-4 py-2.5 bg-black/60 backdrop-blur-md border border-white/10 hover:border-white/30 transition-all hover:bg-black/70" | |
| title="Change video source" | |
| > | |
| <div | |
| className="flex items-center justify-center" | |
| style={{ color: THEME.mistralOrange }} | |
| > | |
| {SOURCE_ICONS[videoSource.type]} | |
| </div> | |
| <span className="text-sm font-medium text-white/90 max-w-[200px] truncate"> | |
| {getSourceLabel()} | |
| </span> | |
| <svg | |
| className="w-4 h-4 text-white/50 group-hover:text-white/80 transition-colors" | |
| fill="none" | |
| viewBox="0 0 24 24" | |
| stroke="currentColor" | |
| strokeWidth={2} | |
| > | |
| <path strokeLinecap="round" strokeLinejoin="round" d="M8.25 15L12 18.75 15.75 15m-7.5-6L12 5.25 15.75 9" /> | |
| </svg> | |
| </button> | |
| </div> | |
| )} | |
| {/* Prompt Input - Bottom Left */} | |
| <div className="absolute bottom-5 left-5 z-30 w-[540px]"> | |
| <PromptInput onPromptChange={handlePromptChange} /> | |
| </div> | |
| {/* Live Caption - Bottom Right */} | |
| <div className="absolute bottom-5 right-5 z-30 w-[720px]"> | |
| <LiveCaption | |
| caption={caption} | |
| isRunning={isLoopRunning} | |
| error={error} | |
| history={history} | |
| stats={stats} | |
| /> | |
| </div> | |
| </div> | |
| </div> | |
| ); | |
| } | |