| | import { useState, useRef, useEffect, useCallback } from "react"; |
| | import WebcamCapture from "./WebcamCapture"; |
| | import PromptInput from "./PromptInput"; |
| | import LiveCaption, { type HistoryEntry } from "./LiveCaption"; |
| | import { useVLMContext } from "../context/useVLMContext"; |
| | import { PROMPTS, TIMING } from "../constants"; |
| |
|
| | interface CaptioningViewProps { |
| | videoRef: React.RefObject<HTMLVideoElement | null>; |
| | } |
| |
|
| | function useCaptioningLoop( |
| | videoRef: React.RefObject<HTMLVideoElement | null>, |
| | isRunning: boolean, |
| | promptRef: React.RefObject<string>, |
| | onCaptionUpdate: (caption: string) => void, |
| | onError: (error: string) => void, |
| | onGenerationComplete: (caption: string) => void, |
| | onStatsUpdate: (stats: { tps?: number; ttft?: number }) => void, |
| | ) { |
| | const { isLoaded, runInference } = useVLMContext(); |
| | const abortControllerRef = useRef<AbortController | null>(null); |
| | const onCaptionUpdateRef = useRef(onCaptionUpdate); |
| | const onErrorRef = useRef(onError); |
| | const onGenerationCompleteRef = useRef(onGenerationComplete); |
| | const onStatsUpdateRef = useRef(onStatsUpdate); |
| |
|
| | useEffect(() => { |
| | onCaptionUpdateRef.current = onCaptionUpdate; |
| | }, [onCaptionUpdate]); |
| |
|
| | useEffect(() => { |
| | onErrorRef.current = onError; |
| | }, [onError]); |
| |
|
| | useEffect(() => { |
| | onGenerationCompleteRef.current = onGenerationComplete; |
| | }, [onGenerationComplete]); |
| |
|
| | useEffect(() => { |
| | onStatsUpdateRef.current = onStatsUpdate; |
| | }, [onStatsUpdate]); |
| |
|
| | useEffect(() => { |
| | abortControllerRef.current?.abort(); |
| | if (!isRunning || !isLoaded) return; |
| |
|
| | abortControllerRef.current = new AbortController(); |
| | const signal = abortControllerRef.current.signal; |
| | const video = videoRef.current; |
| | const captureLoop = async () => { |
| | while (!signal.aborted) { |
| | if ( |
| | video && |
| | video.readyState >= 2 && |
| | !video.paused && |
| | video.videoWidth > 0 |
| | ) { |
| | try { |
| | const currentPrompt = promptRef.current || ""; |
| | const result = await runInference( |
| | video, |
| | currentPrompt, |
| | onCaptionUpdateRef.current, |
| | (stats) => onStatsUpdateRef.current(stats), |
| | ); |
| | if (result && !signal.aborted) { |
| | onCaptionUpdateRef.current(result); |
| | onGenerationCompleteRef.current(result); |
| | } |
| | } catch (error) { |
| | if (!signal.aborted) { |
| | const message = |
| | error instanceof Error ? error.message : String(error); |
| | onErrorRef.current(message); |
| | console.error("Error processing frame:", error); |
| | } |
| | } |
| | } |
| | if (signal.aborted) break; |
| | await new Promise((resolve) => |
| | setTimeout(resolve, TIMING.FRAME_CAPTURE_DELAY), |
| | ); |
| | } |
| | }; |
| |
|
| | |
| | |
| | setTimeout(captureLoop, 0); |
| |
|
| | return () => { |
| | abortControllerRef.current?.abort(); |
| | }; |
| | }, [isRunning, isLoaded, runInference, promptRef, videoRef]); |
| | } |
| |
|
| | export default function CaptioningView({ videoRef }: CaptioningViewProps) { |
| | const { imageSize, setImageSize } = useVLMContext(); |
| | const [caption, setCaption] = useState<string>(""); |
| | const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true); |
| | const [currentPrompt, setCurrentPrompt] = useState<string>(PROMPTS.default); |
| | const [error, setError] = useState<string | null>(null); |
| | const [history, setHistory] = useState<HistoryEntry[]>([]); |
| | const [stats, setStats] = useState<{ tps?: number; ttft?: number }>({}); |
| |
|
| | |
| | const promptRef = useRef<string>(currentPrompt); |
| |
|
| | |
| | useEffect(() => { |
| | promptRef.current = currentPrompt; |
| | }, [currentPrompt]); |
| |
|
| | const handleCaptionUpdate = useCallback((newCaption: string) => { |
| | setCaption(newCaption); |
| | setError(null); |
| | }, []); |
| |
|
| | const handleError = useCallback((errorMessage: string) => { |
| | setError(errorMessage); |
| | setCaption(`Error: ${errorMessage}`); |
| | }, []); |
| |
|
| | const handleGenerationComplete = useCallback((text: string) => { |
| | const now = new Date(); |
| | const timeString = now.toLocaleTimeString("en-US", { |
| | hour12: false, |
| | hour: "2-digit", |
| | minute: "2-digit", |
| | second: "2-digit", |
| | }); |
| |
|
| | setHistory((prev) => |
| | [ |
| | { |
| | timestamp: timeString, |
| | text: text, |
| | }, |
| | ...prev, |
| | ].slice(0, 50), |
| | ); |
| | }, []); |
| |
|
| | const handleStatsUpdate = useCallback( |
| | (newStats: { tps?: number; ttft?: number }) => { |
| | setStats((prev) => ({ ...prev, ...newStats })); |
| | }, |
| | [], |
| | ); |
| |
|
| | useCaptioningLoop( |
| | videoRef, |
| | isLoopRunning, |
| | promptRef, |
| | handleCaptionUpdate, |
| | handleError, |
| | handleGenerationComplete, |
| | handleStatsUpdate, |
| | ); |
| |
|
| | const handlePromptChange = useCallback((prompt: string) => { |
| | setCurrentPrompt(prompt); |
| | setError(null); |
| | }, []); |
| |
|
| | const handleToggleLoop = useCallback(() => { |
| | setIsLoopRunning((prev) => !prev); |
| | if (error) setError(null); |
| | }, [error]); |
| |
|
| | return ( |
| | <div className="absolute inset-0 text-white"> |
| | <div className="relative w-full h-full"> |
| | <WebcamCapture |
| | isRunning={isLoopRunning} |
| | onToggleRunning={handleToggleLoop} |
| | error={error} |
| | imageSize={imageSize} |
| | onImageSizeChange={setImageSize} |
| | /> |
| | {/* Prompt Input - Bottom Left */} |
| | <div className="absolute bottom-5 left-5 z-30 w-[540px]"> |
| | <PromptInput onPromptChange={handlePromptChange} /> |
| | </div> |
| | {/* Live Caption - Bottom Right */} |
| | <div className="absolute bottom-5 right-5 z-30 w-[720px]"> |
| | <LiveCaption |
| | caption={caption} |
| | isRunning={isLoopRunning} |
| | error={error} |
| | history={history} |
| | stats={stats} |
| | /> |
| | </div> |
| | </div> |
| | </div> |
| | ); |
| | } |