Spaces:

LiquidAI
/

LFM2-VL-WebGPU

Running

File size: 8,423 Bytes

01488bc

import { startTransition, useEffect, useRef, useState } from "react";
import { Camera, Film } from "lucide-react";
import { BrandMark } from "./components/BrandMark";
import { CaptureScene, type CaptureSource } from "./components/CaptureScene";
import { FluidBackdrop } from "./components/FluidBackdrop";
import { HfIcon } from "./components/HfIcon";
import { VLMProvider } from "./context/VLMProvider";
import { useVLM } from "./context/VLMContext";

const PROMPT_PRESETS = [
  {
    display: "Describe the scene",
    prompt: "Describe the scene in one sentence.",
  },
  {
    display: "What color shirt am I wearing?",
    prompt: "What color shirt am I wearing?",
  },
  {
    display: "What am I holding?",
    prompt: "What am I holding?",
  },
  {
    display: "How old do I look?",
    prompt: "How old do I look?",
  },
] as const;

type Scene = "landing" | "loading" | "source" | "capture";

function disposeSource(source: CaptureSource | null) {
  if (!source) {
    return;
  }

  if (source.kind === "webcam") {
    source.stream.getTracks().forEach((track) => track.stop());
    return;
  }

  URL.revokeObjectURL(source.url);
}

function getErrorMessage(error: unknown) {
  if (error instanceof Error) {
    return error.message;
  }

  return "Something went wrong.";
}

function AppContent() {
  const [scene, setScene] = useState<Scene>("landing");
  const [source, setSource] = useState<CaptureSource | null>(null);
  const [prompt, setPrompt] = useState<string>(PROMPT_PRESETS[0].prompt);
  const [mediaError, setMediaError] = useState<string | null>(null);
  const fileInputRef = useRef<HTMLInputElement>(null);
  const sourceRef = useRef<CaptureSource | null>(null);

  const { error, loadModel, message, progress, status } = useVLM();

  useEffect(() => {
    sourceRef.current = source;
  }, [source]);

  useEffect(() => {
    return () => {
      disposeSource(sourceRef.current);
    };
  }, []);

  useEffect(() => {
    if (scene !== "loading" || status === "ready") {
      return;
    }

    let cancelled = false;

    void loadModel()
      .then(() => {
        if (cancelled) {
          return;
        }

        startTransition(() => {
          setScene("source");
        });
      })
      .catch(() => undefined);

    return () => {
      cancelled = true;
    };
  }, [loadModel, scene, status]);

  const beginExperience = () => {
    startTransition(() => {
      setScene("loading");
    });
  };

  const replaceSource = (nextSource: CaptureSource) => {
    disposeSource(source);
    setMediaError(null);
    setSource(nextSource);

    startTransition(() => {
      setScene("capture");
    });
  };

  const handleUseWebcam = async () => {
    try {
      if (!navigator.mediaDevices?.getUserMedia) {
        throw new Error("Camera access is not available in this browser.");
      }

      const stream = await navigator.mediaDevices.getUserMedia({
        audio: false,
        video: {
          facingMode: "user",
          width: { ideal: 1280 },
          height: { ideal: 720 },
        },
      });

      replaceSource({
        kind: "webcam",
        label: "Live camera",
        stream,
      });
    } catch (cameraError) {
      setMediaError(getErrorMessage(cameraError));
    }
  };

  const openVideoPicker = () => {
    fileInputRef.current?.click();
  };

  const handleVideoSelection = (event: React.ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0];
    event.target.value = "";

    if (!file) {
      return;
    }

    replaceSource({
      kind: "file",
      label: file.name,
      url: URL.createObjectURL(file),
    });
  };

  const exitCapture = () => {
    disposeSource(source);
    setSource(null);
    setMediaError(null);

    startTransition(() => {
      setScene("source");
    });
  };

  const showBackdrop = scene !== "capture";

  return (
    <>
      {showBackdrop ? <FluidBackdrop subdued={scene === "loading"} /> : null}

      <input
        ref={fileInputRef}
        accept="video/*"
        className="hidden-file-input"
        onChange={handleVideoSelection}
        type="file"
      />

      {scene === "landing" ? (
        <button
          className="landing-scene"
          onClick={beginExperience}
          type="button"
        >
          <div className="landing-inner">
            <BrandMark />

            <div className="hero-copy">
              <h1>LFM2-VL WebGPU</h1>
              <p>
                Real-time video captioning in your browser,
                <br />
                powered by
                <HfIcon className="hero-inline-icon" />
                <span className="hero-inline-wordmark">Transformers.js</span>
              </p>
            </div>

            <div className="begin-prompt">Click anywhere to begin</div>
          </div>
        </button>
      ) : null}

      {scene === "loading" ? (
        <main className="scene-shell scene-shell--centered">
          <BrandMark />

          <section className="loading-card">
            <span className="eyebrow">Loading Model</span>
            <h2>{message}</h2>
            <div aria-hidden="true" className="progress-track">
              <div
                className="progress-fill"
                style={{
                  width: `${Math.max(progress, status === "ready" ? 100 : 6)}%`,
                }}
              />
            </div>
            <p>{Math.round(progress)}%</p>

            {error ? (
              <>
                <div className="error-banner" role="alert">
                  {error}
                </div>
                <button
                  className="primary-button"
                  onClick={() => void loadModel()}
                  type="button"
                >
                  Retry loading
                </button>
              </>
            ) : null}
          </section>
        </main>
      ) : null}

      {scene === "source" ? (
        <main className="scene-shell">
          <div className="scene-header">
            <BrandMark />
          </div>

          <section className="source-card">
            <span className="eyebrow">Choose Input</span>
            <h2>Caption a live camera or a local video file.</h2>
            <p>
              The model is ready. Pick a source and we&apos;ll start captioning
              each frame as quickly as the browser can process it.
            </p>

            <div className="source-grid">
              <button
                className="source-option"
                onClick={() => void handleUseWebcam()}
                type="button"
              >
                <div className="source-option__header">
                  <Camera
                    className="source-option__icon"
                    size={28}
                    strokeWidth={1.9}
                  />
                  <strong>Webcam</strong>
                </div>
                <span>
                  Start a live camera stream and caption it in real time.
                </span>
              </button>

              <button
                className="source-option"
                onClick={openVideoPicker}
                type="button"
              >
                <div className="source-option__header">
                  <Film
                    className="source-option__icon"
                    size={28}
                    strokeWidth={1.9}
                  />
                  <strong>File</strong>
                </div>
                <span>
                  Upload a local clip and run the same caption loop against it.
                </span>
              </button>
            </div>

            {mediaError ? (
              <div className="error-banner" role="alert">
                {mediaError}
              </div>
            ) : null}
          </section>
        </main>
      ) : null}

      {scene === "capture" && source ? (
        <CaptureScene
          mediaError={mediaError}
          onChooseVideo={openVideoPicker}
          onChooseWebcam={handleUseWebcam}
          onDismissMediaError={() => setMediaError(null)}
          onExit={exitCapture}
          onPromptChange={setPrompt}
          prompt={prompt}
          promptPresets={PROMPT_PRESETS}
          source={source}
        />
      ) : null}
    </>
  );
}

function App() {
  return (
    <VLMProvider>
      <AppContent />
    </VLMProvider>
  );
}

export default App;