conversational-webgpu

Running

File size: 16,723 Bytes

// src/App.jsx (or similar path)

import { useEffect, useState, useRef } from "react";
import { Mic, PhoneOff, ChevronDown } from "lucide-react";
import { INPUT_SAMPLE_RATE } from "./constants";

import WORKLET from "./play-worklet.js"; // Assuming this is correctly resolved

export default function App() {
  // States and refs remain the same
  const [callStartTime, setCallStartTime] = useState(null);
  const [callStarted, setCallStarted] = useState(false);
  const [playing, setPlaying] = useState(false);

  const [voice, setVoice] = useState("af_heart");
  const [voices, setVoices] = useState([]);

  const [isListening, setIsListening] = useState(false);
  const [isSpeaking, setIsSpeaking] = useState(false);
  const [listeningScale, setListeningScale] = useState(1);
  const [speakingScale, setSpeakingScale] = useState(1);
  const [ripples, setRipples] = useState([]);

  const [ready, setReady] = useState(false);
  const [error, setError] = useState(null);
  const [elapsedTime, setElapsedTime] = useState("00:00");
  const worker = useRef(null);

  const micStreamRef = useRef(null);
  const node = useRef(null);

  // useEffect hooks remain the same
  useEffect(() => {
    worker.current?.postMessage({
      type: "set_voice",
      voice,
    });
  }, [voice]);

  useEffect(() => {
    if (!callStarted) {
      worker.current?.postMessage({
        type: "end_call",
      });
    }
  }, [callStarted]);

  useEffect(() => {
    if (callStarted && callStartTime) {
      const interval = setInterval(() => {
        const diff = Math.floor((Date.now() - callStartTime) / 1000);
        const minutes = String(Math.floor(diff / 60)).padStart(2, "0");
        const seconds = String(diff % 60).padStart(2, "0");
        setElapsedTime(`${minutes}:${seconds}`);
      }, 1000);
      return () => clearInterval(interval);
    } else {
      setElapsedTime("00:00");
    }
  }, [callStarted, callStartTime]);

  useEffect(() => {
    worker.current ??= new Worker(new URL("./worker.js", import.meta.url), { // Ensure worker.js is present
      type: "module",
    });

    const onMessage = ({ data }) => {
      if (data.error) {
        return onError(data.error);
      }

      switch (data.type) {
        case "status":
          if (data.status === "recording_start") {
            setIsListening(true);
            setIsSpeaking(false);
          } else if (data.status === "recording_end") {
            setIsListening(false);
          } else if (data.status === "ready") {
            setVoices(data.voices);
            setReady(true);
          }
          break;
        case "output":
          if (!playing) {
            node.current?.port.postMessage(data.result.audio);
            setPlaying(true);
            setIsSpeaking(true);
            setIsListening(false);
          }
          break;
      }
    };
    const onError = (err) => setError(err.message);

    worker.current.addEventListener("message", onMessage);
    worker.current.addEventListener("error", onError);

    return () => {
      worker.current.removeEventListener("message", onMessage);
      worker.current.removeEventListener("error", onError);
    };
  }, []); // Removed 'playing' from dependencies as it causes re-subscriptions

  useEffect(() => {
    if (!callStarted) return;

    let worklet;
    let inputAudioContext;
    let source;
    let ignore = false;

    let outputAudioContext;
    const audioStreamPromise = Promise.resolve(micStreamRef.current);

    audioStreamPromise
      .then(async (stream) => {
        if (ignore) return;

        inputAudioContext = new (window.AudioContext ||
          window.webkitAudioContext)({
          sampleRate: INPUT_SAMPLE_RATE,
        });

        const analyser = inputAudioContext.createAnalyser();
        analyser.fftSize = 256;
        source = inputAudioContext.createMediaStreamSource(stream);
        source.connect(analyser);

        const inputDataArray = new Uint8Array(analyser.frequencyBinCount);

        function calculateRMS(array) {
          let sum = 0;
          for (let i = 0; i < array.length; ++i) {
            const normalized = array[i] / 128 - 1;
            sum += normalized * normalized;
          }
          const rms = Math.sqrt(sum / array.length);
          return rms;
        }

        // Ensure vad-processor.js is present
        await inputAudioContext.audioWorklet.addModule(
          new URL("./vad-processor.js", import.meta.url),
        );
        worklet = new AudioWorkletNode(inputAudioContext, "vad-processor", {
          numberOfInputs: 1,
          numberOfOutputs: 0,
          channelCount: 1,
          channelCountMode: "explicit",
          channelInterpretation: "discrete",
        });

        source.connect(worklet);
        worklet.port.onmessage = (event) => {
          const { buffer } = event.data;
          worker.current?.postMessage({ type: "audio", buffer });
        };

        outputAudioContext = new AudioContext({
          sampleRate: 24000,
        });
        outputAudioContext.resume();

        const blob = new Blob([`(${WORKLET.toString()})()`], {
          type: "application/javascript",
        });
        const url = URL.createObjectURL(blob);
        await outputAudioContext.audioWorklet.addModule(url);
        URL.revokeObjectURL(url);

        node.current = new AudioWorkletNode(
          outputAudioContext,
          "buffered-audio-worklet-processor",
        );

        node.current.port.onmessage = (event) => {
          if (event.data.type === "playback_ended") {
            setPlaying(false);
            setIsSpeaking(false);
            worker.current?.postMessage({ type: "playback_ended" });
          }
        };

        const outputAnalyser = outputAudioContext.createAnalyser();
        outputAnalyser.fftSize = 256;

        node.current.connect(outputAnalyser);
        outputAnalyser.connect(outputAudioContext.destination);

        const outputDataArray = new Uint8Array(
          outputAnalyser.frequencyBinCount,
        );

        function updateVisualizers() {
          if (!inputAudioContext || inputAudioContext.state === 'closed') return; // Prevent errors if context is closed
          analyser.getByteTimeDomainData(inputDataArray);
          const rms = calculateRMS(inputDataArray);
          const targetScale = 1 + Math.min(1.25 * rms, 0.25);
          setListeningScale((prev) => prev + (targetScale - prev) * 0.25);

          if (!outputAudioContext || outputAudioContext.state === 'closed') return; // Prevent errors
          outputAnalyser.getByteTimeDomainData(outputDataArray);
          const outputRMS = calculateRMS(outputDataArray);
          const targetOutputScale = 1 + Math.min(1.25 * outputRMS, 0.25);
          setSpeakingScale((prev) => prev + (targetOutputScale - prev) * 0.25);

          requestAnimationFrame(updateVisualizers);
        }
        updateVisualizers();
      })
      .catch((err) => {
        setError(err.message);
        console.error(err);
      });

    return () => {
      ignore = true;
      audioStreamPromise.then((s) => s?.getTracks().forEach((t) => t.stop())); // Optional chaining for s
      source?.disconnect();
      worklet?.disconnect();
      inputAudioContext?.close().catch(console.error); // Catch potential errors on close

      outputAudioContext?.close().catch(console.error); // Catch potential errors on close
    };
  }, [callStarted]); // Removed dependencies that might cause frequent re-runs like 'playing'

  useEffect(() => {
    if (!callStarted) return;
    const interval = setInterval(() => {
      const id = Date.now();
      setRipples((prev) => [...prev, id]);
      setTimeout(() => {
        setRipples((prev) => prev.filter((r) => r !== id));
      }, 1500); // Duration of the ripple animation
    }, 1000); // Interval for adding new ripples
    return () => clearInterval(interval);
  }, [callStarted]);


  const handleStartCall = async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({
        audio: {
          channelCount: 1,
          echoCancellation: true,
          autoGainControl: true,
          noiseSuppression: true,
          sampleRate: INPUT_SAMPLE_RATE,
        },
      });
      micStreamRef.current = stream;

      setCallStartTime(Date.now());
      setCallStarted(true);
      setError(null); // Clear previous errors
      worker.current?.postMessage({ type: "start_call" });
    } catch (err) {
      setError(err.message);
      console.error(err);
    }
  };

  // Main component rendering
  return (
    // Overall page container
    // Light: gray-50 background
    // Dark: slate-900 background
    <div className="h-screen min-h-[240px] flex items-center justify-center bg-gray-50 dark:bg-slate-900 p-4 relative">
      {/* Main content card */}
      {/* Light: white background */}
      {/* Dark: slate-800 background */}
      <div className="h-full max-h-[320px] w-[640px] bg-white dark:bg-slate-800 rounded-xl shadow-lg p-8 flex items-center justify-between space-x-16">
        {/* Left section: Voice selection and timer */}
        {/* Light: green-700 text */}
        {/* Dark: green-400 text */}
        <div className="text-green-700 dark:text-green-400 w-[140px]">
          <div className="text-xl font-bold flex justify-between">
            {voices?.[voice]?.name}
            {/* Elapsed time */}
            {/* Light: gray-500 text */}
            {/* Dark: slate-400 text */}
            <span className="font-normal text-gray-500 dark:text-slate-400">{elapsedTime}</span>
          </div>
          <div className="text-base relative">
            {/* Custom select button */}
            <button
              type="button"
              disabled={!ready}
              // Conditional styling based on 'ready' state
              className={`w-full flex items-center justify-between border rounded-md transition-colors 
                ${ ready
                  ? "bg-transparent border-gray-300 dark:border-gray-600 hover:border-gray-400 dark:hover:border-gray-500 text-gray-700 dark:text-gray-300" // Text color for button
                  : "bg-gray-100 dark:bg-slate-700 border-gray-300 dark:border-gray-600 opacity-50 cursor-not-allowed text-gray-400 dark:text-gray-500" // Text color for disabled
                }`
              }
            >
              <span className="px-2 py-1">Select voice</span>
              {/* Chevron icon, color will be inherited or can be set explicitly if needed */}
              <ChevronDown className="absolute right-2" />
            </button>
            {/* Hidden native select element */}
            {/* Basic styling for accessibility, actual appearance handled by custom button */}
            <select
              value={voice}
              onChange={(e) => setVoice(e.target.value)}
              className="absolute inset-0 opacity-0 cursor-pointer"
              disabled={!ready}
            >
              {Object.entries(voices).map(([key, v]) => (
                // Option text color in dropdown might be browser-dependent for native select.
                // For fully styled dropdown, custom component is needed.
                <option key={key} value={key}>
                  {`${v.name} (${
                    v.language === "en-us" ? "American" : v.language
                  } ${v.gender})`}
                </option>
              ))}
            </select>
          </div>
        </div>

        {/* Center section: Visualizer */}
        <div className="relative flex items-center justify-center w-32 h-32 flex-shrink-0 aspect-square">
          {callStarted &&
            ripples.map((id) => (
              <div
                key={id}
                // Ripple animation border
                // Light: border-green-200
                // Dark: border-green-600/700 (adjust for visibility)
                className="absolute inset-0 rounded-full border-2 border-green-200 dark:border-green-700 pointer-events-none"
                style={{ animation: "ripple 1.5s ease-out forwards" }}
              />
            ))}
          {/* Pulsing loader while initializing */}
          <div
            // Background for pulsing loader
            // Light: green-200 (normal), red-200 (error)
            // Dark: green-700 (normal), red-700 (error)
            className={`absolute w-32 h-32 rounded-full ${
              error ? "bg-red-200 dark:bg-red-700" : "bg-green-200 dark:bg-green-700"
            } ${!ready ? "animate-ping opacity-75" : ""}`}
            style={{ animationDuration: "1.5s" }}
          />
          {/* Main rings for visualizer */}
          <div
            // Speaking scale ring
            // Light: green-300 (normal), red-300 (error)
            // Dark: green-600 (normal), red-600 (error)
            className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out ${
              error ? "bg-red-300 dark:bg-red-600" : "bg-green-300 dark:bg-green-600"
            } ${!ready ? "opacity-0" : ""}`}
            style={{ transform: `scale(${speakingScale})` }}
          />
          <div
            // Listening scale ring
            // Light: green-200 (normal), red-200 (error)
            // Dark: green-700 (normal), red-700 (error) - same as pulse for consistency
            className={`absolute w-32 h-32 rounded-full shadow-inner transition-transform duration-300 ease-out ${
              error ? "bg-red-200 dark:bg-red-700" : "bg-green-200 dark:bg-green-700"
            } ${!ready ? "opacity-0" : ""}`}
            style={{ transform: `scale(${listeningScale})` }}
          />
          {/* Center text: status or error message */}
          <div
            // Text color for status messages
            // Light: gray-700 (normal), red-700 (error)
            // Dark: slate-300 (normal), red-400 (error)
            className={`absolute z-10 text-lg text-center ${
              error ? "text-red-700 dark:text-red-400" : "text-gray-700 dark:text-slate-300"
            }`}
          >
            {error ? (
              error
            ) : (
              <>
                {!ready && "Loading..."}
                {isListening && "Listening..."}
                {isSpeaking && "Speaking..."}
              </>
            )}
          </div>
        </div>

        {/* Right section: Call control buttons */}
        <div className="space-y-4 w-[140px]">
          {callStarted ? (
            // End call button
            <button
              // Light: bg-red-100, text-red-700, hover:bg-red-200
              // Dark: bg-red-700, text-red-100, hover:bg-red-600
              className="flex items-center space-x-2 px-4 py-2 bg-red-100 text-red-700 rounded-md hover:bg-red-200 dark:bg-red-700 dark:text-red-100 dark:hover:bg-red-600"
              onClick={() => {
                setCallStarted(false);
                setCallStartTime(null);
                setPlaying(false);
                setIsListening(false);
                setIsSpeaking(false);
                // Consider stopping mic tracks here explicitly if not handled by useEffect cleanup quickly enough
                micStreamRef.current?.getTracks().forEach(track => track.stop());
                micStreamRef.current = null;
              }}
            >
              <PhoneOff className="w-5 h-5" />
              <span>End call</span>
            </button>
          ) : (
            // Start call button
            <button
              // Conditional styling for 'ready' state
              className={`flex items-center space-x-2 px-4 py-2 rounded-md ${
                ready
                  ? "bg-blue-100 text-blue-700 hover:bg-blue-200 dark:bg-blue-600 dark:text-blue-100 dark:hover:bg-blue-500" // Enabled state
                  : "bg-blue-100 text-blue-700 opacity-50 cursor-not-allowed dark:bg-blue-800 dark:text-blue-400" // Disabled state
              }`}
              onClick={handleStartCall}
              disabled={!ready}
            >
              <Mic className="w-5 h-5" /> {/* Added Mic icon for Start Call */}
              <span>Start call</span>
            </button>
          )}
        </div>
      </div>

      {/* Footer link */}
      <div className="absolute bottom-4 text-sm text-gray-600 dark:text-gray-400"> {/* Text color for footer */}
        Built with{" "}
        <a
          href="https://github.com/huggingface/transformers.js"
          rel="noopener noreferrer"
          target="_blank"
          // Light: text-blue-600
          // Dark: text-blue-400
          className="text-blue-600 hover:underline dark:text-blue-400"
        >
          🤗 Transformers.js
        </a>
      </div>
    </div>
  );
}