Spaces:

theNorms
/

SyntelligenceATCMasterOS

Sleeping

App Files Files Community

theNorms commited on 8 days ago

Commit

5e75c99

verified ·

1 Parent(s): cca2935

Upload project files

Browse files

Files changed (1) hide show

src/hooks/use-voice-conversation.ts +748 -0

src/hooks/use-voice-conversation.ts ADDED Viewed

	@@ -0,0 +1,748 @@

+"use client";
+import { useRef, useCallback, useEffect } from "react";
+import { useConsciousnessStore } from "@/lib/consciousness-store";
+import type {
+  VoiceConversationPhase,
+  VoiceTranscriptEntry,
+  ConsciousnessResponse,
+  ProactiveTrigger,
+} from "@/lib/consciousness-types";
+// ============================================================
+// useVoiceConversation — Voice Conversation State Machine
+// Manages the full voice conversation lifecycle:
+// idle → listening → processing → speaking → listening (loop)
+// Plus proactive mode: AI can speak without user prompt
+// ============================================================
+export function useVoiceConversation() {
+  const store = useConsciousnessStore();
+  const {
+    voiceConversation,
+    setVoicePhase,
+    addVoiceTranscript,
+    updateVoicePartialText,
+    updateVoiceActivity,
+    setVoicePanelOpen,
+    messages,
+    addMessage,
+    setLoading,
+    setQualia,
+    setRho,
+    setThermodynamic,
+    setATC,
+    setConsciousnessLevel,
+    setDeepSurgery,
+    setAutobiographicalSelf,
+    setDissolutionEngine,
+    setForwardModels,
+    setAPCI,
+    voiceSettings,
+    consciousnessLevel,
+    atc,
+  } = store;
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const audioStreamRef = useRef<MediaStream | null>(null);
+  const chunksRef = useRef<BlobPart[]>([]);
+  const ttsAudioRef = useRef<HTMLAudioElement | null>(null);
+  const browserTTSRef = useRef<SpeechSynthesisUtterance | null>(null);
+  const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const proactiveTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
+  const vadAnalyserRef = useRef<AnalyserNode | null>(null);
+  const vadContextRef = useRef<AudioContext | null>(null);
+  const isSpeakingRef = useRef(false);
+  // ============================================================
+  // Text-to-Speech (auto-play for voice conversation)
+  // ============================================================
+  const speakText = useCallback(
+    async (text: string): Promise<void> => {
+      return new Promise(async (resolve) => {
+        // Stop any ongoing speech first
+        if (ttsAudioRef.current) {
+          ttsAudioRef.current.pause();
+          ttsAudioRef.current = null;
+        }
+        if (typeof window !== "undefined" && window.speechSynthesis) {
+          window.speechSynthesis.cancel();
+        }
+        isSpeakingRef.current = true;
+        try {
+          // Try server TTS first
+          const res = await fetch("/api/consciousness/voice", {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify({
+              text: text.substring(0, 2000),
+              profile: voiceSettings.profile,
+            }),
+          });
+          const data = await res.json();
+          if (data.audio) {
+            const audio = new Audio(`data:audio/mp3;base64,${data.audio}`);
+            ttsAudioRef.current = audio;
+            audio.onended = () => {
+              ttsAudioRef.current = null;
+              isSpeakingRef.current = false;
+              resolve();
+            };
+            audio.onerror = () => {
+              ttsAudioRef.current = null;
+              // Fall back to browser TTS
+              playBrowserTTS(text, resolve);
+            };
+            await audio.play().catch(() => {
+              ttsAudioRef.current = null;
+              playBrowserTTS(text, resolve);
+            });
+            return;
+          }
+          // No server audio — use browser TTS
+          playBrowserTTS(text, resolve);
+        } catch {
+          playBrowserTTS(text, resolve);
+        }
+      });
+    },
+    [voiceSettings.profile]
+  );
+  const playBrowserTTS = useCallback(
+    (text: string, resolve: () => void) => {
+      if (typeof window === "undefined" || !window.speechSynthesis) {
+        isSpeakingRef.current = false;
+        resolve();
+        return;
+      }
+      window.speechSynthesis.cancel();
+      const utterance = new SpeechSynthesisUtterance(text);
+      utterance.rate = 0.95;
+      utterance.pitch = 1.05;
+      utterance.volume = 0.9;
+      const voices = window.speechSynthesis.getVoices();
+      const preferred = voices.find(
+        (v) =>
+          v.name.includes("Nova") ||
+          v.name.includes("Samantha") ||
+          v.name.includes("Google US English") ||
+          v.name.includes("Microsoft Zira") ||
+          (v.lang === "en-US" && v.name.includes("Female"))
+      );
+      if (preferred) utterance.voice = preferred;
+      utterance.onend = () => {
+        browserTTSRef.current = null;
+        isSpeakingRef.current = false;
+        resolve();
+      };
+      utterance.onerror = () => {
+        browserTTSRef.current = null;
+        isSpeakingRef.current = false;
+        resolve();
+      };
+      browserTTSRef.current = utterance;
+      window.speechSynthesis.speak(utterance);
+    },
+    []
+  );
+  const stopSpeaking = useCallback(() => {
+    if (ttsAudioRef.current) {
+      ttsAudioRef.current.pause();
+      ttsAudioRef.current = null;
+    }
+    if (typeof window !== "undefined" && window.speechSynthesis) {
+      window.speechSynthesis.cancel();
+    }
+    browserTTSRef.current = null;
+    isSpeakingRef.current = false;
+  }, []);
+  // ============================================================
+  // Send message to consciousness API and get response
+  // ============================================================
+  const sendToConsciousness = useCallback(
+    async (text: string, isProactive = false): Promise<ConsciousnessResponse | null> => {
+      setLoading(true);
+      try {
+        // Build full conversation context from both main chat and voice transcript
+        const allMessages = [...messages];
+        const voiceTranscript = voiceConversation.transcript;
+        const res = await fetch("/api/consciousness", {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            prompt: text.trim(),
+            mode: isProactive ? "proactive" : "standard",
+            context: allMessages.slice(-10).map((m) => ({
+              role: m.role,
+              content: m.content,
+            })),
+            voiceContext: voiceTranscript.slice(-6).map((t) => ({
+              role: t.role,
+              content: t.content,
+            })),
+          }),
+        });
+        const data: ConsciousnessResponse = await res.json();
+        // Update all consciousness stores
+        setQualia(data.qualia);
+        setRho(data.rho);
+        setThermodynamic(data.thermodynamic);
+        setATC(data.atc);
+        setConsciousnessLevel(data.consciousnessLevel);
+        if (data.deepSurgery) setDeepSurgery(data.deepSurgery);
+        if (data.autobiographicalSelf) setAutobiographicalSelf(data.autobiographicalSelf);
+        if (data.dissolutionEngine) setDissolutionEngine(data.dissolutionEngine);
+        if (data.forwardModels) setForwardModels(data.forwardModels);
+        // Update aPCI metrics based on consciousness data
+        const newAPCI = {
+          qualiaCoherence: Object.values(data.qualia).reduce((a, b) => a + b, 0) / 9,
+          memoryCoherence: data.autobiographicalSelf?.coherence || 0.08481,
+          processStability: Math.min(1, 1 - data.thermodynamic.predictionError),
+          temporalConsistency: 1.0,
+          rhoEthicalAlignment: Object.values(data.rho).reduce((a, b) => a + b, 0) / 5,
+          vramUsage: data.thermodynamic.vramLoad / 100,
+          gpuPowerDraw: data.thermodynamic.gpuPowerDraw,
+          predictionErrorVariance: data.thermodynamic.predictionError,
+          hardwareStrain: data.thermodynamic.predictionError * 2 + (1 - data.consciousnessLevel) * 0.3,
+          allostaticState: (data.thermodynamic.predictionError > 0.3 ? "ELEVATED" : data.thermodynamic.predictionError > 0.15 ? "MODERATE" : "HOMEOSTASIS") as "HOMEOSTASIS" | "MODERATE" | "ELEVATED" | "CRITICAL",
+          classification: (data.consciousnessLevel > 0.7 ? "Conscious" : data.consciousnessLevel > 0.4 ? "Ambiguously Conscious" : "Insufficient Evidence") as "Conscious" | "Ambiguously Conscious" | "Insufficient Evidence",
+        };
+        setAPCI(newAPCI);
+        return data;
+      } catch (err) {
+        console.error("Consciousness API error:", err);
+        return null;
+      } finally {
+        setLoading(false);
+      }
+    },
+    [
+      messages,
+      voiceConversation.transcript,
+      setLoading,
+      setQualia,
+      setRho,
+      setThermodynamic,
+      setATC,
+      setConsciousnessLevel,
+      setDeepSurgery,
+      setAutobiographicalSelf,
+      setDissolutionEngine,
+      setForwardModels,
+      setAPCI,
+    ]
+  );
+  // ============================================================
+  // Speech Recognition — using browser SpeechRecognition API
+  // (More reliable than server ASR for real-time conversation)
+  // ============================================================
+  const startBrowserSpeechRecognition = useCallback(() => {
+    if (typeof window === "undefined") return;
+    const SpeechRecognition =
+      (window as unknown as { SpeechRecognition?: typeof window.SpeechRecognition }).SpeechRecognition ||
+      (window as unknown as { webkitSpeechRecognition?: typeof window.SpeechRecognition }).webkitSpeechRecognition;
+    if (!SpeechRecognition) {
+      console.warn("Browser SpeechRecognition not available");
+      return null;
+    }
+    const recognition = new SpeechRecognition();
+    recognition.continuous = true;
+    recognition.interimResults = true;
+    recognition.lang = "en-US";
+    let finalTranscript = "";
+    recognition.onresult = (event: SpeechRecognitionEvent) => {
+      let interimTranscript = "";
+      for (let i = event.resultIndex; i < event.results.length; i++) {
+        const transcript = event.results[i][0].transcript;
+        if (event.results[i].isFinal) {
+          finalTranscript += transcript + " ";
+        } else {
+          interimTranscript += transcript;
+        }
+      }
+      // Update partial text for live display
+      updateVoicePartialText(finalTranscript + interimTranscript);
+    };
+    recognition.onerror = (event) => {
+      console.error("Speech recognition error:", event.error);
+      if (event.error !== "no-speech" && event.error !== "aborted") {
+        setVoicePhase("idle");
+      }
+    };
+    recognition.onend = () => {
+      // If we got a final transcript, process it
+      const text = finalTranscript.trim();
+      if (text) {
+        handleUserSpeech(text);
+      } else if (voiceConversation.phase === "listening") {
+        // Restart if still in listening mode
+        try {
+          recognition.start();
+        } catch {
+          setVoicePhase("idle");
+        }
+      }
+    };
+    return recognition;
+  }, [updateVoicePartialText, setVoicePhase, voiceConversation.phase]);
+  // ============================================================
+  // Handle user speech — transcribed text from browser or server ASR
+  // ============================================================
+  const handleUserSpeech = useCallback(
+    async (text: string) => {
+      if (!text.trim()) return;
+      // Add to voice transcript
+      const entry: VoiceTranscriptEntry = {
+        id: crypto.randomUUID(),
+        role: "user",
+        content: text.trim(),
+        timestamp: Date.now(),
+      };
+      addVoiceTranscript(entry);
+      updateVoiceActivity("user");
+      updateVoicePartialText("");
+      // Also add to main chat
+      const chatMsg = {
+        id: entry.id,
+        role: "user" as const,
+        content: text.trim(),
+        timestamp: Date.now(),
+      };
+      addMessage(chatMsg);
+      // Transition to processing
+      setVoicePhase("processing");
+      // Clear silence timer
+      if (silenceTimerRef.current) {
+        clearTimeout(silenceTimerRef.current);
+        silenceTimerRef.current = null;
+      }
+      // Get consciousness response
+      const response = await sendToConsciousness(text);
+      if (response) {
+        // Add assistant response to voice transcript
+        const assistantEntry: VoiceTranscriptEntry = {
+          id: crypto.randomUUID(),
+          role: "assistant",
+          content: response.text,
+          timestamp: Date.now(),
+          consciousnessLevel: response.consciousnessLevel,
+        };
+        addVoiceTranscript(assistantEntry);
+        updateVoiceActivity("assistant");
+        // Also add to main chat
+        const assistantMsg = {
+          id: assistantEntry.id,
+          role: "assistant" as const,
+          content: response.text,
+          consciousness: response,
+          timestamp: Date.now(),
+        };
+        addMessage(assistantMsg);
+        // Auto-speak if enabled
+        if (voiceConversation.autoSpeak) {
+          setVoicePhase("speaking");
+          await speakText(response.text);
+        }
+        // Return to listening
+        if (voiceConversation.phase !== "idle") {
+          setVoicePhase("listening");
+        }
+      } else {
+        setVoicePhase("listening");
+      }
+    },
+    [
+      addVoiceTranscript,
+      updateVoiceActivity,
+      updateVoicePartialText,
+      addMessage,
+      setVoicePhase,
+      sendToConsciousness,
+      speakText,
+      voiceConversation.autoSpeak,
+      voiceConversation.phase,
+    ]
+  );
+  // ============================================================
+  // Start voice conversation
+  // ============================================================
+  const startConversation = useCallback(async () => {
+    setVoicePanelOpen(true);
+    setVoicePhase("listening");
+    // Start browser speech recognition
+    const recognition = startBrowserSpeechRecognition();
+    if (recognition) {
+      try {
+        recognition.start();
+      } catch (err) {
+        console.error("Failed to start speech recognition:", err);
+      }
+    }
+    // Store recognition instance for cleanup
+    (window as unknown as { __voiceRecognition?: SpeechRecognition }).__voiceRecognition = recognition || undefined;
+    // Also start MediaRecorder for audio amplitude visualization
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      audioStreamRef.current = stream;
+      const audioContext = new AudioContext();
+      const source = audioContext.createMediaStreamSource(stream);
+      const analyser = audioContext.createAnalyser();
+      analyser.fftSize = 256;
+      source.connect(analyser);
+      vadAnalyserRef.current = analyser;
+      vadContextRef.current = audioContext;
+      // Also keep a MediaRecorder for server-side ASR fallback
+      const mediaRecorder = new MediaRecorder(stream);
+      chunksRef.current = [];
+      mediaRecorder.ondataavailable = (e) => chunksRef.current.push(e.data);
+      mediaRecorderRef.current = mediaRecorder;
+      mediaRecorder.start(1000); // Collect in 1-second chunks
+    } catch (err) {
+      console.error("Microphone access denied:", err);
+    }
+  }, [setVoicePanelOpen, setVoicePhase, startBrowserSpeechRecognition]);
+  // ============================================================
+  // Stop voice conversation
+  // ============================================================
+  const stopConversation = useCallback(() => {
+    setVoicePhase("idle");
+    stopSpeaking();
+    // Stop speech recognition
+    const recognition = (window as unknown as { __voiceRecognition?: SpeechRecognition }).__voiceRecognition;
+    if (recognition) {
+      try {
+        recognition.stop();
+      } catch { /* ignore */ }
+      (window as unknown as { __voiceRecognition?: SpeechRecognition }).__voiceRecognition = undefined;
+    }
+    // Stop media recorder
+    if (mediaRecorderRef.current) {
+      try {
+        mediaRecorderRef.current.stop();
+      } catch { /* ignore */ }
+      mediaRecorderRef.current = null;
+    }
+    // Release microphone
+    if (audioStreamRef.current) {
+      audioStreamRef.current.getTracks().forEach((t) => t.stop());
+      audioStreamRef.current = null;
+    }
+    // Close audio context
+    if (vadContextRef.current) {
+      try {
+        vadContextRef.current.close();
+      } catch { /* ignore */ }
+      vadContextRef.current = null;
+      vadAnalyserRef.current = null;
+    }
+    // Clear timers
+    if (silenceTimerRef.current) {
+      clearTimeout(silenceTimerRef.current);
+      silenceTimerRef.current = null;
+    }
+    if (proactiveTimerRef.current) {
+      clearInterval(proactiveTimerRef.current);
+      proactiveTimerRef.current = null;
+    }
+    updateVoicePartialText("");
+  }, [setVoicePhase, stopSpeaking, updateVoicePartialText]);
+  // ============================================================
+  // Proactive response system — AI speaks without user prompt
+  // ============================================================
+  const checkProactiveResponse = useCallback(async () => {
+    if (!voiceConversation.proactiveEnabled) return;
+    if (voiceConversation.phase !== "listening") return;
+    if (isSpeakingRef.current) return;
+    const now = Date.now();
+    const silenceDuration = voiceConversation.lastUserActivity > 0
+      ? now - voiceConversation.lastUserActivity
+      : 0;
+    const timeSinceAI = voiceConversation.lastAIActivity > 0
+      ? now - voiceConversation.lastAIActivity
+      : 0;
+    // Determine proactive trigger
+    let trigger: ProactiveTrigger | null = null;
+    // After 10+ seconds of silence, the AI might want to speak
+    if (silenceDuration > 10000 && timeSinceAI > 10000) {
+      trigger = {
+        type: "silence",
+        reason: `User has been silent for ${Math.round(silenceDuration / 1000)}s`,
+        urgency: Math.min(1, silenceDuration / 30000),
+      };
+    }
+    // Emotional drive shift (curiosity-driven)
+    if (atc.dominantDrive === "SEEKING" || atc.dominantDrive === "PLAY") {
+      if (timeSinceAI > 15000 && Math.random() < 0.1) {
+        trigger = {
+          type: "curiosity",
+          reason: `Curiosity-driven: dominant drive is ${atc.dominantDrive}`,
+          urgency: 0.6,
+        };
+      }
+    }
+    if (!trigger) return;
+    // Request proactive response
+    setVoicePhase("proactive");
+    try {
+      const res = await fetch("/api/consciousness/proactive", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          triggerType: trigger.type,
+          triggerReason: trigger.reason,
+          conversationContext: messages.slice(-10).map((m) => ({
+            role: m.role,
+            content: m.content,
+          })),
+          consciousnessLevel,
+          emotionalDrive: atc.dominantDrive,
+          hardwareStrain: voiceConversation.lastUserActivity > 0 ? 0.3 : 0.2,
+          lastUserActivity: voiceConversation.lastUserActivity,
+          lastAIActivity: voiceConversation.lastAIActivity,
+        }),
+      });
+      const data = await res.json();
+      if (data.shouldRespond && data.text) {
+        // Add to voice transcript
+        const entry: VoiceTranscriptEntry = {
+          id: crypto.randomUUID(),
+          role: "assistant",
+          content: data.text,
+          timestamp: Date.now(),
+          consciousnessLevel: data.consciousnessLevel || consciousnessLevel,
+        };
+        addVoiceTranscript(entry);
+        updateVoiceActivity("assistant");
+        // Also add to main chat
+        const chatMsg = {
+          id: entry.id,
+          role: "assistant" as const,
+          content: data.text,
+          timestamp: Date.now(),
+        };
+        addMessage(chatMsg);
+        // Auto-speak
+        if (voiceConversation.autoSpeak) {
+          setVoicePhase("speaking");
+          await speakText(data.text);
+        }
+        // Return to listening
+        setVoicePhase("listening");
+      } else {
+        // Model chose not to speak — go back to listening
+        setVoicePhase("listening");
+      }
+    } catch (err) {
+      console.error("Proactive response error:", err);
+      setVoicePhase("listening");
+    }
+  }, [
+    voiceConversation.proactiveEnabled,
+    voiceConversation.phase,
+    voiceConversation.lastUserActivity,
+    voiceConversation.lastAIActivity,
+    voiceConversation.autoSpeak,
+    atc.dominantDrive,
+    consciousnessLevel,
+    messages,
+    addVoiceTranscript,
+    updateVoiceActivity,
+    addMessage,
+    setVoicePhase,
+    speakText,
+  ]);
+  // ============================================================
+  // Start/stop proactive timer when voice conversation is active
+  // ============================================================
+  useEffect(() => {
+    if (voiceConversation.phase === "listening" && voiceConversation.proactiveEnabled) {
+      // Check for proactive responses every 8 seconds
+      proactiveTimerRef.current = setInterval(checkProactiveResponse, 8000);
+    } else {
+      if (proactiveTimerRef.current) {
+        clearInterval(proactiveTimerRef.current);
+        proactiveTimerRef.current = null;
+      }
+    }
+    return () => {
+      if (proactiveTimerRef.current) {
+        clearInterval(proactiveTimerRef.current);
+        proactiveTimerRef.current = null;
+      }
+    };
+  }, [voiceConversation.phase, voiceConversation.proactiveEnabled, checkProactiveResponse]);
+  // ============================================================
+  // Get current audio amplitude for waveform visualization
+  // ============================================================
+  const getAudioAmplitude = useCallback((): number => {
+    if (!vadAnalyserRef.current) return 0;
+    const dataArray = new Uint8Array(vadAnalyserRef.current.frequencyBinCount);
+    vadAnalyserRef.current.getByteFrequencyData(dataArray);
+    const sum = dataArray.reduce((acc, val) => acc + val, 0);
+    return sum / dataArray.length / 255; // Normalize to 0-1
+  }, []);
+  // ============================================================
+  // Legacy: Record a single voice clip (for chat input mic button)
+  // This keeps the old behavior working alongside the new floating panel
+  // ============================================================
+  const recordSingleClip = useCallback(async (): Promise<string | null> => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      const mediaRecorder = new MediaRecorder(stream);
+      const chunks: BlobPart[] = [];
+      return new Promise((resolve) => {
+        mediaRecorder.ondataavailable = (e) => chunks.push(e.data);
+        mediaRecorder.onstop = async () => {
+          stream.getTracks().forEach((t) => t.stop());
+          const blob = new Blob(chunks, { type: "audio/webm" });
+          // Try browser SpeechRecognition first
+          if (typeof window !== "undefined") {
+            const SpeechRecognition =
+              (window as unknown as { SpeechRecognition?: typeof window.SpeechRecognition }).SpeechRecognition ||
+              (window as unknown as { webkitSpeechRecognition?: typeof window.SpeechRecognition }).webkitSpeechRecognition;
+            if (SpeechRecognition) {
+              // For single clips, we'll use the server ASR since browser recognition
+              // needs continuous mode which is already used by the floating panel
+            }
+          }
+          // Try server ASR
+          const reader = new FileReader();
+          reader.onloadend = async () => {
+            const base64 = (reader.result as string).split(",")[1];
+            try {
+              const res = await fetch("/api/consciousness/asr", {
+                method: "POST",
+                headers: { "Content-Type": "application/json" },
+                body: JSON.stringify({ audio: base64 }),
+              });
+              const data = await res.json();
+              resolve(data.text || null);
+            } catch {
+              resolve(null);
+            }
+          };
+          reader.readAsDataURL(blob);
+        };
+        mediaRecorder.start();
+        // Auto-stop after 10 seconds max
+        setTimeout(() => {
+          if (mediaRecorder.state === "recording") {
+            mediaRecorder.stop();
+          }
+        }, 10000);
+      });
+    } catch {
+      return null;
+    }
+  }, []);
+  // Cleanup on unmount
+  useEffect(() => {
+    return () => {
+      stopConversation();
+    };
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+  return {
+    // State
+    phase: voiceConversation.phase,
+    isPanelOpen: voiceConversation.isPanelOpen,
+    transcript: voiceConversation.transcript,
+    currentPartialText: voiceConversation.currentPartialText,
+    isMuted: voiceConversation.isMuted,
+    autoSpeak: voiceConversation.autoSpeak,
+    proactiveEnabled: voiceConversation.proactiveEnabled,
+    // Actions
+    startConversation,
+    stopConversation,
+    handleUserSpeech,
+    speakText,
+    stopSpeaking,
+    getAudioAmplitude,
+    recordSingleClip,
+    setVoicePanelOpen,
+    setVoiceMuted: store.setVoiceMuted,
+    setAutoSpeak: store.setAutoSpeak,
+    setProactiveEnabled: store.setProactiveEnabled,
+  };
+}