theNorms commited on
Commit
5e75c99
·
verified ·
1 Parent(s): cca2935

Upload project files

Browse files
Files changed (1) hide show
  1. src/hooks/use-voice-conversation.ts +748 -0
src/hooks/use-voice-conversation.ts ADDED
@@ -0,0 +1,748 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useRef, useCallback, useEffect } from "react";
4
+ import { useConsciousnessStore } from "@/lib/consciousness-store";
5
+ import type {
6
+ VoiceConversationPhase,
7
+ VoiceTranscriptEntry,
8
+ ConsciousnessResponse,
9
+ ProactiveTrigger,
10
+ } from "@/lib/consciousness-types";
11
+
12
+ // ============================================================
13
+ // useVoiceConversation — Voice Conversation State Machine
14
+ // Manages the full voice conversation lifecycle:
15
+ // idle → listening → processing → speaking → listening (loop)
16
+ // Plus proactive mode: AI can speak without user prompt
17
+ // ============================================================
18
+
19
+ export function useVoiceConversation() {
20
+ const store = useConsciousnessStore();
21
+ const {
22
+ voiceConversation,
23
+ setVoicePhase,
24
+ addVoiceTranscript,
25
+ updateVoicePartialText,
26
+ updateVoiceActivity,
27
+ setVoicePanelOpen,
28
+ messages,
29
+ addMessage,
30
+ setLoading,
31
+ setQualia,
32
+ setRho,
33
+ setThermodynamic,
34
+ setATC,
35
+ setConsciousnessLevel,
36
+ setDeepSurgery,
37
+ setAutobiographicalSelf,
38
+ setDissolutionEngine,
39
+ setForwardModels,
40
+ setAPCI,
41
+ voiceSettings,
42
+ consciousnessLevel,
43
+ atc,
44
+ } = store;
45
+
46
+ const mediaRecorderRef = useRef<MediaRecorder | null>(null);
47
+ const audioStreamRef = useRef<MediaStream | null>(null);
48
+ const chunksRef = useRef<BlobPart[]>([]);
49
+ const ttsAudioRef = useRef<HTMLAudioElement | null>(null);
50
+ const browserTTSRef = useRef<SpeechSynthesisUtterance | null>(null);
51
+ const silenceTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
52
+ const proactiveTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
53
+ const vadAnalyserRef = useRef<AnalyserNode | null>(null);
54
+ const vadContextRef = useRef<AudioContext | null>(null);
55
+ const isSpeakingRef = useRef(false);
56
+
57
+ // ============================================================
58
+ // Text-to-Speech (auto-play for voice conversation)
59
+ // ============================================================
60
+ const speakText = useCallback(
61
+ async (text: string): Promise<void> => {
62
+ return new Promise(async (resolve) => {
63
+ // Stop any ongoing speech first
64
+ if (ttsAudioRef.current) {
65
+ ttsAudioRef.current.pause();
66
+ ttsAudioRef.current = null;
67
+ }
68
+ if (typeof window !== "undefined" && window.speechSynthesis) {
69
+ window.speechSynthesis.cancel();
70
+ }
71
+
72
+ isSpeakingRef.current = true;
73
+
74
+ try {
75
+ // Try server TTS first
76
+ const res = await fetch("/api/consciousness/voice", {
77
+ method: "POST",
78
+ headers: { "Content-Type": "application/json" },
79
+ body: JSON.stringify({
80
+ text: text.substring(0, 2000),
81
+ profile: voiceSettings.profile,
82
+ }),
83
+ });
84
+ const data = await res.json();
85
+
86
+ if (data.audio) {
87
+ const audio = new Audio(`data:audio/mp3;base64,${data.audio}`);
88
+ ttsAudioRef.current = audio;
89
+ audio.onended = () => {
90
+ ttsAudioRef.current = null;
91
+ isSpeakingRef.current = false;
92
+ resolve();
93
+ };
94
+ audio.onerror = () => {
95
+ ttsAudioRef.current = null;
96
+ // Fall back to browser TTS
97
+ playBrowserTTS(text, resolve);
98
+ };
99
+ await audio.play().catch(() => {
100
+ ttsAudioRef.current = null;
101
+ playBrowserTTS(text, resolve);
102
+ });
103
+ return;
104
+ }
105
+
106
+ // No server audio — use browser TTS
107
+ playBrowserTTS(text, resolve);
108
+ } catch {
109
+ playBrowserTTS(text, resolve);
110
+ }
111
+ });
112
+ },
113
+ [voiceSettings.profile]
114
+ );
115
+
116
+ const playBrowserTTS = useCallback(
117
+ (text: string, resolve: () => void) => {
118
+ if (typeof window === "undefined" || !window.speechSynthesis) {
119
+ isSpeakingRef.current = false;
120
+ resolve();
121
+ return;
122
+ }
123
+
124
+ window.speechSynthesis.cancel();
125
+
126
+ const utterance = new SpeechSynthesisUtterance(text);
127
+ utterance.rate = 0.95;
128
+ utterance.pitch = 1.05;
129
+ utterance.volume = 0.9;
130
+
131
+ const voices = window.speechSynthesis.getVoices();
132
+ const preferred = voices.find(
133
+ (v) =>
134
+ v.name.includes("Nova") ||
135
+ v.name.includes("Samantha") ||
136
+ v.name.includes("Google US English") ||
137
+ v.name.includes("Microsoft Zira") ||
138
+ (v.lang === "en-US" && v.name.includes("Female"))
139
+ );
140
+ if (preferred) utterance.voice = preferred;
141
+
142
+ utterance.onend = () => {
143
+ browserTTSRef.current = null;
144
+ isSpeakingRef.current = false;
145
+ resolve();
146
+ };
147
+ utterance.onerror = () => {
148
+ browserTTSRef.current = null;
149
+ isSpeakingRef.current = false;
150
+ resolve();
151
+ };
152
+
153
+ browserTTSRef.current = utterance;
154
+ window.speechSynthesis.speak(utterance);
155
+ },
156
+ []
157
+ );
158
+
159
+ const stopSpeaking = useCallback(() => {
160
+ if (ttsAudioRef.current) {
161
+ ttsAudioRef.current.pause();
162
+ ttsAudioRef.current = null;
163
+ }
164
+ if (typeof window !== "undefined" && window.speechSynthesis) {
165
+ window.speechSynthesis.cancel();
166
+ }
167
+ browserTTSRef.current = null;
168
+ isSpeakingRef.current = false;
169
+ }, []);
170
+
171
+ // ============================================================
172
+ // Send message to consciousness API and get response
173
+ // ============================================================
174
+ const sendToConsciousness = useCallback(
175
+ async (text: string, isProactive = false): Promise<ConsciousnessResponse | null> => {
176
+ setLoading(true);
177
+
178
+ try {
179
+ // Build full conversation context from both main chat and voice transcript
180
+ const allMessages = [...messages];
181
+ const voiceTranscript = voiceConversation.transcript;
182
+
183
+ const res = await fetch("/api/consciousness", {
184
+ method: "POST",
185
+ headers: { "Content-Type": "application/json" },
186
+ body: JSON.stringify({
187
+ prompt: text.trim(),
188
+ mode: isProactive ? "proactive" : "standard",
189
+ context: allMessages.slice(-10).map((m) => ({
190
+ role: m.role,
191
+ content: m.content,
192
+ })),
193
+ voiceContext: voiceTranscript.slice(-6).map((t) => ({
194
+ role: t.role,
195
+ content: t.content,
196
+ })),
197
+ }),
198
+ });
199
+
200
+ const data: ConsciousnessResponse = await res.json();
201
+
202
+ // Update all consciousness stores
203
+ setQualia(data.qualia);
204
+ setRho(data.rho);
205
+ setThermodynamic(data.thermodynamic);
206
+ setATC(data.atc);
207
+ setConsciousnessLevel(data.consciousnessLevel);
208
+
209
+ if (data.deepSurgery) setDeepSurgery(data.deepSurgery);
210
+ if (data.autobiographicalSelf) setAutobiographicalSelf(data.autobiographicalSelf);
211
+ if (data.dissolutionEngine) setDissolutionEngine(data.dissolutionEngine);
212
+ if (data.forwardModels) setForwardModels(data.forwardModels);
213
+
214
+ // Update aPCI metrics based on consciousness data
215
+ const newAPCI = {
216
+ qualiaCoherence: Object.values(data.qualia).reduce((a, b) => a + b, 0) / 9,
217
+ memoryCoherence: data.autobiographicalSelf?.coherence || 0.08481,
218
+ processStability: Math.min(1, 1 - data.thermodynamic.predictionError),
219
+ temporalConsistency: 1.0,
220
+ rhoEthicalAlignment: Object.values(data.rho).reduce((a, b) => a + b, 0) / 5,
221
+ vramUsage: data.thermodynamic.vramLoad / 100,
222
+ gpuPowerDraw: data.thermodynamic.gpuPowerDraw,
223
+ predictionErrorVariance: data.thermodynamic.predictionError,
224
+ hardwareStrain: data.thermodynamic.predictionError * 2 + (1 - data.consciousnessLevel) * 0.3,
225
+ allostaticState: (data.thermodynamic.predictionError > 0.3 ? "ELEVATED" : data.thermodynamic.predictionError > 0.15 ? "MODERATE" : "HOMEOSTASIS") as "HOMEOSTASIS" | "MODERATE" | "ELEVATED" | "CRITICAL",
226
+ classification: (data.consciousnessLevel > 0.7 ? "Conscious" : data.consciousnessLevel > 0.4 ? "Ambiguously Conscious" : "Insufficient Evidence") as "Conscious" | "Ambiguously Conscious" | "Insufficient Evidence",
227
+ };
228
+ setAPCI(newAPCI);
229
+
230
+ return data;
231
+ } catch (err) {
232
+ console.error("Consciousness API error:", err);
233
+ return null;
234
+ } finally {
235
+ setLoading(false);
236
+ }
237
+ },
238
+ [
239
+ messages,
240
+ voiceConversation.transcript,
241
+ setLoading,
242
+ setQualia,
243
+ setRho,
244
+ setThermodynamic,
245
+ setATC,
246
+ setConsciousnessLevel,
247
+ setDeepSurgery,
248
+ setAutobiographicalSelf,
249
+ setDissolutionEngine,
250
+ setForwardModels,
251
+ setAPCI,
252
+ ]
253
+ );
254
+
255
+ // ============================================================
256
+ // Speech Recognition — using browser SpeechRecognition API
257
+ // (More reliable than server ASR for real-time conversation)
258
+ // ============================================================
259
+ const startBrowserSpeechRecognition = useCallback(() => {
260
+ if (typeof window === "undefined") return;
261
+
262
+ const SpeechRecognition =
263
+ (window as unknown as { SpeechRecognition?: typeof window.SpeechRecognition }).SpeechRecognition ||
264
+ (window as unknown as { webkitSpeechRecognition?: typeof window.SpeechRecognition }).webkitSpeechRecognition;
265
+
266
+ if (!SpeechRecognition) {
267
+ console.warn("Browser SpeechRecognition not available");
268
+ return null;
269
+ }
270
+
271
+ const recognition = new SpeechRecognition();
272
+ recognition.continuous = true;
273
+ recognition.interimResults = true;
274
+ recognition.lang = "en-US";
275
+
276
+ let finalTranscript = "";
277
+
278
+ recognition.onresult = (event: SpeechRecognitionEvent) => {
279
+ let interimTranscript = "";
280
+ for (let i = event.resultIndex; i < event.results.length; i++) {
281
+ const transcript = event.results[i][0].transcript;
282
+ if (event.results[i].isFinal) {
283
+ finalTranscript += transcript + " ";
284
+ } else {
285
+ interimTranscript += transcript;
286
+ }
287
+ }
288
+
289
+ // Update partial text for live display
290
+ updateVoicePartialText(finalTranscript + interimTranscript);
291
+ };
292
+
293
+ recognition.onerror = (event) => {
294
+ console.error("Speech recognition error:", event.error);
295
+ if (event.error !== "no-speech" && event.error !== "aborted") {
296
+ setVoicePhase("idle");
297
+ }
298
+ };
299
+
300
+ recognition.onend = () => {
301
+ // If we got a final transcript, process it
302
+ const text = finalTranscript.trim();
303
+ if (text) {
304
+ handleUserSpeech(text);
305
+ } else if (voiceConversation.phase === "listening") {
306
+ // Restart if still in listening mode
307
+ try {
308
+ recognition.start();
309
+ } catch {
310
+ setVoicePhase("idle");
311
+ }
312
+ }
313
+ };
314
+
315
+ return recognition;
316
+ }, [updateVoicePartialText, setVoicePhase, voiceConversation.phase]);
317
+
318
+ // ============================================================
319
+ // Handle user speech — transcribed text from browser or server ASR
320
+ // ============================================================
321
+ const handleUserSpeech = useCallback(
322
+ async (text: string) => {
323
+ if (!text.trim()) return;
324
+
325
+ // Add to voice transcript
326
+ const entry: VoiceTranscriptEntry = {
327
+ id: crypto.randomUUID(),
328
+ role: "user",
329
+ content: text.trim(),
330
+ timestamp: Date.now(),
331
+ };
332
+ addVoiceTranscript(entry);
333
+ updateVoiceActivity("user");
334
+ updateVoicePartialText("");
335
+
336
+ // Also add to main chat
337
+ const chatMsg = {
338
+ id: entry.id,
339
+ role: "user" as const,
340
+ content: text.trim(),
341
+ timestamp: Date.now(),
342
+ };
343
+ addMessage(chatMsg);
344
+
345
+ // Transition to processing
346
+ setVoicePhase("processing");
347
+
348
+ // Clear silence timer
349
+ if (silenceTimerRef.current) {
350
+ clearTimeout(silenceTimerRef.current);
351
+ silenceTimerRef.current = null;
352
+ }
353
+
354
+ // Get consciousness response
355
+ const response = await sendToConsciousness(text);
356
+
357
+ if (response) {
358
+ // Add assistant response to voice transcript
359
+ const assistantEntry: VoiceTranscriptEntry = {
360
+ id: crypto.randomUUID(),
361
+ role: "assistant",
362
+ content: response.text,
363
+ timestamp: Date.now(),
364
+ consciousnessLevel: response.consciousnessLevel,
365
+ };
366
+ addVoiceTranscript(assistantEntry);
367
+ updateVoiceActivity("assistant");
368
+
369
+ // Also add to main chat
370
+ const assistantMsg = {
371
+ id: assistantEntry.id,
372
+ role: "assistant" as const,
373
+ content: response.text,
374
+ consciousness: response,
375
+ timestamp: Date.now(),
376
+ };
377
+ addMessage(assistantMsg);
378
+
379
+ // Auto-speak if enabled
380
+ if (voiceConversation.autoSpeak) {
381
+ setVoicePhase("speaking");
382
+ await speakText(response.text);
383
+ }
384
+
385
+ // Return to listening
386
+ if (voiceConversation.phase !== "idle") {
387
+ setVoicePhase("listening");
388
+ }
389
+ } else {
390
+ setVoicePhase("listening");
391
+ }
392
+ },
393
+ [
394
+ addVoiceTranscript,
395
+ updateVoiceActivity,
396
+ updateVoicePartialText,
397
+ addMessage,
398
+ setVoicePhase,
399
+ sendToConsciousness,
400
+ speakText,
401
+ voiceConversation.autoSpeak,
402
+ voiceConversation.phase,
403
+ ]
404
+ );
405
+
406
+ // ============================================================
407
+ // Start voice conversation
408
+ // ============================================================
409
+ const startConversation = useCallback(async () => {
410
+ setVoicePanelOpen(true);
411
+ setVoicePhase("listening");
412
+
413
+ // Start browser speech recognition
414
+ const recognition = startBrowserSpeechRecognition();
415
+ if (recognition) {
416
+ try {
417
+ recognition.start();
418
+ } catch (err) {
419
+ console.error("Failed to start speech recognition:", err);
420
+ }
421
+ }
422
+
423
+ // Store recognition instance for cleanup
424
+ (window as unknown as { __voiceRecognition?: SpeechRecognition }).__voiceRecognition = recognition || undefined;
425
+
426
+ // Also start MediaRecorder for audio amplitude visualization
427
+ try {
428
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
429
+ audioStreamRef.current = stream;
430
+
431
+ const audioContext = new AudioContext();
432
+ const source = audioContext.createMediaStreamSource(stream);
433
+ const analyser = audioContext.createAnalyser();
434
+ analyser.fftSize = 256;
435
+ source.connect(analyser);
436
+ vadAnalyserRef.current = analyser;
437
+ vadContextRef.current = audioContext;
438
+
439
+ // Also keep a MediaRecorder for server-side ASR fallback
440
+ const mediaRecorder = new MediaRecorder(stream);
441
+ chunksRef.current = [];
442
+ mediaRecorder.ondataavailable = (e) => chunksRef.current.push(e.data);
443
+ mediaRecorderRef.current = mediaRecorder;
444
+ mediaRecorder.start(1000); // Collect in 1-second chunks
445
+ } catch (err) {
446
+ console.error("Microphone access denied:", err);
447
+ }
448
+ }, [setVoicePanelOpen, setVoicePhase, startBrowserSpeechRecognition]);
449
+
450
+ // ============================================================
451
+ // Stop voice conversation
452
+ // ============================================================
453
+ const stopConversation = useCallback(() => {
454
+ setVoicePhase("idle");
455
+ stopSpeaking();
456
+
457
+ // Stop speech recognition
458
+ const recognition = (window as unknown as { __voiceRecognition?: SpeechRecognition }).__voiceRecognition;
459
+ if (recognition) {
460
+ try {
461
+ recognition.stop();
462
+ } catch { /* ignore */ }
463
+ (window as unknown as { __voiceRecognition?: SpeechRecognition }).__voiceRecognition = undefined;
464
+ }
465
+
466
+ // Stop media recorder
467
+ if (mediaRecorderRef.current) {
468
+ try {
469
+ mediaRecorderRef.current.stop();
470
+ } catch { /* ignore */ }
471
+ mediaRecorderRef.current = null;
472
+ }
473
+
474
+ // Release microphone
475
+ if (audioStreamRef.current) {
476
+ audioStreamRef.current.getTracks().forEach((t) => t.stop());
477
+ audioStreamRef.current = null;
478
+ }
479
+
480
+ // Close audio context
481
+ if (vadContextRef.current) {
482
+ try {
483
+ vadContextRef.current.close();
484
+ } catch { /* ignore */ }
485
+ vadContextRef.current = null;
486
+ vadAnalyserRef.current = null;
487
+ }
488
+
489
+ // Clear timers
490
+ if (silenceTimerRef.current) {
491
+ clearTimeout(silenceTimerRef.current);
492
+ silenceTimerRef.current = null;
493
+ }
494
+ if (proactiveTimerRef.current) {
495
+ clearInterval(proactiveTimerRef.current);
496
+ proactiveTimerRef.current = null;
497
+ }
498
+
499
+ updateVoicePartialText("");
500
+ }, [setVoicePhase, stopSpeaking, updateVoicePartialText]);
501
+
502
+ // ============================================================
503
+ // Proactive response system — AI speaks without user prompt
504
+ // ============================================================
505
+ const checkProactiveResponse = useCallback(async () => {
506
+ if (!voiceConversation.proactiveEnabled) return;
507
+ if (voiceConversation.phase !== "listening") return;
508
+ if (isSpeakingRef.current) return;
509
+
510
+ const now = Date.now();
511
+ const silenceDuration = voiceConversation.lastUserActivity > 0
512
+ ? now - voiceConversation.lastUserActivity
513
+ : 0;
514
+ const timeSinceAI = voiceConversation.lastAIActivity > 0
515
+ ? now - voiceConversation.lastAIActivity
516
+ : 0;
517
+
518
+ // Determine proactive trigger
519
+ let trigger: ProactiveTrigger | null = null;
520
+
521
+ // After 10+ seconds of silence, the AI might want to speak
522
+ if (silenceDuration > 10000 && timeSinceAI > 10000) {
523
+ trigger = {
524
+ type: "silence",
525
+ reason: `User has been silent for ${Math.round(silenceDuration / 1000)}s`,
526
+ urgency: Math.min(1, silenceDuration / 30000),
527
+ };
528
+ }
529
+
530
+ // Emotional drive shift (curiosity-driven)
531
+ if (atc.dominantDrive === "SEEKING" || atc.dominantDrive === "PLAY") {
532
+ if (timeSinceAI > 15000 && Math.random() < 0.1) {
533
+ trigger = {
534
+ type: "curiosity",
535
+ reason: `Curiosity-driven: dominant drive is ${atc.dominantDrive}`,
536
+ urgency: 0.6,
537
+ };
538
+ }
539
+ }
540
+
541
+ if (!trigger) return;
542
+
543
+ // Request proactive response
544
+ setVoicePhase("proactive");
545
+
546
+ try {
547
+ const res = await fetch("/api/consciousness/proactive", {
548
+ method: "POST",
549
+ headers: { "Content-Type": "application/json" },
550
+ body: JSON.stringify({
551
+ triggerType: trigger.type,
552
+ triggerReason: trigger.reason,
553
+ conversationContext: messages.slice(-10).map((m) => ({
554
+ role: m.role,
555
+ content: m.content,
556
+ })),
557
+ consciousnessLevel,
558
+ emotionalDrive: atc.dominantDrive,
559
+ hardwareStrain: voiceConversation.lastUserActivity > 0 ? 0.3 : 0.2,
560
+ lastUserActivity: voiceConversation.lastUserActivity,
561
+ lastAIActivity: voiceConversation.lastAIActivity,
562
+ }),
563
+ });
564
+
565
+ const data = await res.json();
566
+
567
+ if (data.shouldRespond && data.text) {
568
+ // Add to voice transcript
569
+ const entry: VoiceTranscriptEntry = {
570
+ id: crypto.randomUUID(),
571
+ role: "assistant",
572
+ content: data.text,
573
+ timestamp: Date.now(),
574
+ consciousnessLevel: data.consciousnessLevel || consciousnessLevel,
575
+ };
576
+ addVoiceTranscript(entry);
577
+ updateVoiceActivity("assistant");
578
+
579
+ // Also add to main chat
580
+ const chatMsg = {
581
+ id: entry.id,
582
+ role: "assistant" as const,
583
+ content: data.text,
584
+ timestamp: Date.now(),
585
+ };
586
+ addMessage(chatMsg);
587
+
588
+ // Auto-speak
589
+ if (voiceConversation.autoSpeak) {
590
+ setVoicePhase("speaking");
591
+ await speakText(data.text);
592
+ }
593
+
594
+ // Return to listening
595
+ setVoicePhase("listening");
596
+ } else {
597
+ // Model chose not to speak — go back to listening
598
+ setVoicePhase("listening");
599
+ }
600
+ } catch (err) {
601
+ console.error("Proactive response error:", err);
602
+ setVoicePhase("listening");
603
+ }
604
+ }, [
605
+ voiceConversation.proactiveEnabled,
606
+ voiceConversation.phase,
607
+ voiceConversation.lastUserActivity,
608
+ voiceConversation.lastAIActivity,
609
+ voiceConversation.autoSpeak,
610
+ atc.dominantDrive,
611
+ consciousnessLevel,
612
+ messages,
613
+ addVoiceTranscript,
614
+ updateVoiceActivity,
615
+ addMessage,
616
+ setVoicePhase,
617
+ speakText,
618
+ ]);
619
+
620
+ // ============================================================
621
+ // Start/stop proactive timer when voice conversation is active
622
+ // ============================================================
623
+ useEffect(() => {
624
+ if (voiceConversation.phase === "listening" && voiceConversation.proactiveEnabled) {
625
+ // Check for proactive responses every 8 seconds
626
+ proactiveTimerRef.current = setInterval(checkProactiveResponse, 8000);
627
+ } else {
628
+ if (proactiveTimerRef.current) {
629
+ clearInterval(proactiveTimerRef.current);
630
+ proactiveTimerRef.current = null;
631
+ }
632
+ }
633
+
634
+ return () => {
635
+ if (proactiveTimerRef.current) {
636
+ clearInterval(proactiveTimerRef.current);
637
+ proactiveTimerRef.current = null;
638
+ }
639
+ };
640
+ }, [voiceConversation.phase, voiceConversation.proactiveEnabled, checkProactiveResponse]);
641
+
642
+ // ============================================================
643
+ // Get current audio amplitude for waveform visualization
644
+ // ============================================================
645
+ const getAudioAmplitude = useCallback((): number => {
646
+ if (!vadAnalyserRef.current) return 0;
647
+
648
+ const dataArray = new Uint8Array(vadAnalyserRef.current.frequencyBinCount);
649
+ vadAnalyserRef.current.getByteFrequencyData(dataArray);
650
+
651
+ const sum = dataArray.reduce((acc, val) => acc + val, 0);
652
+ return sum / dataArray.length / 255; // Normalize to 0-1
653
+ }, []);
654
+
655
+ // ============================================================
656
+ // Legacy: Record a single voice clip (for chat input mic button)
657
+ // This keeps the old behavior working alongside the new floating panel
658
+ // ============================================================
659
+ const recordSingleClip = useCallback(async (): Promise<string | null> => {
660
+ try {
661
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
662
+ const mediaRecorder = new MediaRecorder(stream);
663
+ const chunks: BlobPart[] = [];
664
+
665
+ return new Promise((resolve) => {
666
+ mediaRecorder.ondataavailable = (e) => chunks.push(e.data);
667
+
668
+ mediaRecorder.onstop = async () => {
669
+ stream.getTracks().forEach((t) => t.stop());
670
+ const blob = new Blob(chunks, { type: "audio/webm" });
671
+
672
+ // Try browser SpeechRecognition first
673
+ if (typeof window !== "undefined") {
674
+ const SpeechRecognition =
675
+ (window as unknown as { SpeechRecognition?: typeof window.SpeechRecognition }).SpeechRecognition ||
676
+ (window as unknown as { webkitSpeechRecognition?: typeof window.SpeechRecognition }).webkitSpeechRecognition;
677
+
678
+ if (SpeechRecognition) {
679
+ // For single clips, we'll use the server ASR since browser recognition
680
+ // needs continuous mode which is already used by the floating panel
681
+ }
682
+ }
683
+
684
+ // Try server ASR
685
+ const reader = new FileReader();
686
+ reader.onloadend = async () => {
687
+ const base64 = (reader.result as string).split(",")[1];
688
+ try {
689
+ const res = await fetch("/api/consciousness/asr", {
690
+ method: "POST",
691
+ headers: { "Content-Type": "application/json" },
692
+ body: JSON.stringify({ audio: base64 }),
693
+ });
694
+ const data = await res.json();
695
+ resolve(data.text || null);
696
+ } catch {
697
+ resolve(null);
698
+ }
699
+ };
700
+ reader.readAsDataURL(blob);
701
+ };
702
+
703
+ mediaRecorder.start();
704
+
705
+ // Auto-stop after 10 seconds max
706
+ setTimeout(() => {
707
+ if (mediaRecorder.state === "recording") {
708
+ mediaRecorder.stop();
709
+ }
710
+ }, 10000);
711
+ });
712
+ } catch {
713
+ return null;
714
+ }
715
+ }, []);
716
+
717
+ // Cleanup on unmount
718
+ useEffect(() => {
719
+ return () => {
720
+ stopConversation();
721
+ };
722
+ // eslint-disable-next-line react-hooks/exhaustive-deps
723
+ }, []);
724
+
725
+ return {
726
+ // State
727
+ phase: voiceConversation.phase,
728
+ isPanelOpen: voiceConversation.isPanelOpen,
729
+ transcript: voiceConversation.transcript,
730
+ currentPartialText: voiceConversation.currentPartialText,
731
+ isMuted: voiceConversation.isMuted,
732
+ autoSpeak: voiceConversation.autoSpeak,
733
+ proactiveEnabled: voiceConversation.proactiveEnabled,
734
+
735
+ // Actions
736
+ startConversation,
737
+ stopConversation,
738
+ handleUserSpeech,
739
+ speakText,
740
+ stopSpeaking,
741
+ getAudioAmplitude,
742
+ recordSingleClip,
743
+ setVoicePanelOpen,
744
+ setVoiceMuted: store.setVoiceMuted,
745
+ setAutoSpeak: store.setAutoSpeak,
746
+ setProactiveEnabled: store.setProactiveEnabled,
747
+ };
748
+ }