varshini-nandula commited on
Commit
0666849
·
1 Parent(s): 758f79c

feat(ui): Implement Speech-to-Text Dictation

Browse files
frontend/src/components/chat/ChatPanel.tsx CHANGED
@@ -9,7 +9,42 @@ import { Button } from "@/components/ui/button";
9
  import { Textarea } from "@/components/ui/textarea";
10
  import MessageBubble from "./MessageBubble";
11
  import SourceCard from "./SourceCard";
12
- import { Send, Loader2, Trash2, MessageSquare, Download } from "lucide-react";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  interface Props {
15
  activeDoc: DocInfo | null;
@@ -17,7 +52,7 @@ interface Props {
17
  }
18
 
19
  export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
20
- const { t } = useTranslation();
21
  const messages = useChatStore((state) => state.messages);
22
  const input = useChatStore((state) => state.input);
23
  const streaming = useChatStore((state) => state.streaming);
@@ -28,6 +63,10 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
28
  const setIsTyping = useChatStore((state) => state.setIsTyping);
29
  const resetChat = useChatStore((state) => state.resetChat);
30
  const [showExportMenu, setShowExportMenu] = useState(false);
 
 
 
 
31
  const textareaRef = useRef<HTMLTextAreaElement>(null);
32
  const bottomRef = useRef<HTMLDivElement>(null);
33
  const prevDocId = useRef<string | null>(null);
@@ -237,6 +276,109 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
237
  return () => document.removeEventListener("mousedown", handleClickOutside);
238
  }, [showExportMenu]);
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  const handleKeyDown = (e: React.KeyboardEvent) => {
241
  if (e.key === "Enter" && !e.shiftKey) {
242
  e.preventDefault();
@@ -287,24 +429,86 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
287
  </div>
288
 
289
  {/* ── Input Area ─────────────────────────────── */}
290
- <div className="border-t border-border/50 p-4 bg-card/30 backdrop-blur-sm">
291
- <div className="max-w-3xl mx-auto flex gap-2 items-end">
292
- <Textarea
293
- ref={textareaRef}
294
- id="chat-input"
295
- value={input}
296
- onChange={(e) => setInput(e.target.value)}
297
- onKeyDown={handleKeyDown}
298
- placeholder={
299
- activeDoc
300
- ? t("chat.askPlaceholder", { name: activeDoc.original_name })
301
- : t("chat.selectPlaceholder")
302
- }
303
- disabled={streaming}
304
- className="min-h-[44px] max-h-32 resize-none bg-background/50 border-border/50"
305
- rows={1}
306
- />
307
- <div className="flex gap-1.5 shrink-0">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  <Button
309
  id="send-btn"
310
  size="icon"
@@ -376,5 +580,6 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
376
  </div>
377
  </div>
378
  </div>
 
379
  );
380
  }
 
9
  import { Textarea } from "@/components/ui/textarea";
10
  import MessageBubble from "./MessageBubble";
11
  import SourceCard from "./SourceCard";
12
+ import { Send, Loader2, Trash2, MessageSquare, Download, Mic, MicOff } from "lucide-react";
13
+ import { cn } from "@/lib/utils";
14
+
15
+ interface ISpeechRecognitionEvent {
16
+ resultIndex: number;
17
+ results: {
18
+ length: number;
19
+ [index: number]: {
20
+ [index: number]: {
21
+ transcript: string;
22
+ };
23
+ isFinal: boolean;
24
+ };
25
+ };
26
+ }
27
+
28
+ interface ISpeechRecognitionErrorEvent {
29
+ error: string;
30
+ message: string;
31
+ }
32
+
33
+ interface ISpeechRecognition {
34
+ continuous: boolean;
35
+ interimResults: boolean;
36
+ lang: string;
37
+ onresult: ((event: ISpeechRecognitionEvent) => void) | null;
38
+ onerror: ((event: ISpeechRecognitionErrorEvent) => void) | null;
39
+ onend: (() => void) | null;
40
+ start: () => void;
41
+ stop: () => void;
42
+ }
43
+
44
+ interface WindowWithSpeech extends Window {
45
+ SpeechRecognition?: new () => ISpeechRecognition;
46
+ webkitSpeechRecognition?: new () => ISpeechRecognition;
47
+ }
48
 
49
  interface Props {
50
  activeDoc: DocInfo | null;
 
52
  }
53
 
54
  export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
55
+ const { t, i18n } = useTranslation();
56
  const messages = useChatStore((state) => state.messages);
57
  const input = useChatStore((state) => state.input);
58
  const streaming = useChatStore((state) => state.streaming);
 
63
  const setIsTyping = useChatStore((state) => state.setIsTyping);
64
  const resetChat = useChatStore((state) => state.resetChat);
65
  const [showExportMenu, setShowExportMenu] = useState(false);
66
+ const [isRecording, setIsRecording] = useState(false);
67
+ const [speechError, setSpeechError] = useState<string | null>(null);
68
+ const recognitionRef = useRef<ISpeechRecognition | null>(null);
69
+ const initialInputRef = useRef<string>("");
70
  const textareaRef = useRef<HTMLTextAreaElement>(null);
71
  const bottomRef = useRef<HTMLDivElement>(null);
72
  const prevDocId = useRef<string | null>(null);
 
276
  return () => document.removeEventListener("mousedown", handleClickOutside);
277
  }, [showExportMenu]);
278
 
279
+ // Cleanup speech recognition on unmount
280
+ useEffect(() => {
281
+ return () => {
282
+ if (recognitionRef.current) {
283
+ recognitionRef.current.stop();
284
+ }
285
+ };
286
+ }, []);
287
+
288
+ const startRecording = () => {
289
+ const SpeechRecognitionAPI =
290
+ typeof window !== "undefined"
291
+ ? (window as unknown as WindowWithSpeech).SpeechRecognition ||
292
+ (window as unknown as WindowWithSpeech).webkitSpeechRecognition
293
+ : null;
294
+
295
+ if (!SpeechRecognitionAPI) {
296
+ setSpeechError(t("chat.speechNotSupported", { defaultValue: "Speech recognition is not supported in this browser." }));
297
+ return;
298
+ }
299
+
300
+ try {
301
+ const recognition = new SpeechRecognitionAPI();
302
+ recognition.continuous = true;
303
+ recognition.interimResults = true;
304
+
305
+ const currentLang = i18n.language || "en";
306
+ const langMap: Record<string, string> = {
307
+ en: "en-US",
308
+ hi: "hi-IN",
309
+ es: "es-ES",
310
+ fr: "fr-FR",
311
+ };
312
+ recognition.lang = langMap[currentLang] || "en-US";
313
+
314
+ initialInputRef.current = input;
315
+ setSpeechError(null);
316
+ setIsRecording(true);
317
+
318
+ recognition.onresult = (event: ISpeechRecognitionEvent) => {
319
+ let sessionTranscript = "";
320
+ for (let i = 0; i < event.results.length; ++i) {
321
+ sessionTranscript += event.results[i][0].transcript;
322
+ }
323
+ setInput(
324
+ initialInputRef.current +
325
+ (initialInputRef.current ? " " : "") +
326
+ sessionTranscript.trim()
327
+ );
328
+ };
329
+
330
+ recognition.onerror = (event: ISpeechRecognitionErrorEvent) => {
331
+ const errorCode = event.error;
332
+ if (errorCode === "aborted") return; // ignore manual aborts
333
+
334
+ let msg = t("chat.speechError", { defaultValue: `Speech recognition error: ${errorCode}` });
335
+ if (errorCode === "not-allowed") {
336
+ msg = t("chat.micPermissionDenied", {
337
+ defaultValue: "Microphone access denied. Please enable permissions in settings.",
338
+ });
339
+ } else if (errorCode === "no-speech") {
340
+ msg = t("chat.noSpeechDetected", {
341
+ defaultValue: "No speech was detected. Please try again.",
342
+ });
343
+ } else if (errorCode === "audio-capture") {
344
+ msg = t("chat.audioCaptureError", {
345
+ defaultValue: "No microphone found or microphone is not working.",
346
+ });
347
+ } else if (errorCode === "network") {
348
+ msg = t("chat.networkError", {
349
+ defaultValue: "Network error occurred during speech recognition.",
350
+ });
351
+ }
352
+ setSpeechError(msg);
353
+ setIsRecording(false);
354
+ };
355
+
356
+ recognition.onend = () => {
357
+ setIsRecording(false);
358
+ };
359
+
360
+ recognitionRef.current = recognition;
361
+ recognition.start();
362
+ } catch (err) {
363
+ setSpeechError(err instanceof Error ? err.message : "Failed to start speech recognition.");
364
+ setIsRecording(false);
365
+ }
366
+ };
367
+
368
+ const stopRecording = () => {
369
+ if (recognitionRef.current) {
370
+ recognitionRef.current.stop();
371
+ }
372
+ };
373
+
374
+ const toggleRecording = () => {
375
+ if (isRecording) {
376
+ stopRecording();
377
+ } else {
378
+ startRecording();
379
+ }
380
+ };
381
+
382
  const handleKeyDown = (e: React.KeyboardEvent) => {
383
  if (e.key === "Enter" && !e.shiftKey) {
384
  e.preventDefault();
 
429
  </div>
430
 
431
  {/* ── Input Area ─────────────────────────────── */}
432
+ <div className="border-t border-border/50 p-4 bg-card/30 backdrop-blur-sm relative">
433
+ <div className="max-w-3xl mx-auto relative">
434
+ {/* Status / Error Message Area */}
435
+ {(isRecording || speechError) && (
436
+ <div className="absolute bottom-full mb-2 left-0 right-0 flex items-center justify-between bg-card border border-border/80 shadow-md rounded-lg px-3 py-1.5 text-xs animate-in fade-in slide-in-from-bottom-1 z-40 max-w-3xl mx-auto">
437
+ <div className="flex items-center gap-2">
438
+ {isRecording ? (
439
+ <>
440
+ <span className="relative flex h-2 w-2">
441
+ <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75"></span>
442
+ <span className="relative inline-flex rounded-full h-2 w-2 bg-red-500"></span>
443
+ </span>
444
+ <span className="font-medium text-muted-foreground">
445
+ {t("chat.listening", { defaultValue: "Listening... Speak now." })}
446
+ </span>
447
+ </>
448
+ ) : (
449
+ <span className="text-destructive font-medium">{speechError}</span>
450
+ )}
451
+ </div>
452
+ <button
453
+ type="button"
454
+ onClick={() => {
455
+ if (isRecording) {
456
+ stopRecording();
457
+ } else {
458
+ setSpeechError(null);
459
+ }
460
+ }}
461
+ className="text-muted-foreground hover:text-foreground font-semibold px-1.5 py-0.5 rounded hover:bg-muted transition-colors"
462
+ >
463
+ {isRecording ? t("chat.stop", { defaultValue: "Stop" }) : "✕"}
464
+ </button>
465
+ </div>
466
+ )}
467
+
468
+ <div className="flex gap-2 items-end">
469
+ <div className="relative flex-1 flex items-center">
470
+ <Textarea
471
+ ref={textareaRef}
472
+ id="chat-input"
473
+ value={input}
474
+ onChange={(e) => setInput(e.target.value)}
475
+ onKeyDown={handleKeyDown}
476
+ placeholder={
477
+ activeDoc
478
+ ? t("chat.askPlaceholder", { name: activeDoc.original_name })
479
+ : t("chat.selectPlaceholder")
480
+ }
481
+ disabled={streaming}
482
+ className="min-h-[44px] max-h-32 resize-none bg-background/50 border-border/50 pr-10"
483
+ rows={1}
484
+ />
485
+ <Button
486
+ id="mic-btn"
487
+ type="button"
488
+ variant="ghost"
489
+ size="icon"
490
+ disabled={streaming}
491
+ onClick={toggleRecording}
492
+ className={cn(
493
+ "absolute right-2 bottom-1.5 h-7 w-7 rounded-md text-muted-foreground transition-all duration-200",
494
+ isRecording
495
+ ? "bg-red-500/20 text-red-500 hover:bg-red-500/30 hover:text-red-600 animate-pulse"
496
+ : "hover:text-primary hover:bg-accent"
497
+ )}
498
+ title={
499
+ isRecording
500
+ ? t("chat.stopRecording", { defaultValue: "Stop recording" })
501
+ : t("chat.startRecording", { defaultValue: "Start recording" })
502
+ }
503
+ >
504
+ {isRecording ? (
505
+ <MicOff className="h-4 w-4" />
506
+ ) : (
507
+ <Mic className="h-4 w-4" />
508
+ )}
509
+ </Button>
510
+ </div>
511
+ <div className="flex gap-1.5 shrink-0">
512
  <Button
513
  id="send-btn"
514
  size="icon"
 
580
  </div>
581
  </div>
582
  </div>
583
+ </div>
584
  );
585
  }
frontend/src/lib/i18n.ts CHANGED
@@ -59,6 +59,16 @@ const resources = {
59
  markdown: "Markdown (.md)",
60
  plainText: "Plain Text (.txt)",
61
  pdf: "PDF (.pdf)",
 
 
 
 
 
 
 
 
 
 
62
  },
63
  documents: {
64
  uploadFailed: "Upload failed",
@@ -131,6 +141,16 @@ const resources = {
131
  markdown: "मार्कडाउन (.md)",
132
  plainText: "सादा पाठ (.txt)",
133
  pdf: "पीडीएफ (.pdf)",
 
 
 
 
 
 
 
 
 
 
134
  },
135
  documents: {
136
  uploadFailed: "अपलोड विफल",
@@ -203,6 +223,16 @@ const resources = {
203
  markdown: "Markdown (.md)",
204
  plainText: "Texto plano (.txt)",
205
  pdf: "PDF (.pdf)",
 
 
 
 
 
 
 
 
 
 
206
  },
207
  documents: {
208
  uploadFailed: "Error de carga",
@@ -275,6 +305,16 @@ const resources = {
275
  markdown: "Markdown (.md)",
276
  plainText: "Texte brut (.txt)",
277
  pdf: "PDF (.pdf)",
 
 
 
 
 
 
 
 
 
 
278
  },
279
  documents: {
280
  uploadFailed: "Échec de l'envoi",
 
59
  markdown: "Markdown (.md)",
60
  plainText: "Plain Text (.txt)",
61
  pdf: "PDF (.pdf)",
62
+ startRecording: "Start recording",
63
+ stopRecording: "Stop recording",
64
+ listening: "Listening... Speak now.",
65
+ stop: "Stop",
66
+ speechNotSupported: "Speech recognition is not supported in this browser.",
67
+ micPermissionDenied: "Microphone access denied. Please enable permissions in settings.",
68
+ noSpeechDetected: "No speech was detected. Please try again.",
69
+ audioCaptureError: "No microphone found or microphone is not working.",
70
+ networkError: "Network error occurred during speech recognition.",
71
+ speechError: "Speech recognition error: {{message}}",
72
  },
73
  documents: {
74
  uploadFailed: "Upload failed",
 
141
  markdown: "मार्कडाउन (.md)",
142
  plainText: "सादा पाठ (.txt)",
143
  pdf: "पीडीएफ (.pdf)",
144
+ startRecording: "रिकॉर्डिंग शुरू करें",
145
+ stopRecording: "रिकॉर्डिंग बंद करें",
146
+ listening: "सुन रहा हूँ... अब बोलिए।",
147
+ stop: "रोकें",
148
+ speechNotSupported: "इस ब्राउज़र में स्पीच रिकग्निशन समर्थित नहीं है।",
149
+ micPermissionDenied: "माइक्रोफ़ोन एक्सेस अस्वीकृत। कृपया सेटिंग में अनुमति चालू करें।",
150
+ noSpeechDetected: "कोई आवाज़ नहीं सुनी गई। कृपया पुनः प्रयास करें।",
151
+ audioCaptureError: "कोई माइक्रोफ़ोन नहीं मिला या माइक्रोफ़ोन काम नहीं कर रहा है।",
152
+ networkError: "स्पीच रिकग्निशन के दौरान नेटवर्क त्रुटि हुई।",
153
+ speechError: "स्पीच रिकग्निशन त्रुटि: {{message}}",
154
  },
155
  documents: {
156
  uploadFailed: "अपलोड विफल",
 
223
  markdown: "Markdown (.md)",
224
  plainText: "Texto plano (.txt)",
225
  pdf: "PDF (.pdf)",
226
+ startRecording: "Iniciar grabación",
227
+ stopRecording: "Detener grabación",
228
+ listening: "Escuchando... Hable ahora.",
229
+ stop: "Detener",
230
+ speechNotSupported: "El reconocimiento de voz no es compatible con este navegador.",
231
+ micPermissionDenied: "Acceso al micrófono denegado. Habilite los permisos en la configuración.",
232
+ noSpeechDetected: "No se detectó voz. Por favor, inténtelo de nuevo.",
233
+ audioCaptureError: "No se encontró ningún micrófono o no está funcionando.",
234
+ networkError: "Ocurrió un error de red durante el reconocimiento de voz.",
235
+ speechError: "Error de reconocimiento de voz: {{message}}",
236
  },
237
  documents: {
238
  uploadFailed: "Error de carga",
 
305
  markdown: "Markdown (.md)",
306
  plainText: "Texte brut (.txt)",
307
  pdf: "PDF (.pdf)",
308
+ startRecording: "Démarrer l'enregistrement",
309
+ stopRecording: "Arrêter l'enregistrement",
310
+ listening: "Écoute en cours... Parlez maintenant.",
311
+ stop: "Arrêter",
312
+ speechNotSupported: "La reconnaissance vocale n'est pas prise en charge par ce navigateur.",
313
+ micPermissionDenied: "Accès au microphone refusé. Veuillez activer les autorisations dans les paramètres.",
314
+ noSpeechDetected: "Aucune parole n'a été détectée. Veuillez réessayer.",
315
+ audioCaptureError: "Aucun microphone trouvé ou le microphone ne fonctionne pas.",
316
+ networkError: "Une erreur réseau s'est produite lors de la reconnaissance vocale.",
317
+ speechError: "Erreur de reconnaissance vocale : {{message}}",
318
  },
319
  documents: {
320
  uploadFailed: "Échec de l'envoi",