Paramjit Singh commited on
Commit
793ad4f
·
unverified ·
2 Parent(s): c66ac650666849

Merge pull request #254 from varshini-nandula/feature/speech-to-text-dictation

Browse files
frontend/src/components/chat/ChatPanel.tsx CHANGED
@@ -9,7 +9,42 @@ import { Button } from "@/components/ui/button";
9
  import { Textarea } from "@/components/ui/textarea";
10
  import MessageBubble from "./MessageBubble";
11
  import SourceCard from "./SourceCard";
12
- import { Send, Loader2, Trash2, MessageSquare, Download } from "lucide-react";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  interface Props {
15
  activeDoc: DocInfo | null;
@@ -17,7 +52,7 @@ interface Props {
17
  }
18
 
19
  export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
20
- const { t } = useTranslation();
21
  const messages = useChatStore((state) => state.messages);
22
  const input = useChatStore((state) => state.input);
23
  const streaming = useChatStore((state) => state.streaming);
@@ -30,6 +65,10 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
30
  const resetChat = useChatStore((state) => state.resetChat);
31
  const fetchSessionHistory = useChatStore((state) => state.fetchSessionHistory);
32
  const [showExportMenu, setShowExportMenu] = useState(false);
 
 
 
 
33
  const textareaRef = useRef<HTMLTextAreaElement>(null);
34
  const bottomRef = useRef<HTMLDivElement>(null);
35
  const prevDocId = useRef<string | null>(null);
@@ -245,6 +284,109 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
245
  return () => document.removeEventListener("mousedown", handleClickOutside);
246
  }, [showExportMenu]);
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  const handleKeyDown = (e: React.KeyboardEvent) => {
249
  if (e.key === "Enter" && !e.shiftKey) {
250
  e.preventDefault();
@@ -295,24 +437,86 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
295
  </div>
296
 
297
  {/* ── Input Area ─────────────────────────────── */}
298
- <div className="border-t border-border/50 p-4 bg-card/30 backdrop-blur-sm">
299
- <div className="max-w-3xl mx-auto flex gap-2 items-end">
300
- <Textarea
301
- ref={textareaRef}
302
- id="chat-input"
303
- value={input}
304
- onChange={(e) => setInput(e.target.value)}
305
- onKeyDown={handleKeyDown}
306
- placeholder={
307
- activeDoc
308
- ? t("chat.askPlaceholder", { name: activeDoc.original_name })
309
- : t("chat.selectPlaceholder")
310
- }
311
- disabled={streaming}
312
- className="min-h-[44px] max-h-32 resize-none bg-background/50 border-border/50"
313
- rows={1}
314
- />
315
- <div className="flex gap-1.5 shrink-0">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  <Button
317
  id="send-btn"
318
  size="icon"
@@ -384,5 +588,6 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
384
  </div>
385
  </div>
386
  </div>
 
387
  );
388
  }
 
9
  import { Textarea } from "@/components/ui/textarea";
10
  import MessageBubble from "./MessageBubble";
11
  import SourceCard from "./SourceCard";
12
+ import { Send, Loader2, Trash2, MessageSquare, Download, Mic, MicOff } from "lucide-react";
13
+ import { cn } from "@/lib/utils";
14
+
15
+ interface ISpeechRecognitionEvent {
16
+ resultIndex: number;
17
+ results: {
18
+ length: number;
19
+ [index: number]: {
20
+ [index: number]: {
21
+ transcript: string;
22
+ };
23
+ isFinal: boolean;
24
+ };
25
+ };
26
+ }
27
+
28
+ interface ISpeechRecognitionErrorEvent {
29
+ error: string;
30
+ message: string;
31
+ }
32
+
33
+ interface ISpeechRecognition {
34
+ continuous: boolean;
35
+ interimResults: boolean;
36
+ lang: string;
37
+ onresult: ((event: ISpeechRecognitionEvent) => void) | null;
38
+ onerror: ((event: ISpeechRecognitionErrorEvent) => void) | null;
39
+ onend: (() => void) | null;
40
+ start: () => void;
41
+ stop: () => void;
42
+ }
43
+
44
+ interface WindowWithSpeech extends Window {
45
+ SpeechRecognition?: new () => ISpeechRecognition;
46
+ webkitSpeechRecognition?: new () => ISpeechRecognition;
47
+ }
48
 
49
  interface Props {
50
  activeDoc: DocInfo | null;
 
52
  }
53
 
54
  export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
55
+ const { t, i18n } = useTranslation();
56
  const messages = useChatStore((state) => state.messages);
57
  const input = useChatStore((state) => state.input);
58
  const streaming = useChatStore((state) => state.streaming);
 
65
  const resetChat = useChatStore((state) => state.resetChat);
66
  const fetchSessionHistory = useChatStore((state) => state.fetchSessionHistory);
67
  const [showExportMenu, setShowExportMenu] = useState(false);
68
+ const [isRecording, setIsRecording] = useState(false);
69
+ const [speechError, setSpeechError] = useState<string | null>(null);
70
+ const recognitionRef = useRef<ISpeechRecognition | null>(null);
71
+ const initialInputRef = useRef<string>("");
72
  const textareaRef = useRef<HTMLTextAreaElement>(null);
73
  const bottomRef = useRef<HTMLDivElement>(null);
74
  const prevDocId = useRef<string | null>(null);
 
284
  return () => document.removeEventListener("mousedown", handleClickOutside);
285
  }, [showExportMenu]);
286
 
287
+ // Cleanup speech recognition on unmount
288
+ useEffect(() => {
289
+ return () => {
290
+ if (recognitionRef.current) {
291
+ recognitionRef.current.stop();
292
+ }
293
+ };
294
+ }, []);
295
+
296
+ const startRecording = () => {
297
+ const SpeechRecognitionAPI =
298
+ typeof window !== "undefined"
299
+ ? (window as unknown as WindowWithSpeech).SpeechRecognition ||
300
+ (window as unknown as WindowWithSpeech).webkitSpeechRecognition
301
+ : null;
302
+
303
+ if (!SpeechRecognitionAPI) {
304
+ setSpeechError(t("chat.speechNotSupported", { defaultValue: "Speech recognition is not supported in this browser." }));
305
+ return;
306
+ }
307
+
308
+ try {
309
+ const recognition = new SpeechRecognitionAPI();
310
+ recognition.continuous = true;
311
+ recognition.interimResults = true;
312
+
313
+ const currentLang = i18n.language || "en";
314
+ const langMap: Record<string, string> = {
315
+ en: "en-US",
316
+ hi: "hi-IN",
317
+ es: "es-ES",
318
+ fr: "fr-FR",
319
+ };
320
+ recognition.lang = langMap[currentLang] || "en-US";
321
+
322
+ initialInputRef.current = input;
323
+ setSpeechError(null);
324
+ setIsRecording(true);
325
+
326
+ recognition.onresult = (event: ISpeechRecognitionEvent) => {
327
+ let sessionTranscript = "";
328
+ for (let i = 0; i < event.results.length; ++i) {
329
+ sessionTranscript += event.results[i][0].transcript;
330
+ }
331
+ setInput(
332
+ initialInputRef.current +
333
+ (initialInputRef.current ? " " : "") +
334
+ sessionTranscript.trim()
335
+ );
336
+ };
337
+
338
+ recognition.onerror = (event: ISpeechRecognitionErrorEvent) => {
339
+ const errorCode = event.error;
340
+ if (errorCode === "aborted") return; // ignore manual aborts
341
+
342
+ let msg = t("chat.speechError", { defaultValue: `Speech recognition error: ${errorCode}` });
343
+ if (errorCode === "not-allowed") {
344
+ msg = t("chat.micPermissionDenied", {
345
+ defaultValue: "Microphone access denied. Please enable permissions in settings.",
346
+ });
347
+ } else if (errorCode === "no-speech") {
348
+ msg = t("chat.noSpeechDetected", {
349
+ defaultValue: "No speech was detected. Please try again.",
350
+ });
351
+ } else if (errorCode === "audio-capture") {
352
+ msg = t("chat.audioCaptureError", {
353
+ defaultValue: "No microphone found or microphone is not working.",
354
+ });
355
+ } else if (errorCode === "network") {
356
+ msg = t("chat.networkError", {
357
+ defaultValue: "Network error occurred during speech recognition.",
358
+ });
359
+ }
360
+ setSpeechError(msg);
361
+ setIsRecording(false);
362
+ };
363
+
364
+ recognition.onend = () => {
365
+ setIsRecording(false);
366
+ };
367
+
368
+ recognitionRef.current = recognition;
369
+ recognition.start();
370
+ } catch (err) {
371
+ setSpeechError(err instanceof Error ? err.message : "Failed to start speech recognition.");
372
+ setIsRecording(false);
373
+ }
374
+ };
375
+
376
+ const stopRecording = () => {
377
+ if (recognitionRef.current) {
378
+ recognitionRef.current.stop();
379
+ }
380
+ };
381
+
382
+ const toggleRecording = () => {
383
+ if (isRecording) {
384
+ stopRecording();
385
+ } else {
386
+ startRecording();
387
+ }
388
+ };
389
+
390
  const handleKeyDown = (e: React.KeyboardEvent) => {
391
  if (e.key === "Enter" && !e.shiftKey) {
392
  e.preventDefault();
 
437
  </div>
438
 
439
  {/* ── Input Area ─────────────────────────────── */}
440
+ <div className="border-t border-border/50 p-4 bg-card/30 backdrop-blur-sm relative">
441
+ <div className="max-w-3xl mx-auto relative">
442
+ {/* Status / Error Message Area */}
443
+ {(isRecording || speechError) && (
444
+ <div className="absolute bottom-full mb-2 left-0 right-0 flex items-center justify-between bg-card border border-border/80 shadow-md rounded-lg px-3 py-1.5 text-xs animate-in fade-in slide-in-from-bottom-1 z-40 max-w-3xl mx-auto">
445
+ <div className="flex items-center gap-2">
446
+ {isRecording ? (
447
+ <>
448
+ <span className="relative flex h-2 w-2">
449
+ <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75"></span>
450
+ <span className="relative inline-flex rounded-full h-2 w-2 bg-red-500"></span>
451
+ </span>
452
+ <span className="font-medium text-muted-foreground">
453
+ {t("chat.listening", { defaultValue: "Listening... Speak now." })}
454
+ </span>
455
+ </>
456
+ ) : (
457
+ <span className="text-destructive font-medium">{speechError}</span>
458
+ )}
459
+ </div>
460
+ <button
461
+ type="button"
462
+ onClick={() => {
463
+ if (isRecording) {
464
+ stopRecording();
465
+ } else {
466
+ setSpeechError(null);
467
+ }
468
+ }}
469
+ className="text-muted-foreground hover:text-foreground font-semibold px-1.5 py-0.5 rounded hover:bg-muted transition-colors"
470
+ >
471
+ {isRecording ? t("chat.stop", { defaultValue: "Stop" }) : "✕"}
472
+ </button>
473
+ </div>
474
+ )}
475
+
476
+ <div className="flex gap-2 items-end">
477
+ <div className="relative flex-1 flex items-center">
478
+ <Textarea
479
+ ref={textareaRef}
480
+ id="chat-input"
481
+ value={input}
482
+ onChange={(e) => setInput(e.target.value)}
483
+ onKeyDown={handleKeyDown}
484
+ placeholder={
485
+ activeDoc
486
+ ? t("chat.askPlaceholder", { name: activeDoc.original_name })
487
+ : t("chat.selectPlaceholder")
488
+ }
489
+ disabled={streaming}
490
+ className="min-h-[44px] max-h-32 resize-none bg-background/50 border-border/50 pr-10"
491
+ rows={1}
492
+ />
493
+ <Button
494
+ id="mic-btn"
495
+ type="button"
496
+ variant="ghost"
497
+ size="icon"
498
+ disabled={streaming}
499
+ onClick={toggleRecording}
500
+ className={cn(
501
+ "absolute right-2 bottom-1.5 h-7 w-7 rounded-md text-muted-foreground transition-all duration-200",
502
+ isRecording
503
+ ? "bg-red-500/20 text-red-500 hover:bg-red-500/30 hover:text-red-600 animate-pulse"
504
+ : "hover:text-primary hover:bg-accent"
505
+ )}
506
+ title={
507
+ isRecording
508
+ ? t("chat.stopRecording", { defaultValue: "Stop recording" })
509
+ : t("chat.startRecording", { defaultValue: "Start recording" })
510
+ }
511
+ >
512
+ {isRecording ? (
513
+ <MicOff className="h-4 w-4" />
514
+ ) : (
515
+ <Mic className="h-4 w-4" />
516
+ )}
517
+ </Button>
518
+ </div>
519
+ <div className="flex gap-1.5 shrink-0">
520
  <Button
521
  id="send-btn"
522
  size="icon"
 
588
  </div>
589
  </div>
590
  </div>
591
+ </div>
592
  );
593
  }
frontend/src/lib/i18n.ts CHANGED
@@ -59,6 +59,16 @@ const resources = {
59
  markdown: "Markdown (.md)",
60
  plainText: "Plain Text (.txt)",
61
  pdf: "PDF (.pdf)",
 
 
 
 
 
 
 
 
 
 
62
  },
63
  documents: {
64
  uploadFailed: "Upload failed",
@@ -131,6 +141,16 @@ const resources = {
131
  markdown: "मार्कडाउन (.md)",
132
  plainText: "सादा पाठ (.txt)",
133
  pdf: "पीडीएफ (.pdf)",
 
 
 
 
 
 
 
 
 
 
134
  },
135
  documents: {
136
  uploadFailed: "अपलोड विफल",
@@ -203,6 +223,16 @@ const resources = {
203
  markdown: "Markdown (.md)",
204
  plainText: "Texto plano (.txt)",
205
  pdf: "PDF (.pdf)",
 
 
 
 
 
 
 
 
 
 
206
  },
207
  documents: {
208
  uploadFailed: "Error de carga",
@@ -275,6 +305,16 @@ const resources = {
275
  markdown: "Markdown (.md)",
276
  plainText: "Texte brut (.txt)",
277
  pdf: "PDF (.pdf)",
 
 
 
 
 
 
 
 
 
 
278
  },
279
  documents: {
280
  uploadFailed: "Échec de l'envoi",
 
59
  markdown: "Markdown (.md)",
60
  plainText: "Plain Text (.txt)",
61
  pdf: "PDF (.pdf)",
62
+ startRecording: "Start recording",
63
+ stopRecording: "Stop recording",
64
+ listening: "Listening... Speak now.",
65
+ stop: "Stop",
66
+ speechNotSupported: "Speech recognition is not supported in this browser.",
67
+ micPermissionDenied: "Microphone access denied. Please enable permissions in settings.",
68
+ noSpeechDetected: "No speech was detected. Please try again.",
69
+ audioCaptureError: "No microphone found or microphone is not working.",
70
+ networkError: "Network error occurred during speech recognition.",
71
+ speechError: "Speech recognition error: {{message}}",
72
  },
73
  documents: {
74
  uploadFailed: "Upload failed",
 
141
  markdown: "मार्कडाउन (.md)",
142
  plainText: "सादा पाठ (.txt)",
143
  pdf: "पीडीएफ (.pdf)",
144
+ startRecording: "रिकॉर्डिंग शुरू करें",
145
+ stopRecording: "रिकॉर्डिंग बंद करें",
146
+ listening: "सुन रहा हूँ... अब बोलिए।",
147
+ stop: "रोकें",
148
+ speechNotSupported: "इस ब्राउज़र में स्पीच रिकग्निशन समर्थित नहीं है।",
149
+ micPermissionDenied: "माइक्रोफ़ोन एक्सेस अस्वीकृत। कृपया सेटिंग में अनुमति चालू करें।",
150
+ noSpeechDetected: "कोई आवाज़ नहीं सुनी गई। कृपया पुनः प्रयास करें।",
151
+ audioCaptureError: "कोई माइक्रोफ़ोन नहीं मिला या माइक्रोफ़ोन काम नहीं कर रहा है।",
152
+ networkError: "स्पीच रिकग्निशन के दौरान नेटवर्क त्रुटि हुई।",
153
+ speechError: "स्पीच रिकग्निशन त्रुटि: {{message}}",
154
  },
155
  documents: {
156
  uploadFailed: "अपलोड विफल",
 
223
  markdown: "Markdown (.md)",
224
  plainText: "Texto plano (.txt)",
225
  pdf: "PDF (.pdf)",
226
+ startRecording: "Iniciar grabación",
227
+ stopRecording: "Detener grabación",
228
+ listening: "Escuchando... Hable ahora.",
229
+ stop: "Detener",
230
+ speechNotSupported: "El reconocimiento de voz no es compatible con este navegador.",
231
+ micPermissionDenied: "Acceso al micrófono denegado. Habilite los permisos en la configuración.",
232
+ noSpeechDetected: "No se detectó voz. Por favor, inténtelo de nuevo.",
233
+ audioCaptureError: "No se encontró ningún micrófono o no está funcionando.",
234
+ networkError: "Ocurrió un error de red durante el reconocimiento de voz.",
235
+ speechError: "Error de reconocimiento de voz: {{message}}",
236
  },
237
  documents: {
238
  uploadFailed: "Error de carga",
 
305
  markdown: "Markdown (.md)",
306
  plainText: "Texte brut (.txt)",
307
  pdf: "PDF (.pdf)",
308
+ startRecording: "Démarrer l'enregistrement",
309
+ stopRecording: "Arrêter l'enregistrement",
310
+ listening: "Écoute en cours... Parlez maintenant.",
311
+ stop: "Arrêter",
312
+ speechNotSupported: "La reconnaissance vocale n'est pas prise en charge par ce navigateur.",
313
+ micPermissionDenied: "Accès au microphone refusé. Veuillez activer les autorisations dans les paramètres.",
314
+ noSpeechDetected: "Aucune parole n'a été détectée. Veuillez réessayer.",
315
+ audioCaptureError: "Aucun microphone trouvé ou le microphone ne fonctionne pas.",
316
+ networkError: "Une erreur réseau s'est produite lors de la reconnaissance vocale.",
317
+ speechError: "Erreur de reconnaissance vocale : {{message}}",
318
  },
319
  documents: {
320
  uploadFailed: "Échec de l'envoi",