Spaces:

Param20h
/

PDF-Assit_RAG

Running

App Files Files Community

Paramjit Singh commited on 4 days ago

Commit

793ad4f

unverified ·

2 Parent(s): c66ac65 0666849

Merge pull request #254 from varshini-nandula/feature/speech-to-text-dictation

Browse files

Files changed (2) hide show

frontend/src/components/chat/ChatPanel.tsx +225 -20
frontend/src/lib/i18n.ts +40 -0

frontend/src/components/chat/ChatPanel.tsx CHANGED Viewed

@@ -9,7 +9,42 @@ import { Button } from "@/components/ui/button";
 import { Textarea } from "@/components/ui/textarea";
 import MessageBubble from "./MessageBubble";
 import SourceCard from "./SourceCard";
-import { Send, Loader2, Trash2, MessageSquare, Download } from "lucide-react";
 interface Props {
   activeDoc: DocInfo | null;
@@ -17,7 +52,7 @@ interface Props {
 }
 export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
-  const { t } = useTranslation();
   const messages = useChatStore((state) => state.messages);
   const input = useChatStore((state) => state.input);
   const streaming = useChatStore((state) => state.streaming);
@@ -30,6 +65,10 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
   const resetChat = useChatStore((state) => state.resetChat);
   const fetchSessionHistory = useChatStore((state) => state.fetchSessionHistory);
   const [showExportMenu, setShowExportMenu] = useState(false);
   const textareaRef = useRef<HTMLTextAreaElement>(null);
   const bottomRef = useRef<HTMLDivElement>(null);
   const prevDocId = useRef<string | null>(null);
@@ -245,6 +284,109 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
     return () => document.removeEventListener("mousedown", handleClickOutside);
   }, [showExportMenu]);
   const handleKeyDown = (e: React.KeyboardEvent) => {
     if (e.key === "Enter" && !e.shiftKey) {
       e.preventDefault();
@@ -295,24 +437,86 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
       </div>
       {/* ── Input Area ─────────────────────────────── */}
-      <div className="border-t border-border/50 p-4 bg-card/30 backdrop-blur-sm">
-        <div className="max-w-3xl mx-auto flex gap-2 items-end">
-          <Textarea
-            ref={textareaRef}
-            id="chat-input"
-            value={input}
-            onChange={(e) => setInput(e.target.value)}
-            onKeyDown={handleKeyDown}
-            placeholder={
-              activeDoc
-                ? t("chat.askPlaceholder", { name: activeDoc.original_name })
-                : t("chat.selectPlaceholder")
-            }
-            disabled={streaming}
-            className="min-h-[44px] max-h-32 resize-none bg-background/50 border-border/50"
-            rows={1}
-          />
-          <div className="flex gap-1.5 shrink-0">
             <Button
               id="send-btn"
               size="icon"
@@ -384,5 +588,6 @@ export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
         </div>
       </div>
     </div>
   );
 }

 import { Textarea } from "@/components/ui/textarea";
 import MessageBubble from "./MessageBubble";
 import SourceCard from "./SourceCard";
+import { Send, Loader2, Trash2, MessageSquare, Download, Mic, MicOff } from "lucide-react";
+import { cn } from "@/lib/utils";
+interface ISpeechRecognitionEvent {
+  resultIndex: number;
+  results: {
+    length: number;
+    [index: number]: {
+      [index: number]: {
+        transcript: string;
+      };
+      isFinal: boolean;
+    };
+  };
+}
+interface ISpeechRecognitionErrorEvent {
+  error: string;
+  message: string;
+}
+interface ISpeechRecognition {
+  continuous: boolean;
+  interimResults: boolean;
+  lang: string;
+  onresult: ((event: ISpeechRecognitionEvent) => void) | null;
+  onerror: ((event: ISpeechRecognitionErrorEvent) => void) | null;
+  onend: (() => void) | null;
+  start: () => void;
+  stop: () => void;
+}
+interface WindowWithSpeech extends Window {
+  SpeechRecognition?: new () => ISpeechRecognition;
+  webkitSpeechRecognition?: new () => ISpeechRecognition;
+}
 interface Props {
   activeDoc: DocInfo | null;
 }
 export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
+  const { t, i18n } = useTranslation();
   const messages = useChatStore((state) => state.messages);
   const input = useChatStore((state) => state.input);
   const streaming = useChatStore((state) => state.streaming);
   const resetChat = useChatStore((state) => state.resetChat);
   const fetchSessionHistory = useChatStore((state) => state.fetchSessionHistory);
   const [showExportMenu, setShowExportMenu] = useState(false);
+  const [isRecording, setIsRecording] = useState(false);
+  const [speechError, setSpeechError] = useState<string | null>(null);
+  const recognitionRef = useRef<ISpeechRecognition | null>(null);
+  const initialInputRef = useRef<string>("");
   const textareaRef = useRef<HTMLTextAreaElement>(null);
   const bottomRef = useRef<HTMLDivElement>(null);
   const prevDocId = useRef<string | null>(null);
     return () => document.removeEventListener("mousedown", handleClickOutside);
   }, [showExportMenu]);
+  // Cleanup speech recognition on unmount
+  useEffect(() => {
+    return () => {
+      if (recognitionRef.current) {
+        recognitionRef.current.stop();
+      }
+    };
+  }, []);
+  const startRecording = () => {
+    const SpeechRecognitionAPI =
+      typeof window !== "undefined"
+        ? (window as unknown as WindowWithSpeech).SpeechRecognition ||
+          (window as unknown as WindowWithSpeech).webkitSpeechRecognition
+        : null;
+    if (!SpeechRecognitionAPI) {
+      setSpeechError(t("chat.speechNotSupported", { defaultValue: "Speech recognition is not supported in this browser." }));
+      return;
+    }
+    try {
+      const recognition = new SpeechRecognitionAPI();
+      recognition.continuous = true;
+      recognition.interimResults = true;
+      const currentLang = i18n.language || "en";
+      const langMap: Record<string, string> = {
+        en: "en-US",
+        hi: "hi-IN",
+        es: "es-ES",
+        fr: "fr-FR",
+      };
+      recognition.lang = langMap[currentLang] || "en-US";
+      initialInputRef.current = input;
+      setSpeechError(null);
+      setIsRecording(true);
+      recognition.onresult = (event: ISpeechRecognitionEvent) => {
+        let sessionTranscript = "";
+        for (let i = 0; i < event.results.length; ++i) {
+          sessionTranscript += event.results[i][0].transcript;
+        }
+        setInput(
+          initialInputRef.current +
+            (initialInputRef.current ? " " : "") +
+            sessionTranscript.trim()
+        );
+      };
+      recognition.onerror = (event: ISpeechRecognitionErrorEvent) => {
+        const errorCode = event.error;
+        if (errorCode === "aborted") return; // ignore manual aborts
+        let msg = t("chat.speechError", { defaultValue: `Speech recognition error: ${errorCode}` });
+        if (errorCode === "not-allowed") {
+          msg = t("chat.micPermissionDenied", {
+            defaultValue: "Microphone access denied. Please enable permissions in settings.",
+          });
+        } else if (errorCode === "no-speech") {
+          msg = t("chat.noSpeechDetected", {
+            defaultValue: "No speech was detected. Please try again.",
+          });
+        } else if (errorCode === "audio-capture") {
+          msg = t("chat.audioCaptureError", {
+            defaultValue: "No microphone found or microphone is not working.",
+          });
+        } else if (errorCode === "network") {
+          msg = t("chat.networkError", {
+            defaultValue: "Network error occurred during speech recognition.",
+          });
+        }
+        setSpeechError(msg);
+        setIsRecording(false);
+      };
+      recognition.onend = () => {
+        setIsRecording(false);
+      };
+      recognitionRef.current = recognition;
+      recognition.start();
+    } catch (err) {
+      setSpeechError(err instanceof Error ? err.message : "Failed to start speech recognition.");
+      setIsRecording(false);
+    }
+  };
+  const stopRecording = () => {
+    if (recognitionRef.current) {
+      recognitionRef.current.stop();
+    }
+  };
+  const toggleRecording = () => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      startRecording();
+    }
+  };
   const handleKeyDown = (e: React.KeyboardEvent) => {
     if (e.key === "Enter" && !e.shiftKey) {
       e.preventDefault();
       </div>
       {/* ── Input Area ─────────────────────────────── */}
+      <div className="border-t border-border/50 p-4 bg-card/30 backdrop-blur-sm relative">
+        <div className="max-w-3xl mx-auto relative">
+          {/* Status / Error Message Area */}
+          {(isRecording || speechError) && (
+            <div className="absolute bottom-full mb-2 left-0 right-0 flex items-center justify-between bg-card border border-border/80 shadow-md rounded-lg px-3 py-1.5 text-xs animate-in fade-in slide-in-from-bottom-1 z-40 max-w-3xl mx-auto">
+              <div className="flex items-center gap-2">
+                {isRecording ? (
+                  <>
+                    <span className="relative flex h-2 w-2">
+                      <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-red-400 opacity-75"></span>
+                      <span className="relative inline-flex rounded-full h-2 w-2 bg-red-500"></span>
+                    </span>
+                    <span className="font-medium text-muted-foreground">
+                      {t("chat.listening", { defaultValue: "Listening... Speak now." })}
+                    </span>
+                  </>
+                ) : (
+                  <span className="text-destructive font-medium">{speechError}</span>
+                )}
+              </div>
+              <button
+                type="button"
+                onClick={() => {
+                  if (isRecording) {
+                    stopRecording();
+                  } else {
+                    setSpeechError(null);
+                  }
+                }}
+                className="text-muted-foreground hover:text-foreground font-semibold px-1.5 py-0.5 rounded hover:bg-muted transition-colors"
+              >
+                {isRecording ? t("chat.stop", { defaultValue: "Stop" }) : "✕"}
+              </button>
+            </div>
+          )}
+          <div className="flex gap-2 items-end">
+            <div className="relative flex-1 flex items-center">
+              <Textarea
+                ref={textareaRef}
+                id="chat-input"
+                value={input}
+                onChange={(e) => setInput(e.target.value)}
+                onKeyDown={handleKeyDown}
+                placeholder={
+                  activeDoc
+                    ? t("chat.askPlaceholder", { name: activeDoc.original_name })
+                    : t("chat.selectPlaceholder")
+                }
+                disabled={streaming}
+                className="min-h-[44px] max-h-32 resize-none bg-background/50 border-border/50 pr-10"
+                rows={1}
+              />
+              <Button
+                id="mic-btn"
+                type="button"
+                variant="ghost"
+                size="icon"
+                disabled={streaming}
+                onClick={toggleRecording}
+                className={cn(
+                  "absolute right-2 bottom-1.5 h-7 w-7 rounded-md text-muted-foreground transition-all duration-200",
+                  isRecording
+                    ? "bg-red-500/20 text-red-500 hover:bg-red-500/30 hover:text-red-600 animate-pulse"
+                    : "hover:text-primary hover:bg-accent"
+                )}
+                title={
+                  isRecording
+                    ? t("chat.stopRecording", { defaultValue: "Stop recording" })
+                    : t("chat.startRecording", { defaultValue: "Start recording" })
+                }
+              >
+                {isRecording ? (
+                  <MicOff className="h-4 w-4" />
+                ) : (
+                  <Mic className="h-4 w-4" />
+                )}
+              </Button>
+            </div>
+            <div className="flex gap-1.5 shrink-0">
             <Button
               id="send-btn"
               size="icon"
         </div>
       </div>
     </div>
+  </div>
   );
 }

frontend/src/lib/i18n.ts CHANGED Viewed

@@ -59,6 +59,16 @@ const resources = {
         markdown: "Markdown (.md)",
         plainText: "Plain Text (.txt)",
         pdf: "PDF (.pdf)",
       },
       documents: {
         uploadFailed: "Upload failed",
@@ -131,6 +141,16 @@ const resources = {
         markdown: "मार्कडाउन (.md)",
         plainText: "सादा पाठ (.txt)",
         pdf: "पीडीएफ (.pdf)",
       },
       documents: {
         uploadFailed: "अपलोड विफल",
@@ -203,6 +223,16 @@ const resources = {
         markdown: "Markdown (.md)",
         plainText: "Texto plano (.txt)",
         pdf: "PDF (.pdf)",
       },
       documents: {
         uploadFailed: "Error de carga",
@@ -275,6 +305,16 @@ const resources = {
         markdown: "Markdown (.md)",
         plainText: "Texte brut (.txt)",
         pdf: "PDF (.pdf)",
       },
       documents: {
         uploadFailed: "Échec de l'envoi",

         markdown: "Markdown (.md)",
         plainText: "Plain Text (.txt)",
         pdf: "PDF (.pdf)",
+        startRecording: "Start recording",
+        stopRecording: "Stop recording",
+        listening: "Listening... Speak now.",
+        stop: "Stop",
+        speechNotSupported: "Speech recognition is not supported in this browser.",
+        micPermissionDenied: "Microphone access denied. Please enable permissions in settings.",
+        noSpeechDetected: "No speech was detected. Please try again.",
+        audioCaptureError: "No microphone found or microphone is not working.",
+        networkError: "Network error occurred during speech recognition.",
+        speechError: "Speech recognition error: {{message}}",
       },
       documents: {
         uploadFailed: "Upload failed",
         markdown: "मार्कडाउन (.md)",
         plainText: "सादा पाठ (.txt)",
         pdf: "पीडीएफ (.pdf)",
+        startRecording: "रिकॉर्डिंग शुरू करें",
+        stopRecording: "रिकॉर्डिंग बंद करें",
+        listening: "सुन रहा हूँ... अब बोलिए।",
+        stop: "रोकें",
+        speechNotSupported: "इस ब्राउज़र में स्पीच रिकग्निशन समर्थित नहीं है।",
+        micPermissionDenied: "माइक्रोफ़ोन एक्सेस अस्वीकृत। कृपया सेटिंग में अनुमति चालू करें।",
+        noSpeechDetected: "कोई आवाज़ नहीं सुनी गई। कृपया पुनः प्रयास करें।",
+        audioCaptureError: "कोई माइक्रोफ़ोन नहीं मिला या माइक्रोफ़ोन काम नहीं कर रहा है।",
+        networkError: "स्पीच रिकग्निशन के दौरान नेटवर्क त्रुटि हुई।",
+        speechError: "स्पीच रिकग्निशन त्रुटि: {{message}}",
       },
       documents: {
         uploadFailed: "अपलोड विफल",
         markdown: "Markdown (.md)",
         plainText: "Texto plano (.txt)",
         pdf: "PDF (.pdf)",
+        startRecording: "Iniciar grabación",
+        stopRecording: "Detener grabación",
+        listening: "Escuchando... Hable ahora.",
+        stop: "Detener",
+        speechNotSupported: "El reconocimiento de voz no es compatible con este navegador.",
+        micPermissionDenied: "Acceso al micrófono denegado. Habilite los permisos en la configuración.",
+        noSpeechDetected: "No se detectó voz. Por favor, inténtelo de nuevo.",
+        audioCaptureError: "No se encontró ningún micrófono o no está funcionando.",
+        networkError: "Ocurrió un error de red durante el reconocimiento de voz.",
+        speechError: "Error de reconocimiento de voz: {{message}}",
       },
       documents: {
         uploadFailed: "Error de carga",
         markdown: "Markdown (.md)",
         plainText: "Texte brut (.txt)",
         pdf: "PDF (.pdf)",
+        startRecording: "Démarrer l'enregistrement",
+        stopRecording: "Arrêter l'enregistrement",
+        listening: "Écoute en cours... Parlez maintenant.",
+        stop: "Arrêter",
+        speechNotSupported: "La reconnaissance vocale n'est pas prise en charge par ce navigateur.",
+        micPermissionDenied: "Accès au microphone refusé. Veuillez activer les autorisations dans les paramètres.",
+        noSpeechDetected: "Aucune parole n'a été détectée. Veuillez réessayer.",
+        audioCaptureError: "Aucun microphone trouvé ou le microphone ne fonctionne pas.",
+        networkError: "Une erreur réseau s'est produite lors de la reconnaissance vocale.",
+        speechError: "Erreur de reconnaissance vocale : {{message}}",
       },
       documents: {
         uploadFailed: "Échec de l'envoi",