Spaces:

Abdalkaderdev
/

ORA

Paused

App Files Files Community

Abdalkaderdev commited on Jan 12

Commit

3688b19

1 Parent(s): 5306cf5

Update frontend with Whisper STT and prepare for vision features

Browse files

Files changed (1) hide show

frontend/app/voice/page.tsx +60 -23

frontend/app/voice/page.tsx CHANGED Viewed

@@ -7,37 +7,74 @@ export default function OraVoice() {
     const [transcript, setTranscript] = useState("");
     const [response, setResponse] = useState("");
     const [history, setHistory] = useState<any[]>([]);
     const recognitionRef = useRef<any>(null);
     useEffect(() => {
-        // Initialize Speech Recognition
-        if (typeof window !== "undefined") {
-            const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
-            if (SpeechRecognition) {
-                const recognition = new SpeechRecognition();
-                recognition.continuous = false;
-                recognition.lang = "en-US";
-                recognition.interimResults = false;
-                recognition.onstart = () => setState("LISTENING");
-                recognition.onresult = (event: any) => {
-                    const text = event.results[0][0].transcript;
-                    setTranscript(text);
-                    handleSend(text);
-                };
-                recognition.onend = () => {
-                    // If we didn't get a result and state is still listening, go back to idle
-                    // But if we got a result, handleSend changes state to THINKING
-                    if (state === "LISTENING") setState("IDLE");
                 };
-                recognitionRef.current = recognition;
-            }
         }
-    }, []);
     const handleSend = async (text: string) => {
         setState("THINKING");

     const [transcript, setTranscript] = useState("");
     const [response, setResponse] = useState("");
     const [history, setHistory] = useState<any[]>([]);
+    const [emotion, setEmotion] = useState<string | null>(null);
+    const [uploadedImage, setUploadedImage] = useState<string | null>(null);
     const recognitionRef = useRef<any>(null);
+    const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+    const audioChunksRef = useRef<Blob[]>([]);
     useEffect(() => {
+        // Whisper-based recording will replace browser STT
+        // No need for webkitSpeechRecognition anymore
+    }, []);
+    const startWhisperRecording = async () => {
+        try {
+            setState("LISTENING");
+            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            const recorder = new MediaRecorder(stream);
+            audioChunksRef.current = [];
+            recorder.ondataavailable = (e) => {
+                audioChunksRef.current.push(e.data);
+            };
+            recorder.onstop = async () => {
+                const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' });
+                const reader = new FileReader();
+                reader.onloadend = async () => {
+                    const base64Audio = (reader.result as string).split(',')[1];
+                    // Send to Whisper for transcription
+                    const res = await fetch("/api/transcribe", {
+                        method: "POST",
+                        headers: { "Content-Type": "application/json" },
+                        body: JSON.stringify({ audio_data: base64Audio }),
+                    });
+                    const data = await res.json();
+                    setTranscript(data.text);
+                    handleSend(data.text);
                 };
+                reader.readAsDataURL(audioBlob);
+                stream.getTracks().forEach(track => track.stop());
+            };
+            recorder.start();
+            mediaRecorderRef.current = recorder;
+            // Auto-stop after 10 seconds
+            setTimeout(() => {
+                if (mediaRecorderRef.current?.state === "recording") {
+                    mediaRecorderRef.current.stop();
+                }
+            }, 10000);
+        } catch (error) {
+            console.error("Recording error:", error);
+            setState("IDLE");
         }
+    };
+    const stopWhisperRecording = () => {
+        if (mediaRecorderRef.current?.state === "recording") {
+            mediaRecorderRef.current.stop();
+        }
+    };
     const handleSend = async (text: string) => {
         setState("THINKING");