Abdalkaderdev commited on
Commit
3688b19
·
1 Parent(s): 5306cf5

Update frontend with Whisper STT and prepare for vision features

Browse files
Files changed (1) hide show
  1. frontend/app/voice/page.tsx +60 -23
frontend/app/voice/page.tsx CHANGED
@@ -7,37 +7,74 @@ export default function OraVoice() {
7
  const [transcript, setTranscript] = useState("");
8
  const [response, setResponse] = useState("");
9
  const [history, setHistory] = useState<any[]>([]);
 
 
10
 
11
  const recognitionRef = useRef<any>(null);
 
 
12
 
13
  useEffect(() => {
14
- // Initialize Speech Recognition
15
- if (typeof window !== "undefined") {
16
- const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition;
17
- if (SpeechRecognition) {
18
- const recognition = new SpeechRecognition();
19
- recognition.continuous = false;
20
- recognition.lang = "en-US";
21
- recognition.interimResults = false;
22
-
23
- recognition.onstart = () => setState("LISTENING");
24
-
25
- recognition.onresult = (event: any) => {
26
- const text = event.results[0][0].transcript;
27
- setTranscript(text);
28
- handleSend(text);
29
- };
30
 
31
- recognition.onend = () => {
32
- // If we didn't get a result and state is still listening, go back to idle
33
- // But if we got a result, handleSend changes state to THINKING
34
- if (state === "LISTENING") setState("IDLE");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  };
36
 
37
- recognitionRef.current = recognition;
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
- }, []);
 
 
 
 
 
 
41
 
42
  const handleSend = async (text: string) => {
43
  setState("THINKING");
 
7
  const [transcript, setTranscript] = useState("");
8
  const [response, setResponse] = useState("");
9
  const [history, setHistory] = useState<any[]>([]);
10
+ const [emotion, setEmotion] = useState<string | null>(null);
11
+ const [uploadedImage, setUploadedImage] = useState<string | null>(null);
12
 
13
  const recognitionRef = useRef<any>(null);
14
+ const mediaRecorderRef = useRef<MediaRecorder | null>(null);
15
+ const audioChunksRef = useRef<Blob[]>([]);
16
 
17
  useEffect(() => {
18
+ // Whisper-based recording will replace browser STT
19
+ // No need for webkitSpeechRecognition anymore
20
+ }, []);
21
+
22
+ const startWhisperRecording = async () => {
23
+ try {
24
+ setState("LISTENING");
25
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
26
+ const recorder = new MediaRecorder(stream);
27
+
28
+ audioChunksRef.current = [];
 
 
 
 
 
29
 
30
+ recorder.ondataavailable = (e) => {
31
+ audioChunksRef.current.push(e.data);
32
+ };
33
+
34
+ recorder.onstop = async () => {
35
+ const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' });
36
+ const reader = new FileReader();
37
+
38
+ reader.onloadend = async () => {
39
+ const base64Audio = (reader.result as string).split(',')[1];
40
+
41
+ // Send to Whisper for transcription
42
+ const res = await fetch("/api/transcribe", {
43
+ method: "POST",
44
+ headers: { "Content-Type": "application/json" },
45
+ body: JSON.stringify({ audio_data: base64Audio }),
46
+ });
47
+
48
+ const data = await res.json();
49
+ setTranscript(data.text);
50
+ handleSend(data.text);
51
  };
52
 
53
+ reader.readAsDataURL(audioBlob);
54
+ stream.getTracks().forEach(track => track.stop());
55
+ };
56
+
57
+ recorder.start();
58
+ mediaRecorderRef.current = recorder;
59
+
60
+ // Auto-stop after 10 seconds
61
+ setTimeout(() => {
62
+ if (mediaRecorderRef.current?.state === "recording") {
63
+ mediaRecorderRef.current.stop();
64
+ }
65
+ }, 10000);
66
+
67
+ } catch (error) {
68
+ console.error("Recording error:", error);
69
+ setState("IDLE");
70
  }
71
+ };
72
+
73
+ const stopWhisperRecording = () => {
74
+ if (mediaRecorderRef.current?.state === "recording") {
75
+ mediaRecorderRef.current.stop();
76
+ }
77
+ };
78
 
79
  const handleSend = async (text: string) => {
80
  setState("THINKING");