Spaces:
Sleeping
Sleeping
Commit
·
3688b19
1
Parent(s):
5306cf5
Update frontend with Whisper STT and prepare for vision features
Browse files- frontend/app/voice/page.tsx +60 -23
frontend/app/voice/page.tsx
CHANGED
|
@@ -7,37 +7,74 @@ export default function OraVoice() {
|
|
| 7 |
const [transcript, setTranscript] = useState("");
|
| 8 |
const [response, setResponse] = useState("");
|
| 9 |
const [history, setHistory] = useState<any[]>([]);
|
|
|
|
|
|
|
| 10 |
|
| 11 |
const recognitionRef = useRef<any>(null);
|
|
|
|
|
|
|
| 12 |
|
| 13 |
useEffect(() => {
|
| 14 |
-
//
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
recognition.onresult = (event: any) => {
|
| 26 |
-
const text = event.results[0][0].transcript;
|
| 27 |
-
setTranscript(text);
|
| 28 |
-
handleSend(text);
|
| 29 |
-
};
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
};
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
}
|
| 40 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
const handleSend = async (text: string) => {
|
| 43 |
setState("THINKING");
|
|
|
|
| 7 |
const [transcript, setTranscript] = useState("");
|
| 8 |
const [response, setResponse] = useState("");
|
| 9 |
const [history, setHistory] = useState<any[]>([]);
|
| 10 |
+
const [emotion, setEmotion] = useState<string | null>(null);
|
| 11 |
+
const [uploadedImage, setUploadedImage] = useState<string | null>(null);
|
| 12 |
|
| 13 |
const recognitionRef = useRef<any>(null);
|
| 14 |
+
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
| 15 |
+
const audioChunksRef = useRef<Blob[]>([]);
|
| 16 |
|
| 17 |
useEffect(() => {
|
| 18 |
+
// Whisper-based recording will replace browser STT
|
| 19 |
+
// No need for webkitSpeechRecognition anymore
|
| 20 |
+
}, []);
|
| 21 |
+
|
| 22 |
+
const startWhisperRecording = async () => {
|
| 23 |
+
try {
|
| 24 |
+
setState("LISTENING");
|
| 25 |
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
| 26 |
+
const recorder = new MediaRecorder(stream);
|
| 27 |
+
|
| 28 |
+
audioChunksRef.current = [];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
recorder.ondataavailable = (e) => {
|
| 31 |
+
audioChunksRef.current.push(e.data);
|
| 32 |
+
};
|
| 33 |
+
|
| 34 |
+
recorder.onstop = async () => {
|
| 35 |
+
const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/wav' });
|
| 36 |
+
const reader = new FileReader();
|
| 37 |
+
|
| 38 |
+
reader.onloadend = async () => {
|
| 39 |
+
const base64Audio = (reader.result as string).split(',')[1];
|
| 40 |
+
|
| 41 |
+
// Send to Whisper for transcription
|
| 42 |
+
const res = await fetch("/api/transcribe", {
|
| 43 |
+
method: "POST",
|
| 44 |
+
headers: { "Content-Type": "application/json" },
|
| 45 |
+
body: JSON.stringify({ audio_data: base64Audio }),
|
| 46 |
+
});
|
| 47 |
+
|
| 48 |
+
const data = await res.json();
|
| 49 |
+
setTranscript(data.text);
|
| 50 |
+
handleSend(data.text);
|
| 51 |
};
|
| 52 |
|
| 53 |
+
reader.readAsDataURL(audioBlob);
|
| 54 |
+
stream.getTracks().forEach(track => track.stop());
|
| 55 |
+
};
|
| 56 |
+
|
| 57 |
+
recorder.start();
|
| 58 |
+
mediaRecorderRef.current = recorder;
|
| 59 |
+
|
| 60 |
+
// Auto-stop after 10 seconds
|
| 61 |
+
setTimeout(() => {
|
| 62 |
+
if (mediaRecorderRef.current?.state === "recording") {
|
| 63 |
+
mediaRecorderRef.current.stop();
|
| 64 |
+
}
|
| 65 |
+
}, 10000);
|
| 66 |
+
|
| 67 |
+
} catch (error) {
|
| 68 |
+
console.error("Recording error:", error);
|
| 69 |
+
setState("IDLE");
|
| 70 |
}
|
| 71 |
+
};
|
| 72 |
+
|
| 73 |
+
const stopWhisperRecording = () => {
|
| 74 |
+
if (mediaRecorderRef.current?.state === "recording") {
|
| 75 |
+
mediaRecorderRef.current.stop();
|
| 76 |
+
}
|
| 77 |
+
};
|
| 78 |
|
| 79 |
const handleSend = async (text: string) => {
|
| 80 |
setState("THINKING");
|