matt HOFFNER
add isComplete value to hook for easier access to autosending messages once complete
862ccf9
| import React, { useState, useEffect, useRef, useCallback } from 'react'; | |
| import styles from './page.module.css'; | |
| import { useMicVAD } from "@ricky0123/vad-react"; | |
| import * as ort from "onnxruntime-web"; | |
| import MicIcon from '@mui/icons-material/Mic'; | |
| import StopIcon from '@mui/icons-material/Stop'; | |
| import { webmFixDuration } from './BlobFix'; | |
| import Progress from './progress'; | |
| import { useTranscriber } from "./hooks/useTranscriber"; | |
| ort.env.wasm.wasmPaths = "/_next/static/chunks/"; | |
| interface VoiceInputFormProps { | |
| handleSubmit: any; | |
| input: string; | |
| setInput: React.Dispatch<React.SetStateAction<string>>; | |
| } | |
| function getMimeType() { | |
| const types = [ | |
| "audio/webm", | |
| "audio/mp4", | |
| "audio/ogg", | |
| "audio/wav", | |
| "audio/aac", | |
| ]; | |
| for (let i = 0; i < types.length; i++) { | |
| if (MediaRecorder.isTypeSupported(types[i])) { | |
| return types[i]; | |
| } | |
| } | |
| return undefined; | |
| } | |
| const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => { | |
| const audioContext = new AudioContext(); | |
| const arrayBuffer = await blob.arrayBuffer(); | |
| return await audioContext.decodeAudioData(arrayBuffer); | |
| }; | |
| const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => { | |
| const [recording, setRecording] = useState(false); | |
| const [duration, setDuration] = useState(0); | |
| const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null); | |
| const streamRef = useRef<MediaStream | null>(null); | |
| const mediaRecorderRef = useRef<MediaRecorder | null>(null); | |
| const chunksRef = useRef<Blob[]>([]); | |
| const [recognizedText, setRecognizedText] = useState(''); | |
| const transcriber = useTranscriber(); | |
| const onFormSubmit = (e: React.FormEvent<HTMLFormElement>) => { | |
| e.preventDefault(); | |
| handleSubmit(input); // Assuming handleSubmit now takes the input as an argument | |
| }; | |
| const startListening = useCallback((audioData: any) => { | |
| transcriber.start(audioData); | |
| }, [transcriber]); | |
| useEffect(() => { | |
| if (transcriber.output) { | |
| setRecognizedText(transcriber.output.text); | |
| } | |
| }, [transcriber.output, transcriber.isBusy]); | |
| const handleTranscriptionComplete = () => { | |
| // Create a synthetic event object | |
| const syntheticEvent = { | |
| preventDefault: () => {}, | |
| target: { | |
| // Mimic the structure of your form's event.target here | |
| elements: { | |
| // Assuming the form has an input field named 'input' | |
| input: { | |
| value: recognizedText | |
| } | |
| } | |
| } | |
| }; | |
| handleSubmit(syntheticEvent); | |
| }; | |
| useEffect(() => { | |
| if (transcriber.isComplete) { | |
| handleTranscriptionComplete(); | |
| } | |
| }, [transcriber.isComplete]); | |
| useEffect(() => { | |
| if (recognizedText) { | |
| setInput(recognizedText); | |
| } | |
| }, [recognizedText, setInput]); | |
| useEffect(() => { | |
| const processRecording = async () => { | |
| if (recordedBlob) { | |
| // Process the blob for transcription | |
| const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); | |
| startListening(audioBuffer); // Start the transcription process | |
| // Reset the blob state if you want to prepare for a new recording | |
| setRecordedBlob(null); | |
| } | |
| }; | |
| processRecording(); | |
| }, [recordedBlob, startListening]); | |
| const vad = useMicVAD({ | |
| modelURL: "/_next/static/chunks/silero_vad.onnx", | |
| workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js", | |
| startOnLoad: false, | |
| onSpeechEnd: async () => { | |
| if (recording) { | |
| await stopRecording(); // Stop the recording | |
| setRecording(!recording); // Update the recording state | |
| } | |
| }, | |
| }); | |
| const stopRecording = () => { | |
| if ( | |
| mediaRecorderRef.current && | |
| mediaRecorderRef.current.state === "recording" | |
| ) { | |
| mediaRecorderRef.current.stop(); // set state to inactive | |
| setDuration(0); | |
| setRecording(false); | |
| } | |
| }; | |
| const startRecording = async () => { | |
| // Reset recording (if any) | |
| setRecordedBlob(null); | |
| // @ts-ignore | |
| transcriber.start(); | |
| let startTime = Date.now(); | |
| try { | |
| if (!streamRef.current) { | |
| streamRef.current = await navigator.mediaDevices.getUserMedia({ | |
| audio: true, | |
| }); | |
| } | |
| const mimeType = getMimeType(); | |
| const mediaRecorder = new MediaRecorder(streamRef.current, { | |
| mimeType, | |
| }); | |
| mediaRecorderRef.current = mediaRecorder; | |
| mediaRecorder.addEventListener("dataavailable", async (event) => { | |
| if (event.data.size > 0) { | |
| chunksRef.current.push(event.data); | |
| } | |
| if (mediaRecorder.state === "inactive") { | |
| const duration = Date.now() - startTime; | |
| // Received a stop event | |
| let blob = new Blob(chunksRef.current, { type: mimeType }); | |
| if (mimeType === "audio/webm") { | |
| blob = await webmFixDuration(blob, duration, blob.type); | |
| } | |
| setRecordedBlob(blob); | |
| chunksRef.current = []; | |
| } | |
| }); | |
| mediaRecorder.start(); | |
| setRecording(true); | |
| } catch (error) { | |
| console.error("Error accessing microphone:", error); | |
| } | |
| }; | |
| useEffect(() => { | |
| let stream: MediaStream | null = null; | |
| if (recording) { | |
| const timer = setInterval(() => { | |
| setDuration((prevDuration) => prevDuration + 1); | |
| }, 1000); | |
| return () => { | |
| clearInterval(timer); | |
| }; | |
| } | |
| return () => { | |
| if (stream) { | |
| stream.getTracks().forEach((track) => track.stop()); | |
| } | |
| }; | |
| }, [recording]); | |
| const handleToggleRecording = () => { | |
| vad.start(); | |
| if (recording) { | |
| stopRecording(); | |
| } else { | |
| startRecording(); | |
| } | |
| }; | |
| return ( | |
| <div> | |
| {transcriber.progressItems.length > 0 && ( | |
| <div> | |
| <label> | |
| Loading model files... (only run once) | |
| </label> | |
| {transcriber.progressItems.map((data) => ( | |
| <div key={data.file}> | |
| <Progress | |
| text={data.file} | |
| percentage={data.progress} | |
| /> | |
| </div> | |
| ))} | |
| </div> | |
| )} | |
| <form onSubmit={onFormSubmit} className={styles.form}> | |
| <input | |
| type="text" | |
| value={input} | |
| className={styles.input} | |
| onChange={(e) => setInput(e.target.value)} | |
| placeholder="Speak or type..." | |
| /> | |
| </form> | |
| <button | |
| type='button' | |
| className={styles.button} | |
| onClick={handleToggleRecording} | |
| > | |
| {recording ? <StopIcon /> : <MicIcon />} | |
| </button> | |
| </div> | |
| ); | |
| }; | |
| export default VoiceInputForm; | |