import streamlit as st import speech_recognition as sr import pyaudio import threading import queue FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 CHUNK = 1024 audio_queue = queue.Queue() text_queue = queue.Queue() def recognize_worker(): recognizer = sr.Recognizer() while True: audio_data = audio_queue.get() if audio_data is None: break try: text = recognizer.recognize_google(audio_data) text_queue.put(text) except sr.UnknownValueError: pass def audio_stream(): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) recognizer = sr.Recognizer() while not stop_flag.is_set(): data = stream.read(CHUNK, exception_on_overflow=False) if len(data) == 0: break audio_data = sr.AudioData(data, RATE, 2) audio_queue.put(audio_data) stream.stop_stream() stream.close() p.terminate() st.title('Real-time Speech to Text') stop_flag = threading.Event() if st.button('Start Recording'): stop_flag.clear() threading.Thread(target=audio_stream, daemon=True).start() threading.Thread(target=recognize_worker, daemon=True).start() st.write('Recording started...') if st.button('Stop Recording'): stop_flag.set() audio_queue.put(None) st.write('Recording stopped.') st.text_area('Text:', value='', key='text_area') if not text_queue.empty(): st.session_state.text_area += text_queue.get() + '\n'