File size: 1,560 Bytes
436ea00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
import speech_recognition as sr
import pyaudio
import threading
import queue

FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024

audio_queue = queue.Queue()
text_queue = queue.Queue()


def recognize_worker():
    recognizer = sr.Recognizer()
    while True:
        audio_data = audio_queue.get()
        if audio_data is None:
            break
        try:
            text = recognizer.recognize_google(audio_data)
            text_queue.put(text)
        except sr.UnknownValueError:
            pass


def audio_stream():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)

    recognizer = sr.Recognizer()
    while not stop_flag.is_set():
        data = stream.read(CHUNK, exception_on_overflow=False)
        if len(data) == 0:
            break
        audio_data = sr.AudioData(data, RATE, 2)
        audio_queue.put(audio_data)

    stream.stop_stream()
    stream.close()
    p.terminate()


st.title('Real-time Speech to Text')

stop_flag = threading.Event()

if st.button('Start Recording'):
    stop_flag.clear()
    threading.Thread(target=audio_stream, daemon=True).start()
    threading.Thread(target=recognize_worker, daemon=True).start()
    st.write('Recording started...')

if st.button('Stop Recording'):
    stop_flag.set()
    audio_queue.put(None)
    st.write('Recording stopped.')

st.text_area('Text:', value='', key='text_area')

if not text_queue.empty():
    st.session_state.text_area += text_queue.get() + '\n'