fast_rep_voice / stt_module.py
Krish-05's picture
Update stt_module.py
a9ca228 verified
import threading
import pydub
import av
import streamlit as st # Only imported for st.session_state access in recv method
from streamlit_webrtc import AudioProcessorBase
class AudioBufferProcessor(AudioProcessorBase):
"""
An audio processor that buffers incoming audio frames.
It accumulates audio only when `st.session_state.is_recording` is True.
"""
def __init__(self) -> None:
self._audio_buffer = pydub.AudioSegment.empty()
self._lock = threading.Lock() # Use a lock for thread-safe access to the buffer
def recv(self, frame: av.AudioFrame) -> None:
"""
Receives audio frames from the WebRTC stream.
If recording is active, appends the frame to the internal buffer.
"""
if st.session_state.is_recording:
sound = pydub.AudioSegment(
data=frame.to_ndarray().tobytes(),
sample_width=frame.format.bytes,
frame_rate=frame.sample_rate,
channels=len(frame.layout.channels),
)
sound = sound.set_channels(1).set_frame_rate(16000)
with self._lock:
self._audio_buffer += sound
def get_and_clear_buffered_audio(self) -> pydub.AudioSegment:
"""
Retrieves the accumulated audio and clears the buffer.
This method is called when recording stops.
"""
with self._lock:
recorded_audio = self._audio_buffer
self._audio_buffer = pydub.AudioSegment.empty() # Clear the buffer
return recorded_audio