import numpy as np import soundfile as sf import librosa from io import BytesIO from typing import Optional class AudioBuffer: """Buffer to accumulate streaming audio data""" def __init__(self, chunk_duration: float = 30.0, sample_rate: int = 16000): self.chunk_duration = chunk_duration self.sample_rate = sample_rate self.buffer = np.array([], dtype=np.float32) self.target_chunk_size = int(sample_rate * chunk_duration) def add_data(self, audio_data: bytes) -> Optional[np.ndarray]: """Add audio data to buffer and return chunk if ready""" # Convert bytes to audio array audio_buffer = BytesIO(audio_data) audio_chunk, sample_rate = sf.read(audio_buffer) # Ensure mono audio if len(audio_chunk.shape) > 1: audio_chunk = audio_chunk.mean(axis=1) # Resample to target sample rate if needed if sample_rate != self.sample_rate: audio_chunk = librosa.resample(audio_chunk, orig_sr=sample_rate, target_sr=self.sample_rate) self.buffer = np.concatenate([self.buffer, audio_chunk.astype(np.float32)]) if len(self.buffer) >= self.target_chunk_size: # Extract chunk chunk = self.buffer[:self.target_chunk_size] # Keep remaining data in buffer self.buffer = self.buffer[self.target_chunk_size:] return chunk return None def get_remaining(self) -> np.ndarray: """Get any remaining audio data""" return self.buffer