Spaces:
Sleeping
Sleeping
| """Module for handling audio input through Gradio interface.""" | |
| from typing import Callable | |
| import numpy as np | |
| from scipy import signal | |
| from improvisation_lab.infrastructure.audio.audio_processor import \ | |
| AudioProcessor | |
| class WebAudioProcessor(AudioProcessor): | |
| """Handle audio input from Gradio interface.""" | |
| def __init__( | |
| self, | |
| sample_rate: int, | |
| callback: Callable[[np.ndarray], None] | None = None, | |
| buffer_duration: float = 0.3, | |
| ): | |
| """Initialize GradioAudioInput. | |
| Args: | |
| sample_rate: Audio sample rate in Hz | |
| callback: Optional callback function to process audio data | |
| buffer_duration: Duration of audio buffer in seconds | |
| """ | |
| super().__init__(sample_rate, callback, buffer_duration) | |
| def _resample_audio( | |
| self, audio_data: np.ndarray, original_sr: int, target_sr: int | |
| ) -> np.ndarray: | |
| """Resample audio data to target sample rate. | |
| In the case of Gradio, | |
| the sample rate of the audio data may not match the target sample rate. | |
| Args: | |
| audio_data: numpy array of audio samples | |
| original_sr: Original sample rate in Hz | |
| target_sr: Target sample rate in Hz | |
| Returns: | |
| Resampled audio data with target sample rate | |
| """ | |
| number_of_samples = round(len(audio_data) * float(target_sr) / original_sr) | |
| resampled_data = signal.resample(audio_data, number_of_samples) | |
| return resampled_data | |
| def _normalize_audio(self, audio_data: np.ndarray) -> np.ndarray: | |
| """Normalize audio data to range [-1, 1] by dividing by maximum absolute value. | |
| Args: | |
| audio_data: numpy array of audio samples | |
| Returns: | |
| Normalized audio data with values between -1 and 1 | |
| """ | |
| if len(audio_data) == 0: | |
| return audio_data | |
| max_abs = np.max(np.abs(audio_data)) | |
| return audio_data if max_abs == 0 else audio_data / max_abs | |
| def _remove_low_amplitude_noise(self, audio_data: np.ndarray) -> np.ndarray: | |
| """Remove low amplitude noise from audio data. | |
| Applies a threshold to remove low amplitude signals that are likely noise. | |
| Args: | |
| audio_data: Audio data as numpy array | |
| Returns: | |
| Audio data with low amplitude noise removed | |
| """ | |
| # [TODO] Set appropriate threshold | |
| threshold = 20.0 | |
| audio_data[np.abs(audio_data) < threshold] = 0 | |
| return audio_data | |
| def process_audio(self, audio_input: tuple[int, np.ndarray]) -> None: | |
| """Process incoming audio data from Gradio. | |
| Args: | |
| audio_input: Tuple of (sample_rate, audio_data) | |
| where audio_data is a (samples, channels) array | |
| """ | |
| if not self.is_recording: | |
| return | |
| input_sample_rate, audio_data = audio_input | |
| if input_sample_rate != self.sample_rate: | |
| audio_data = self._resample_audio( | |
| audio_data, input_sample_rate, self.sample_rate | |
| ) | |
| audio_data = self._remove_low_amplitude_noise(audio_data) | |
| audio_data = self._normalize_audio(audio_data) | |
| self._append_to_buffer(audio_data) | |
| self._process_buffer() | |
| def start_recording(self): | |
| """Start accepting audio input from Gradio.""" | |
| if self.is_recording: | |
| raise RuntimeError("Recording is already in progress") | |
| self.is_recording = True | |
| def stop_recording(self): | |
| """Stop accepting audio input from Gradio.""" | |
| if not self.is_recording: | |
| raise RuntimeError("Recording is not in progress") | |
| self.is_recording = False | |
| self._buffer = np.array([], dtype=np.float32) | |