Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import wave | |
| import io | |
| import logging | |
| from typing import Literal, Optional | |
| logger = logging.getLogger(__name__) | |
| NoiseType = Literal['white', 'pink', 'brown', 'gaussian', 'background', 'speech'] | |
| class NoiseGenerator: | |
| """ | |
| Audio noise generator for robustness testing. | |
| Supports various types of noise injection for testing digit recognition. | |
| """ | |
| def __init__(self): | |
| self.logger = logging.getLogger(__name__) | |
| def generate_white_noise(self, duration: float, sample_rate: int = 16000, | |
| amplitude: float = 0.1) -> np.ndarray: | |
| """ | |
| Generate white noise signal. | |
| Args: | |
| duration: Duration in seconds | |
| sample_rate: Sample rate in Hz | |
| amplitude: Noise amplitude (0.0 to 1.0) | |
| Returns: | |
| Numpy array of white noise | |
| """ | |
| samples = int(duration * sample_rate) | |
| noise = np.random.normal(0, amplitude, samples) | |
| return noise.astype(np.float32) | |
| def generate_pink_noise(self, duration: float, sample_rate: int = 16000, | |
| amplitude: float = 0.1) -> np.ndarray: | |
| """ | |
| Generate pink noise (1/f noise). | |
| Args: | |
| duration: Duration in seconds | |
| sample_rate: Sample rate in Hz | |
| amplitude: Noise amplitude | |
| Returns: | |
| Numpy array of pink noise | |
| """ | |
| samples = int(duration * sample_rate) | |
| # Generate white noise | |
| white = np.random.randn(samples) | |
| # Apply 1/f filter in frequency domain | |
| freqs = np.fft.fftfreq(samples, 1/sample_rate) | |
| freqs[0] = 1 # Avoid division by zero | |
| # 1/f filter | |
| filter_response = 1.0 / np.sqrt(np.abs(freqs)) | |
| filter_response[0] = 0 | |
| # Apply filter | |
| white_fft = np.fft.fft(white) | |
| pink_fft = white_fft * filter_response | |
| pink = np.real(np.fft.ifft(pink_fft)) | |
| # Normalize and scale | |
| pink = pink / np.std(pink) * amplitude | |
| return pink.astype(np.float32) | |
| def generate_brown_noise(self, duration: float, sample_rate: int = 16000, | |
| amplitude: float = 0.1) -> np.ndarray: | |
| """ | |
| Generate brown noise (1/f^2 noise). | |
| Args: | |
| duration: Duration in seconds | |
| sample_rate: Sample rate in Hz | |
| amplitude: Noise amplitude | |
| Returns: | |
| Numpy array of brown noise | |
| """ | |
| samples = int(duration * sample_rate) | |
| # Generate white noise and integrate (cumulative sum) | |
| white = np.random.randn(samples) | |
| brown = np.cumsum(white) | |
| # Normalize and scale | |
| brown = brown / np.std(brown) * amplitude | |
| return brown.astype(np.float32) | |
| def generate_gaussian_noise(self, duration: float, sample_rate: int = 16000, | |
| amplitude: float = 0.1) -> np.ndarray: | |
| """ | |
| Generate Gaussian (normal distribution) noise. | |
| Args: | |
| duration: Duration in seconds | |
| sample_rate: Sample rate in Hz | |
| amplitude: Noise amplitude (standard deviation) | |
| Returns: | |
| Numpy array of Gaussian noise | |
| """ | |
| samples = int(duration * sample_rate) | |
| noise = np.random.normal(0, amplitude, samples) | |
| return noise.astype(np.float32) | |
| def generate_background_noise(self, duration: float, sample_rate: int = 16000, | |
| amplitude: float = 0.05) -> np.ndarray: | |
| """ | |
| Generate realistic background noise (mixture of different noise types). | |
| Args: | |
| duration: Duration in seconds | |
| sample_rate: Sample rate in Hz | |
| amplitude: Noise amplitude | |
| Returns: | |
| Numpy array of background noise | |
| """ | |
| # Mix different types of noise | |
| white = self.generate_white_noise(duration, sample_rate, amplitude * 0.3) | |
| pink = self.generate_pink_noise(duration, sample_rate, amplitude * 0.5) | |
| # Add some low-frequency rumble | |
| t = np.linspace(0, duration, int(sample_rate * duration), False) | |
| rumble = amplitude * 0.2 * np.sin(2 * np.pi * 60 * t) # 60 Hz hum | |
| background = white + pink + rumble | |
| return background.astype(np.float32) | |
| def inject_noise(self, audio_data: bytes, noise_type: NoiseType, | |
| noise_level: float = 0.1) -> bytes: | |
| """ | |
| Inject noise into existing audio data. | |
| Args: | |
| audio_data: Original audio bytes (WAV format) | |
| noise_type: Type of noise to inject | |
| noise_level: Noise level relative to signal (0.0 to 1.0) | |
| Returns: | |
| Audio bytes with noise injected | |
| Raises: | |
| Exception: If noise injection fails | |
| """ | |
| try: | |
| # Convert input audio to numpy | |
| with wave.open(io.BytesIO(audio_data), 'rb') as wav_file: | |
| frames = wav_file.readframes(wav_file.getnframes()) | |
| sample_rate = wav_file.getframerate() | |
| channels = wav_file.getnchannels() | |
| sample_width = wav_file.getsampwidth() | |
| if sample_width != 2: | |
| raise Exception(f"Unsupported sample width: {sample_width}") | |
| audio_array = np.frombuffer(frames, dtype=np.int16) | |
| # Convert to float | |
| audio_float = audio_array.astype(np.float32) / 32767.0 | |
| # Handle stereo | |
| if channels == 2: | |
| audio_float = audio_float.reshape(-1, 2) | |
| # Process each channel separately | |
| for ch in range(2): | |
| channel_data = audio_float[:, ch] | |
| duration = len(channel_data) / sample_rate | |
| # Generate appropriate noise | |
| if noise_type == 'white': | |
| noise = self.generate_white_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'pink': | |
| noise = self.generate_pink_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'brown': | |
| noise = self.generate_brown_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'gaussian': | |
| noise = self.generate_gaussian_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'background': | |
| noise = self.generate_background_noise(duration, sample_rate, noise_level) | |
| else: | |
| raise Exception(f"Unsupported noise type: {noise_type}") | |
| # Ensure same length | |
| if len(noise) != len(channel_data): | |
| noise = noise[:len(channel_data)] | |
| # Add noise | |
| audio_float[:, ch] = channel_data + noise | |
| # Flatten back | |
| audio_float = audio_float.flatten() | |
| else: | |
| # Mono processing | |
| duration = len(audio_float) / sample_rate | |
| # Generate noise | |
| if noise_type == 'white': | |
| noise = self.generate_white_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'pink': | |
| noise = self.generate_pink_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'brown': | |
| noise = self.generate_brown_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'gaussian': | |
| noise = self.generate_gaussian_noise(duration, sample_rate, noise_level) | |
| elif noise_type == 'background': | |
| noise = self.generate_background_noise(duration, sample_rate, noise_level) | |
| else: | |
| raise Exception(f"Unsupported noise type: {noise_type}") | |
| # Ensure same length | |
| if len(noise) != len(audio_float): | |
| noise = noise[:len(audio_float)] | |
| # Add noise | |
| audio_float = audio_float + noise | |
| # Clip to prevent overflow | |
| audio_float = np.clip(audio_float, -1.0, 1.0) | |
| # Convert back to int16 | |
| audio_int16 = (audio_float * 32767).astype(np.int16) | |
| # Create output WAV | |
| output = io.BytesIO() | |
| with wave.open(output, 'wb') as output_wav: | |
| output_wav.setnchannels(channels) | |
| output_wav.setsampwidth(sample_width) | |
| output_wav.setframerate(sample_rate) | |
| output_wav.writeframes(audio_int16.tobytes()) | |
| self.logger.debug(f"Injected {noise_type} noise at level {noise_level}") | |
| return output.getvalue() | |
| except Exception as e: | |
| self.logger.error(f"Noise injection failed: {str(e)}") | |
| raise Exception(f"Failed to inject noise: {str(e)}") | |
| def create_pure_noise(self, noise_type: NoiseType, duration: float = 1.0, | |
| sample_rate: int = 16000, amplitude: float = 0.3) -> bytes: | |
| """ | |
| Create pure noise audio file for testing. | |
| Args: | |
| noise_type: Type of noise to generate | |
| duration: Duration in seconds | |
| sample_rate: Sample rate in Hz | |
| amplitude: Noise amplitude | |
| Returns: | |
| WAV audio bytes containing pure noise | |
| """ | |
| try: | |
| # Generate noise | |
| if noise_type == 'white': | |
| noise = self.generate_white_noise(duration, sample_rate, amplitude) | |
| elif noise_type == 'pink': | |
| noise = self.generate_pink_noise(duration, sample_rate, amplitude) | |
| elif noise_type == 'brown': | |
| noise = self.generate_brown_noise(duration, sample_rate, amplitude) | |
| elif noise_type == 'gaussian': | |
| noise = self.generate_gaussian_noise(duration, sample_rate, amplitude) | |
| elif noise_type == 'background': | |
| noise = self.generate_background_noise(duration, sample_rate, amplitude) | |
| else: | |
| raise Exception(f"Unsupported noise type: {noise_type}") | |
| # Convert to int16 | |
| noise_int16 = (np.clip(noise, -1.0, 1.0) * 32767).astype(np.int16) | |
| # Create WAV | |
| output = io.BytesIO() | |
| with wave.open(output, 'wb') as wav_file: | |
| wav_file.setnchannels(1) # Mono | |
| wav_file.setsampwidth(2) # 16-bit | |
| wav_file.setframerate(sample_rate) | |
| wav_file.writeframes(noise_int16.tobytes()) | |
| return output.getvalue() | |
| except Exception as e: | |
| self.logger.error(f"Pure noise generation failed: {str(e)}") | |
| raise Exception(f"Failed to create pure noise: {str(e)}") | |
| # Global noise generator instance | |
| noise_generator = NoiseGenerator() |