atsushieee's picture
Upload folder using huggingface_hub
c1e08a0 verified
"""Module for handling microphone input and audio processing.
This module provides functionality for real-time audio capture from a microphone,
with support for buffering and callback-based processing of audio data.
"""
from typing import Callable
import numpy as np
import pyaudio
from improvisation_lab.infrastructure.audio.audio_processor import \
AudioProcessor
class DirectAudioProcessor(AudioProcessor):
"""Handle real-time audio input from microphone.
This class provides functionality to:
1. Capture audio from the default microphone
2. Buffer the incoming audio data
3. Process the buffered data through a user-provided callback function
The audio processing is done in chunks, with the chunk size determined by
the buffer_duration parameter. This allows for efficient real-time
processing of audio data, such as pitch detection.
"""
def __init__(
self,
sample_rate: int,
callback: Callable[[np.ndarray], None] | None = None,
buffer_duration: float = 0.2,
):
"""Initialize MicInput.
Args:
sample_rate: Audio sample rate in Hz
callback: Optional callback function to process audio data
buffer_duration: Duration of audio buffer in seconds before processing
"""
super().__init__(sample_rate, callback, buffer_duration)
self.audio = None
self._stream = None
def _audio_callback(
self, in_data: bytes, frame_count: int, time_info: dict, status: int
) -> tuple[bytes, int]:
"""Process incoming audio data.
This callback is automatically called by PyAudio
when new audio data is available.
The audio data is converted to a numpy array and:
1. Stored in the internal buffer
2. Passed to the user-provided callback function if one exists
Note:
This method follows PyAudio's callback function specification.
It must accept four arguments (in_data, frame_count, time_info, status)
and return a tuple of (bytes, status_flag).
These arguments are automatically provided by PyAudio
when calling this callback.
Args:
in_data: Raw audio input data as bytes
frame_count: Number of frames in the input
time_info: Dictionary with timing information
status: Stream status flag
Returns:
Tuple of (input_data, pyaudio.paContinue)
"""
# Convert bytes to numpy array (float32 format)
audio_data = np.frombuffer(in_data, dtype=np.float32)
self._append_to_buffer(audio_data)
self._process_buffer()
return (in_data, pyaudio.paContinue)
def start_recording(self):
"""Start recording from microphone."""
if self.is_recording:
raise RuntimeError("Recording is already in progress")
self.audio = pyaudio.PyAudio()
self._stream = self.audio.open(
format=pyaudio.paFloat32,
channels=1,
rate=self.sample_rate,
input=True,
stream_callback=self._audio_callback,
)
self.is_recording = True
def stop_recording(self):
"""Stop recording from microphone."""
if not self.is_recording:
raise RuntimeError("Recording is not in progress")
self._stream.stop_stream()
self._stream.close()
self.audio.terminate()
self.is_recording = False
self._stream = None
self.audio = None