Spaces:

atsushieee
/

improvisation-lab

Sleeping

File size: 3,582 Bytes

c1e08a0

"""Module for handling microphone input and audio processing.

This module provides functionality for real-time audio capture from a microphone,
with support for buffering and callback-based processing of audio data.
"""

from typing import Callable

import numpy as np
import pyaudio

from improvisation_lab.infrastructure.audio.audio_processor import \
    AudioProcessor


class DirectAudioProcessor(AudioProcessor):
    """Handle real-time audio input from microphone.

    This class provides functionality to:
    1. Capture audio from the default microphone
    2. Buffer the incoming audio data
    3. Process the buffered data through a user-provided callback function

    The audio processing is done in chunks, with the chunk size determined by
    the buffer_duration parameter. This allows for efficient real-time
    processing of audio data, such as pitch detection.
    """

    def __init__(
        self,
        sample_rate: int,
        callback: Callable[[np.ndarray], None] | None = None,
        buffer_duration: float = 0.2,
    ):
        """Initialize MicInput.

        Args:
            sample_rate: Audio sample rate in Hz
            callback: Optional callback function to process audio data
            buffer_duration: Duration of audio buffer in seconds before processing
        """
        super().__init__(sample_rate, callback, buffer_duration)
        self.audio = None
        self._stream = None

    def _audio_callback(
        self, in_data: bytes, frame_count: int, time_info: dict, status: int
    ) -> tuple[bytes, int]:
        """Process incoming audio data.

        This callback is automatically called by PyAudio
        when new audio data is available.
        The audio data is converted to a numpy array and:
        1. Stored in the internal buffer
        2. Passed to the user-provided callback function if one exists

        Note:
            This method follows PyAudio's callback function specification.
            It must accept four arguments (in_data, frame_count, time_info, status)
            and return a tuple of (bytes, status_flag).
            These arguments are automatically provided by PyAudio
            when calling this callback.

        Args:
            in_data: Raw audio input data as bytes
            frame_count: Number of frames in the input
            time_info: Dictionary with timing information
            status: Stream status flag

        Returns:
            Tuple of (input_data, pyaudio.paContinue)
        """
        # Convert bytes to numpy array (float32 format)
        audio_data = np.frombuffer(in_data, dtype=np.float32)
        self._append_to_buffer(audio_data)
        self._process_buffer()
        return (in_data, pyaudio.paContinue)

    def start_recording(self):
        """Start recording from microphone."""
        if self.is_recording:
            raise RuntimeError("Recording is already in progress")

        self.audio = pyaudio.PyAudio()
        self._stream = self.audio.open(
            format=pyaudio.paFloat32,
            channels=1,
            rate=self.sample_rate,
            input=True,
            stream_callback=self._audio_callback,
        )
        self.is_recording = True

    def stop_recording(self):
        """Stop recording from microphone."""
        if not self.is_recording:
            raise RuntimeError("Recording is not in progress")

        self._stream.stop_stream()
        self._stream.close()
        self.audio.terminate()
        self.is_recording = False
        self._stream = None
        self.audio = None