Spaces:

atsushieee
/

improvisation-lab

Sleeping

App Files Files Community

improvisation-lab / improvisation_lab /infrastructure /audio /direct_processor.py

atsushieee

Upload folder using huggingface_hub

c1e08a0 verified about 1 year ago

raw

history blame contribute delete

3.58 kB

	"""Module for handling microphone input and audio processing.

	This module provides functionality for real-time audio capture from a microphone,
	with support for buffering and callback-based processing of audio data.
	"""

	from typing import Callable

	import numpy as np
	import pyaudio

	from improvisation_lab.infrastructure.audio.audio_processor import \
	AudioProcessor


	class DirectAudioProcessor(AudioProcessor):
	"""Handle real-time audio input from microphone.

	This class provides functionality to:
	1. Capture audio from the default microphone
	2. Buffer the incoming audio data
	3. Process the buffered data through a user-provided callback function

	The audio processing is done in chunks, with the chunk size determined by
	the buffer_duration parameter. This allows for efficient real-time
	processing of audio data, such as pitch detection.
	"""

	def __init__(
	self,
	sample_rate: int,
	callback: Callable[[np.ndarray], None] \| None = None,
	buffer_duration: float = 0.2,
	):
	"""Initialize MicInput.

	Args:
	sample_rate: Audio sample rate in Hz
	callback: Optional callback function to process audio data
	buffer_duration: Duration of audio buffer in seconds before processing
	"""
	super().__init__(sample_rate, callback, buffer_duration)
	self.audio = None
	self._stream = None

	def _audio_callback(
	self, in_data: bytes, frame_count: int, time_info: dict, status: int
	) -> tuple[bytes, int]:
	"""Process incoming audio data.

	This callback is automatically called by PyAudio
	when new audio data is available.
	The audio data is converted to a numpy array and:
	1. Stored in the internal buffer
	2. Passed to the user-provided callback function if one exists

	Note:
	This method follows PyAudio's callback function specification.
	It must accept four arguments (in_data, frame_count, time_info, status)
	and return a tuple of (bytes, status_flag).
	These arguments are automatically provided by PyAudio
	when calling this callback.

	Args:
	in_data: Raw audio input data as bytes
	frame_count: Number of frames in the input
	time_info: Dictionary with timing information
	status: Stream status flag

	Returns:
	Tuple of (input_data, pyaudio.paContinue)
	"""
	# Convert bytes to numpy array (float32 format)
	audio_data = np.frombuffer(in_data, dtype=np.float32)
	self._append_to_buffer(audio_data)
	self._process_buffer()
	return (in_data, pyaudio.paContinue)

	def start_recording(self):
	"""Start recording from microphone."""
	if self.is_recording:
	raise RuntimeError("Recording is already in progress")

	self.audio = pyaudio.PyAudio()
	self._stream = self.audio.open(
	format=pyaudio.paFloat32,
	channels=1,
	rate=self.sample_rate,
	input=True,
	stream_callback=self._audio_callback,
	)
	self.is_recording = True

	def stop_recording(self):
	"""Stop recording from microphone."""
	if not self.is_recording:
	raise RuntimeError("Recording is not in progress")

	self._stream.stop_stream()
	self._stream.close()
	self.audio.terminate()
	self.is_recording = False
	self._stream = None
	self.audio = None