test_ui / src /open_llm_vtuber /utils /stream_audio.py
britto224's picture
Upload 130 files
5669b22 verified
import base64
from pydub import AudioSegment
from pydub.utils import make_chunks
from ..agent.output_types import Actions
from ..agent.output_types import DisplayText
def _get_volume_by_chunks(audio: AudioSegment, chunk_length_ms: int) -> list:
"""
Calculate the normalized volume (RMS) for each chunk of the audio.
Parameters:
audio (AudioSegment): The audio segment to process.
chunk_length_ms (int): The length of each audio chunk in milliseconds.
Returns:
list: Normalized volumes for each chunk.
"""
chunks = make_chunks(audio, chunk_length_ms)
volumes = [chunk.rms for chunk in chunks]
max_volume = max(volumes)
if max_volume == 0:
raise ValueError("Audio is empty or all zero.")
return [volume / max_volume for volume in volumes]
def prepare_audio_payload(
audio_path: str | None,
chunk_length_ms: int = 20,
display_text: DisplayText = None,
actions: Actions = None,
forwarded: bool = False,
) -> dict[str, any]:
"""
Prepares the audio payload for sending to a broadcast endpoint.
If audio_path is None, returns a payload with audio=None for silent display.
Parameters:
audio_path (str | None): The path to the audio file to be processed, or None for silent display
chunk_length_ms (int): The length of each audio chunk in milliseconds
display_text (DisplayText, optional): Text to be displayed with the audio
actions (Actions, optional): Actions associated with the audio
Returns:
dict: The audio payload to be sent
"""
if isinstance(display_text, DisplayText):
display_text = display_text.to_dict()
if not audio_path:
# Return payload for silent display
return {
"type": "audio",
"audio": None,
"volumes": [],
"slice_length": chunk_length_ms,
"display_text": display_text,
"actions": actions.to_dict() if actions else None,
"forwarded": forwarded,
}
try:
audio = AudioSegment.from_file(audio_path)
audio_bytes = audio.export(format="wav").read()
except Exception as e:
raise ValueError(
f"Error loading or converting generated audio file to wav file '{audio_path}': {e}"
)
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
volumes = _get_volume_by_chunks(audio, chunk_length_ms)
payload = {
"type": "audio",
"audio": audio_base64,
"volumes": volumes,
"slice_length": chunk_length_ms,
"display_text": display_text,
"actions": actions.to_dict() if actions else None,
"forwarded": forwarded,
}
return payload
# Example usage:
# payload, duration = prepare_audio_payload("path/to/audio.mp3", display_text="Hello", expression_list=[0,1,2])