File size: 2,963 Bytes
5669b22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | import base64
from pydub import AudioSegment
from pydub.utils import make_chunks
from ..agent.output_types import Actions
from ..agent.output_types import DisplayText
def _get_volume_by_chunks(audio: AudioSegment, chunk_length_ms: int) -> list:
"""
Calculate the normalized volume (RMS) for each chunk of the audio.
Parameters:
audio (AudioSegment): The audio segment to process.
chunk_length_ms (int): The length of each audio chunk in milliseconds.
Returns:
list: Normalized volumes for each chunk.
"""
chunks = make_chunks(audio, chunk_length_ms)
volumes = [chunk.rms for chunk in chunks]
max_volume = max(volumes)
if max_volume == 0:
raise ValueError("Audio is empty or all zero.")
return [volume / max_volume for volume in volumes]
def prepare_audio_payload(
audio_path: str | None,
chunk_length_ms: int = 20,
display_text: DisplayText = None,
actions: Actions = None,
forwarded: bool = False,
) -> dict[str, any]:
"""
Prepares the audio payload for sending to a broadcast endpoint.
If audio_path is None, returns a payload with audio=None for silent display.
Parameters:
audio_path (str | None): The path to the audio file to be processed, or None for silent display
chunk_length_ms (int): The length of each audio chunk in milliseconds
display_text (DisplayText, optional): Text to be displayed with the audio
actions (Actions, optional): Actions associated with the audio
Returns:
dict: The audio payload to be sent
"""
if isinstance(display_text, DisplayText):
display_text = display_text.to_dict()
if not audio_path:
# Return payload for silent display
return {
"type": "audio",
"audio": None,
"volumes": [],
"slice_length": chunk_length_ms,
"display_text": display_text,
"actions": actions.to_dict() if actions else None,
"forwarded": forwarded,
}
try:
audio = AudioSegment.from_file(audio_path)
audio_bytes = audio.export(format="wav").read()
except Exception as e:
raise ValueError(
f"Error loading or converting generated audio file to wav file '{audio_path}': {e}"
)
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
volumes = _get_volume_by_chunks(audio, chunk_length_ms)
payload = {
"type": "audio",
"audio": audio_base64,
"volumes": volumes,
"slice_length": chunk_length_ms,
"display_text": display_text,
"actions": actions.to_dict() if actions else None,
"forwarded": forwarded,
}
return payload
# Example usage:
# payload, duration = prepare_audio_payload("path/to/audio.mp3", display_text="Hello", expression_list=[0,1,2])
|