|
|
"""Basic data types for multimodal ChatML format.""" |
|
|
|
|
|
from dataclasses import dataclass |
|
|
from typing import Dict, List, Optional, Union |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class AudioContent: |
|
|
audio_url: str |
|
|
|
|
|
raw_audio: Optional[str] = None |
|
|
offset: Optional[float] = None |
|
|
duration: Optional[float] = None |
|
|
row_id: Optional[int] = None |
|
|
type: str = "audio" |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class TextContent: |
|
|
text: str |
|
|
type: str = "text" |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class Message: |
|
|
role: str |
|
|
content: Union[str, AudioContent, TextContent, List[Union[str, AudioContent, TextContent]]] |
|
|
recipient: Optional[str] = None |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class ChatMLSample: |
|
|
"""Dataclass to hold multimodal ChatML data.""" |
|
|
|
|
|
messages: List[Message] |
|
|
start_index: Optional[int] = None |
|
|
misc: Optional[Dict] = None |
|
|
speaker: Optional[str] = None |
|
|
|