voice-tools / src /models /speaker_profile.py
jcudit's picture
jcudit HF Staff
fix: correct gitignore to only exclude root-level models directory, not src/models package
0456b70
"""
Speaker Profile Model
Represents a speaker's voice characteristics extracted from audio,
used for identification and matching.
"""
from dataclasses import dataclass
from typing import Optional
import numpy as np
@dataclass
class SpeakerProfile:
"""
Speaker profile containing voice embeddings and metadata.
Attributes:
speaker_id: Unique identifier (e.g., "SPEAKER_00", "SPEAKER_01")
embedding: Numerical representation of voice characteristics
source_audio: Path to the audio file this profile was extracted from
start_time: Start time in source audio (seconds)
end_time: End time in source audio (seconds)
confidence: Quality/reliability metric (0.0-1.0)
"""
speaker_id: str
embedding: np.ndarray
source_audio: str
start_time: float
end_time: float
confidence: float = 1.0
def __post_init__(self):
"""Validate speaker profile data."""
if self.embedding is None or len(self.embedding) == 0:
raise ValueError("Embedding vector cannot be empty")
if self.end_time <= self.start_time:
raise ValueError(
f"End time ({self.end_time}) must be after start time ({self.start_time})"
)
if not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
duration = self.end_time - self.start_time
if duration < 3.0:
raise ValueError(f"Duration ({duration}s) is too short (minimum 3 seconds recommended)")
@property
def duration(self) -> float:
"""Calculate duration of the profile in seconds."""
return self.end_time - self.start_time
def __repr__(self) -> str:
return (
f"SpeakerProfile(speaker_id='{self.speaker_id}', "
f"duration={self.duration:.2f}s, "
f"confidence={self.confidence:.2f})"
)