""" VoiceProfile data model: Reference embedding and speaker identification. Represents a target voice profile extracted from reference audio clip. """ from dataclasses import dataclass, field from typing import Optional import numpy as np @dataclass class VoiceProfile: """ Voice profile representing a target speaker. Contains embedding vectors and metadata for voice identification. """ # Core identification speaker_id: str embedding: np.ndarray # 512-dimensional vector from pyannote # Source information reference_file: str reference_duration: float # seconds # Quality metrics embedding_quality: float = 1.0 # 0-1 score indicating embedding confidence num_speech_segments: int = 0 # Metadata sample_rate: int = 16000 created_at: Optional[str] = None def __post_init__(self): """Validate voice profile after initialization.""" if self.embedding.ndim != 1: raise ValueError("Embedding must be 1-dimensional vector") if self.embedding_quality < 0 or self.embedding_quality > 1: raise ValueError("Embedding quality must be between 0 and 1") def similarity(self, other_embedding: np.ndarray) -> float: """ Calculate cosine similarity with another embedding. Args: other_embedding: Another voice embedding vector Returns: Similarity score (0-1, higher = more similar) """ from scipy.spatial.distance import cosine # Cosine similarity = 1 - cosine distance similarity = 1 - cosine(self.embedding, other_embedding) return max(0.0, min(1.0, similarity)) # Clamp to [0, 1] def matches(self, other_embedding: np.ndarray, threshold: float = 0.7) -> bool: """ Check if another embedding matches this voice profile. Args: other_embedding: Voice embedding to compare threshold: Similarity threshold for match (default: 0.7) Returns: True if embeddings match above threshold """ return self.similarity(other_embedding) >= threshold def to_dict(self) -> dict: """ Convert voice profile to dictionary. Returns: Dictionary representation """ return { "speaker_id": self.speaker_id, "embedding": self.embedding.tolist(), "reference_file": self.reference_file, "reference_duration": self.reference_duration, "embedding_quality": self.embedding_quality, "num_speech_segments": self.num_speech_segments, "sample_rate": self.sample_rate, "created_at": self.created_at, } @classmethod def from_dict(cls, data: dict) -> "VoiceProfile": """ Create voice profile from dictionary. Args: data: Dictionary representation Returns: VoiceProfile instance """ data = data.copy() data["embedding"] = np.array(data["embedding"]) return cls(**data) def save(self, file_path: str): """ Save voice profile to file. Args: file_path: Output file path (.npz format) """ import numpy as np np.savez( file_path, speaker_id=self.speaker_id, embedding=self.embedding, reference_file=self.reference_file, reference_duration=self.reference_duration, embedding_quality=self.embedding_quality, num_speech_segments=self.num_speech_segments, sample_rate=self.sample_rate, created_at=self.created_at or "", ) @classmethod def load(cls, file_path: str) -> "VoiceProfile": """ Load voice profile from file. Args: file_path: Input file path (.npz format) Returns: VoiceProfile instance """ import numpy as np data = np.load(file_path, allow_pickle=True) return cls( speaker_id=str(data["speaker_id"]), embedding=data["embedding"], reference_file=str(data["reference_file"]), reference_duration=float(data["reference_duration"]), embedding_quality=float(data["embedding_quality"]), num_speech_segments=int(data["num_speech_segments"]), sample_rate=int(data["sample_rate"]), created_at=str(data["created_at"]) if data["created_at"] else None, )