"""
Speaker Profile Model

Represents a speaker's voice characteristics extracted from audio,
used for identification and matching.
"""

from dataclasses import dataclass
from typing import Optional

import numpy as np


@dataclass
class SpeakerProfile:
    """
    Speaker profile containing voice embeddings and metadata.

    Attributes:
        speaker_id: Unique identifier (e.g., "SPEAKER_00", "SPEAKER_01")
        embedding: Numerical representation of voice characteristics
        source_audio: Path to the audio file this profile was extracted from
        start_time: Start time in source audio (seconds)
        end_time: End time in source audio (seconds)
        confidence: Quality/reliability metric (0.0-1.0)
    """

    speaker_id: str
    embedding: np.ndarray
    source_audio: str
    start_time: float
    end_time: float
    confidence: float = 1.0

    def __post_init__(self):
        """Validate speaker profile data."""
        if self.embedding is None or len(self.embedding) == 0:
            raise ValueError("Embedding vector cannot be empty")

        if self.end_time <= self.start_time:
            raise ValueError(
                f"End time ({self.end_time}) must be after start time ({self.start_time})"
            )

        if not 0.0 <= self.confidence <= 1.0:
            raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")

        duration = self.end_time - self.start_time
        if duration < 3.0:
            raise ValueError(f"Duration ({duration}s) is too short (minimum 3 seconds recommended)")

    @property
    def duration(self) -> float:
        """Calculate duration of the profile in seconds."""
        return self.end_time - self.start_time

    def __repr__(self) -> str:
        return (
            f"SpeakerProfile(speaker_id='{self.speaker_id}', "
            f"duration={self.duration:.2f}s, "
            f"confidence={self.confidence:.2f})"
        )