Spaces:
Paused
Paused
File size: 4,569 Bytes
0456b70 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | """
VoiceProfile data model: Reference embedding and speaker identification.
Represents a target voice profile extracted from reference audio clip.
"""
from dataclasses import dataclass, field
from typing import Optional
import numpy as np
@dataclass
class VoiceProfile:
"""
Voice profile representing a target speaker.
Contains embedding vectors and metadata for voice identification.
"""
# Core identification
speaker_id: str
embedding: np.ndarray # 512-dimensional vector from pyannote
# Source information
reference_file: str
reference_duration: float # seconds
# Quality metrics
embedding_quality: float = 1.0 # 0-1 score indicating embedding confidence
num_speech_segments: int = 0
# Metadata
sample_rate: int = 16000
created_at: Optional[str] = None
def __post_init__(self):
"""Validate voice profile after initialization."""
if self.embedding.ndim != 1:
raise ValueError("Embedding must be 1-dimensional vector")
if self.embedding_quality < 0 or self.embedding_quality > 1:
raise ValueError("Embedding quality must be between 0 and 1")
def similarity(self, other_embedding: np.ndarray) -> float:
"""
Calculate cosine similarity with another embedding.
Args:
other_embedding: Another voice embedding vector
Returns:
Similarity score (0-1, higher = more similar)
"""
from scipy.spatial.distance import cosine
# Cosine similarity = 1 - cosine distance
similarity = 1 - cosine(self.embedding, other_embedding)
return max(0.0, min(1.0, similarity)) # Clamp to [0, 1]
def matches(self, other_embedding: np.ndarray, threshold: float = 0.7) -> bool:
"""
Check if another embedding matches this voice profile.
Args:
other_embedding: Voice embedding to compare
threshold: Similarity threshold for match (default: 0.7)
Returns:
True if embeddings match above threshold
"""
return self.similarity(other_embedding) >= threshold
def to_dict(self) -> dict:
"""
Convert voice profile to dictionary.
Returns:
Dictionary representation
"""
return {
"speaker_id": self.speaker_id,
"embedding": self.embedding.tolist(),
"reference_file": self.reference_file,
"reference_duration": self.reference_duration,
"embedding_quality": self.embedding_quality,
"num_speech_segments": self.num_speech_segments,
"sample_rate": self.sample_rate,
"created_at": self.created_at,
}
@classmethod
def from_dict(cls, data: dict) -> "VoiceProfile":
"""
Create voice profile from dictionary.
Args:
data: Dictionary representation
Returns:
VoiceProfile instance
"""
data = data.copy()
data["embedding"] = np.array(data["embedding"])
return cls(**data)
def save(self, file_path: str):
"""
Save voice profile to file.
Args:
file_path: Output file path (.npz format)
"""
import numpy as np
np.savez(
file_path,
speaker_id=self.speaker_id,
embedding=self.embedding,
reference_file=self.reference_file,
reference_duration=self.reference_duration,
embedding_quality=self.embedding_quality,
num_speech_segments=self.num_speech_segments,
sample_rate=self.sample_rate,
created_at=self.created_at or "",
)
@classmethod
def load(cls, file_path: str) -> "VoiceProfile":
"""
Load voice profile from file.
Args:
file_path: Input file path (.npz format)
Returns:
VoiceProfile instance
"""
import numpy as np
data = np.load(file_path, allow_pickle=True)
return cls(
speaker_id=str(data["speaker_id"]),
embedding=data["embedding"],
reference_file=str(data["reference_file"]),
reference_duration=float(data["reference_duration"]),
embedding_quality=float(data["embedding_quality"]),
num_speech_segments=int(data["num_speech_segments"]),
sample_rate=int(data["sample_rate"]),
created_at=str(data["created_at"]) if data["created_at"] else None,
)
|