jcudit HF Staff commited on
Commit
0456b70
·
1 Parent(s): ff074e4

fix: correct gitignore to only exclude root-level models directory, not src/models package

Browse files
.gitignore CHANGED
@@ -41,8 +41,8 @@ env/
41
  .env.*
42
  !.env.example
43
 
44
- # Models directory (HuggingFace cache)
45
- models/
46
  *.pt
47
  *.pth
48
  *.bin
@@ -79,4 +79,4 @@ tmp/
79
  # Planning
80
  specs
81
  .specify
82
- CLAUDE.md
 
41
  .env.*
42
  !.env.example
43
 
44
+ # Models directory (HuggingFace cache) - only at root level
45
+ /models/
46
  *.pt
47
  *.pth
48
  *.bin
 
79
  # Planning
80
  specs
81
  .specify
82
+ CLAUDE.md
src/models/__init__.py ADDED
File without changes
src/models/audio_segment.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio Segment Model
3
+
4
+ Represents a contiguous portion of audio with speaker and timing information.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from enum import Enum
9
+ from typing import List, Optional
10
+
11
+
12
+ class SegmentType(Enum):
13
+ """Classification of audio segment types."""
14
+
15
+ SPEECH = "speech"
16
+ NONVERBAL = "nonverbal"
17
+ SILENCE = "silence"
18
+ OVERLAP = "overlap" # Multiple speakers talking simultaneously
19
+
20
+
21
+ @dataclass
22
+ class AudioSegment:
23
+ """
24
+ Audio segment with time range and speaker information.
25
+
26
+ Attributes:
27
+ start_time: Beginning timestamp in seconds
28
+ end_time: Ending timestamp in seconds
29
+ speaker_id: Identifier of the speaker in this segment
30
+ confidence: Certainty of speaker identification (0.0-1.0)
31
+ segment_type: Classification of the segment
32
+ audio_file: Path to the source audio file (optional)
33
+ """
34
+
35
+ start_time: float
36
+ end_time: float
37
+ speaker_id: str
38
+ confidence: float = 1.0
39
+ segment_type: SegmentType = SegmentType.SPEECH
40
+ audio_file: Optional[str] = None
41
+
42
+ def __post_init__(self):
43
+ """Validate audio segment data."""
44
+ if self.start_time < 0:
45
+ raise ValueError(f"Start time cannot be negative: {self.start_time}")
46
+
47
+ if self.end_time <= self.start_time:
48
+ raise ValueError(
49
+ f"End time ({self.end_time}) must be after start time ({self.start_time})"
50
+ )
51
+
52
+ if not 0.0 <= self.confidence <= 1.0:
53
+ raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
54
+
55
+ @property
56
+ def duration(self) -> float:
57
+ """Calculate duration of the segment in seconds."""
58
+ return self.end_time - self.start_time
59
+
60
+ def overlaps_with(self, other: "AudioSegment") -> bool:
61
+ """Check if this segment overlaps with another segment."""
62
+ return not (self.end_time <= other.start_time or other.end_time <= self.start_time)
63
+
64
+ def contains_time(self, time: float) -> bool:
65
+ """Check if a timestamp falls within this segment."""
66
+ return self.start_time <= time <= self.end_time
67
+
68
+ def __repr__(self) -> str:
69
+ return (
70
+ f"AudioSegment("
71
+ f"speaker='{self.speaker_id}', "
72
+ f"time={self.start_time:.2f}-{self.end_time:.2f}s, "
73
+ f"duration={self.duration:.2f}s, "
74
+ f"confidence={self.confidence:.2f}, "
75
+ f"type={self.segment_type.value})"
76
+ )
77
+
78
+
79
+ class SegmentCollection:
80
+ """
81
+ Collection of audio segments with utility methods.
82
+
83
+ Provides methods for filtering, sorting, and analyzing groups of segments.
84
+ """
85
+
86
+ def __init__(self, segments: List[AudioSegment]):
87
+ """Initialize collection with segments."""
88
+ self.segments = segments
89
+
90
+ def __len__(self) -> int:
91
+ """Return number of segments."""
92
+ return len(self.segments)
93
+
94
+ def __iter__(self):
95
+ """Iterate over segments."""
96
+ return iter(self.segments)
97
+
98
+ def __getitem__(self, index):
99
+ """Get segment by index."""
100
+ return self.segments[index]
101
+
102
+ @property
103
+ def total_duration(self) -> float:
104
+ """Calculate total duration of all segments."""
105
+ return sum(seg.duration for seg in self.segments)
106
+
107
+ def filter_by_speaker(self, speaker_id: str) -> "SegmentCollection":
108
+ """Filter segments by speaker ID."""
109
+ filtered = [seg for seg in self.segments if seg.speaker_id == speaker_id]
110
+ return SegmentCollection(filtered)
111
+
112
+ def filter_by_type(self, segment_type: SegmentType) -> "SegmentCollection":
113
+ """Filter segments by type."""
114
+ filtered = [seg for seg in self.segments if seg.segment_type == segment_type]
115
+ return SegmentCollection(filtered)
116
+
117
+ def sort_by_time(self) -> "SegmentCollection":
118
+ """Sort segments by start time."""
119
+ sorted_segments = sorted(self.segments, key=lambda s: s.start_time)
120
+ return SegmentCollection(sorted_segments)
121
+
122
+ def get_speakers(self) -> List[str]:
123
+ """Get unique list of speaker IDs."""
124
+ return list(set(seg.speaker_id for seg in self.segments))
125
+
126
+ def average_confidence(self) -> float:
127
+ """Calculate average confidence across all segments."""
128
+ if not self.segments:
129
+ return 0.0
130
+ return sum(seg.confidence for seg in self.segments) / len(self.segments)
src/models/processing_job.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ProcessingJob data model: Batch configuration and execution tracking.
3
+
4
+ Represents a voice extraction job with configuration and state.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from enum import Enum
10
+ from typing import List, Literal, Optional
11
+
12
+
13
+ class ExtractionMode(Enum):
14
+ """Extraction mode for audio processing."""
15
+
16
+ SPEECH = "speech"
17
+ NONVERBAL = "nonverbal"
18
+ BOTH = "both"
19
+
20
+
21
+ class JobStatus(Enum):
22
+ """Processing job status."""
23
+
24
+ PENDING = "pending"
25
+ RUNNING = "running"
26
+ COMPLETED = "completed"
27
+ FAILED = "failed"
28
+ CANCELLED = "cancelled"
29
+
30
+
31
+ @dataclass
32
+ class ProcessingJob:
33
+ """
34
+ Voice extraction processing job.
35
+
36
+ Represents a batch processing job with configuration, state tracking,
37
+ and results collection.
38
+ """
39
+
40
+ # Input configuration
41
+ reference_file: str
42
+ input_files: List[str]
43
+ output_dir: str
44
+
45
+ # Processing options
46
+ extraction_mode: ExtractionMode = ExtractionMode.SPEECH
47
+ apply_denoising: bool = False
48
+ vad_threshold: float = 0.5
49
+ quality_threshold_enabled: bool = True
50
+
51
+ # Job state
52
+ status: JobStatus = JobStatus.PENDING
53
+ job_id: Optional[str] = None
54
+ created_at: Optional[str] = None
55
+ started_at: Optional[str] = None
56
+ completed_at: Optional[str] = None
57
+
58
+ # Progress tracking
59
+ total_files: int = 0
60
+ files_processed: int = 0
61
+ files_failed: int = 0
62
+ current_file: Optional[str] = None
63
+
64
+ # Results
65
+ output_files: List[str] = field(default_factory=list)
66
+ failed_files: List[dict] = field(default_factory=list) # {file, error}
67
+
68
+ # Statistics
69
+ total_input_duration: float = 0.0
70
+ total_extracted_duration: float = 0.0
71
+ total_processing_time: float = 0.0
72
+
73
+ def __post_init__(self):
74
+ """Initialize job after creation."""
75
+ if self.job_id is None:
76
+ # Generate job ID from timestamp
77
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
78
+ self.job_id = f"job_{timestamp}"
79
+
80
+ if self.created_at is None:
81
+ self.created_at = datetime.now().isoformat()
82
+
83
+ self.total_files = len(self.input_files)
84
+
85
+ @property
86
+ def progress_percentage(self) -> float:
87
+ """
88
+ Get job progress as percentage.
89
+
90
+ Returns:
91
+ Progress percentage (0-100)
92
+ """
93
+ if self.total_files == 0:
94
+ return 0.0
95
+
96
+ return (self.files_processed / self.total_files) * 100
97
+
98
+ @property
99
+ def success_rate(self) -> float:
100
+ """
101
+ Get success rate for processed files.
102
+
103
+ Returns:
104
+ Success rate as percentage (0-100)
105
+ """
106
+ processed = self.files_processed
107
+ if processed == 0:
108
+ return 0.0
109
+
110
+ succeeded = processed - self.files_failed
111
+ return (succeeded / processed) * 100
112
+
113
+ @property
114
+ def extraction_yield(self) -> float:
115
+ """
116
+ Get extraction yield percentage.
117
+
118
+ Returns:
119
+ Yield as percentage of input duration (0-100)
120
+ """
121
+ if self.total_input_duration == 0:
122
+ return 0.0
123
+
124
+ return (self.total_extracted_duration / self.total_input_duration) * 100
125
+
126
+ @property
127
+ def is_complete(self) -> bool:
128
+ """Check if job is complete."""
129
+ return self.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED)
130
+
131
+ @property
132
+ def is_running(self) -> bool:
133
+ """Check if job is currently running."""
134
+ return self.status == JobStatus.RUNNING
135
+
136
+ def start(self):
137
+ """Mark job as started."""
138
+ self.status = JobStatus.RUNNING
139
+ self.started_at = datetime.now().isoformat()
140
+
141
+ def complete(self):
142
+ """Mark job as completed."""
143
+ self.status = JobStatus.COMPLETED
144
+ self.completed_at = datetime.now().isoformat()
145
+
146
+ # Calculate total processing time
147
+ if self.started_at and self.completed_at:
148
+ start = datetime.fromisoformat(self.started_at)
149
+ end = datetime.fromisoformat(self.completed_at)
150
+ self.total_processing_time = (end - start).total_seconds()
151
+
152
+ def fail(self, error: str):
153
+ """Mark job as failed."""
154
+ self.status = JobStatus.FAILED
155
+ self.completed_at = datetime.now().isoformat()
156
+
157
+ # Add general error to failed files
158
+ self.failed_files.append(
159
+ {
160
+ "file": "JOB",
161
+ "error": error,
162
+ }
163
+ )
164
+
165
+ def cancel(self):
166
+ """Mark job as cancelled."""
167
+ self.status = JobStatus.CANCELLED
168
+ self.completed_at = datetime.now().isoformat()
169
+
170
+ def add_success(
171
+ self, input_file: str, output_file: str, input_duration: float, extracted_duration: float
172
+ ):
173
+ """
174
+ Record successful file processing.
175
+
176
+ Args:
177
+ input_file: Input file path
178
+ output_file: Output file path
179
+ input_duration: Input file duration in seconds
180
+ extracted_duration: Extracted audio duration in seconds
181
+ """
182
+ self.files_processed += 1
183
+ self.output_files.append(output_file)
184
+ self.total_input_duration += input_duration
185
+ self.total_extracted_duration += extracted_duration
186
+
187
+ def add_failure(self, input_file: str, error: str):
188
+ """
189
+ Record failed file processing.
190
+
191
+ Args:
192
+ input_file: Input file path that failed
193
+ error: Error message
194
+ """
195
+ self.files_processed += 1
196
+ self.files_failed += 1
197
+ self.failed_files.append(
198
+ {
199
+ "file": input_file,
200
+ "error": error,
201
+ }
202
+ )
203
+
204
+ def update_progress(self, current_file: str):
205
+ """
206
+ Update current processing file.
207
+
208
+ Args:
209
+ current_file: Currently processing file path
210
+ """
211
+ self.current_file = current_file
212
+
213
+ def get_summary(self) -> dict:
214
+ """
215
+ Get job summary statistics.
216
+
217
+ Returns:
218
+ Dictionary with summary information
219
+ """
220
+ return {
221
+ "job_id": self.job_id,
222
+ "status": self.status.value,
223
+ "extraction_mode": self.extraction_mode.value,
224
+ "apply_denoising": self.apply_denoising,
225
+ "total_files": self.total_files,
226
+ "files_processed": self.files_processed,
227
+ "files_succeeded": self.files_processed - self.files_failed,
228
+ "files_failed": self.files_failed,
229
+ "progress_percentage": self.progress_percentage,
230
+ "success_rate": self.success_rate,
231
+ "total_input_duration": self.total_input_duration,
232
+ "total_extracted_duration": self.total_extracted_duration,
233
+ "extraction_yield": self.extraction_yield,
234
+ "total_processing_time": self.total_processing_time,
235
+ "created_at": self.created_at,
236
+ "started_at": self.started_at,
237
+ "completed_at": self.completed_at,
238
+ }
239
+
240
+ def to_dict(self) -> dict:
241
+ """Convert job to dictionary."""
242
+ return {
243
+ "job_id": self.job_id,
244
+ "reference_file": self.reference_file,
245
+ "input_files": self.input_files,
246
+ "output_dir": self.output_dir,
247
+ "extraction_mode": self.extraction_mode.value,
248
+ "apply_denoising": self.apply_denoising,
249
+ "vad_threshold": self.vad_threshold,
250
+ "quality_threshold_enabled": self.quality_threshold_enabled,
251
+ "status": self.status.value,
252
+ "created_at": self.created_at,
253
+ "started_at": self.started_at,
254
+ "completed_at": self.completed_at,
255
+ "total_files": self.total_files,
256
+ "files_processed": self.files_processed,
257
+ "files_failed": self.files_failed,
258
+ "current_file": self.current_file,
259
+ "output_files": self.output_files,
260
+ "failed_files": self.failed_files,
261
+ "total_input_duration": self.total_input_duration,
262
+ "total_extracted_duration": self.total_extracted_duration,
263
+ "total_processing_time": self.total_processing_time,
264
+ "summary": self.get_summary(),
265
+ }
266
+
267
+ @classmethod
268
+ def from_dict(cls, data: dict) -> "ProcessingJob":
269
+ """Create job from dictionary."""
270
+ data = data.copy()
271
+
272
+ # Convert enum strings to enums
273
+ if isinstance(data.get("extraction_mode"), str):
274
+ data["extraction_mode"] = ExtractionMode(data["extraction_mode"])
275
+
276
+ if isinstance(data.get("status"), str):
277
+ data["status"] = JobStatus(data["status"])
278
+
279
+ # Remove computed properties
280
+ data.pop("summary", None)
281
+
282
+ return cls(**data)
283
+
284
+ def generate_report(self) -> str:
285
+ """
286
+ Generate human-readable job report.
287
+
288
+ Returns:
289
+ Formatted report string
290
+ """
291
+ report = ["=== Voice Extraction Job Report ===", ""]
292
+
293
+ report.append(f"Job ID: {self.job_id}")
294
+ report.append(f"Status: {self.status.value.upper()}")
295
+ report.append(f"Mode: {self.extraction_mode.value}")
296
+ report.append(f"Denoising: {'Enabled' if self.apply_denoising else 'Disabled'}")
297
+ report.append("")
298
+
299
+ report.append(f"Files Processed: {self.files_processed}/{self.total_files}")
300
+ report.append(f"Success Rate: {self.success_rate:.1f}%")
301
+ report.append(f"Progress: {self.progress_percentage:.1f}%")
302
+ report.append("")
303
+
304
+ report.append(f"Input Duration: {self.total_input_duration / 60:.1f} minutes")
305
+ report.append(f"Extracted Duration: {self.total_extracted_duration / 60:.1f} minutes")
306
+ report.append(f"Extraction Yield: {self.extraction_yield:.1f}%")
307
+
308
+ if self.total_processing_time > 0:
309
+ report.append(f"Processing Time: {self.total_processing_time / 60:.1f} minutes")
310
+
311
+ if self.files_failed > 0:
312
+ report.append("")
313
+ report.append(f"Failed Files ({self.files_failed}):")
314
+ for failure in self.failed_files[:5]: # Show first 5
315
+ report.append(f" - {failure['file']}: {failure['error']}")
316
+
317
+ if len(self.failed_files) > 5:
318
+ report.append(f" ... and {len(self.failed_files) - 5} more")
319
+
320
+ return "\n".join(report)
src/models/speaker_profile.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Speaker Profile Model
3
+
4
+ Represents a speaker's voice characteristics extracted from audio,
5
+ used for identification and matching.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Optional
10
+
11
+ import numpy as np
12
+
13
+
14
+ @dataclass
15
+ class SpeakerProfile:
16
+ """
17
+ Speaker profile containing voice embeddings and metadata.
18
+
19
+ Attributes:
20
+ speaker_id: Unique identifier (e.g., "SPEAKER_00", "SPEAKER_01")
21
+ embedding: Numerical representation of voice characteristics
22
+ source_audio: Path to the audio file this profile was extracted from
23
+ start_time: Start time in source audio (seconds)
24
+ end_time: End time in source audio (seconds)
25
+ confidence: Quality/reliability metric (0.0-1.0)
26
+ """
27
+
28
+ speaker_id: str
29
+ embedding: np.ndarray
30
+ source_audio: str
31
+ start_time: float
32
+ end_time: float
33
+ confidence: float = 1.0
34
+
35
+ def __post_init__(self):
36
+ """Validate speaker profile data."""
37
+ if self.embedding is None or len(self.embedding) == 0:
38
+ raise ValueError("Embedding vector cannot be empty")
39
+
40
+ if self.end_time <= self.start_time:
41
+ raise ValueError(
42
+ f"End time ({self.end_time}) must be after start time ({self.start_time})"
43
+ )
44
+
45
+ if not 0.0 <= self.confidence <= 1.0:
46
+ raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
47
+
48
+ duration = self.end_time - self.start_time
49
+ if duration < 3.0:
50
+ raise ValueError(f"Duration ({duration}s) is too short (minimum 3 seconds recommended)")
51
+
52
+ @property
53
+ def duration(self) -> float:
54
+ """Calculate duration of the profile in seconds."""
55
+ return self.end_time - self.start_time
56
+
57
+ def __repr__(self) -> str:
58
+ return (
59
+ f"SpeakerProfile(speaker_id='{self.speaker_id}', "
60
+ f"duration={self.duration:.2f}s, "
61
+ f"confidence={self.confidence:.2f})"
62
+ )
src/models/voice_profile.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ VoiceProfile data model: Reference embedding and speaker identification.
3
+
4
+ Represents a target voice profile extracted from reference audio clip.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Optional
9
+
10
+ import numpy as np
11
+
12
+
13
+ @dataclass
14
+ class VoiceProfile:
15
+ """
16
+ Voice profile representing a target speaker.
17
+
18
+ Contains embedding vectors and metadata for voice identification.
19
+ """
20
+
21
+ # Core identification
22
+ speaker_id: str
23
+ embedding: np.ndarray # 512-dimensional vector from pyannote
24
+
25
+ # Source information
26
+ reference_file: str
27
+ reference_duration: float # seconds
28
+
29
+ # Quality metrics
30
+ embedding_quality: float = 1.0 # 0-1 score indicating embedding confidence
31
+ num_speech_segments: int = 0
32
+
33
+ # Metadata
34
+ sample_rate: int = 16000
35
+ created_at: Optional[str] = None
36
+
37
+ def __post_init__(self):
38
+ """Validate voice profile after initialization."""
39
+ if self.embedding.ndim != 1:
40
+ raise ValueError("Embedding must be 1-dimensional vector")
41
+
42
+ if self.embedding_quality < 0 or self.embedding_quality > 1:
43
+ raise ValueError("Embedding quality must be between 0 and 1")
44
+
45
+ def similarity(self, other_embedding: np.ndarray) -> float:
46
+ """
47
+ Calculate cosine similarity with another embedding.
48
+
49
+ Args:
50
+ other_embedding: Another voice embedding vector
51
+
52
+ Returns:
53
+ Similarity score (0-1, higher = more similar)
54
+ """
55
+ from scipy.spatial.distance import cosine
56
+
57
+ # Cosine similarity = 1 - cosine distance
58
+ similarity = 1 - cosine(self.embedding, other_embedding)
59
+ return max(0.0, min(1.0, similarity)) # Clamp to [0, 1]
60
+
61
+ def matches(self, other_embedding: np.ndarray, threshold: float = 0.7) -> bool:
62
+ """
63
+ Check if another embedding matches this voice profile.
64
+
65
+ Args:
66
+ other_embedding: Voice embedding to compare
67
+ threshold: Similarity threshold for match (default: 0.7)
68
+
69
+ Returns:
70
+ True if embeddings match above threshold
71
+ """
72
+ return self.similarity(other_embedding) >= threshold
73
+
74
+ def to_dict(self) -> dict:
75
+ """
76
+ Convert voice profile to dictionary.
77
+
78
+ Returns:
79
+ Dictionary representation
80
+ """
81
+ return {
82
+ "speaker_id": self.speaker_id,
83
+ "embedding": self.embedding.tolist(),
84
+ "reference_file": self.reference_file,
85
+ "reference_duration": self.reference_duration,
86
+ "embedding_quality": self.embedding_quality,
87
+ "num_speech_segments": self.num_speech_segments,
88
+ "sample_rate": self.sample_rate,
89
+ "created_at": self.created_at,
90
+ }
91
+
92
+ @classmethod
93
+ def from_dict(cls, data: dict) -> "VoiceProfile":
94
+ """
95
+ Create voice profile from dictionary.
96
+
97
+ Args:
98
+ data: Dictionary representation
99
+
100
+ Returns:
101
+ VoiceProfile instance
102
+ """
103
+ data = data.copy()
104
+ data["embedding"] = np.array(data["embedding"])
105
+ return cls(**data)
106
+
107
+ def save(self, file_path: str):
108
+ """
109
+ Save voice profile to file.
110
+
111
+ Args:
112
+ file_path: Output file path (.npz format)
113
+ """
114
+ import numpy as np
115
+
116
+ np.savez(
117
+ file_path,
118
+ speaker_id=self.speaker_id,
119
+ embedding=self.embedding,
120
+ reference_file=self.reference_file,
121
+ reference_duration=self.reference_duration,
122
+ embedding_quality=self.embedding_quality,
123
+ num_speech_segments=self.num_speech_segments,
124
+ sample_rate=self.sample_rate,
125
+ created_at=self.created_at or "",
126
+ )
127
+
128
+ @classmethod
129
+ def load(cls, file_path: str) -> "VoiceProfile":
130
+ """
131
+ Load voice profile from file.
132
+
133
+ Args:
134
+ file_path: Input file path (.npz format)
135
+
136
+ Returns:
137
+ VoiceProfile instance
138
+ """
139
+ import numpy as np
140
+
141
+ data = np.load(file_path, allow_pickle=True)
142
+
143
+ return cls(
144
+ speaker_id=str(data["speaker_id"]),
145
+ embedding=data["embedding"],
146
+ reference_file=str(data["reference_file"]),
147
+ reference_duration=float(data["reference_duration"]),
148
+ embedding_quality=float(data["embedding_quality"]),
149
+ num_speech_segments=int(data["num_speech_segments"]),
150
+ sample_rate=int(data["sample_rate"]),
151
+ created_at=str(data["created_at"]) if data["created_at"] else None,
152
+ )