Spaces:
Running
Running
| """ | |
| BarVox Audio Processing API - Pydantic Data Models | |
| """ | |
| from pydantic import BaseModel | |
| from typing import Optional, List, Dict, Any | |
| class DictionaryRecording(BaseModel): | |
| features: Dict[str, Any] | |
| class DictionaryEntry(BaseModel): | |
| id: str | |
| label: str | |
| recordings: List[DictionaryRecording] | |
| z_floor: Optional[float] = None # Per-word minimum z-score (computed at bank load time) | |
| dtw_z_floor: Optional[float] = None # Per-word minimum z-score in DTW space (computed at bank load time) | |
| cosine_floor: Optional[float] = None # Per-word whitened cosine floor (computed at bank load time) | |
| class ExtractBankRequest(BaseModel): | |
| bank_name: str | |
| silero_params: Optional[Dict[str, Any]] = None | |
| class SimilarityRequest(BaseModel): | |
| test_features: Dict[str, Any] | |
| dictionary_entries: List[DictionaryEntry] | |
| dtw_params: Optional[Dict[str, Any]] = None | |
| similarity_mode: Optional[str] = "mean" # "mean", "dtw", or "hybrid" (mean filter → DTW re-rank) | |
| hybrid_top_n: Optional[int] = 8 # Number of top candidates to re-rank with DTW in hybrid mode (increased from 5) | |
| # Unknown rejection: better to say "unknown" than give a wrong prediction | |
| unknown_threshold: Optional[float] = None # Cosine mean-score floor (from bank cosine self-similarity) | |
| dtw_calibration_threshold: Optional[float] = None # DTW score floor (from bank DTW self-similarity) | |
| unknown_min_gap: Optional[float] = None # Min DTW gap between 1st and 2nd (0 = disabled) | |
| unknown_z_threshold: Optional[float] = 2.0 # Z-score threshold: top word must be this many std devs above mean | |
| # Embedding whitening: breaks HuBERT anisotropy so cosine scores have real range | |
| global_mean_embedding: Optional[List[float]] = None # Global mean of all bank HuBERT embeddings (from /extract_bank) | |
| # CTC entropy: detects English words that HuBERT recognizes confidently | |
| ctc_entropy_threshold: Optional[float] = None # Reject if entropy < this AND DTW below floor (start: 1.0) |