Spaces:

ronima12
/

barvox-backend

Running

RonenShilchikov

Restructure: move Python backend into backend/ directory

423bed8 about 2 months ago

2.03 kB

	"""
	BarVox Audio Processing API - Pydantic Data Models
	"""

	from pydantic import BaseModel
	from typing import Optional, List, Dict, Any

	class DictionaryRecording(BaseModel):
	features: Dict[str, Any]

	class DictionaryEntry(BaseModel):
	id: str
	label: str
	recordings: List[DictionaryRecording]
	z_floor: Optional[float] = None # Per-word minimum z-score (computed at bank load time)
	dtw_z_floor: Optional[float] = None # Per-word minimum z-score in DTW space (computed at bank load time)
	cosine_floor: Optional[float] = None # Per-word whitened cosine floor (computed at bank load time)

	class ExtractBankRequest(BaseModel):
	bank_name: str
	silero_params: Optional[Dict[str, Any]] = None


	class SimilarityRequest(BaseModel):
	test_features: Dict[str, Any]
	dictionary_entries: List[DictionaryEntry]
	dtw_params: Optional[Dict[str, Any]] = None
	similarity_mode: Optional[str] = "mean" # "mean", "dtw", or "hybrid" (mean filter → DTW re-rank)
	hybrid_top_n: Optional[int] = 8 # Number of top candidates to re-rank with DTW in hybrid mode (increased from 5)
	# Unknown rejection: better to say "unknown" than give a wrong prediction
	unknown_threshold: Optional[float] = None # Cosine mean-score floor (from bank cosine self-similarity)
	dtw_calibration_threshold: Optional[float] = None # DTW score floor (from bank DTW self-similarity)
	unknown_min_gap: Optional[float] = None # Min DTW gap between 1st and 2nd (0 = disabled)
	unknown_z_threshold: Optional[float] = 2.0 # Z-score threshold: top word must be this many std devs above mean
	# Embedding whitening: breaks HuBERT anisotropy so cosine scores have real range
	global_mean_embedding: Optional[List[float]] = None # Global mean of all bank HuBERT embeddings (from /extract_bank)
	# CTC entropy: detects English words that HuBERT recognizes confidently
	ctc_entropy_threshold: Optional[float] = None # Reject if entropy < this AND DTW below floor (start: 1.0)