Spaces:
Running
Running
| # Model Parameter Counts Dictionary for PazaBench Leaderboard | |
| # Used as bubble sizes in the speech vs accuracy tradeoff chart | |
| MODEL_PARAMETER_COUNTS = { | |
| # Facebook Data2Vec family | |
| "facebook/data2vec-audio-base-960h": 94_400_000, # 94.4M | |
| "facebook/data2vec-audio-large-960h": 315_000_000, # ~315M (large architecture) | |
| # Facebook MMS family | |
| "facebook/mms-1b-all": 1_000_000_000, # 1B params | |
| "facebook/mms-1b-fl102": 1_000_000_000, # 1B params | |
| # Facebook Wav2Vec2 family | |
| "facebook/wav2vec2-base-960h": 94_400_000, # 94.4M | |
| "facebook/wav2vec2-large-960h": 315_000_000, # ~315M (large) | |
| "facebook/wav2vec2-large-960h-lv60-self": 315_000_000, # ~315M (large) | |
| "facebook/wav2vec2-large-robust-ft-libri-960h": 315_000_000, # ~315M | |
| # Facebook Wav2Vec2 Conformer family | |
| "facebook/wav2vec2-conformer-rel-pos-large-960h-ft": 600_000_000, # 600M | |
| "facebook/wav2vec2-conformer-rope-large-960h-ft": 600_000_000, # 600M | |
| # Facebook HuBERT family | |
| "facebook/hubert-large-ls960-ft": 315_000_000, # ~315M (large) | |
| "facebook/hubert-xlarge-ls960-ft": 1_000_000_000, # 1B params | |
| # IBM Granite Speech family | |
| # Total params include speech encoder + LLM backbone | |
| "ibm/granite-granite-speech-3.3-2b": 3_000_000_000, # 3B params | |
| "ibm/granite-granite-speech-3.3-8b": 9_000_000_000, # 9B params | |
| # Kyutai family | |
| "kyutai/stt-2.6b-en": 2_600_000_000, # 2.6B params | |
| # Lite ASR / EfficientSpeech family | |
| "efficient/speech-lite-whisper-large-v3": 1_000_000_000, # ~1B (large-v3 compression) | |
| "efficient/speech-lite-whisper-large-v3-acc": 1_000_000_000, # ~1B (accuracy-optimized) | |
| "efficient/speech-lite-whisper-large-v3-fast": 1_000_000_000, # ~1B (speed-optimized) | |
| "efficient/speech-lite-whisper-large-v3-turbo": 600_000_000, # ~0.6B (turbo compression) | |
| "efficient/speech-lite-whisper-large-v3-turbo-acc": 600_000_000, # ~0.6B (turbo accuracy) | |
| "efficient/speech-lite-whisper-large-v3-turbo-fast": 600_000_000, # ~0.6B (turbo fast) | |
| # Moonshine family (Useful Sensors) | |
| "usefulsensors/moonshine-tiny": 27_100_000, # 27.1M params | |
| "usefulsensors/moonshine-base": 61_500_000, # 61.5M params | |
| # OpenAI Whisper family | |
| "openai/whisper-tiny.en": 37_800_000, # 37.8M | |
| "openai/whisper-base.en": 72_600_000, # 72.6M | |
| "openai/whisper-small.en": 244_000_000, # 244M | |
| "openai/whisper-medium.en": 769_000_000, # 769M | |
| "openai/whisper-large": 1_550_000_000, # 1550M | |
| "openai/whisper-large-v2": 1_550_000_000, # 1550M | |
| "openai/whisper-large-v3": 1_550_000_000, # 1550M | |
| "openai/whisper-large-v3-turbo": 809_000_000, # 809M | |
| # Distil-Whisper family | |
| "distil/whisper-distil-large-v2": 756_000_000, # 756M | |
| "distil/whisper-distil-large-v3": 756_000_000, # 756M | |
| "distil/whisper-distil-medium.en": 394_000_000, # 394M | |
| # Paza family | |
| "paza/microsoft-paza-Phi-4-multimodal-instruct": 5_600_000_000, # 5.6B | |
| "paza/microsoft-paza-mms-1b-all": 1_000_000_000, # 1B | |
| "paza/microsoft-paza-whisper-large-v3-turbo": 809_000_000, # 809M | |
| # Qwen2 Audio family | |
| "Qwen/Qwen2-Audio-7B": 8_000_000_000, # 8B params | |
| "Qwen/Qwen2-Audio-7B-Instruct": 8_000_000_000, # 8B params | |
| # OmniASR family - CTC models | |
| "facebook/omniASR-CTC-300M": 325_494_996, # 325M | |
| "facebook/omniASR-CTC-1B": 975_065_300, # 975M | |
| "facebook/omniASR-CTC-3B": 3_080_423_636, # 3.08B | |
| "facebook/omniASR-CTC-7B": 6_504_786_132, # 6.5B | |
| # OmniASR family - LLM models | |
| "facebook/omniASR-LLM-300M": 1_627_603_584, # 1.6B | |
| "facebook/omniASR-LLM-1B": 2_275_710_592, # 2.3B | |
| "facebook/omniASR-LLM-3B": 4_376_679_040, # 4.4B | |
| "facebook/omniASR-LLM-7B": 7_801_041_536, # 7.8B | |
| # OmniASR family - Zero-shot model | |
| "facebook/omniASR-LLM-7B-ZS": 7_810_900_608, # 7.8B | |
| # Microsoft Phi-4 family | |
| "microsoft/Phi-4-multimodal-instruct": 5_600_000_000, # 5.6B params | |
| # NVIDIA NeMo ASR family | |
| "nvidia/canary-1b": 1_000_000_000, # ~1B (FastConformer encoder-decoder) | |
| "nvidia/canary-1b-v2": 1_000_000_000, # ~1B params | |
| "nvidia/canary-qwen-2.5b": 2_500_000_000, # 2.5B params | |
| "nvidia/parakeet-tdt-0.6b-v2": 600_000_000, # 600M | |
| "nvidia/parakeet-tdt-0.6b-v3": 600_000_000, # 600M params | |
| } |