Spaces:
Running
Running
File size: 4,761 Bytes
53a73e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# Model Parameter Counts Dictionary for PazaBench Leaderboard
# Used as bubble sizes in the speech vs accuracy tradeoff chart
MODEL_PARAMETER_COUNTS = {
# Facebook Data2Vec family
"facebook/data2vec-audio-base-960h": 94_400_000, # 94.4M
"facebook/data2vec-audio-large-960h": 315_000_000, # ~315M (large architecture)
# Facebook MMS family
"facebook/mms-1b-all": 1_000_000_000, # 1B params
"facebook/mms-1b-fl102": 1_000_000_000, # 1B params
# Facebook Wav2Vec2 family
"facebook/wav2vec2-base-960h": 94_400_000, # 94.4M
"facebook/wav2vec2-large-960h": 315_000_000, # ~315M (large)
"facebook/wav2vec2-large-960h-lv60-self": 315_000_000, # ~315M (large)
"facebook/wav2vec2-large-robust-ft-libri-960h": 315_000_000, # ~315M
# Facebook Wav2Vec2 Conformer family
"facebook/wav2vec2-conformer-rel-pos-large-960h-ft": 600_000_000, # 600M
"facebook/wav2vec2-conformer-rope-large-960h-ft": 600_000_000, # 600M
# Facebook HuBERT family
"facebook/hubert-large-ls960-ft": 315_000_000, # ~315M (large)
"facebook/hubert-xlarge-ls960-ft": 1_000_000_000, # 1B params
# IBM Granite Speech family
# Total params include speech encoder + LLM backbone
"ibm/granite-granite-speech-3.3-2b": 3_000_000_000, # 3B params
"ibm/granite-granite-speech-3.3-8b": 9_000_000_000, # 9B params
# Kyutai family
"kyutai/stt-2.6b-en": 2_600_000_000, # 2.6B params
# Lite ASR / EfficientSpeech family
"efficient/speech-lite-whisper-large-v3": 1_000_000_000, # ~1B (large-v3 compression)
"efficient/speech-lite-whisper-large-v3-acc": 1_000_000_000, # ~1B (accuracy-optimized)
"efficient/speech-lite-whisper-large-v3-fast": 1_000_000_000, # ~1B (speed-optimized)
"efficient/speech-lite-whisper-large-v3-turbo": 600_000_000, # ~0.6B (turbo compression)
"efficient/speech-lite-whisper-large-v3-turbo-acc": 600_000_000, # ~0.6B (turbo accuracy)
"efficient/speech-lite-whisper-large-v3-turbo-fast": 600_000_000, # ~0.6B (turbo fast)
# Moonshine family (Useful Sensors)
"usefulsensors/moonshine-tiny": 27_100_000, # 27.1M params
"usefulsensors/moonshine-base": 61_500_000, # 61.5M params
# OpenAI Whisper family
"openai/whisper-tiny.en": 37_800_000, # 37.8M
"openai/whisper-base.en": 72_600_000, # 72.6M
"openai/whisper-small.en": 244_000_000, # 244M
"openai/whisper-medium.en": 769_000_000, # 769M
"openai/whisper-large": 1_550_000_000, # 1550M
"openai/whisper-large-v2": 1_550_000_000, # 1550M
"openai/whisper-large-v3": 1_550_000_000, # 1550M
"openai/whisper-large-v3-turbo": 809_000_000, # 809M
# Distil-Whisper family
"distil/whisper-distil-large-v2": 756_000_000, # 756M
"distil/whisper-distil-large-v3": 756_000_000, # 756M
"distil/whisper-distil-medium.en": 394_000_000, # 394M
# Paza family
"paza/microsoft-paza-Phi-4-multimodal-instruct": 5_600_000_000, # 5.6B
"paza/microsoft-paza-mms-1b-all": 1_000_000_000, # 1B
"paza/microsoft-paza-whisper-large-v3-turbo": 809_000_000, # 809M
# Qwen2 Audio family
"Qwen/Qwen2-Audio-7B": 8_000_000_000, # 8B params
"Qwen/Qwen2-Audio-7B-Instruct": 8_000_000_000, # 8B params
# OmniASR family - CTC models
"facebook/omniASR-CTC-300M": 325_494_996, # 325M
"facebook/omniASR-CTC-1B": 975_065_300, # 975M
"facebook/omniASR-CTC-3B": 3_080_423_636, # 3.08B
"facebook/omniASR-CTC-7B": 6_504_786_132, # 6.5B
# OmniASR family - LLM models
"facebook/omniASR-LLM-300M": 1_627_603_584, # 1.6B
"facebook/omniASR-LLM-1B": 2_275_710_592, # 2.3B
"facebook/omniASR-LLM-3B": 4_376_679_040, # 4.4B
"facebook/omniASR-LLM-7B": 7_801_041_536, # 7.8B
# OmniASR family - Zero-shot model
"facebook/omniASR-LLM-7B-ZS": 7_810_900_608, # 7.8B
# Microsoft Phi-4 family
"microsoft/Phi-4-multimodal-instruct": 5_600_000_000, # 5.6B params
# NVIDIA NeMo ASR family
"nvidia/canary-1b": 1_000_000_000, # ~1B (FastConformer encoder-decoder)
"nvidia/canary-1b-v2": 1_000_000_000, # ~1B params
"nvidia/canary-qwen-2.5b": 2_500_000_000, # 2.5B params
"nvidia/parakeet-tdt-0.6b-v2": 600_000_000, # 600M
"nvidia/parakeet-tdt-0.6b-v3": 600_000_000, # 600M params
} |