| { |
| "repo_name": "AIST-95M", |
| "legacy_aliases": [ |
| "TE-86M Dual Audio" |
| ], |
| "checkpoint": "checkpoints/wordnet_validaudio_real_mn20_whisper_wavcaps_speech150k_d2_retrieval_20260418T004617Z/best_model.pt", |
| "config": "configs/te_mn20_whisper_d2_validaudio.yaml", |
| "counts": { |
| "text_encoder_mdbr_leaf_ir": 22861056, |
| "image_encoder_mobilenetv4_conv_medium": 8434512, |
| "audio_encoder_mn20_as_full": 17909287, |
| "audio_encoder_whisper_tiny_encoder": 8208384, |
| "image_projection_head": 12306560, |
| "audio_projection_head": 14272640, |
| "text_projection_head": 11323520 |
| }, |
| "totals": { |
| "exact_loaded_total": 95315959, |
| "without_mn20_classifier_head": 89048552, |
| "trainable_projection_checkpoint_weights": 37902720 |
| }, |
| "notes": [ |
| "exact_loaded_total counts the deployed dual-audio evaluation path as loaded by triembed", |
| "without_mn20_classifier_head excludes the EfficientAT classifier parameters for historical comparison", |
| "projection_state_dict duplicates the per-head state dicts inside the checkpoint and is not double-counted" |
| ] |
| } |
|
|