{ "repo_name": "AIST-95M", "legacy_aliases": [ "TE-86M Dual Audio" ], "checkpoint": "checkpoints/wordnet_validaudio_real_mn20_whisper_wavcaps_speech150k_d2_retrieval_20260418T004617Z/best_model.pt", "config": "configs/te_mn20_whisper_d2_validaudio.yaml", "counts": { "text_encoder_mdbr_leaf_ir": 22861056, "image_encoder_mobilenetv4_conv_medium": 8434512, "audio_encoder_mn20_as_full": 17909287, "audio_encoder_whisper_tiny_encoder": 8208384, "image_projection_head": 12306560, "audio_projection_head": 14272640, "text_projection_head": 11323520 }, "totals": { "exact_loaded_total": 95315959, "without_mn20_classifier_head": 89048552, "trainable_projection_checkpoint_weights": 37902720 }, "notes": [ "exact_loaded_total counts the deployed dual-audio evaluation path as loaded by triembed", "without_mn20_classifier_head excludes the EfficientAT classifier parameters for historical comparison", "projection_state_dict duplicates the per-head state dicts inside the checkpoint and is not double-counted" ] }