{
  "repo_name": "AIST-95M",
  "legacy_aliases": [
    "TE-86M Dual Audio"
  ],
  "checkpoint": "checkpoints/wordnet_validaudio_real_mn20_whisper_wavcaps_speech150k_d2_retrieval_20260418T004617Z/best_model.pt",
  "config": "configs/te_mn20_whisper_d2_validaudio.yaml",
  "counts": {
    "text_encoder_mdbr_leaf_ir": 22861056,
    "image_encoder_mobilenetv4_conv_medium": 8434512,
    "audio_encoder_mn20_as_full": 17909287,
    "audio_encoder_whisper_tiny_encoder": 8208384,
    "image_projection_head": 12306560,
    "audio_projection_head": 14272640,
    "text_projection_head": 11323520
  },
  "totals": {
    "exact_loaded_total": 95315959,
    "without_mn20_classifier_head": 89048552,
    "trainable_projection_checkpoint_weights": 37902720
  },
  "notes": [
    "exact_loaded_total counts the deployed dual-audio evaluation path as loaded by triembed",
    "without_mn20_classifier_head excludes the EfficientAT classifier parameters for historical comparison",
    "projection_state_dict duplicates the per-head state dicts inside the checkpoint and is not double-counted"
  ]
}