augmem
/

AIST-95M

Feature Extraction

image-text-audio

Model card Files Files and versions

AIST-95M / parameter_breakdown.json

gcoderw's picture

Publish AIST-95M

789accf verified 15 days ago

history blame contribute delete

1.09 kB

	{
	"repo_name": "AIST-95M",
	"legacy_aliases": [
	"TE-86M Dual Audio"
	],
	"checkpoint": "checkpoints/wordnet_validaudio_real_mn20_whisper_wavcaps_speech150k_d2_retrieval_20260418T004617Z/best_model.pt",
	"config": "configs/te_mn20_whisper_d2_validaudio.yaml",
	"counts": {
	"text_encoder_mdbr_leaf_ir": 22861056,
	"image_encoder_mobilenetv4_conv_medium": 8434512,
	"audio_encoder_mn20_as_full": 17909287,
	"audio_encoder_whisper_tiny_encoder": 8208384,
	"image_projection_head": 12306560,
	"audio_projection_head": 14272640,
	"text_projection_head": 11323520
	},
	"totals": {
	"exact_loaded_total": 95315959,
	"without_mn20_classifier_head": 89048552,
	"trainable_projection_checkpoint_weights": 37902720
	},
	"notes": [
	"exact_loaded_total counts the deployed dual-audio evaluation path as loaded by triembed",
	"without_mn20_classifier_head excludes the EfficientAT classifier parameters for historical comparison",
	"projection_state_dict duplicates the per-head state dicts inside the checkpoint and is not double-counted"
	]
	}