{ "tasks": [ "SprintDuplicateQuestions", "STSBenchmark", "Flickr30kT2IRetrieval", "Flickr30kI2TRetrieval", "CommonVoiceMini21T2ARetrieval", "MACST2ARetrieval", "UrbanSound8KT2ARetrieval", "ClothoT2ARetrieval" ], "primary_metric_policy": { "text": "main_score", "image_text_retrieval": "ndcg_at_10", "audio_text_retrieval": "ndcg_at_10" }, "runs": [ { "label": "AIST-87M 1280", "dimension": 1280, "results_dir": "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim1280/results/triembed__te-1280d/best_model", "completed_tasks": 8, "missing_tasks": [], "overall_mean": 0.3491338344017094, "family_means": { "Audio recall": 0.1041809188034188, "Image recall": 0.424995, "Text continuity": 0.7631785 }, "rows": [ { "label": "AIST-87M 1280", "dimension": 1280, "task": "SprintDuplicateQuestions", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.875145, "metrics": { "main_score": 0.875145 }, "subsets": 1 }, { "label": "AIST-87M 1280", "dimension": 1280, "task": "STSBenchmark", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.651212, "metrics": { "main_score": 0.651212, "cosine_spearman": 0.651212, "spearman": 0.651212 }, "subsets": 1 }, { "label": "AIST-87M 1280", "dimension": 1280, "task": "Flickr30kT2IRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.4685, "metrics": { "main_score": 0.4685, "ndcg_at_10": 0.4685, "recall_at_1": 0.2956, "recall_at_10": 0.6718, "mrr_at_10": 0.405197 }, "subsets": 1 }, { "label": "AIST-87M 1280", "dimension": 1280, "task": "Flickr30kI2TRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.38149, "metrics": { "main_score": 0.38149, "ndcg_at_10": 0.38149, "recall_at_1": 0.0816, "recall_at_10": 0.4072, "mrr_at_10": 0.533862 }, "subsets": 1 }, { "label": "AIST-87M 1280", "dimension": 1280, "task": "CommonVoiceMini21T2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.028403675213675213, "metrics": { "main_score": 0.03276290598290598, "ndcg_at_10": 0.028403675213675213, "recall_at_1": 0.005908376068376069, "recall_at_10": 0.061962393162393166, "mrr_at_10": 0.01842434188034188 }, "subsets": 117 }, { "label": "AIST-87M 1280", "dimension": 1280, "task": "MACST2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.11037, "metrics": { "main_score": 0.13995, "ndcg_at_10": 0.11037, "recall_at_1": 0.03308, "recall_at_10": 0.21374, "mrr_at_10": 0.079078 }, "subsets": 1 }, { "label": "AIST-87M 1280", "dimension": 1280, "task": "UrbanSound8KT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.00851, "metrics": { "main_score": 0.00963, "ndcg_at_10": 0.00851, "recall_at_1": 0.00196, "recall_at_10": 0.01847, "mrr_at_10": 0.00556 }, "subsets": 1 }, { "label": "AIST-87M 1280", "dimension": 1280, "task": "ClothoT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.26944, "metrics": { "main_score": 0.3325, "ndcg_at_10": 0.26944, "recall_at_1": 0.1282, "recall_at_10": 0.44315, "mrr_at_10": 0.215861 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim1280/results/triembed__te-1280d/best_model" ] }, { "label": "AIST-87M 768", "dimension": 768, "results_dir": "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim768/results/triembed__te-768d/best_model", "completed_tasks": 8, "missing_tasks": [], "overall_mean": 0.34871195512820513, "family_means": { "Audio recall": 0.10426891025641025, "Image recall": 0.423815, "Text continuity": 0.7624949999999999 }, "rows": [ { "label": "AIST-87M 768", "dimension": 768, "task": "SprintDuplicateQuestions", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.874231, "metrics": { "main_score": 0.874231 }, "subsets": 1 }, { "label": "AIST-87M 768", "dimension": 768, "task": "STSBenchmark", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.650759, "metrics": { "main_score": 0.650759, "cosine_spearman": 0.650759, "spearman": 0.650759 }, "subsets": 1 }, { "label": "AIST-87M 768", "dimension": 768, "task": "Flickr30kT2IRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.46701, "metrics": { "main_score": 0.46701, "ndcg_at_10": 0.46701, "recall_at_1": 0.2922, "recall_at_10": 0.6712, "mrr_at_10": 0.403385 }, "subsets": 1 }, { "label": "AIST-87M 768", "dimension": 768, "task": "Flickr30kI2TRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.38062, "metrics": { "main_score": 0.38062, "ndcg_at_10": 0.38062, "recall_at_1": 0.0814, "recall_at_10": 0.4058, "mrr_at_10": 0.532687 }, "subsets": 1 }, { "label": "AIST-87M 768", "dimension": 768, "task": "CommonVoiceMini21T2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.028395641025641027, "metrics": { "main_score": 0.03299991452991453, "ndcg_at_10": 0.028395641025641027, "recall_at_1": 0.005907350427350427, "recall_at_10": 0.062035897435897436, "mrr_at_10": 0.01839460683760684 }, "subsets": 117 }, { "label": "AIST-87M 768", "dimension": 768, "task": "MACST2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.11149, "metrics": { "main_score": 0.14249, "ndcg_at_10": 0.11149, "recall_at_1": 0.03308, "recall_at_10": 0.21628, "mrr_at_10": 0.079723 }, "subsets": 1 }, { "label": "AIST-87M 768", "dimension": 768, "task": "UrbanSound8KT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.00851, "metrics": { "main_score": 0.00963, "ndcg_at_10": 0.00851, "recall_at_1": 0.00196, "recall_at_10": 0.01847, "mrr_at_10": 0.005562 }, "subsets": 1 }, { "label": "AIST-87M 768", "dimension": 768, "task": "ClothoT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.26868, "metrics": { "main_score": 0.33178, "ndcg_at_10": 0.26868, "recall_at_1": 0.12695, "recall_at_10": 0.44208, "mrr_at_10": 0.21516 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim768/results/triembed__te-768d/best_model", "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_dim768_fill/dim768/results/triembed__te-768d/best_model" ] }, { "label": "AIST-87M 512", "dimension": 512, "results_dir": "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_dim512/dim512/results/triembed__te-512d/best_model", "completed_tasks": 8, "missing_tasks": [], "overall_mean": 0.3488224732905983, "family_means": { "Audio recall": 0.10438869658119658, "Image recall": 0.42417499999999997, "Text continuity": 0.7623375 }, "rows": [ { "label": "AIST-87M 512", "dimension": 512, "task": "SprintDuplicateQuestions", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.873508, "metrics": { "main_score": 0.873508 }, "subsets": 1 }, { "label": "AIST-87M 512", "dimension": 512, "task": "STSBenchmark", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.651167, "metrics": { "main_score": 0.651167, "cosine_spearman": 0.651167, "spearman": 0.651167 }, "subsets": 1 }, { "label": "AIST-87M 512", "dimension": 512, "task": "Flickr30kT2IRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.4676, "metrics": { "main_score": 0.4676, "ndcg_at_10": 0.4676, "recall_at_1": 0.2954, "recall_at_10": 0.6702, "mrr_at_10": 0.404515 }, "subsets": 1 }, { "label": "AIST-87M 512", "dimension": 512, "task": "Flickr30kI2TRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.38075, "metrics": { "main_score": 0.38075, "ndcg_at_10": 0.38075, "recall_at_1": 0.0824, "recall_at_10": 0.4052, "mrr_at_10": 0.535146 }, "subsets": 1 }, { "label": "AIST-87M 512", "dimension": 512, "task": "CommonVoiceMini21T2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.028264786324786326, "metrics": { "main_score": 0.03229504273504274, "ndcg_at_10": 0.028264786324786326, "recall_at_1": 0.006467948717948718, "recall_at_10": 0.060837521367521366, "mrr_at_10": 0.018573598290598292 }, "subsets": 117 }, { "label": "AIST-87M 512", "dimension": 512, "task": "MACST2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.11287, "metrics": { "main_score": 0.13486, "ndcg_at_10": 0.11287, "recall_at_1": 0.03308, "recall_at_10": 0.22137, "mrr_at_10": 0.080181 }, "subsets": 1 }, { "label": "AIST-87M 512", "dimension": 512, "task": "UrbanSound8KT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.0085, "metrics": { "main_score": 0.00923, "ndcg_at_10": 0.0085, "recall_at_1": 0.00196, "recall_at_10": 0.01847, "mrr_at_10": 0.005544 }, "subsets": 1 }, { "label": "AIST-87M 512", "dimension": 512, "task": "ClothoT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.26792, "metrics": { "main_score": 0.33107, "ndcg_at_10": 0.26792, "recall_at_1": 0.1248, "recall_at_10": 0.44261, "mrr_at_10": 0.213985 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_dim512/dim512/results/triembed__te-512d/best_model" ] }, { "label": "AIST-95M 1280 Flickr", "dimension": 1280, "results_dir": "/shared/augmem/triembed/results/aist95m_1280_mieb_flickr_20260502T0217Z/dim1280/results/triembed__te-1280d/best_model", "completed_tasks": 2, "missing_tasks": [ "ClothoT2ARetrieval", "CommonVoiceMini21T2ARetrieval", "MACST2ARetrieval", "STSBenchmark", "SprintDuplicateQuestions", "UrbanSound8KT2ARetrieval" ], "overall_mean": 0.485, "family_means": { "Image recall": 0.485 }, "rows": [ { "label": "AIST-95M 1280 Flickr", "dimension": 1280, "task": "Flickr30kT2IRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.50216, "metrics": { "main_score": 0.50216, "ndcg_at_10": 0.50216, "recall_at_1": 0.3254, "recall_at_10": 0.7004, "mrr_at_10": 0.439975 }, "subsets": 1 }, { "label": "AIST-95M 1280 Flickr", "dimension": 1280, "task": "Flickr30kI2TRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.46784, "metrics": { "main_score": 0.46784, "ndcg_at_10": 0.46784, "recall_at_1": 0.0958, "recall_at_10": 0.5034, "mrr_at_10": 0.598869 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/aist95m_1280_mieb_flickr_20260502T0217Z/dim1280/results/triembed__te-1280d/best_model" ] }, { "label": "Native mn20 audio 768", "dimension": 768, "results_dir": "/shared/augmem/triembed/results/es_aist_memory_audio_native_default_20260501T1835Z/dim768/results/triembed__native-efficientat-768d/latest_model", "completed_tasks": 4, "missing_tasks": [ "Flickr30kI2TRetrieval", "Flickr30kT2IRetrieval", "STSBenchmark", "SprintDuplicateQuestions" ], "overall_mean": 0.11513626068376069, "family_means": { "Audio recall": 0.11513626068376069 }, "rows": [ { "label": "Native mn20 audio 768", "dimension": 768, "task": "CommonVoiceMini21T2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.035825042735042736, "metrics": { "main_score": 0.04166820512820513, "ndcg_at_10": 0.035825042735042736, "recall_at_1": 0.009125726495726495, "recall_at_10": 0.07585017094017094, "mrr_at_10": 0.023907692307692307 }, "subsets": 117 }, { "label": "Native mn20 audio 768", "dimension": 768, "task": "MACST2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.12746, "metrics": { "main_score": 0.13995, "ndcg_at_10": 0.12746, "recall_at_1": 0.05852, "recall_at_10": 0.22392, "mrr_at_10": 0.098715 }, "subsets": 1 }, { "label": "Native mn20 audio 768", "dimension": 768, "task": "UrbanSound8KT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.00849, "metrics": { "main_score": 0.00923, "ndcg_at_10": 0.00849, "recall_at_1": 0.00196, "recall_at_10": 0.01866, "mrr_at_10": 0.005487 }, "subsets": 1 }, { "label": "Native mn20 audio 768", "dimension": 768, "task": "ClothoT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.28877, "metrics": { "main_score": 0.3581, "ndcg_at_10": 0.28877, "recall_at_1": 0.14414, "recall_at_10": 0.4641, "mrr_at_10": 0.234475 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/es_aist_memory_audio_native_default_20260501T1835Z/dim768/results/triembed__native-efficientat-768d/latest_model" ] }, { "label": "Dual-audio tower 1280", "dimension": 1280, "results_dir": "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim1280/results/triembed__te-1280d/TE-86M-dual-audio-best_model", "completed_tasks": 8, "missing_tasks": [], "overall_mean": 0.3973782852564103, "family_means": { "Audio recall": 0.11287532051282051, "Image recall": 0.485, "Text continuity": 0.8787625 }, "rows": [ { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "SprintDuplicateQuestions", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.953368, "metrics": { "main_score": 0.953368 }, "subsets": 1 }, { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "STSBenchmark", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.804157, "metrics": { "main_score": 0.804157, "cosine_spearman": 0.804157, "spearman": 0.804154 }, "subsets": 1 }, { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "Flickr30kT2IRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.50216, "metrics": { "main_score": 0.50216, "ndcg_at_10": 0.50216, "recall_at_1": 0.3254, "recall_at_10": 0.7004, "mrr_at_10": 0.439975 }, "subsets": 1 }, { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "Flickr30kI2TRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.46784, "metrics": { "main_score": 0.46784, "ndcg_at_10": 0.46784, "recall_at_1": 0.0958, "recall_at_10": 0.5034, "mrr_at_10": 0.598869 }, "subsets": 1 }, { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "CommonVoiceMini21T2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.03849128205128205, "metrics": { "main_score": 0.04426282051282051, "ndcg_at_10": 0.03849128205128205, "recall_at_1": 0.00971991452991453, "recall_at_10": 0.08076905982905982, "mrr_at_10": 0.02587371794871795 }, "subsets": 117 }, { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "MACST2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.10964, "metrics": { "main_score": 0.15522, "ndcg_at_10": 0.10964, "recall_at_1": 0.04326, "recall_at_10": 0.19338, "mrr_at_10": 0.083683 }, "subsets": 1 }, { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "UrbanSound8KT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.00823, "metrics": { "main_score": 0.00904, "ndcg_at_10": 0.00823, "recall_at_1": 0.00177, "recall_at_10": 0.01807, "mrr_at_10": 0.00531 }, "subsets": 1 }, { "label": "Dual-audio tower 1280", "dimension": 1280, "task": "ClothoT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.29514, "metrics": { "main_score": 0.36043, "ndcg_at_10": 0.29514, "recall_at_1": 0.14861, "recall_at_10": 0.47395, "mrr_at_10": 0.239903 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim1280/results/triembed__te-1280d/TE-86M-dual-audio-best_model" ] }, { "label": "Dual-audio tower 768", "dimension": 768, "results_dir": "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim768/results/triembed__te-768d/TE-86M-dual-audio-best_model", "completed_tasks": 6, "missing_tasks": [ "MACST2ARetrieval", "UrbanSound8KT2ARetrieval" ], "overall_mean": 0.5098147193732193, "family_means": { "Audio recall": 0.16678465811965812, "Image recall": 0.48403999999999997, "Text continuity": 0.8786195 }, "rows": [ { "label": "Dual-audio tower 768", "dimension": 768, "task": "SprintDuplicateQuestions", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.953072, "metrics": { "main_score": 0.953072 }, "subsets": 1 }, { "label": "Dual-audio tower 768", "dimension": 768, "task": "STSBenchmark", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.804167, "metrics": { "main_score": 0.804167, "cosine_spearman": 0.804167, "spearman": 0.804167 }, "subsets": 1 }, { "label": "Dual-audio tower 768", "dimension": 768, "task": "Flickr30kT2IRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.50179, "metrics": { "main_score": 0.50179, "ndcg_at_10": 0.50179, "recall_at_1": 0.3254, "recall_at_10": 0.698, "mrr_at_10": 0.440147 }, "subsets": 1 }, { "label": "Dual-audio tower 768", "dimension": 768, "task": "Flickr30kI2TRetrieval", "family": "Image recall", "primary_metric": "ndcg_at_10", "primary": 0.46629, "metrics": { "main_score": 0.46629, "ndcg_at_10": 0.46629, "recall_at_1": 0.0956, "recall_at_10": 0.5022, "mrr_at_10": 0.597365 }, "subsets": 1 }, { "label": "Dual-audio tower 768", "dimension": 768, "task": "CommonVoiceMini21T2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.03849931623931624, "metrics": { "main_score": 0.04466316239316239, "ndcg_at_10": 0.03849931623931624, "recall_at_1": 0.009814871794871794, "recall_at_10": 0.08058384615384616, "mrr_at_10": 0.025928871794871796 }, "subsets": 117 }, { "label": "Dual-audio tower 768", "dimension": 768, "task": "ClothoT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.29507, "metrics": { "main_score": 0.3615, "ndcg_at_10": 0.29507, "recall_at_1": 0.14861, "recall_at_10": 0.47359, "mrr_at_10": 0.239883 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim768/results/triembed__te-768d/TE-86M-dual-audio-best_model" ] }, { "label": "Dual-audio tower 512", "dimension": 512, "results_dir": "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim512/results/triembed__te-512d/TE-86M-dual-audio-best_model", "completed_tasks": 4, "missing_tasks": [ "Flickr30kI2TRetrieval", "Flickr30kT2IRetrieval", "MACST2ARetrieval", "UrbanSound8KT2ARetrieval" ], "overall_mean": 0.5228179594017094, "family_means": { "Audio recall": 0.16697341880341882, "Text continuity": 0.8786625 }, "rows": [ { "label": "Dual-audio tower 512", "dimension": 512, "task": "SprintDuplicateQuestions", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.952893, "metrics": { "main_score": 0.952893 }, "subsets": 1 }, { "label": "Dual-audio tower 512", "dimension": 512, "task": "STSBenchmark", "family": "Text continuity", "primary_metric": "main_score", "primary": 0.804432, "metrics": { "main_score": 0.804432, "cosine_spearman": 0.804432, "spearman": 0.804432 }, "subsets": 1 }, { "label": "Dual-audio tower 512", "dimension": 512, "task": "CommonVoiceMini21T2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.03858683760683761, "metrics": { "main_score": 0.04408854700854701, "ndcg_at_10": 0.03858683760683761, "recall_at_1": 0.00959076923076923, "recall_at_10": 0.08129623931623932, "mrr_at_10": 0.025843299145299144 }, "subsets": 117 }, { "label": "Dual-audio tower 512", "dimension": 512, "task": "ClothoT2ARetrieval", "family": "Audio recall", "primary_metric": "ndcg_at_10", "primary": 0.29536, "metrics": { "main_score": 0.35882, "ndcg_at_10": 0.29536, "recall_at_1": 0.1513, "recall_at_10": 0.47162, "mrr_at_10": 0.240905 }, "subsets": 1 } ], "source_result_dirs": [ "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim512/results/triembed__te-512d/TE-86M-dual-audio-best_model" ] } ], "comparisons": [ { "baseline": "Native mn20 audio 768", "target": "AIST-87M 768", "paired_tasks": 4, "mean_absolute_delta": -0.010867350427350436, "rows": [ { "task": "CommonVoiceMini21T2ARetrieval", "dimension": 768, "family": "Audio recall", "baseline": "Native mn20 audio 768", "baseline_primary": 0.035825042735042736, "target": "AIST-87M 768", "target_primary": 0.028395641025641027, "absolute_delta": -0.00742940170940171, "relative_delta_pct": -20.73801213399403 }, { "task": "MACST2ARetrieval", "dimension": 768, "family": "Audio recall", "baseline": "Native mn20 audio 768", "baseline_primary": 0.12746, "target": "AIST-87M 768", "target_primary": 0.11149, "absolute_delta": -0.015969999999999984, "relative_delta_pct": -12.529420994821894 }, { "task": "UrbanSound8KT2ARetrieval", "dimension": 768, "family": "Audio recall", "baseline": "Native mn20 audio 768", "baseline_primary": 0.00849, "target": "AIST-87M 768", "target_primary": 0.00851, "absolute_delta": 0.00002000000000000092, "relative_delta_pct": 0.2355712603062535 }, { "task": "ClothoT2ARetrieval", "dimension": 768, "family": "Audio recall", "baseline": "Native mn20 audio 768", "baseline_primary": 0.28877, "target": "AIST-87M 768", "target_primary": 0.26868, "absolute_delta": -0.020090000000000052, "relative_delta_pct": -6.9570938809433285 } ] }, { "baseline": "Dual-audio tower 768", "target": "AIST-87M 768", "paired_tasks": 6, "mean_absolute_delta": -0.06486544586894587, "rows": [ { "task": "SprintDuplicateQuestions", "dimension": 768, "family": "Text continuity", "baseline": "Dual-audio tower 768", "baseline_primary": 0.953072, "target": "AIST-87M 768", "target_primary": 0.874231, "absolute_delta": -0.07884100000000005, "relative_delta_pct": -8.272302617220948 }, { "task": "STSBenchmark", "dimension": 768, "family": "Text continuity", "baseline": "Dual-audio tower 768", "baseline_primary": 0.804167, "target": "AIST-87M 768", "target_primary": 0.650759, "absolute_delta": -0.153408, "relative_delta_pct": -19.076634579633335 }, { "task": "Flickr30kT2IRetrieval", "dimension": 768, "family": "Image recall", "baseline": "Dual-audio tower 768", "baseline_primary": 0.50179, "target": "AIST-87M 768", "target_primary": 0.46701, "absolute_delta": -0.03477999999999998, "relative_delta_pct": -6.931186352856769 }, { "task": "Flickr30kI2TRetrieval", "dimension": 768, "family": "Image recall", "baseline": "Dual-audio tower 768", "baseline_primary": 0.46629, "target": "AIST-87M 768", "target_primary": 0.38062, "absolute_delta": -0.08566999999999997, "relative_delta_pct": -18.372686525552762 }, { "task": "CommonVoiceMini21T2ARetrieval", "dimension": 768, "family": "Audio recall", "baseline": "Dual-audio tower 768", "baseline_primary": 0.03849931623931624, "target": "AIST-87M 768", "target_primary": 0.028395641025641027, "absolute_delta": -0.010103675213675212, "relative_delta_pct": -26.243778333281533 }, { "task": "ClothoT2ARetrieval", "dimension": 768, "family": "Audio recall", "baseline": "Dual-audio tower 768", "baseline_primary": 0.29507, "target": "AIST-87M 768", "target_primary": 0.26868, "absolute_delta": -0.026390000000000025, "relative_delta_pct": -8.943640492086633 } ] } ] }