| { |
| "tasks": [ |
| "SprintDuplicateQuestions", |
| "STSBenchmark", |
| "Flickr30kT2IRetrieval", |
| "Flickr30kI2TRetrieval", |
| "CommonVoiceMini21T2ARetrieval", |
| "MACST2ARetrieval", |
| "UrbanSound8KT2ARetrieval", |
| "ClothoT2ARetrieval" |
| ], |
| "primary_metric_policy": { |
| "text": "main_score", |
| "image_text_retrieval": "ndcg_at_10", |
| "audio_text_retrieval": "ndcg_at_10" |
| }, |
| "runs": [ |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "results_dir": "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim1280/results/triembed__te-1280d/best_model", |
| "completed_tasks": 8, |
| "missing_tasks": [], |
| "overall_mean": 0.3491338344017094, |
| "family_means": { |
| "Audio recall": 0.1041809188034188, |
| "Image recall": 0.424995, |
| "Text continuity": 0.7631785 |
| }, |
| "rows": [ |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "SprintDuplicateQuestions", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.875145, |
| "metrics": { |
| "main_score": 0.875145 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "STSBenchmark", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.651212, |
| "metrics": { |
| "main_score": 0.651212, |
| "cosine_spearman": 0.651212, |
| "spearman": 0.651212 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "Flickr30kT2IRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.4685, |
| "metrics": { |
| "main_score": 0.4685, |
| "ndcg_at_10": 0.4685, |
| "recall_at_1": 0.2956, |
| "recall_at_10": 0.6718, |
| "mrr_at_10": 0.405197 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "Flickr30kI2TRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.38149, |
| "metrics": { |
| "main_score": 0.38149, |
| "ndcg_at_10": 0.38149, |
| "recall_at_1": 0.0816, |
| "recall_at_10": 0.4072, |
| "mrr_at_10": 0.533862 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.028403675213675213, |
| "metrics": { |
| "main_score": 0.03276290598290598, |
| "ndcg_at_10": 0.028403675213675213, |
| "recall_at_1": 0.005908376068376069, |
| "recall_at_10": 0.061962393162393166, |
| "mrr_at_10": 0.01842434188034188 |
| }, |
| "subsets": 117 |
| }, |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "MACST2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.11037, |
| "metrics": { |
| "main_score": 0.13995, |
| "ndcg_at_10": 0.11037, |
| "recall_at_1": 0.03308, |
| "recall_at_10": 0.21374, |
| "mrr_at_10": 0.079078 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "UrbanSound8KT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.00851, |
| "metrics": { |
| "main_score": 0.00963, |
| "ndcg_at_10": 0.00851, |
| "recall_at_1": 0.00196, |
| "recall_at_10": 0.01847, |
| "mrr_at_10": 0.00556 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 1280", |
| "dimension": 1280, |
| "task": "ClothoT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.26944, |
| "metrics": { |
| "main_score": 0.3325, |
| "ndcg_at_10": 0.26944, |
| "recall_at_1": 0.1282, |
| "recall_at_10": 0.44315, |
| "mrr_at_10": 0.215861 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim1280/results/triembed__te-1280d/best_model" |
| ] |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "results_dir": "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim768/results/triembed__te-768d/best_model", |
| "completed_tasks": 8, |
| "missing_tasks": [], |
| "overall_mean": 0.34871195512820513, |
| "family_means": { |
| "Audio recall": 0.10426891025641025, |
| "Image recall": 0.423815, |
| "Text continuity": 0.7624949999999999 |
| }, |
| "rows": [ |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "SprintDuplicateQuestions", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.874231, |
| "metrics": { |
| "main_score": 0.874231 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "STSBenchmark", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.650759, |
| "metrics": { |
| "main_score": 0.650759, |
| "cosine_spearman": 0.650759, |
| "spearman": 0.650759 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "Flickr30kT2IRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.46701, |
| "metrics": { |
| "main_score": 0.46701, |
| "ndcg_at_10": 0.46701, |
| "recall_at_1": 0.2922, |
| "recall_at_10": 0.6712, |
| "mrr_at_10": 0.403385 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "Flickr30kI2TRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.38062, |
| "metrics": { |
| "main_score": 0.38062, |
| "ndcg_at_10": 0.38062, |
| "recall_at_1": 0.0814, |
| "recall_at_10": 0.4058, |
| "mrr_at_10": 0.532687 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.028395641025641027, |
| "metrics": { |
| "main_score": 0.03299991452991453, |
| "ndcg_at_10": 0.028395641025641027, |
| "recall_at_1": 0.005907350427350427, |
| "recall_at_10": 0.062035897435897436, |
| "mrr_at_10": 0.01839460683760684 |
| }, |
| "subsets": 117 |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "MACST2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.11149, |
| "metrics": { |
| "main_score": 0.14249, |
| "ndcg_at_10": 0.11149, |
| "recall_at_1": 0.03308, |
| "recall_at_10": 0.21628, |
| "mrr_at_10": 0.079723 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "UrbanSound8KT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.00851, |
| "metrics": { |
| "main_score": 0.00963, |
| "ndcg_at_10": 0.00851, |
| "recall_at_1": 0.00196, |
| "recall_at_10": 0.01847, |
| "mrr_at_10": 0.005562 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 768", |
| "dimension": 768, |
| "task": "ClothoT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.26868, |
| "metrics": { |
| "main_score": 0.33178, |
| "ndcg_at_10": 0.26868, |
| "recall_at_1": 0.12695, |
| "recall_at_10": 0.44208, |
| "mrr_at_10": 0.21516 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_default/dim768/results/triembed__te-768d/best_model", |
| "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_dim768_fill/dim768/results/triembed__te-768d/best_model" |
| ] |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "results_dir": "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_dim512/dim512/results/triembed__te-512d/best_model", |
| "completed_tasks": 8, |
| "missing_tasks": [], |
| "overall_mean": 0.3488224732905983, |
| "family_means": { |
| "Audio recall": 0.10438869658119658, |
| "Image recall": 0.42417499999999997, |
| "Text continuity": 0.7623375 |
| }, |
| "rows": [ |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "SprintDuplicateQuestions", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.873508, |
| "metrics": { |
| "main_score": 0.873508 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "STSBenchmark", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.651167, |
| "metrics": { |
| "main_score": 0.651167, |
| "cosine_spearman": 0.651167, |
| "spearman": 0.651167 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "Flickr30kT2IRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.4676, |
| "metrics": { |
| "main_score": 0.4676, |
| "ndcg_at_10": 0.4676, |
| "recall_at_1": 0.2954, |
| "recall_at_10": 0.6702, |
| "mrr_at_10": 0.404515 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "Flickr30kI2TRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.38075, |
| "metrics": { |
| "main_score": 0.38075, |
| "ndcg_at_10": 0.38075, |
| "recall_at_1": 0.0824, |
| "recall_at_10": 0.4052, |
| "mrr_at_10": 0.535146 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.028264786324786326, |
| "metrics": { |
| "main_score": 0.03229504273504274, |
| "ndcg_at_10": 0.028264786324786326, |
| "recall_at_1": 0.006467948717948718, |
| "recall_at_10": 0.060837521367521366, |
| "mrr_at_10": 0.018573598290598292 |
| }, |
| "subsets": 117 |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "MACST2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.11287, |
| "metrics": { |
| "main_score": 0.13486, |
| "ndcg_at_10": 0.11287, |
| "recall_at_1": 0.03308, |
| "recall_at_10": 0.22137, |
| "mrr_at_10": 0.080181 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "UrbanSound8KT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.0085, |
| "metrics": { |
| "main_score": 0.00923, |
| "ndcg_at_10": 0.0085, |
| "recall_at_1": 0.00196, |
| "recall_at_10": 0.01847, |
| "mrr_at_10": 0.005544 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-87M 512", |
| "dimension": 512, |
| "task": "ClothoT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.26792, |
| "metrics": { |
| "main_score": 0.33107, |
| "ndcg_at_10": 0.26792, |
| "recall_at_1": 0.1248, |
| "recall_at_10": 0.44261, |
| "mrr_at_10": 0.213985 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/aist81m_raw1280_mn20_merged_teacher_20260503T0125Z_memory_slice_dim512/dim512/results/triembed__te-512d/best_model" |
| ] |
| }, |
| { |
| "label": "AIST-95M 1280 Flickr", |
| "dimension": 1280, |
| "results_dir": "/shared/augmem/triembed/results/aist95m_1280_mieb_flickr_20260502T0217Z/dim1280/results/triembed__te-1280d/best_model", |
| "completed_tasks": 2, |
| "missing_tasks": [ |
| "ClothoT2ARetrieval", |
| "CommonVoiceMini21T2ARetrieval", |
| "MACST2ARetrieval", |
| "STSBenchmark", |
| "SprintDuplicateQuestions", |
| "UrbanSound8KT2ARetrieval" |
| ], |
| "overall_mean": 0.485, |
| "family_means": { |
| "Image recall": 0.485 |
| }, |
| "rows": [ |
| { |
| "label": "AIST-95M 1280 Flickr", |
| "dimension": 1280, |
| "task": "Flickr30kT2IRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.50216, |
| "metrics": { |
| "main_score": 0.50216, |
| "ndcg_at_10": 0.50216, |
| "recall_at_1": 0.3254, |
| "recall_at_10": 0.7004, |
| "mrr_at_10": 0.439975 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "AIST-95M 1280 Flickr", |
| "dimension": 1280, |
| "task": "Flickr30kI2TRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.46784, |
| "metrics": { |
| "main_score": 0.46784, |
| "ndcg_at_10": 0.46784, |
| "recall_at_1": 0.0958, |
| "recall_at_10": 0.5034, |
| "mrr_at_10": 0.598869 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/aist95m_1280_mieb_flickr_20260502T0217Z/dim1280/results/triembed__te-1280d/best_model" |
| ] |
| }, |
| { |
| "label": "Native mn20 audio 768", |
| "dimension": 768, |
| "results_dir": "/shared/augmem/triembed/results/es_aist_memory_audio_native_default_20260501T1835Z/dim768/results/triembed__native-efficientat-768d/latest_model", |
| "completed_tasks": 4, |
| "missing_tasks": [ |
| "Flickr30kI2TRetrieval", |
| "Flickr30kT2IRetrieval", |
| "STSBenchmark", |
| "SprintDuplicateQuestions" |
| ], |
| "overall_mean": 0.11513626068376069, |
| "family_means": { |
| "Audio recall": 0.11513626068376069 |
| }, |
| "rows": [ |
| { |
| "label": "Native mn20 audio 768", |
| "dimension": 768, |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.035825042735042736, |
| "metrics": { |
| "main_score": 0.04166820512820513, |
| "ndcg_at_10": 0.035825042735042736, |
| "recall_at_1": 0.009125726495726495, |
| "recall_at_10": 0.07585017094017094, |
| "mrr_at_10": 0.023907692307692307 |
| }, |
| "subsets": 117 |
| }, |
| { |
| "label": "Native mn20 audio 768", |
| "dimension": 768, |
| "task": "MACST2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.12746, |
| "metrics": { |
| "main_score": 0.13995, |
| "ndcg_at_10": 0.12746, |
| "recall_at_1": 0.05852, |
| "recall_at_10": 0.22392, |
| "mrr_at_10": 0.098715 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Native mn20 audio 768", |
| "dimension": 768, |
| "task": "UrbanSound8KT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.00849, |
| "metrics": { |
| "main_score": 0.00923, |
| "ndcg_at_10": 0.00849, |
| "recall_at_1": 0.00196, |
| "recall_at_10": 0.01866, |
| "mrr_at_10": 0.005487 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Native mn20 audio 768", |
| "dimension": 768, |
| "task": "ClothoT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.28877, |
| "metrics": { |
| "main_score": 0.3581, |
| "ndcg_at_10": 0.28877, |
| "recall_at_1": 0.14414, |
| "recall_at_10": 0.4641, |
| "mrr_at_10": 0.234475 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/es_aist_memory_audio_native_default_20260501T1835Z/dim768/results/triembed__native-efficientat-768d/latest_model" |
| ] |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "results_dir": "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim1280/results/triembed__te-1280d/TE-86M-dual-audio-best_model", |
| "completed_tasks": 8, |
| "missing_tasks": [], |
| "overall_mean": 0.3973782852564103, |
| "family_means": { |
| "Audio recall": 0.11287532051282051, |
| "Image recall": 0.485, |
| "Text continuity": 0.8787625 |
| }, |
| "rows": [ |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "SprintDuplicateQuestions", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.953368, |
| "metrics": { |
| "main_score": 0.953368 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "STSBenchmark", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.804157, |
| "metrics": { |
| "main_score": 0.804157, |
| "cosine_spearman": 0.804157, |
| "spearman": 0.804154 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "Flickr30kT2IRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.50216, |
| "metrics": { |
| "main_score": 0.50216, |
| "ndcg_at_10": 0.50216, |
| "recall_at_1": 0.3254, |
| "recall_at_10": 0.7004, |
| "mrr_at_10": 0.439975 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "Flickr30kI2TRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.46784, |
| "metrics": { |
| "main_score": 0.46784, |
| "ndcg_at_10": 0.46784, |
| "recall_at_1": 0.0958, |
| "recall_at_10": 0.5034, |
| "mrr_at_10": 0.598869 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.03849128205128205, |
| "metrics": { |
| "main_score": 0.04426282051282051, |
| "ndcg_at_10": 0.03849128205128205, |
| "recall_at_1": 0.00971991452991453, |
| "recall_at_10": 0.08076905982905982, |
| "mrr_at_10": 0.02587371794871795 |
| }, |
| "subsets": 117 |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "MACST2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.10964, |
| "metrics": { |
| "main_score": 0.15522, |
| "ndcg_at_10": 0.10964, |
| "recall_at_1": 0.04326, |
| "recall_at_10": 0.19338, |
| "mrr_at_10": 0.083683 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "UrbanSound8KT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.00823, |
| "metrics": { |
| "main_score": 0.00904, |
| "ndcg_at_10": 0.00823, |
| "recall_at_1": 0.00177, |
| "recall_at_10": 0.01807, |
| "mrr_at_10": 0.00531 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 1280", |
| "dimension": 1280, |
| "task": "ClothoT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.29514, |
| "metrics": { |
| "main_score": 0.36043, |
| "ndcg_at_10": 0.29514, |
| "recall_at_1": 0.14861, |
| "recall_at_10": 0.47395, |
| "mrr_at_10": 0.239903 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim1280/results/triembed__te-1280d/TE-86M-dual-audio-best_model" |
| ] |
| }, |
| { |
| "label": "Dual-audio tower 768", |
| "dimension": 768, |
| "results_dir": "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim768/results/triembed__te-768d/TE-86M-dual-audio-best_model", |
| "completed_tasks": 6, |
| "missing_tasks": [ |
| "MACST2ARetrieval", |
| "UrbanSound8KT2ARetrieval" |
| ], |
| "overall_mean": 0.5098147193732193, |
| "family_means": { |
| "Audio recall": 0.16678465811965812, |
| "Image recall": 0.48403999999999997, |
| "Text continuity": 0.8786195 |
| }, |
| "rows": [ |
| { |
| "label": "Dual-audio tower 768", |
| "dimension": 768, |
| "task": "SprintDuplicateQuestions", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.953072, |
| "metrics": { |
| "main_score": 0.953072 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 768", |
| "dimension": 768, |
| "task": "STSBenchmark", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.804167, |
| "metrics": { |
| "main_score": 0.804167, |
| "cosine_spearman": 0.804167, |
| "spearman": 0.804167 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 768", |
| "dimension": 768, |
| "task": "Flickr30kT2IRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.50179, |
| "metrics": { |
| "main_score": 0.50179, |
| "ndcg_at_10": 0.50179, |
| "recall_at_1": 0.3254, |
| "recall_at_10": 0.698, |
| "mrr_at_10": 0.440147 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 768", |
| "dimension": 768, |
| "task": "Flickr30kI2TRetrieval", |
| "family": "Image recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.46629, |
| "metrics": { |
| "main_score": 0.46629, |
| "ndcg_at_10": 0.46629, |
| "recall_at_1": 0.0956, |
| "recall_at_10": 0.5022, |
| "mrr_at_10": 0.597365 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 768", |
| "dimension": 768, |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.03849931623931624, |
| "metrics": { |
| "main_score": 0.04466316239316239, |
| "ndcg_at_10": 0.03849931623931624, |
| "recall_at_1": 0.009814871794871794, |
| "recall_at_10": 0.08058384615384616, |
| "mrr_at_10": 0.025928871794871796 |
| }, |
| "subsets": 117 |
| }, |
| { |
| "label": "Dual-audio tower 768", |
| "dimension": 768, |
| "task": "ClothoT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.29507, |
| "metrics": { |
| "main_score": 0.3615, |
| "ndcg_at_10": 0.29507, |
| "recall_at_1": 0.14861, |
| "recall_at_10": 0.47359, |
| "mrr_at_10": 0.239883 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim768/results/triembed__te-768d/TE-86M-dual-audio-best_model" |
| ] |
| }, |
| { |
| "label": "Dual-audio tower 512", |
| "dimension": 512, |
| "results_dir": "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim512/results/triembed__te-512d/TE-86M-dual-audio-best_model", |
| "completed_tasks": 4, |
| "missing_tasks": [ |
| "Flickr30kI2TRetrieval", |
| "Flickr30kT2IRetrieval", |
| "MACST2ARetrieval", |
| "UrbanSound8KT2ARetrieval" |
| ], |
| "overall_mean": 0.5228179594017094, |
| "family_means": { |
| "Audio recall": 0.16697341880341882, |
| "Text continuity": 0.8786625 |
| }, |
| "rows": [ |
| { |
| "label": "Dual-audio tower 512", |
| "dimension": 512, |
| "task": "SprintDuplicateQuestions", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.952893, |
| "metrics": { |
| "main_score": 0.952893 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 512", |
| "dimension": 512, |
| "task": "STSBenchmark", |
| "family": "Text continuity", |
| "primary_metric": "main_score", |
| "primary": 0.804432, |
| "metrics": { |
| "main_score": 0.804432, |
| "cosine_spearman": 0.804432, |
| "spearman": 0.804432 |
| }, |
| "subsets": 1 |
| }, |
| { |
| "label": "Dual-audio tower 512", |
| "dimension": 512, |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.03858683760683761, |
| "metrics": { |
| "main_score": 0.04408854700854701, |
| "ndcg_at_10": 0.03858683760683761, |
| "recall_at_1": 0.00959076923076923, |
| "recall_at_10": 0.08129623931623932, |
| "mrr_at_10": 0.025843299145299144 |
| }, |
| "subsets": 117 |
| }, |
| { |
| "label": "Dual-audio tower 512", |
| "dimension": 512, |
| "task": "ClothoT2ARetrieval", |
| "family": "Audio recall", |
| "primary_metric": "ndcg_at_10", |
| "primary": 0.29536, |
| "metrics": { |
| "main_score": 0.35882, |
| "ndcg_at_10": 0.29536, |
| "recall_at_1": 0.1513, |
| "recall_at_10": 0.47162, |
| "mrr_at_10": 0.240905 |
| }, |
| "subsets": 1 |
| } |
| ], |
| "source_result_dirs": [ |
| "/shared/augmem/triembed/results/aist86m_full_mteb_mieb_maeb_1280_768_512_20260502T070609Z/dim512/results/triembed__te-512d/TE-86M-dual-audio-best_model" |
| ] |
| } |
| ], |
| "comparisons": [ |
| { |
| "baseline": "Native mn20 audio 768", |
| "target": "AIST-87M 768", |
| "paired_tasks": 4, |
| "mean_absolute_delta": -0.010867350427350436, |
| "rows": [ |
| { |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "dimension": 768, |
| "family": "Audio recall", |
| "baseline": "Native mn20 audio 768", |
| "baseline_primary": 0.035825042735042736, |
| "target": "AIST-87M 768", |
| "target_primary": 0.028395641025641027, |
| "absolute_delta": -0.00742940170940171, |
| "relative_delta_pct": -20.73801213399403 |
| }, |
| { |
| "task": "MACST2ARetrieval", |
| "dimension": 768, |
| "family": "Audio recall", |
| "baseline": "Native mn20 audio 768", |
| "baseline_primary": 0.12746, |
| "target": "AIST-87M 768", |
| "target_primary": 0.11149, |
| "absolute_delta": -0.015969999999999984, |
| "relative_delta_pct": -12.529420994821894 |
| }, |
| { |
| "task": "UrbanSound8KT2ARetrieval", |
| "dimension": 768, |
| "family": "Audio recall", |
| "baseline": "Native mn20 audio 768", |
| "baseline_primary": 0.00849, |
| "target": "AIST-87M 768", |
| "target_primary": 0.00851, |
| "absolute_delta": 0.00002000000000000092, |
| "relative_delta_pct": 0.2355712603062535 |
| }, |
| { |
| "task": "ClothoT2ARetrieval", |
| "dimension": 768, |
| "family": "Audio recall", |
| "baseline": "Native mn20 audio 768", |
| "baseline_primary": 0.28877, |
| "target": "AIST-87M 768", |
| "target_primary": 0.26868, |
| "absolute_delta": -0.020090000000000052, |
| "relative_delta_pct": -6.9570938809433285 |
| } |
| ] |
| }, |
| { |
| "baseline": "Dual-audio tower 768", |
| "target": "AIST-87M 768", |
| "paired_tasks": 6, |
| "mean_absolute_delta": -0.06486544586894587, |
| "rows": [ |
| { |
| "task": "SprintDuplicateQuestions", |
| "dimension": 768, |
| "family": "Text continuity", |
| "baseline": "Dual-audio tower 768", |
| "baseline_primary": 0.953072, |
| "target": "AIST-87M 768", |
| "target_primary": 0.874231, |
| "absolute_delta": -0.07884100000000005, |
| "relative_delta_pct": -8.272302617220948 |
| }, |
| { |
| "task": "STSBenchmark", |
| "dimension": 768, |
| "family": "Text continuity", |
| "baseline": "Dual-audio tower 768", |
| "baseline_primary": 0.804167, |
| "target": "AIST-87M 768", |
| "target_primary": 0.650759, |
| "absolute_delta": -0.153408, |
| "relative_delta_pct": -19.076634579633335 |
| }, |
| { |
| "task": "Flickr30kT2IRetrieval", |
| "dimension": 768, |
| "family": "Image recall", |
| "baseline": "Dual-audio tower 768", |
| "baseline_primary": 0.50179, |
| "target": "AIST-87M 768", |
| "target_primary": 0.46701, |
| "absolute_delta": -0.03477999999999998, |
| "relative_delta_pct": -6.931186352856769 |
| }, |
| { |
| "task": "Flickr30kI2TRetrieval", |
| "dimension": 768, |
| "family": "Image recall", |
| "baseline": "Dual-audio tower 768", |
| "baseline_primary": 0.46629, |
| "target": "AIST-87M 768", |
| "target_primary": 0.38062, |
| "absolute_delta": -0.08566999999999997, |
| "relative_delta_pct": -18.372686525552762 |
| }, |
| { |
| "task": "CommonVoiceMini21T2ARetrieval", |
| "dimension": 768, |
| "family": "Audio recall", |
| "baseline": "Dual-audio tower 768", |
| "baseline_primary": 0.03849931623931624, |
| "target": "AIST-87M 768", |
| "target_primary": 0.028395641025641027, |
| "absolute_delta": -0.010103675213675212, |
| "relative_delta_pct": -26.243778333281533 |
| }, |
| { |
| "task": "ClothoT2ARetrieval", |
| "dimension": 768, |
| "family": "Audio recall", |
| "baseline": "Dual-audio tower 768", |
| "baseline_primary": 0.29507, |
| "target": "AIST-87M 768", |
| "target_primary": 0.26868, |
| "absolute_delta": -0.026390000000000025, |
| "relative_delta_pct": -8.943640492086633 |
| } |
| ] |
| } |
| ] |
| } |
|
|