diff --git a/.gitattributes b/.gitattributes index db21ce325cb4103afe69ff3a339e23330e53d6e1..0869f52539562b5fe4e664392fee470c645e1fbc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -36,3 +36,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_uniform.jsonl filter=lfs diff=lfs merge=lfs -text SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_closest.jsonl filter=lfs diff=lfs merge=lfs -text SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_closest.jsonl filter=lfs diff=lfs merge=lfs -text +SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_uniform.jsonl filter=lfs diff=lfs merge=lfs -text +SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_farest.jsonl filter=lfs diff=lfs merge=lfs -text +SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_farest.jsonl filter=lfs diff=lfs merge=lfs -text +siglip-so400m-patch14-384\#0.8\#0.6\#siglip-so400m-patch14-384\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_closest.jsonl filter=lfs diff=lfs merge=lfs -text +siglip-so400m-patch14-384\#0.8\#0.6\#siglip-so400m-patch14-384\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_uniform.jsonl filter=lfs diff=lfs merge=lfs -text +captions.tsv filter=lfs diff=lfs merge=lfs -text diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_farest/tensorboard/events.out.tfevents.1745688045.g12.2586430.0 b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_farest/tensorboard/events.out.tfevents.1745688045.g12.2586430.0 new file mode 100644 index 0000000000000000000000000000000000000000..bd684562eebaa04fca3c189f6f7c2e0d75169750 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_farest/tensorboard/events.out.tfevents.1745688045.g12.2586430.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c633d46b0620be846fb5b5afdb9b32eb3b3f502669dd38665578d573a7a097 +size 19936 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..7c5a04ec7767b63fe9b298f471ceb9db47b9e5b1 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "caltech101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8613298337707787, "acc5": 0.9757217847769029, "mean_per_class_recall": 0.9243666325650888}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..d11c46ccba017f14c802996453f69e46de009e7c --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8311155328939187, "acc5": 0.9896779007586121, "mean_per_class_recall": 0.8300859230489636}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..3fd62045954268d86e43d96b581137b81352ab17 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "cifar100", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7575, "acc5": 0.9361, "mean_per_class_recall": 0.7575}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..3e3af446b357add7a925c8668adf0ae14b8ee047 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9358, "acc5": 0.9976, "mean_per_class_recall": 0.9359000000000002}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..7729871a7d65906c8a897f54be392a28f8e3a654 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "country211", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.19450236966824644, "acc5": 0.42265402843601896, "mean_per_class_recall": 0.19445497630331754}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..aca4552749338d6c0f9f3069dd06c2a691843c88 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5531914893617021, "acc5": 0.8308510638297872, "mean_per_class_recall": 0.5531914893617021}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..14b2492cb080b11b548ed04eb8fecafa9c2936e7 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5212962962962963, "acc5": 0.8868148148148148, "mean_per_class_recall": 0.5254933333333334}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_fgvc_aircraft_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_fgvc_aircraft_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..4f82f1dfaa36d91aa1298cbd35e86758498c5142 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_fgvc_aircraft_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "fgvc_aircraft", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.2652265226522652, "acc5": 0.6126612661266126, "mean_per_class_recall": 0.26398395721925133}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..c3ca7ddada4f3b98bc0a1037fbcf763ee17f1935 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json @@ -0,0 +1 @@ +{"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6549999713897705, "text_retrieval_recall@1": 0.8149999976158142, "image_retrieval_recall@5": 0.8682000041007996, "text_retrieval_recall@5": 0.9520000219345093, "image_retrieval_recall@10": 0.921999990940094, "text_retrieval_recall@10": 0.9810000061988831}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..ad08ffd2d9f50db3c40d78580a84acc7ae14bacf --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "flowers", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7440234184420231, "acc5": 0.9011221336802732, "mean_per_class_recall": 0.7362584000701315}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..b3a06ff4fffee7fa66d8004509436b0fc9e316ad --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "food101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.867049504950495, "acc5": 0.9799603960396039, "mean_per_class_recall": 0.867009900990099}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..dd7a040cb263fd9168ea29c6b8744430e6495a8c --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "gtsrb", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5332541567695962, "acc5": 0.7577988915281076, "mean_per_class_recall": 0.4841045961397327}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..00fa6ccdf4ce75ef6a9218b3428941fbfaffa3db --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "imagenet1k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.68376, "acc5": 0.90904, "mean_per_class_recall": 0.68382}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..18ae9f858576d6cc8cea111b4b1e8db7a8ada8e6 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json @@ -0,0 +1 @@ +{"dataset": "mscoco_captions", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.38504597544670105, "text_retrieval_recall@1": 0.5383999943733215, "image_retrieval_recall@5": 0.6431427597999573, "text_retrieval_recall@5": 0.7778000235557556, "image_retrieval_recall@10": 0.7439424395561218, "text_retrieval_recall@10": 0.8586000204086304}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..2f6881e3b13ae2b6ec14032f3297bd4fe529e7a1 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "pets", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9078768056691197, "acc5": 0.9961842463886618, "mean_per_class_recall": 0.9070997688018754}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..b5c64b999571c3e263aeb185769eef41e964f8fd --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "stl10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.973375, "acc5": 1.0, "mean_per_class_recall": 0.9733749999999999}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..366d760b7ad3c11a8c4584522e931571bde3ba68 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "sun397", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6725453776412822, "acc5": 0.9295841992018684, "mean_per_class_recall": 0.6641095198145022}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..5a488f25b924672c46ddec4495a431be697a714c --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "vtab/resisc45", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6007936507936508, "acc5": 0.9041269841269841, "mean_per_class_recall": 0.6084713941217861}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log new file mode 100644 index 0000000000000000000000000000000000000000..4b529e00c9b8017bff5bf4082eb3ff094cb7db77 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log @@ -0,0 +1,195 @@ +2025-04-27,07:54:14 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2. +2025-04-27,07:54:14 | INFO | Loaded ViT-B-16 model config. +2025-04-27,07:54:15 | INFO | Loading pretrained ViT-B-16 weights (datacomp_xl_s13b_b90k). +2025-04-27,07:54:15 | INFO | Model: +2025-04-27,07:54:15 | INFO | CLIP( + (visual): VisionTransformer( + (conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False) + (patch_dropout): Identity() + (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (transformer): Transformer( + (resblocks): ModuleList( + (0-11): 12 x ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ls_1): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): GELU(approximate='none') + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ls_2): Identity() + ) + ) + ) + (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (transformer): Transformer( + (resblocks): ModuleList( + (0-11): 12 x ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ls_1): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (gelu): GELU(approximate='none') + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + (ls_2): Identity() + ) + ) + ) + (token_embedding): Embedding(49408, 512) + (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True) +) +2025-04-27,07:54:15 | INFO | Params: +2025-04-27,07:54:15 | INFO | accum_freq: 2 +2025-04-27,07:54:15 | INFO | aug_cfg: {} +2025-04-27,07:54:15 | INFO | batch_size: 2048 +2025-04-27,07:54:15 | INFO | beta1: 0.9 +2025-04-27,07:54:15 | INFO | beta2: 0.98 +2025-04-27,07:54:15 | INFO | cache_dir: None +2025-04-27,07:54:15 | INFO | caption_ratio: 0.1 +2025-04-27,07:54:15 | INFO | checkpoint_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints +2025-04-27,07:54:15 | INFO | coca_caption_loss_weight: 2.0 +2025-04-27,07:54:15 | INFO | coca_contrastive_loss_weight: 1.0 +2025-04-27,07:54:15 | INFO | copy_codebase: False +2025-04-27,07:54:15 | INFO | csv_caption_key: title +2025-04-27,07:54:15 | INFO | csv_img_key: filepath +2025-04-27,07:54:15 | INFO | csv_separator: +2025-04-27,07:54:15 | INFO | dataset_resampled: False +2025-04-27,07:54:15 | INFO | dataset_type: synthetic +2025-04-27,07:54:15 | INFO | ddp_static_graph: False +2025-04-27,07:54:15 | INFO | debug: False +2025-04-27,07:54:15 | INFO | delete_previous_checkpoint: False +2025-04-27,07:54:15 | INFO | device: cuda:0 +2025-04-27,07:54:15 | INFO | dist_backend: None +2025-04-27,07:54:15 | INFO | dist_url: None +2025-04-27,07:54:15 | INFO | distill: False +2025-04-27,07:54:15 | INFO | distill_model: None +2025-04-27,07:54:15 | INFO | distill_pretrained: None +2025-04-27,07:54:15 | INFO | distributed: True +2025-04-27,07:54:15 | INFO | epochs: 10 +2025-04-27,07:54:15 | INFO | epochs_cooldown: None +2025-04-27,07:54:15 | INFO | eps: 1e-08 +2025-04-27,07:54:15 | INFO | force_custom_text: False +2025-04-27,07:54:15 | INFO | force_image_size: None +2025-04-27,07:54:15 | INFO | force_patch_dropout: None +2025-04-27,07:54:15 | INFO | force_quick_gelu: False +2025-04-27,07:54:15 | INFO | gather_with_grad: True +2025-04-27,07:54:15 | INFO | grad_checkpointing: True +2025-04-27,07:54:15 | INFO | grad_clip_norm: None +2025-04-27,07:54:15 | INFO | horovod: False +2025-04-27,07:54:15 | INFO | image_interpolation: None +2025-04-27,07:54:15 | INFO | image_mean: None +2025-04-27,07:54:15 | INFO | image_resize_mode: None +2025-04-27,07:54:15 | INFO | image_std: None +2025-04-27,07:54:15 | INFO | imagenet_v2: None +2025-04-27,07:54:15 | INFO | imagenet_val: None +2025-04-27,07:54:15 | INFO | keep_func_name: keep_image_uniform +2025-04-27,07:54:15 | INFO | local_loss: False +2025-04-27,07:54:15 | INFO | local_rank: 0 +2025-04-27,07:54:15 | INFO | lock_image: False +2025-04-27,07:54:15 | INFO | lock_image_freeze_bn_stats: False +2025-04-27,07:54:15 | INFO | lock_image_unlocked_groups: 0 +2025-04-27,07:54:15 | INFO | lock_text: True +2025-04-27,07:54:15 | INFO | lock_text_freeze_layer_norm: False +2025-04-27,07:54:15 | INFO | lock_text_unlocked_layers: 0 +2025-04-27,07:54:15 | INFO | log_every_n_steps: 100 +2025-04-27,07:54:15 | INFO | log_level: 20 +2025-04-27,07:54:15 | INFO | log_local: False +2025-04-27,07:54:15 | INFO | log_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log +2025-04-27,07:54:15 | INFO | logs: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text +2025-04-27,07:54:15 | INFO | loss_dist_impl: None +2025-04-27,07:54:15 | INFO | lr: 4e-05 +2025-04-27,07:54:15 | INFO | lr_cooldown_end: 0.0 +2025-04-27,07:54:15 | INFO | lr_cooldown_power: 1.0 +2025-04-27,07:54:15 | INFO | lr_scheduler: cosine +2025-04-27,07:54:15 | INFO | map_func_name: use_all +2025-04-27,07:54:15 | INFO | model: ViT-B-16 +2025-04-27,07:54:15 | INFO | momentum: None +2025-04-27,07:54:15 | INFO | name: keep_image_uniform +2025-04-27,07:54:15 | INFO | no_set_device_rank: False +2025-04-27,07:54:15 | INFO | opt: adamw +2025-04-27,07:54:15 | INFO | precision: amp +2025-04-27,07:54:15 | INFO | pretrained: datacomp_xl_s13b_b90k +2025-04-27,07:54:15 | INFO | pretrained_image: False +2025-04-27,07:54:15 | INFO | rank: 0 +2025-04-27,07:54:15 | INFO | remote_sync: None +2025-04-27,07:54:15 | INFO | remote_sync_frequency: 300 +2025-04-27,07:54:15 | INFO | remote_sync_protocol: s3 +2025-04-27,07:54:15 | INFO | report_to: tensorboard,wandb +2025-04-27,07:54:15 | INFO | resume: None +2025-04-27,07:54:15 | INFO | save_frequency: 10 +2025-04-27,07:54:15 | INFO | save_most_recent: False +2025-04-27,07:54:15 | INFO | seed: 0 +2025-04-27,07:54:15 | INFO | siglip: False +2025-04-27,07:54:15 | INFO | skip_scheduler: False +2025-04-27,07:54:15 | INFO | tensorboard: True +2025-04-27,07:54:15 | INFO | tensorboard_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard +2025-04-27,07:54:15 | INFO | torchcompile: False +2025-04-27,07:54:15 | INFO | torchscript: False +2025-04-27,07:54:15 | INFO | trace: False +2025-04-27,07:54:15 | INFO | train_data: /mnt/personal/zhudongy/recap-datacomp-3m-wds/{00376..01507}.tar +2025-04-27,07:54:15 | INFO | train_data_upsampling_factors: None +2025-04-27,07:54:15 | INFO | train_num_samples: 9011874 +2025-04-27,07:54:15 | INFO | use_bn_sync: False +2025-04-27,07:54:15 | INFO | use_bnb_linear: None +2025-04-27,07:54:15 | INFO | val_data: None +2025-04-27,07:54:15 | INFO | val_frequency: 1 +2025-04-27,07:54:15 | INFO | val_num_samples: None +2025-04-27,07:54:15 | INFO | wandb: True +2025-04-27,07:54:15 | INFO | wandb_notes: +2025-04-27,07:54:15 | INFO | wandb_project_name: open-clip +2025-04-27,07:54:15 | INFO | warmup: 110 +2025-04-27,07:54:15 | INFO | wd: 0.5 +2025-04-27,07:54:15 | INFO | workers: 16 +2025-04-27,07:54:15 | INFO | world_size: 2 +2025-04-27,07:54:15 | INFO | zeroshot_frequency: 2 +2025-04-27,07:54:16 | INFO | Created AdamW (adamw) optimizer: lr: 4e-05, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None +2025-04-27,07:54:35 | INFO | Start epoch 0 +2025-04-27,07:55:28 | INFO | Train Epoch: 0 [ 8192/917504 (1%)] Data (t): 43.129 Batch (t): 52.904, 154.846/s, 77.4229/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 28.541 (28.541) Imm_text: 28.541 (28.541) Isd_image: 3.2232 (3.2232) Isd_text: 3.2232 (3.2232) Contrastive_loss: 1.4352 (1.4352) Loss: 1.4352 (1.4352) +2025-04-27,08:10:27 | INFO | Train Epoch: 0 [827392/917504 (90%)] Data (t): 0.991 Batch (t): 8.995, 919.591/s, 459.795/s/gpu LR: 0.000037 Logit Scale: 99.933 Imm_image: 28.938 (28.739) Imm_text: 28.938 (28.739) Isd_image: 5.8971 (4.5602) Isd_text: 5.8971 (4.5602) Contrastive_loss: 0.73869 (1.0870) Loss: 0.73869 (1.0870) +2025-04-27,08:12:05 | INFO | Train Epoch: 0 [917504/917504 (100%)] Data (t): 0.934 Batch (t): 8.914, 916.232/s, 458.116/s/gpu LR: 0.000040 Logit Scale: 99.925 Imm_image: 29.008 (28.829) Imm_text: 29.008 (28.829) Isd_image: 5.3500 (4.8234) Isd_text: 5.3500 (4.8234) Contrastive_loss: 0.70851 (0.96081) Loss: 0.70851 (0.96081) +2025-04-27,08:12:05 | INFO | Start epoch 1 +2025-04-27,08:12:51 | INFO | Train Epoch: 1 [ 8192/917504 (1%)] Data (t): 37.414 Batch (t): 45.501, 180.041/s, 90.0203/s/gpu LR: 0.000040 Logit Scale: 99.924 Imm_image: 29.122 (29.122) Imm_text: 29.122 (29.122) Isd_image: 5.4319 (5.4319) Isd_text: 5.4319 (5.4319) Contrastive_loss: 0.63045 (0.63045) Loss: 0.63045 (0.63045) +2025-04-27,08:27:54 | INFO | Train Epoch: 1 [827392/917504 (90%)] Data (t): 1.012 Batch (t): 9.027, 920.444/s, 460.222/s/gpu LR: 0.000039 Logit Scale: 99.879 Imm_image: 29.092 (29.107) Imm_text: 29.092 (29.107) Isd_image: 4.1551 (4.7935) Isd_text: 4.1551 (4.7935) Contrastive_loss: 0.62187 (0.62616) Loss: 0.62187 (0.62616) +2025-04-27,08:29:32 | INFO | Train Epoch: 1 [917504/917504 (100%)] Data (t): 0.942 Batch (t): 8.924, 919.590/s, 459.795/s/gpu LR: 0.000039 Logit Scale: 99.882 Imm_image: 29.122 (29.112) Imm_text: 29.122 (29.112) Isd_image: 3.7083 (4.4318) Isd_text: 3.7083 (4.4318) Contrastive_loss: 0.52793 (0.59342) Loss: 0.52793 (0.59342) +2025-04-27,08:29:32 | INFO | Start epoch 2 +2025-04-27,08:30:16 | INFO | Train Epoch: 2 [ 8192/917504 (1%)] Data (t): 34.892 Batch (t): 43.754, 187.231/s, 93.6153/s/gpu LR: 0.000039 Logit Scale: 99.884 Imm_image: 29.246 (29.246) Imm_text: 29.246 (29.246) Isd_image: 3.5515 (3.5515) Isd_text: 3.5515 (3.5515) Contrastive_loss: 0.50764 (0.50764) Loss: 0.50764 (0.50764) +2025-04-27,08:45:16 | INFO | Train Epoch: 2 [827392/917504 (90%)] Data (t): 0.995 Batch (t): 9.003, 918.046/s, 459.023/s/gpu LR: 0.000036 Logit Scale: 100.000 Imm_image: 29.356 (29.301) Imm_text: 29.356 (29.301) Isd_image: 2.5848 (3.0682) Isd_text: 2.5848 (3.0682) Contrastive_loss: 0.47861 (0.49313) Loss: 0.47861 (0.49313) +2025-04-27,08:46:54 | INFO | Train Epoch: 2 [917504/917504 (100%)] Data (t): 0.955 Batch (t): 8.942, 913.227/s, 456.613/s/gpu LR: 0.000035 Logit Scale: 100.000 Imm_image: 29.512 (29.371) Imm_text: 29.512 (29.371) Isd_image: 2.4185 (2.8516) Isd_text: 2.4185 (2.8516) Contrastive_loss: 0.40780 (0.46468) Loss: 0.40780 (0.46468) +2025-04-27,08:46:55 | INFO | Start epoch 3 +2025-04-27,08:47:39 | INFO | Train Epoch: 3 [ 8192/917504 (1%)] Data (t): 36.440 Batch (t): 44.632, 183.546/s, 91.7728/s/gpu LR: 0.000035 Logit Scale: 100.000 Imm_image: 29.499 (29.499) Imm_text: 29.499 (29.499) Isd_image: 2.4591 (2.4591) Isd_text: 2.4591 (2.4591) Contrastive_loss: 0.39220 (0.39220) Loss: 0.39220 (0.39220) +2025-04-27,09:02:42 | INFO | Train Epoch: 3 [827392/917504 (90%)] Data (t): 1.015 Batch (t): 9.030, 917.801/s, 458.901/s/gpu LR: 0.000031 Logit Scale: 100.000 Imm_image: 29.522 (29.510) Imm_text: 29.522 (29.510) Isd_image: 1.6210 (2.0401) Isd_text: 1.6210 (2.0401) Contrastive_loss: 0.41654 (0.40437) Loss: 0.41654 (0.40437) +2025-04-27,09:04:21 | INFO | Train Epoch: 3 [917504/917504 (100%)] Data (t): 0.950 Batch (t): 8.931, 914.190/s, 457.095/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.597 (29.539) Imm_text: 29.597 (29.539) Isd_image: 1.4161 (1.8321) Isd_text: 1.4161 (1.8321) Contrastive_loss: 0.36959 (0.39277) Loss: 0.36959 (0.39277) +2025-04-27,09:04:21 | INFO | Start epoch 4 +2025-04-27,09:05:04 | INFO | Train Epoch: 4 [ 8192/917504 (1%)] Data (t): 35.511 Batch (t): 43.616, 187.822/s, 93.9110/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.727 (29.727) Imm_text: 29.727 (29.727) Isd_image: 1.5082 (1.5082) Isd_text: 1.5082 (1.5082) Contrastive_loss: 0.34478 (0.34478) Loss: 0.34478 (0.34478) +2025-04-27,09:20:06 | INFO | Train Epoch: 4 [827392/917504 (90%)] Data (t): 1.004 Batch (t): 9.014, 916.202/s, 458.101/s/gpu LR: 0.000024 Logit Scale: 100.000 Imm_image: 29.862 (29.794) Imm_text: 29.862 (29.794) Isd_image: 0.93800 (1.2231) Isd_text: 0.93800 (1.2231) Contrastive_loss: 0.30456 (0.32467) Loss: 0.30456 (0.32467) +2025-04-27,09:21:44 | INFO | Train Epoch: 4 [917504/917504 (100%)] Data (t): 0.954 Batch (t): 8.936, 915.496/s, 457.748/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.837 (29.809) Imm_text: 29.837 (29.809) Isd_image: 1.1397 (1.1953) Isd_text: 1.1397 (1.1953) Contrastive_loss: 0.28918 (0.31284) Loss: 0.28918 (0.31284) +2025-04-27,09:21:44 | INFO | Start epoch 5 +2025-04-27,09:22:29 | INFO | Train Epoch: 5 [ 8192/917504 (1%)] Data (t): 33.353 Batch (t): 44.506, 184.065/s, 92.0325/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.815 (29.815) Imm_text: 29.815 (29.815) Isd_image: 1.1481 (1.1481) Isd_text: 1.1481 (1.1481) Contrastive_loss: 0.30516 (0.30516) Loss: 0.30516 (0.30516) +2025-04-27,09:37:30 | INFO | Train Epoch: 5 [827392/917504 (90%)] Data (t): 1.007 Batch (t): 9.014, 918.615/s, 459.307/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 29.975 (29.895) Imm_text: 29.975 (29.895) Isd_image: 0.74998 (0.94903) Isd_text: 0.74998 (0.94903) Contrastive_loss: 0.28071 (0.29293) Loss: 0.28071 (0.29293) +2025-04-27,09:39:09 | INFO | Train Epoch: 5 [917504/917504 (100%)] Data (t): 0.950 Batch (t): 8.933, 914.077/s, 457.039/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 30.192 (29.994) Imm_text: 30.192 (29.994) Isd_image: 0.69866 (0.86557) Isd_text: 0.69866 (0.86557) Contrastive_loss: 0.22392 (0.26993) Loss: 0.22392 (0.26993) +2025-04-27,09:39:09 | INFO | Start epoch 6 +2025-04-27,09:39:54 | INFO | Train Epoch: 6 [ 8192/917504 (1%)] Data (t): 35.636 Batch (t): 45.368, 180.569/s, 90.2846/s/gpu LR: 0.000016 Logit Scale: 100.000 Imm_image: 30.142 (30.142) Imm_text: 30.142 (30.142) Isd_image: 0.83195 (0.83195) Isd_text: 0.83195 (0.83195) Contrastive_loss: 0.24683 (0.24683) Loss: 0.24683 (0.24683) +2025-04-27,09:54:56 | INFO | Train Epoch: 6 [827392/917504 (90%)] Data (t): 1.009 Batch (t): 9.016, 917.514/s, 458.757/s/gpu LR: 0.000011 Logit Scale: 100.000 Imm_image: 30.182 (30.162) Imm_text: 30.182 (30.162) Isd_image: 0.66375 (0.74785) Isd_text: 0.66375 (0.74785) Contrastive_loss: 0.25392 (0.25038) Loss: 0.25392 (0.25038) +2025-04-27,09:56:34 | INFO | Train Epoch: 6 [917504/917504 (100%)] Data (t): 0.951 Batch (t): 8.934, 914.233/s, 457.117/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.260 (30.194) Imm_text: 30.260 (30.194) Isd_image: 0.58846 (0.69472) Isd_text: 0.58846 (0.69472) Contrastive_loss: 0.21603 (0.23893) Loss: 0.21603 (0.23893) +2025-04-27,09:56:34 | INFO | Start epoch 7 +2025-04-27,09:57:20 | INFO | Train Epoch: 7 [ 8192/917504 (1%)] Data (t): 36.683 Batch (t): 45.466, 180.179/s, 90.0895/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.232 (30.232) Imm_text: 30.232 (30.232) Isd_image: 0.56494 (0.56494) Isd_text: 0.56494 (0.56494) Contrastive_loss: 0.21279 (0.21279) Loss: 0.21279 (0.21279) +2025-04-27,10:12:22 | INFO | Train Epoch: 7 [827392/917504 (90%)] Data (t): 1.006 Batch (t): 9.018, 918.150/s, 459.075/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.290 (30.261) Imm_text: 30.290 (30.261) Isd_image: 0.56392 (0.56443) Isd_text: 0.56392 (0.56443) Contrastive_loss: 0.24375 (0.22827) Loss: 0.24375 (0.22827) +2025-04-27,10:14:03 | INFO | Train Epoch: 7 [917504/917504 (100%)] Data (t): 1.142 Batch (t): 9.181, 904.205/s, 452.103/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.414 (30.312) Imm_text: 30.414 (30.312) Isd_image: 0.57138 (0.56675) Isd_text: 0.57138 (0.56675) Contrastive_loss: 0.21433 (0.22362) Loss: 0.21433 (0.22362) +2025-04-27,10:14:03 | INFO | Start epoch 8 +2025-04-27,10:14:46 | INFO | Train Epoch: 8 [ 8192/917504 (1%)] Data (t): 34.882 Batch (t): 43.296, 189.209/s, 94.6043/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.263 (30.263) Imm_text: 30.263 (30.263) Isd_image: 0.63063 (0.63063) Isd_text: 0.63063 (0.63063) Contrastive_loss: 0.21246 (0.21246) Loss: 0.21246 (0.21246) +2025-04-27,10:29:43 | INFO | Train Epoch: 8 [827392/917504 (90%)] Data (t): 0.969 Batch (t): 8.967, 920.783/s, 460.391/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.228 (30.245) Imm_text: 30.228 (30.245) Isd_image: 0.59837 (0.61450) Isd_text: 0.59837 (0.61450) Contrastive_loss: 0.24290 (0.22768) Loss: 0.24290 (0.22768) +2025-04-27,10:31:21 | INFO | Train Epoch: 8 [917504/917504 (100%)] Data (t): 0.919 Batch (t): 8.886, 919.279/s, 459.640/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.411 (30.301) Imm_text: 30.411 (30.301) Isd_image: 0.56638 (0.59846) Isd_text: 0.56638 (0.59846) Contrastive_loss: 0.22206 (0.22581) Loss: 0.22206 (0.22581) +2025-04-27,10:31:21 | INFO | Start epoch 9 +2025-04-27,10:32:04 | INFO | Train Epoch: 9 [ 8192/917504 (1%)] Data (t): 35.369 Batch (t): 43.449, 188.542/s, 94.2708/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.428 (30.428) Imm_text: 30.428 (30.428) Isd_image: 0.43297 (0.43297) Isd_text: 0.43297 (0.43297) Contrastive_loss: 0.18715 (0.18715) Loss: 0.18715 (0.18715) +2025-04-27,10:47:03 | INFO | Train Epoch: 9 [827392/917504 (90%)] Data (t): 0.984 Batch (t): 8.989, 919.087/s, 459.543/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.323 (30.375) Imm_text: 30.323 (30.375) Isd_image: 0.59950 (0.51624) Isd_text: 0.59950 (0.51624) Contrastive_loss: 0.22314 (0.20515) Loss: 0.22314 (0.20515) +2025-04-27,10:48:41 | INFO | Train Epoch: 9 [917504/917504 (100%)] Data (t): 0.937 Batch (t): 8.914, 919.208/s, 459.604/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.394 (30.382) Imm_text: 30.394 (30.382) Isd_image: 0.49815 (0.51021) Isd_text: 0.49815 (0.51021) Contrastive_loss: 0.20625 (0.20551) Loss: 0.20625 (0.20551) diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/params.txt b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/params.txt new file mode 100644 index 0000000000000000000000000000000000000000..a59239535b33a1c440b17004d3759ccf3467dd92 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/params.txt @@ -0,0 +1,103 @@ +accum_freq: 2 +aug_cfg: {} +batch_size: 2048 +beta1: 0.9 +beta2: 0.98 +cache_dir: None +caption_ratio: 0.1 +checkpoint_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints +coca_caption_loss_weight: 2.0 +coca_contrastive_loss_weight: 1.0 +copy_codebase: False +csv_caption_key: title +csv_img_key: filepath +csv_separator: +dataset_resampled: False +dataset_type: synthetic +ddp_static_graph: False +debug: False +delete_previous_checkpoint: False +device: cuda:0 +dist_backend: None +dist_url: None +distill: False +distill_model: None +distill_pretrained: None +distributed: True +epochs: 10 +epochs_cooldown: None +eps: 1e-08 +force_custom_text: False +force_image_size: None +force_patch_dropout: None +force_quick_gelu: False +gather_with_grad: True +grad_checkpointing: True +grad_clip_norm: None +horovod: False +image_interpolation: None +image_mean: None +image_resize_mode: None +image_std: None +imagenet_v2: None +imagenet_val: None +keep_func_name: keep_image_uniform +local_loss: False +local_rank: 0 +lock_image: False +lock_image_freeze_bn_stats: False +lock_image_unlocked_groups: 0 +lock_text: True +lock_text_freeze_layer_norm: False +lock_text_unlocked_layers: 0 +log_every_n_steps: 100 +log_level: 20 +log_local: False +log_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log +logs: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text +loss_dist_impl: None +lr: 4e-05 +lr_cooldown_end: 0.0 +lr_cooldown_power: 1.0 +lr_scheduler: cosine +map_func_name: use_all +model: ViT-B-16 +momentum: None +name: keep_image_uniform +no_set_device_rank: False +opt: adamw +precision: amp +pretrained: datacomp_xl_s13b_b90k +pretrained_image: False +rank: 0 +remote_sync: None +remote_sync_frequency: 300 +remote_sync_protocol: s3 +report_to: tensorboard,wandb +resume: None +save_frequency: 10 +save_most_recent: False +seed: 0 +siglip: False +skip_scheduler: False +tensorboard: True +tensorboard_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard +torchcompile: False +torchscript: False +trace: False +train_data: /mnt/personal/zhudongy/recap-datacomp-3m-wds/{00376..01507}.tar +train_data_upsampling_factors: None +train_num_samples: 9011874 +use_bn_sync: False +use_bnb_linear: None +val_data: None +val_frequency: 1 +val_num_samples: None +wandb: True +wandb_notes: +wandb_project_name: open-clip +warmup: 110 +wd: 0.5 +workers: 16 +world_size: 2 +zeroshot_frequency: 2 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard/events.out.tfevents.1745733263.g12.2655775.0 b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard/events.out.tfevents.1745733263.g12.2655775.0 new file mode 100644 index 0000000000000000000000000000000000000000..7d5a2c415a97eb80682aa8e0f459259e76e6dd16 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard/events.out.tfevents.1745733263.g12.2655775.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d083cee4ab2f2fb5312adf93f6583505476bcc1b1f2fc69ab2462355cb6aec4 +size 19936 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_random/tensorboard/events.out.tfevents.1745710659.g12.2628499.0 b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_random/tensorboard/events.out.tfevents.1745710659.g12.2628499.0 new file mode 100644 index 0000000000000000000000000000000000000000..6e7c17112b1ed21be1d5ab05a210a2a2ae717dcb --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_random/tensorboard/events.out.tfevents.1745710659.g12.2628499.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f17f7467451bdea01cb70bda9f19c3d10fc6fa0852805af8a7ec1333a87945a +size 19936 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_closest_image_closest/tensorboard/events.out.tfevents.1745663928.g12.2393971.0 b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_closest_image_closest/tensorboard/events.out.tfevents.1745663928.g12.2393971.0 new file mode 100644 index 0000000000000000000000000000000000000000..358b12387ebe53e2bb5aefa321620dc329538189 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_closest_image_closest/tensorboard/events.out.tfevents.1745663928.g12.2393971.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7461fb18c1b53d1ed330cdf217a4ce913fd2348163ec9e311e36493041bd03db +size 19936 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_farest_image_farest/tensorboard/events.out.tfevents.1745755854.g12.2682891.0 b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_farest_image_farest/tensorboard/events.out.tfevents.1745755854.g12.2682891.0 new file mode 100644 index 0000000000000000000000000000000000000000..49e244278dd13111c65852ac500100051a5f5e56 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_farest_image_farest/tensorboard/events.out.tfevents.1745755854.g12.2682891.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3498cb2e8566f663433bd058a744f1565c5d2ba308c6658b9965266bc5cd1f4d +size 19936 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..bc784e484042609a31f20a51b52c6a4b717c8243 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "caltech101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8611111111111112, "acc5": 0.9732064741907261, "mean_per_class_recall": 0.9224088045513618}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..21247b667626c202054e5a52b2f4a29289d8183f --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8303693570451436, "acc5": 0.988931724909837, "mean_per_class_recall": 0.8305479995448934}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..de4a79ae43d8000b1c3c3f168d185a53bd0e1cdd --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "cifar100", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7475, "acc5": 0.9333, "mean_per_class_recall": 0.7476000000000002}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..9f8b2ae2300bee76bda837db2e6d20672e9af69b --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9345, "acc5": 0.999, "mean_per_class_recall": 0.9345000000000001}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..4c4cd3032dec59552e11198e8432c62e40ef0fef --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "country211", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.19374407582938388, "acc5": 0.4218483412322275, "mean_per_class_recall": 0.1937914691943128}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..77f6eb87c12abdb09178e8d9e1655b4f11c325f0 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5340425531914894, "acc5": 0.8228723404255319, "mean_per_class_recall": 0.5329787234042552}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..58f8a12675a0bb03ff99a3458faa428cf0ecd865 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.4602222222222222, "acc5": 0.8801111111111111, "mean_per_class_recall": 0.47197999999999996}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..ee7b040313ecb2900dbedb08bdb4bb6dbe57b8c3 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json @@ -0,0 +1 @@ +{"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6621999740600586, "text_retrieval_recall@1": 0.8259999752044678, "image_retrieval_recall@5": 0.8751999735832214, "text_retrieval_recall@5": 0.9580000042915344, "image_retrieval_recall@10": 0.9215999841690063, "text_retrieval_recall@10": 0.9800000190734863}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..0cc7fdf11b1ad2386671988a0a0a0718652a63f8 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "flowers", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7518295657830542, "acc5": 0.9006342494714588, "mean_per_class_recall": 0.7590609398935276}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..449dbc9de2b34064078f8de605e7c7b3fae9be21 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "food101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8712475247524752, "acc5": 0.9801584158415841, "mean_per_class_recall": 0.8710891089108911}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..5c848a24840fc0fc367415911ee862328199b5be --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "gtsrb", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5334916864608076, "acc5": 0.7653206650831353, "mean_per_class_recall": 0.49353806950268025}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c80cc1c058ac01b9024465d108ce1868a503db --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "imagenet1k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.68076, "acc5": 0.90792, "mean_per_class_recall": 0.68056}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..a891e6f2635e33fc3d7b3cbc4aad2594ac7a86a3 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json @@ -0,0 +1 @@ +{"dataset": "mscoco_captions", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.3862854838371277, "text_retrieval_recall@1": 0.5411999821662903, "image_retrieval_recall@5": 0.6445421576499939, "text_retrieval_recall@5": 0.7820000052452087, "image_retrieval_recall@10": 0.745781660079956, "text_retrieval_recall@10": 0.8583999872207642}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..fba129355cd3c610113fc2474f83bcd154e0f3ed --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "pets", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8997001907876806, "acc5": 0.9967293540474244, "mean_per_class_recall": 0.8993521290462428}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..a5e9d96820d071e8fa966195d56088324e49a9cf --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "stl10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.977, "acc5": 1.0, "mean_per_class_recall": 0.977}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..e540691076b1ed4c04a09a0792994ceeea32ed12 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "sun397", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6715707008477849, "acc5": 0.9273681887562756, "mean_per_class_recall": 0.6589770233905725}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json new file mode 100644 index 0000000000000000000000000000000000000000..1e691818fd189c09932f18a47eb4c1be20d5acc1 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json @@ -0,0 +1 @@ +{"dataset": "vtab/resisc45", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6215873015873016, "acc5": 0.9153968253968254, "mean_per_class_recall": 0.6280168420204852}, "language": "en"} \ No newline at end of file diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/out.log b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/out.log new file mode 100644 index 0000000000000000000000000000000000000000..76bf58d96520ee942faf7dbfee62c251c576beb0 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/out.log @@ -0,0 +1,195 @@ +2025-04-27,20:53:01 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2. +2025-04-27,20:53:01 | INFO | Loaded ViT-B-16 model config. +2025-04-27,20:53:02 | INFO | Loading pretrained ViT-B-16 weights (datacomp_xl_s13b_b90k). +2025-04-27,20:53:03 | INFO | Model: +2025-04-27,20:53:03 | INFO | CLIP( + (visual): VisionTransformer( + (conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False) + (patch_dropout): Identity() + (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (transformer): Transformer( + (resblocks): ModuleList( + (0-11): 12 x ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ls_1): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (gelu): GELU(approximate='none') + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + (ls_2): Identity() + ) + ) + ) + (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (transformer): Transformer( + (resblocks): ModuleList( + (0-11): 12 x ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ls_1): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (gelu): GELU(approximate='none') + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + (ls_2): Identity() + ) + ) + ) + (token_embedding): Embedding(49408, 512) + (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True) +) +2025-04-27,20:53:03 | INFO | Params: +2025-04-27,20:53:03 | INFO | accum_freq: 2 +2025-04-27,20:53:03 | INFO | aug_cfg: {} +2025-04-27,20:53:03 | INFO | batch_size: 2048 +2025-04-27,20:53:03 | INFO | beta1: 0.9 +2025-04-27,20:53:03 | INFO | beta2: 0.98 +2025-04-27,20:53:03 | INFO | cache_dir: None +2025-04-27,20:53:03 | INFO | caption_ratio: 0.1 +2025-04-27,20:53:03 | INFO | checkpoint_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints +2025-04-27,20:53:03 | INFO | coca_caption_loss_weight: 2.0 +2025-04-27,20:53:03 | INFO | coca_contrastive_loss_weight: 1.0 +2025-04-27,20:53:03 | INFO | copy_codebase: False +2025-04-27,20:53:03 | INFO | csv_caption_key: title +2025-04-27,20:53:03 | INFO | csv_img_key: filepath +2025-04-27,20:53:03 | INFO | csv_separator: +2025-04-27,20:53:03 | INFO | dataset_resampled: False +2025-04-27,20:53:03 | INFO | dataset_type: synthetic +2025-04-27,20:53:03 | INFO | ddp_static_graph: False +2025-04-27,20:53:03 | INFO | debug: False +2025-04-27,20:53:03 | INFO | delete_previous_checkpoint: False +2025-04-27,20:53:03 | INFO | device: cuda:0 +2025-04-27,20:53:03 | INFO | dist_backend: None +2025-04-27,20:53:03 | INFO | dist_url: None +2025-04-27,20:53:03 | INFO | distill: False +2025-04-27,20:53:03 | INFO | distill_model: None +2025-04-27,20:53:03 | INFO | distill_pretrained: None +2025-04-27,20:53:03 | INFO | distributed: True +2025-04-27,20:53:03 | INFO | epochs: 10 +2025-04-27,20:53:03 | INFO | epochs_cooldown: None +2025-04-27,20:53:03 | INFO | eps: 1e-08 +2025-04-27,20:53:03 | INFO | force_custom_text: False +2025-04-27,20:53:03 | INFO | force_image_size: None +2025-04-27,20:53:03 | INFO | force_patch_dropout: None +2025-04-27,20:53:03 | INFO | force_quick_gelu: False +2025-04-27,20:53:03 | INFO | gather_with_grad: True +2025-04-27,20:53:03 | INFO | grad_checkpointing: True +2025-04-27,20:53:03 | INFO | grad_clip_norm: None +2025-04-27,20:53:03 | INFO | horovod: False +2025-04-27,20:53:03 | INFO | image_interpolation: None +2025-04-27,20:53:03 | INFO | image_mean: None +2025-04-27,20:53:03 | INFO | image_resize_mode: None +2025-04-27,20:53:03 | INFO | image_std: None +2025-04-27,20:53:03 | INFO | imagenet_v2: None +2025-04-27,20:53:03 | INFO | imagenet_val: None +2025-04-27,20:53:03 | INFO | keep_func_name: low_inter_only +2025-04-27,20:53:03 | INFO | local_loss: False +2025-04-27,20:53:03 | INFO | local_rank: 0 +2025-04-27,20:53:03 | INFO | lock_image: False +2025-04-27,20:53:03 | INFO | lock_image_freeze_bn_stats: False +2025-04-27,20:53:03 | INFO | lock_image_unlocked_groups: 0 +2025-04-27,20:53:03 | INFO | lock_text: True +2025-04-27,20:53:03 | INFO | lock_text_freeze_layer_norm: False +2025-04-27,20:53:03 | INFO | lock_text_unlocked_layers: 0 +2025-04-27,20:53:03 | INFO | log_every_n_steps: 100 +2025-04-27,20:53:03 | INFO | log_level: 20 +2025-04-27,20:53:03 | INFO | log_local: False +2025-04-27,20:53:03 | INFO | log_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/out.log +2025-04-27,20:53:03 | INFO | logs: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text +2025-04-27,20:53:03 | INFO | loss_dist_impl: None +2025-04-27,20:53:03 | INFO | lr: 4e-05 +2025-04-27,20:53:03 | INFO | lr_cooldown_end: 0.0 +2025-04-27,20:53:03 | INFO | lr_cooldown_power: 1.0 +2025-04-27,20:53:03 | INFO | lr_scheduler: cosine +2025-04-27,20:53:03 | INFO | map_func_name: use_all +2025-04-27,20:53:03 | INFO | model: ViT-B-16 +2025-04-27,20:53:03 | INFO | momentum: None +2025-04-27,20:53:03 | INFO | name: low_inter_only +2025-04-27,20:53:03 | INFO | no_set_device_rank: False +2025-04-27,20:53:03 | INFO | opt: adamw +2025-04-27,20:53:03 | INFO | precision: amp +2025-04-27,20:53:03 | INFO | pretrained: datacomp_xl_s13b_b90k +2025-04-27,20:53:03 | INFO | pretrained_image: False +2025-04-27,20:53:03 | INFO | rank: 0 +2025-04-27,20:53:03 | INFO | remote_sync: None +2025-04-27,20:53:03 | INFO | remote_sync_frequency: 300 +2025-04-27,20:53:03 | INFO | remote_sync_protocol: s3 +2025-04-27,20:53:03 | INFO | report_to: tensorboard,wandb +2025-04-27,20:53:03 | INFO | resume: None +2025-04-27,20:53:03 | INFO | save_frequency: 10 +2025-04-27,20:53:03 | INFO | save_most_recent: False +2025-04-27,20:53:03 | INFO | seed: 0 +2025-04-27,20:53:03 | INFO | siglip: False +2025-04-27,20:53:03 | INFO | skip_scheduler: False +2025-04-27,20:53:03 | INFO | tensorboard: True +2025-04-27,20:53:03 | INFO | tensorboard_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/tensorboard +2025-04-27,20:53:03 | INFO | torchcompile: False +2025-04-27,20:53:03 | INFO | torchscript: False +2025-04-27,20:53:03 | INFO | trace: False +2025-04-27,20:53:03 | INFO | train_data: /mnt/personal/zhudongy/recap-datacomp-3m-wds/{00376..01507}.tar +2025-04-27,20:53:03 | INFO | train_data_upsampling_factors: None +2025-04-27,20:53:03 | INFO | train_num_samples: 9011874 +2025-04-27,20:53:03 | INFO | use_bn_sync: False +2025-04-27,20:53:03 | INFO | use_bnb_linear: None +2025-04-27,20:53:03 | INFO | val_data: None +2025-04-27,20:53:03 | INFO | val_frequency: 1 +2025-04-27,20:53:03 | INFO | val_num_samples: None +2025-04-27,20:53:03 | INFO | wandb: True +2025-04-27,20:53:03 | INFO | wandb_notes: +2025-04-27,20:53:03 | INFO | wandb_project_name: open-clip +2025-04-27,20:53:03 | INFO | warmup: 110 +2025-04-27,20:53:03 | INFO | wd: 0.5 +2025-04-27,20:53:03 | INFO | workers: 16 +2025-04-27,20:53:03 | INFO | world_size: 2 +2025-04-27,20:53:03 | INFO | zeroshot_frequency: 2 +2025-04-27,20:53:03 | INFO | Created AdamW (adamw) optimizer: lr: 4e-05, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None +2025-04-27,20:53:45 | INFO | Start epoch 0 +2025-04-27,20:54:39 | INFO | Train Epoch: 0 [ 8192/917504 (1%)] Data (t): 44.740 Batch (t): 54.300, 150.864/s, 75.4322/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 28.004 (28.004) Imm_text: 28.004 (28.004) Isd_image: 0.48300 (0.48300) Isd_text: 0.48300 (0.48300) Contrastive_loss: 1.3863 (1.3863) Loss: 1.3863 (1.3863) +2025-04-27,21:09:40 | INFO | Train Epoch: 0 [827392/917504 (90%)] Data (t): 1.004 Batch (t): 9.011, 913.635/s, 456.818/s/gpu LR: 0.000037 Logit Scale: 99.932 Imm_image: 28.809 (28.407) Imm_text: 28.809 (28.407) Isd_image: 4.0077 (2.2454) Isd_text: 4.0077 (2.2454) Contrastive_loss: 0.64345 (1.0149) Loss: 0.64345 (1.0149) +2025-04-27,21:11:19 | INFO | Train Epoch: 0 [917504/917504 (100%)] Data (t): 0.966 Batch (t): 8.956, 911.469/s, 455.734/s/gpu LR: 0.000040 Logit Scale: 99.925 Imm_image: 28.850 (28.554) Imm_text: 28.850 (28.554) Isd_image: 3.6929 (2.7279) Isd_text: 3.6929 (2.7279) Contrastive_loss: 0.61486 (0.88152) Loss: 0.61486 (0.88152) +2025-04-27,21:11:19 | INFO | Start epoch 1 +2025-04-27,21:12:04 | INFO | Train Epoch: 1 [ 8192/917504 (1%)] Data (t): 37.221 Batch (t): 45.300, 180.838/s, 90.4192/s/gpu LR: 0.000040 Logit Scale: 99.924 Imm_image: 28.894 (28.894) Imm_text: 28.894 (28.894) Isd_image: 3.6459 (3.6459) Isd_text: 3.6459 (3.6459) Contrastive_loss: 0.56427 (0.56427) Loss: 0.56427 (0.56427) +2025-04-27,21:27:11 | INFO | Train Epoch: 1 [827392/917504 (90%)] Data (t): 1.045 Batch (t): 9.067, 913.108/s, 456.554/s/gpu LR: 0.000039 Logit Scale: 99.876 Imm_image: 28.777 (28.835) Imm_text: 28.777 (28.835) Isd_image: 2.0186 (2.8323) Isd_text: 2.0186 (2.8323) Contrastive_loss: 0.54482 (0.55454) Loss: 0.54482 (0.55454) +2025-04-27,21:28:50 | INFO | Train Epoch: 1 [917504/917504 (100%)] Data (t): 0.990 Batch (t): 8.986, 910.140/s, 455.070/s/gpu LR: 0.000039 Logit Scale: 99.875 Imm_image: 28.908 (28.860) Imm_text: 28.908 (28.860) Isd_image: 1.5264 (2.3970) Isd_text: 1.5264 (2.3970) Contrastive_loss: 0.44167 (0.51692) Loss: 0.44167 (0.51692) +2025-04-27,21:28:50 | INFO | Start epoch 2 +2025-04-27,21:29:35 | INFO | Train Epoch: 2 [ 8192/917504 (1%)] Data (t): 36.369 Batch (t): 44.535, 183.945/s, 91.9726/s/gpu LR: 0.000039 Logit Scale: 99.876 Imm_image: 28.971 (28.971) Imm_text: 28.971 (28.971) Isd_image: 1.5085 (1.5085) Isd_text: 1.5085 (1.5085) Contrastive_loss: 0.46002 (0.46002) Loss: 0.46002 (0.46002) +2025-04-27,21:44:31 | INFO | Train Epoch: 2 [827392/917504 (90%)] Data (t): 0.966 Batch (t): 8.965, 921.609/s, 460.804/s/gpu LR: 0.000036 Logit Scale: 99.957 Imm_image: 29.224 (29.097) Imm_text: 29.224 (29.097) Isd_image: 0.16710 (0.83778) Isd_text: 0.16710 (0.83778) Contrastive_loss: 0.42271 (0.44137) Loss: 0.42271 (0.44137) +2025-04-27,21:46:09 | INFO | Train Epoch: 2 [917504/917504 (100%)] Data (t): 0.923 Batch (t): 8.896, 916.982/s, 458.491/s/gpu LR: 0.000035 Logit Scale: 99.971 Imm_image: 29.325 (29.173) Imm_text: 29.325 (29.173) Isd_image: 0.038107 (0.57122) Isd_text: 0.038107 (0.57122) Contrastive_loss: 0.36076 (0.41450) Loss: 0.36076 (0.41450) +2025-04-27,21:46:09 | INFO | Start epoch 3 +2025-04-27,21:46:54 | INFO | Train Epoch: 3 [ 8192/917504 (1%)] Data (t): 36.596 Batch (t): 44.687, 183.320/s, 91.6598/s/gpu LR: 0.000035 Logit Scale: 99.975 Imm_image: 29.349 (29.349) Imm_text: 29.349 (29.349) Isd_image: -0.042467 (-0.042467) Isd_text: -0.042467 (-0.042467) Contrastive_loss: 0.37044 (0.37044) Loss: 0.37044 (0.37044) +2025-04-27,22:01:54 | INFO | Train Epoch: 3 [827392/917504 (90%)] Data (t): 0.992 Batch (t): 8.996, 919.788/s, 459.894/s/gpu LR: 0.000031 Logit Scale: 100.000 Imm_image: 29.476 (29.412) Imm_text: 29.476 (29.412) Isd_image: -0.75747 (-0.39997) Isd_text: -0.75747 (-0.39997) Contrastive_loss: 0.37558 (0.37301) Loss: 0.37558 (0.37301) +2025-04-27,22:03:32 | INFO | Train Epoch: 3 [917504/917504 (100%)] Data (t): 0.935 Batch (t): 8.913, 917.124/s, 458.562/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.571 (29.465) Imm_text: 29.571 (29.465) Isd_image: -0.97516 (-0.59170) Isd_text: -0.97516 (-0.59170) Contrastive_loss: 0.31964 (0.35522) Loss: 0.31964 (0.35522) +2025-04-27,22:03:32 | INFO | Start epoch 4 +2025-04-27,22:04:17 | INFO | Train Epoch: 4 [ 8192/917504 (1%)] Data (t): 36.657 Batch (t): 44.719, 183.187/s, 91.5936/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.600 (29.600) Imm_text: 29.600 (29.600) Isd_image: -0.96800 (-0.96800) Isd_text: -0.96800 (-0.96800) Contrastive_loss: 0.31170 (0.31170) Loss: 0.31170 (0.31170) +2025-04-27,22:19:17 | INFO | Train Epoch: 4 [827392/917504 (90%)] Data (t): 0.995 Batch (t): 9.002, 919.149/s, 459.574/s/gpu LR: 0.000024 Logit Scale: 100.000 Imm_image: 29.778 (29.689) Imm_text: 29.778 (29.689) Isd_image: -1.3826 (-1.1753) Isd_text: -1.3826 (-1.1753) Contrastive_loss: 0.30417 (0.30793) Loss: 0.30417 (0.30793) +2025-04-27,22:20:55 | INFO | Train Epoch: 4 [917504/917504 (100%)] Data (t): 0.948 Batch (t): 8.931, 914.345/s, 457.172/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.832 (29.737) Imm_text: 29.832 (29.737) Isd_image: -1.6278 (-1.3261) Isd_text: -1.6278 (-1.3261) Contrastive_loss: 0.25928 (0.29172) Loss: 0.25928 (0.29172) +2025-04-27,22:20:55 | INFO | Start epoch 5 +2025-04-27,22:21:40 | INFO | Train Epoch: 5 [ 8192/917504 (1%)] Data (t): 35.569 Batch (t): 44.337, 184.767/s, 92.3833/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.864 (29.864) Imm_text: 29.864 (29.864) Isd_image: -1.5245 (-1.5245) Isd_text: -1.5245 (-1.5245) Contrastive_loss: 0.28583 (0.28583) Loss: 0.28583 (0.28583) +2025-04-27,22:36:41 | INFO | Train Epoch: 5 [827392/917504 (90%)] Data (t): 1.005 Batch (t): 9.014, 915.904/s, 457.952/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 30.039 (29.951) Imm_text: 30.039 (29.951) Isd_image: -1.7178 (-1.6211) Isd_text: -1.7178 (-1.6211) Contrastive_loss: 0.26470 (0.27526) Loss: 0.26470 (0.27526) +2025-04-27,22:38:19 | INFO | Train Epoch: 5 [917504/917504 (100%)] Data (t): 0.956 Batch (t): 8.938, 913.227/s, 456.613/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 29.983 (29.962) Imm_text: 29.983 (29.962) Isd_image: -1.5348 (-1.5924) Isd_text: -1.5348 (-1.5924) Contrastive_loss: 0.25894 (0.26982) Loss: 0.25894 (0.26982) +2025-04-27,22:38:20 | INFO | Start epoch 6 +2025-04-27,22:39:05 | INFO | Train Epoch: 6 [ 8192/917504 (1%)] Data (t): 36.913 Batch (t): 45.213, 181.186/s, 90.5929/s/gpu LR: 0.000016 Logit Scale: 100.000 Imm_image: 30.205 (30.205) Imm_text: 30.205 (30.205) Isd_image: -1.5840 (-1.5840) Isd_text: -1.5840 (-1.5840) Contrastive_loss: 0.21889 (0.21889) Loss: 0.21889 (0.21889) +2025-04-27,22:54:06 | INFO | Train Epoch: 6 [827392/917504 (90%)] Data (t): 1.001 Batch (t): 9.008, 918.420/s, 459.210/s/gpu LR: 0.000011 Logit Scale: 100.000 Imm_image: 30.129 (30.167) Imm_text: 30.129 (30.167) Isd_image: -1.8268 (-1.7054) Isd_text: -1.8268 (-1.7054) Contrastive_loss: 0.24040 (0.22965) Loss: 0.24040 (0.22965) +2025-04-27,22:55:44 | INFO | Train Epoch: 6 [917504/917504 (100%)] Data (t): 0.956 Batch (t): 8.938, 914.360/s, 457.180/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.250 (30.195) Imm_text: 30.250 (30.195) Isd_image: -1.7475 (-1.7195) Isd_text: -1.7475 (-1.7195) Contrastive_loss: 0.22036 (0.22655) Loss: 0.22036 (0.22655) +2025-04-27,22:55:44 | INFO | Start epoch 7 +2025-04-27,22:56:29 | INFO | Train Epoch: 7 [ 8192/917504 (1%)] Data (t): 36.050 Batch (t): 44.469, 184.219/s, 92.1094/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.307 (30.307) Imm_text: 30.307 (30.307) Isd_image: -1.7967 (-1.7967) Isd_text: -1.7967 (-1.7967) Contrastive_loss: 0.22148 (0.22148) Loss: 0.22148 (0.22148) +2025-04-27,23:11:28 | INFO | Train Epoch: 7 [827392/917504 (90%)] Data (t): 0.991 Batch (t): 8.997, 918.481/s, 459.241/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.379 (30.343) Imm_text: 30.379 (30.343) Isd_image: -1.9151 (-1.8559) Isd_text: -1.9151 (-1.8559) Contrastive_loss: 0.21597 (0.21872) Loss: 0.21597 (0.21872) +2025-04-27,23:13:07 | INFO | Train Epoch: 7 [917504/917504 (100%)] Data (t): 0.950 Batch (t): 8.932, 914.317/s, 457.159/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.325 (30.337) Imm_text: 30.325 (30.337) Isd_image: -1.7899 (-1.8339) Isd_text: -1.7899 (-1.8339) Contrastive_loss: 0.21086 (0.21610) Loss: 0.21086 (0.21610) +2025-04-27,23:13:07 | INFO | Start epoch 8 +2025-04-27,23:13:52 | INFO | Train Epoch: 8 [ 8192/917504 (1%)] Data (t): 36.134 Batch (t): 44.652, 183.463/s, 91.7315/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.386 (30.386) Imm_text: 30.386 (30.386) Isd_image: -1.8206 (-1.8206) Isd_text: -1.8206 (-1.8206) Contrastive_loss: 0.21730 (0.21730) Loss: 0.21730 (0.21730) +2025-04-27,23:28:51 | INFO | Train Epoch: 8 [827392/917504 (90%)] Data (t): 0.992 Batch (t): 8.997, 919.420/s, 459.710/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.363 (30.374) Imm_text: 30.363 (30.374) Isd_image: -1.8669 (-1.8437) Isd_text: -1.8669 (-1.8437) Contrastive_loss: 0.22544 (0.22137) Loss: 0.22544 (0.22137) +2025-04-27,23:30:29 | INFO | Train Epoch: 8 [917504/917504 (100%)] Data (t): 0.946 Batch (t): 8.925, 915.526/s, 457.763/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.382 (30.377) Imm_text: 30.382 (30.377) Isd_image: -1.8523 (-1.8466) Isd_text: -1.8523 (-1.8466) Contrastive_loss: 0.20832 (0.21702) Loss: 0.20832 (0.21702) +2025-04-27,23:30:30 | INFO | Start epoch 9 +2025-04-27,23:31:14 | INFO | Train Epoch: 9 [ 8192/917504 (1%)] Data (t): 36.011 Batch (t): 44.621, 183.592/s, 91.7960/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.424 (30.424) Imm_text: 30.424 (30.424) Isd_image: -1.8386 (-1.8386) Isd_text: -1.8386 (-1.8386) Contrastive_loss: 0.20019 (0.20019) Loss: 0.20019 (0.20019) +2025-04-27,23:46:16 | INFO | Train Epoch: 9 [827392/917504 (90%)] Data (t): 1.009 Batch (t): 9.018, 919.090/s, 459.545/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.359 (30.392) Imm_text: 30.359 (30.392) Isd_image: -1.8356 (-1.8371) Isd_text: -1.8356 (-1.8371) Contrastive_loss: 0.22642 (0.21331) Loss: 0.22642 (0.21331) +2025-04-27,23:47:54 | INFO | Train Epoch: 9 [917504/917504 (100%)] Data (t): 0.949 Batch (t): 8.931, 917.092/s, 458.546/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.506 (30.430) Imm_text: 30.506 (30.430) Isd_image: -1.8817 (-1.8520) Isd_text: -1.8817 (-1.8520) Contrastive_loss: 0.18029 (0.20230) Loss: 0.18029 (0.20230) diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/tensorboard/events.out.tfevents.1745780012.g12.2713681.0 b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/tensorboard/events.out.tfevents.1745780012.g12.2713681.0 new file mode 100644 index 0000000000000000000000000000000000000000..9878f9f7973c10d842b6262ec7f9764f3e15c218 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/tensorboard/events.out.tfevents.1745780012.g12.2713681.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265d4b9d7c4756414318fe99f7568a80b7462cd2872802cdd17f1c9d6f497c9a +size 19936 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_I_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_I_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy new file mode 100644 index 0000000000000000000000000000000000000000..2b2cbfe1e83beda243a8f180f525e79be8b9fbf9 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_I_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2641da262316bb2b895abae3224ab6cc166b82a2dd4f65b1fe1f67cb907350 +size 228262016 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_farest.jsonl b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_farest.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0c80a7d7d364b804fef26df117ab736148b597ef --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_farest.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60d6b08a008e194de90c00df9913a0203d8898fd75763bf2d6a84840c64192e +size 124009441 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_farest.jsonl b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_farest.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..01493d75b88aea6a3a5f9fa2b8a1e35b44b1e757 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_farest.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51013bcfe4a79c5ee87068f55c96d5a0cb0bfb0c1899f48a634c42240d5a2e2f +size 124084962 diff --git a/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6d86b7756cc34acc3e455bb497bca3eff841ab94 --- /dev/null +++ b/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c39234fd0396c1f512ea29d8efbb4e717734421d7132a2dcf073af3dabb23ce +size 124061847 diff --git a/captions.tsv b/captions.tsv new file mode 100644 index 0000000000000000000000000000000000000000..ba55829051e0c0033d996fb92e5890b8e2a8b5ff --- /dev/null +++ b/captions.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b710ad65f51fd80e92d8ea2e1aabf1df625f9720d7626d9724e48d4d09b3d8 +size 771831271 diff --git a/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_closest_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_closest_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy new file mode 100644 index 0000000000000000000000000000000000000000..0183c5501576cb1b63e529844feb986335a0c754 --- /dev/null +++ b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_closest_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432e421a1993a7207b19e8c18ee618b6a1c6cbd58fec61f47e398637f8945adb +size 13443200 diff --git a/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_labels_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_labels_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy new file mode 100644 index 0000000000000000000000000000000000000000..430e9e2fc2e50ae6f92e976a99a28b2619804f7e --- /dev/null +++ b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_labels_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bfa489d5b040b0cbcb7de858ecd70b85d3566ea9e313c76186fee55c1d58582 +size 36047624 diff --git a/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_I_closest_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_I_closest_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy new file mode 100644 index 0000000000000000000000000000000000000000..eb2df2232162835a1239ff34652f99574f9e5384 --- /dev/null +++ b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_I_closest_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028bc140b511c9100272fd7255eb7e6140fc43bfefef17a2edb0cf53aa775d8b +size 26886272 diff --git a/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_I_labels_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_I_labels_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy new file mode 100644 index 0000000000000000000000000000000000000000..313e13c06f085b5ff2d7d68a4039c414f51e5be2 --- /dev/null +++ b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_I_labels_0.1_siglip-so400m-patch14-384_siglip-so400m-patch14-384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271461cc2827f9085988564f4a3bcd662d456783139be2aaae53c4a5cf2ec6eb +size 72095120 diff --git a/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_closest.jsonl b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_closest.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a11eccd857291a35539371e3dc6f26503462bd7d --- /dev/null +++ b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_closest.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43929019ebe64344d02dbcf54723e15aafc3cdb9a1520532b096b448d6e3d2b1 +size 118342815 diff --git a/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6e11db3f878b5d7014b975759b2d646930eafa97 --- /dev/null +++ b/siglip-so400m-patch14-384#0.8#0.6#siglip-so400m-patch14-384#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd20c1cdfb2c5d0ce3d9318867a286a9d002a920fa2f2ce1857e410c275a7550 +size 124135450