Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +2 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/0filter_ambiguous/intra_fixed.jsonl +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_closest/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_farest/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_random/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest_image_closest/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest_image_farest/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_uniform/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_1.pt +3 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/out.log +159 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/params.txt +103 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
.gitattributes
CHANGED
|
@@ -47,3 +47,5 @@ SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#int
|
|
| 47 |
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 49 |
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/smallest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 47 |
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 49 |
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/smallest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/different_text_inter.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/intra_fixed.jsonl filter=lfs diff=lfs merge=lfs -text
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/0filter_ambiguous/intra_fixed.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af3768b624539180f9f3f96bced76935478a013682b7e60514a31fc956398fa8
|
| 3 |
+
size 1670544155
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_closest/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb7bd3d8472707c27dd3f5bc4244ef688a24f686b1712ae6ae55bcff9a353e6f
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_farest/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f50c62b1be871d2b358015a319f9092c70b18ebb03e76de2e514102feca377b
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d8cc209e213f198adfdf3e6d15537489975651102fa5f34e0bfefe0a6fc1b1c
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_random/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d8e8f114c1eabd176a00d35a285561e88e30d27421f4388dbc6f335f4f4a5ae
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7d428a17bb5367923de68261f218c7bdb6d491f89944f31773e2803eceed47b
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest_image_closest/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5b12cdf9e2909e257e5c914c893518ed2433c3ae34e4a14286dae2582ed8f36
|
| 3 |
+
size 1512951586
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eea37af0b6e97b8efe426718b0dc9c64dd4b330a7811f53e46f91a30b8b6e4fd
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest_image_farest/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62067d0f84aa2205f6ca5ed384827e08e801e5bb08e0a6f604e005fc98abae38
|
| 3 |
+
size 1512951586
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_uniform/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9acf98a396f6f9f7f7776c77d26a33005f516a7a1ce067f613a21ffac15a2613
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:774f38dcf66e7a359d55b7c6a93322e54a0002c54177eec4670866fc59015cc5
|
| 3 |
+
size 1512951522
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "caltech101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8611111111111112, "acc5": 0.9646762904636921, "mean_per_class_recall": 0.9117574143020897}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cars", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8604651162790697, "acc5": 0.9906728018903121, "mean_per_class_recall": 0.8603007472001762}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar100", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7329, "acc5": 0.9282, "mean_per_class_recall": 0.7332000000000002}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9239, "acc5": 0.9975, "mean_per_class_recall": 0.9239}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "country211", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1642654028436019, "acc5": 0.37867298578199055, "mean_per_class_recall": 0.1641706161137441}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "dtd", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5569148936170213, "acc5": 0.851063829787234, "mean_per_class_recall": 0.5558510638297871}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "eurosat", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5003333333333333, "acc5": 0.9582962962962963, "mean_per_class_recall": 0.5124266666666667}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "fgvc_aircraft", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.23492349234923493, "acc5": 0.5643564356435643, "mean_per_class_recall": 0.23520499108734402}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flickr30k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6736000180244446, "text_retrieval_recall@1": 0.8360000252723694, "image_retrieval_recall@5": 0.8812000155448914, "text_retrieval_recall@5": 0.9599999785423279, "image_retrieval_recall@10": 0.932200014591217, "text_retrieval_recall@10": 0.984000027179718}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flowers", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7049926817368678, "acc5": 0.8697349162465442, "mean_per_class_recall": 0.7014735954769374}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "food101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.819009900990099, "acc5": 0.9664950495049505, "mean_per_class_recall": 0.8192475247524755}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "gtsrb", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5064133016627078, "acc5": 0.7681710213776722, "mean_per_class_recall": 0.4390340643422039}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "imagenet1k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.65884, "acc5": 0.89428, "mean_per_class_recall": 0.6587000000000001}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "mscoco_captions", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.39972010254859924, "text_retrieval_recall@1": 0.5735999941825867, "image_retrieval_recall@5": 0.6586565375328064, "text_retrieval_recall@5": 0.8055999875068665, "image_retrieval_recall@10": 0.759576141834259, "text_retrieval_recall@10": 0.878000020980835}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "pets", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9067865903515945, "acc5": 0.9942763695829926, "mean_per_class_recall": 0.9051993440098055}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "stl10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.96425, "acc5": 0.99975, "mean_per_class_recall": 0.96425}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "sun397", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6866689960829027, "acc5": 0.9371057616271585, "mean_per_class_recall": 0.6837170321383007}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "vtab/resisc45", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6, "acc5": 0.9061904761904762, "mean_per_class_recall": 0.6079051385663736}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/out.log
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24,00:12:07 | INFO | Running with a single process. Device cuda.
|
| 2 |
+
2025-04-24,00:12:07 | INFO | Loaded ViT-B-32 model config.
|
| 3 |
+
2025-04-24,00:12:09 | INFO | Loading pretrained ViT-B-32 weights (laion2b_s34b_b79k).
|
| 4 |
+
2025-04-24,00:12:09 | INFO | Model:
|
| 5 |
+
2025-04-24,00:12:09 | INFO | CLIP(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
|
| 8 |
+
(patch_dropout): Identity()
|
| 9 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 10 |
+
(transformer): Transformer(
|
| 11 |
+
(resblocks): ModuleList(
|
| 12 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 13 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 14 |
+
(attn): MultiheadAttention(
|
| 15 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 16 |
+
)
|
| 17 |
+
(ls_1): Identity()
|
| 18 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 19 |
+
(mlp): Sequential(
|
| 20 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 21 |
+
(gelu): GELU(approximate='none')
|
| 22 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 23 |
+
)
|
| 24 |
+
(ls_2): Identity()
|
| 25 |
+
)
|
| 26 |
+
)
|
| 27 |
+
)
|
| 28 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
)
|
| 30 |
+
(transformer): Transformer(
|
| 31 |
+
(resblocks): ModuleList(
|
| 32 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 33 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 34 |
+
(attn): MultiheadAttention(
|
| 35 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 36 |
+
)
|
| 37 |
+
(ls_1): Identity()
|
| 38 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 39 |
+
(mlp): Sequential(
|
| 40 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 41 |
+
(gelu): GELU(approximate='none')
|
| 42 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 43 |
+
)
|
| 44 |
+
(ls_2): Identity()
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
+
)
|
| 48 |
+
(token_embedding): Embedding(49408, 512)
|
| 49 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 50 |
+
)
|
| 51 |
+
2025-04-24,00:12:09 | INFO | Params:
|
| 52 |
+
2025-04-24,00:12:09 | INFO | accum_freq: 4
|
| 53 |
+
2025-04-24,00:12:09 | INFO | aug_cfg: {}
|
| 54 |
+
2025-04-24,00:12:09 | INFO | batch_size: 2048
|
| 55 |
+
2025-04-24,00:12:09 | INFO | beta1: 0.9
|
| 56 |
+
2025-04-24,00:12:09 | INFO | beta2: 0.98
|
| 57 |
+
2025-04-24,00:12:09 | INFO | cache_dir: None
|
| 58 |
+
2025-04-24,00:12:09 | INFO | caption_ratio: 0.1
|
| 59 |
+
2025-04-24,00:12:09 | INFO | checkpoint_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints
|
| 60 |
+
2025-04-24,00:12:09 | INFO | coca_caption_loss_weight: 2.0
|
| 61 |
+
2025-04-24,00:12:09 | INFO | coca_contrastive_loss_weight: 1.0
|
| 62 |
+
2025-04-24,00:12:09 | INFO | copy_codebase: False
|
| 63 |
+
2025-04-24,00:12:09 | INFO | csv_caption_key: title
|
| 64 |
+
2025-04-24,00:12:09 | INFO | csv_img_key: filepath
|
| 65 |
+
2025-04-24,00:12:09 | INFO | csv_separator:
|
| 66 |
+
2025-04-24,00:12:09 | INFO | dataset_resampled: False
|
| 67 |
+
2025-04-24,00:12:09 | INFO | dataset_type: synthetic
|
| 68 |
+
2025-04-24,00:12:09 | INFO | ddp_static_graph: False
|
| 69 |
+
2025-04-24,00:12:09 | INFO | debug: False
|
| 70 |
+
2025-04-24,00:12:09 | INFO | delete_previous_checkpoint: False
|
| 71 |
+
2025-04-24,00:12:09 | INFO | device: cuda
|
| 72 |
+
2025-04-24,00:12:09 | INFO | dist_backend: None
|
| 73 |
+
2025-04-24,00:12:09 | INFO | dist_url: None
|
| 74 |
+
2025-04-24,00:12:09 | INFO | distill: False
|
| 75 |
+
2025-04-24,00:12:09 | INFO | distill_model: None
|
| 76 |
+
2025-04-24,00:12:09 | INFO | distill_pretrained: None
|
| 77 |
+
2025-04-24,00:12:09 | INFO | distributed: False
|
| 78 |
+
2025-04-24,00:12:09 | INFO | epochs: 1
|
| 79 |
+
2025-04-24,00:12:09 | INFO | epochs_cooldown: None
|
| 80 |
+
2025-04-24,00:12:09 | INFO | eps: 1e-08
|
| 81 |
+
2025-04-24,00:12:09 | INFO | force_custom_text: False
|
| 82 |
+
2025-04-24,00:12:09 | INFO | force_image_size: None
|
| 83 |
+
2025-04-24,00:12:09 | INFO | force_patch_dropout: None
|
| 84 |
+
2025-04-24,00:12:09 | INFO | force_quick_gelu: False
|
| 85 |
+
2025-04-24,00:12:09 | INFO | gather_with_grad: True
|
| 86 |
+
2025-04-24,00:12:09 | INFO | grad_checkpointing: True
|
| 87 |
+
2025-04-24,00:12:09 | INFO | grad_clip_norm: None
|
| 88 |
+
2025-04-24,00:12:09 | INFO | horovod: False
|
| 89 |
+
2025-04-24,00:12:09 | INFO | image_interpolation: None
|
| 90 |
+
2025-04-24,00:12:09 | INFO | image_mean: None
|
| 91 |
+
2025-04-24,00:12:09 | INFO | image_resize_mode: None
|
| 92 |
+
2025-04-24,00:12:09 | INFO | image_std: None
|
| 93 |
+
2025-04-24,00:12:09 | INFO | imagenet_v2: None
|
| 94 |
+
2025-04-24,00:12:09 | INFO | imagenet_val: None
|
| 95 |
+
2025-04-24,00:12:09 | INFO | keep_func_name: keep_image_farest
|
| 96 |
+
2025-04-24,00:12:09 | INFO | local_loss: False
|
| 97 |
+
2025-04-24,00:12:09 | INFO | local_rank: 0
|
| 98 |
+
2025-04-24,00:12:09 | INFO | lock_image: False
|
| 99 |
+
2025-04-24,00:12:09 | INFO | lock_image_freeze_bn_stats: False
|
| 100 |
+
2025-04-24,00:12:09 | INFO | lock_image_unlocked_groups: 0
|
| 101 |
+
2025-04-24,00:12:09 | INFO | lock_text: False
|
| 102 |
+
2025-04-24,00:12:09 | INFO | lock_text_freeze_layer_norm: False
|
| 103 |
+
2025-04-24,00:12:09 | INFO | lock_text_unlocked_layers: 0
|
| 104 |
+
2025-04-24,00:12:09 | INFO | log_every_n_steps: 100
|
| 105 |
+
2025-04-24,00:12:09 | INFO | log_level: 20
|
| 106 |
+
2025-04-24,00:12:09 | INFO | log_local: False
|
| 107 |
+
2025-04-24,00:12:09 | INFO | log_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/out.log
|
| 108 |
+
2025-04-24,00:12:09 | INFO | logs: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
|
| 109 |
+
2025-04-24,00:12:09 | INFO | loss_dist_impl: None
|
| 110 |
+
2025-04-24,00:12:09 | INFO | lr: 1e-05
|
| 111 |
+
2025-04-24,00:12:09 | INFO | lr_cooldown_end: 0.0
|
| 112 |
+
2025-04-24,00:12:09 | INFO | lr_cooldown_power: 1.0
|
| 113 |
+
2025-04-24,00:12:09 | INFO | lr_scheduler: cosine
|
| 114 |
+
2025-04-24,00:12:09 | INFO | map_func_name: use_all
|
| 115 |
+
2025-04-24,00:12:09 | INFO | model: ViT-B-32
|
| 116 |
+
2025-04-24,00:12:09 | INFO | momentum: None
|
| 117 |
+
2025-04-24,00:12:09 | INFO | name: ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest
|
| 118 |
+
2025-04-24,00:12:09 | INFO | no_set_device_rank: False
|
| 119 |
+
2025-04-24,00:12:09 | INFO | opt: adamw
|
| 120 |
+
2025-04-24,00:12:09 | INFO | precision: amp
|
| 121 |
+
2025-04-24,00:12:09 | INFO | pretrained: laion2b_s34b_b79k
|
| 122 |
+
2025-04-24,00:12:09 | INFO | pretrained_image: False
|
| 123 |
+
2025-04-24,00:12:09 | INFO | rank: 0
|
| 124 |
+
2025-04-24,00:12:09 | INFO | remote_sync: None
|
| 125 |
+
2025-04-24,00:12:09 | INFO | remote_sync_frequency: 300
|
| 126 |
+
2025-04-24,00:12:09 | INFO | remote_sync_protocol: s3
|
| 127 |
+
2025-04-24,00:12:09 | INFO | report_to: wandb
|
| 128 |
+
2025-04-24,00:12:09 | INFO | resume: None
|
| 129 |
+
2025-04-24,00:12:09 | INFO | save_frequency: 1
|
| 130 |
+
2025-04-24,00:12:09 | INFO | save_most_recent: False
|
| 131 |
+
2025-04-24,00:12:09 | INFO | seed: 0
|
| 132 |
+
2025-04-24,00:12:09 | INFO | siglip: False
|
| 133 |
+
2025-04-24,00:12:09 | INFO | skip_scheduler: False
|
| 134 |
+
2025-04-24,00:12:09 | INFO | tensorboard: False
|
| 135 |
+
2025-04-24,00:12:09 | INFO | tensorboard_path:
|
| 136 |
+
2025-04-24,00:12:09 | INFO | torchcompile: False
|
| 137 |
+
2025-04-24,00:12:09 | INFO | torchscript: False
|
| 138 |
+
2025-04-24,00:12:09 | INFO | trace: False
|
| 139 |
+
2025-04-24,00:12:09 | INFO | train_data: /mnt/personal/zhudongy/CapsFusion120M-wds/{00000..01426}.tar
|
| 140 |
+
2025-04-24,00:12:09 | INFO | train_data_upsampling_factors: None
|
| 141 |
+
2025-04-24,00:12:09 | INFO | train_num_samples: 10006295
|
| 142 |
+
2025-04-24,00:12:09 | INFO | use_bn_sync: False
|
| 143 |
+
2025-04-24,00:12:09 | INFO | use_bnb_linear: None
|
| 144 |
+
2025-04-24,00:12:09 | INFO | val_data: None
|
| 145 |
+
2025-04-24,00:12:09 | INFO | val_frequency: 1
|
| 146 |
+
2025-04-24,00:12:09 | INFO | val_num_samples: None
|
| 147 |
+
2025-04-24,00:12:09 | INFO | wandb: True
|
| 148 |
+
2025-04-24,00:12:09 | INFO | wandb_notes:
|
| 149 |
+
2025-04-24,00:12:09 | INFO | wandb_project_name: open-clip
|
| 150 |
+
2025-04-24,00:12:09 | INFO | warmup: 122
|
| 151 |
+
2025-04-24,00:12:09 | INFO | wd: 0.5
|
| 152 |
+
2025-04-24,00:12:09 | INFO | workers: 16
|
| 153 |
+
2025-04-24,00:12:09 | INFO | world_size: 1
|
| 154 |
+
2025-04-24,00:12:09 | INFO | zeroshot_frequency: 2
|
| 155 |
+
2025-04-24,00:12:09 | INFO | Created AdamW (adamw) optimizer: lr: 1e-05, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None
|
| 156 |
+
2025-04-24,00:15:17 | INFO | Start epoch 0
|
| 157 |
+
2025-04-24,00:15:49 | INFO | Train Epoch: 0 [ 8192/1015808 (1%)] Data (t): 23.724 Batch (t): 31.607, 259.181/s, 259.181/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 32.957 (32.957) Imm_text: 32.957 (32.957) Isd_image: 4.5893 (4.5893) Isd_text: 4.5893 (4.5893) Contrastive_loss: 0.61915 (0.61915) Loss: 0.61915 (0.61915)
|
| 158 |
+
2025-04-24,00:28:52 | INFO | Train Epoch: 0 [ 827392/1015808 (81%)] Data (t): 1.129 Batch (t): 7.828, 1053.99/s, 1053.99/s/gpu LR: 0.000008 Logit Scale: 99.989 Imm_image: 33.944 (33.451) Imm_text: 33.944 (33.451) Isd_image: 5.0255 (4.8074) Isd_text: 5.0255 (4.8074) Contrastive_loss: 0.37668 (0.49792) Loss: 0.37668 (0.49792)
|
| 159 |
+
2025-04-24,00:31:57 | INFO | Train Epoch: 0 [1015808/1015808 (100%)] Data (t): 1.280 Batch (t): 8.069, 1047.99/s, 1047.99/s/gpu LR: 0.000005 Logit Scale: 99.987 Imm_image: 34.089 (33.663) Imm_text: 34.089 (33.663) Isd_image: 4.6706 (4.7618) Isd_text: 4.6706 (4.7618) Contrastive_loss: 0.36459 (0.45347) Loss: 0.36459 (0.45347)
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/params.txt
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accum_freq: 4
|
| 2 |
+
aug_cfg: {}
|
| 3 |
+
batch_size: 2048
|
| 4 |
+
beta1: 0.9
|
| 5 |
+
beta2: 0.98
|
| 6 |
+
cache_dir: None
|
| 7 |
+
caption_ratio: 0.1
|
| 8 |
+
checkpoint_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints
|
| 9 |
+
coca_caption_loss_weight: 2.0
|
| 10 |
+
coca_contrastive_loss_weight: 1.0
|
| 11 |
+
copy_codebase: False
|
| 12 |
+
csv_caption_key: title
|
| 13 |
+
csv_img_key: filepath
|
| 14 |
+
csv_separator:
|
| 15 |
+
dataset_resampled: False
|
| 16 |
+
dataset_type: synthetic
|
| 17 |
+
ddp_static_graph: False
|
| 18 |
+
debug: False
|
| 19 |
+
delete_previous_checkpoint: False
|
| 20 |
+
device: cuda
|
| 21 |
+
dist_backend: None
|
| 22 |
+
dist_url: None
|
| 23 |
+
distill: False
|
| 24 |
+
distill_model: None
|
| 25 |
+
distill_pretrained: None
|
| 26 |
+
distributed: False
|
| 27 |
+
epochs: 1
|
| 28 |
+
epochs_cooldown: None
|
| 29 |
+
eps: 1e-08
|
| 30 |
+
force_custom_text: False
|
| 31 |
+
force_image_size: None
|
| 32 |
+
force_patch_dropout: None
|
| 33 |
+
force_quick_gelu: False
|
| 34 |
+
gather_with_grad: True
|
| 35 |
+
grad_checkpointing: True
|
| 36 |
+
grad_clip_norm: None
|
| 37 |
+
horovod: False
|
| 38 |
+
image_interpolation: None
|
| 39 |
+
image_mean: None
|
| 40 |
+
image_resize_mode: None
|
| 41 |
+
image_std: None
|
| 42 |
+
imagenet_v2: None
|
| 43 |
+
imagenet_val: None
|
| 44 |
+
keep_func_name: keep_image_farest
|
| 45 |
+
local_loss: False
|
| 46 |
+
local_rank: 0
|
| 47 |
+
lock_image: False
|
| 48 |
+
lock_image_freeze_bn_stats: False
|
| 49 |
+
lock_image_unlocked_groups: 0
|
| 50 |
+
lock_text: False
|
| 51 |
+
lock_text_freeze_layer_norm: False
|
| 52 |
+
lock_text_unlocked_layers: 0
|
| 53 |
+
log_every_n_steps: 100
|
| 54 |
+
log_level: 20
|
| 55 |
+
log_local: False
|
| 56 |
+
log_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/out.log
|
| 57 |
+
logs: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
|
| 58 |
+
loss_dist_impl: None
|
| 59 |
+
lr: 1e-05
|
| 60 |
+
lr_cooldown_end: 0.0
|
| 61 |
+
lr_cooldown_power: 1.0
|
| 62 |
+
lr_scheduler: cosine
|
| 63 |
+
map_func_name: use_all
|
| 64 |
+
model: ViT-B-32
|
| 65 |
+
momentum: None
|
| 66 |
+
name: ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest
|
| 67 |
+
no_set_device_rank: False
|
| 68 |
+
opt: adamw
|
| 69 |
+
precision: amp
|
| 70 |
+
pretrained: laion2b_s34b_b79k
|
| 71 |
+
pretrained_image: False
|
| 72 |
+
rank: 0
|
| 73 |
+
remote_sync: None
|
| 74 |
+
remote_sync_frequency: 300
|
| 75 |
+
remote_sync_protocol: s3
|
| 76 |
+
report_to: wandb
|
| 77 |
+
resume: None
|
| 78 |
+
save_frequency: 1
|
| 79 |
+
save_most_recent: False
|
| 80 |
+
seed: 0
|
| 81 |
+
siglip: False
|
| 82 |
+
skip_scheduler: False
|
| 83 |
+
tensorboard: False
|
| 84 |
+
tensorboard_path:
|
| 85 |
+
torchcompile: False
|
| 86 |
+
torchscript: False
|
| 87 |
+
trace: False
|
| 88 |
+
train_data: /mnt/personal/zhudongy/CapsFusion120M-wds/{00000..01426}.tar
|
| 89 |
+
train_data_upsampling_factors: None
|
| 90 |
+
train_num_samples: 10006295
|
| 91 |
+
use_bn_sync: False
|
| 92 |
+
use_bnb_linear: None
|
| 93 |
+
val_data: None
|
| 94 |
+
val_frequency: 1
|
| 95 |
+
val_num_samples: None
|
| 96 |
+
wandb: True
|
| 97 |
+
wandb_notes:
|
| 98 |
+
wandb_project_name: open-clip
|
| 99 |
+
warmup: 122
|
| 100 |
+
wd: 0.5
|
| 101 |
+
workers: 16
|
| 102 |
+
world_size: 1
|
| 103 |
+
zeroshot_frequency: 2
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "caltech101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8632983377077865, "acc5": 0.9641294838145232, "mean_per_class_recall": 0.9132225597261372}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cars", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8614600174107698, "acc5": 0.9898022634000746, "mean_per_class_recall": 0.862527756274282}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar100", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7413, "acc5": 0.9325, "mean_per_class_recall": 0.7419}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.928, "acc5": 0.9973, "mean_per_class_recall": 0.9279}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "country211", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1638388625592417, "acc5": 0.37668246445497633, "mean_per_class_recall": 0.16421800947867302}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "dtd", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.576063829787234, "acc5": 0.8563829787234043, "mean_per_class_recall": 0.5771276595744682}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "eurosat", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.4793703703703704, "acc5": 0.9591481481481482, "mean_per_class_recall": 0.49027666666666664}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "fgvc_aircraft", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.24002400240024002, "acc5": 0.5553555355535553, "mean_per_class_recall": 0.23985739750445628}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flickr30k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6711999773979187, "text_retrieval_recall@1": 0.8299999833106995, "image_retrieval_recall@5": 0.8858000040054321, "text_retrieval_recall@5": 0.9570000171661377, "image_retrieval_recall@10": 0.9312000274658203, "text_retrieval_recall@10": 0.9810000061988831}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flowers", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7088957554073834, "acc5": 0.8736379899170597, "mean_per_class_recall": 0.6985781923439345}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "food101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8213069306930693, "acc5": 0.9667326732673267, "mean_per_class_recall": 0.821069306930693}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "gtsrb", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.498812351543943, "acc5": 0.7555027711797307, "mean_per_class_recall": 0.42383596334759127}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "imagenet1k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.66026, "acc5": 0.89522, "mean_per_class_recall": 0.6604000000000001}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "mscoco_captions", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.400479793548584, "text_retrieval_recall@1": 0.5802000164985657, "image_retrieval_recall@5": 0.6611355543136597, "text_retrieval_recall@5": 0.8051999807357788, "image_retrieval_recall@10": 0.7623750567436218, "text_retrieval_recall@10": 0.8772000074386597}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "pets", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9051512673753066, "acc5": 0.9953665849005179, "mean_per_class_recall": 0.9041328844971016}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "stl10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.96075, "acc5": 0.999625, "mean_per_class_recall": 0.960625}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "sun397", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6863011935193188, "acc5": 0.9371057616271585, "mean_per_class_recall": 0.6836321109384585}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "vtab/resisc45", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5992063492063492, "acc5": 0.9063492063492063, "mean_per_class_recall": 0.60694300093902}, "language": "en"}
|