cornuHGF commited on
Commit
9771349
·
verified ·
1 Parent(s): b9f3b3c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/0filter_ambiguous/intra_fixed.jsonl +3 -0
  3. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_closest/checkpoints/epoch_1.pt +3 -0
  4. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_farest/checkpoints/epoch_1.pt +3 -0
  5. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_1.pt +3 -0
  6. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_random/checkpoints/epoch_1.pt +3 -0
  7. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest/checkpoints/epoch_1.pt +3 -0
  8. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest_image_closest/checkpoints/epoch_1.pt +3 -0
  9. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest/checkpoints/epoch_1.pt +3 -0
  10. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest_image_farest/checkpoints/epoch_1.pt +3 -0
  11. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_uniform/checkpoints/epoch_1.pt +3 -0
  12. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_1.pt +3 -0
  13. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  14. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  15. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  16. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  17. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  18. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  19. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  20. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  21. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
  22. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  23. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  24. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  25. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  26. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
  27. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  28. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  29. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  30. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  31. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/out.log +159 -0
  32. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/params.txt +103 -0
  33. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  34. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  35. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  36. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  37. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  38. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  39. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  40. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  41. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
  42. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  43. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  44. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  45. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  46. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json +1 -0
  47. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  48. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  49. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
  50. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json +1 -0
.gitattributes CHANGED
@@ -47,3 +47,5 @@ SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#int
47
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
48
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
49
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/smallest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
47
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
48
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/filter_ambiguous/largest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
49
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/smallest_intra_inter.jsonl filter=lfs diff=lfs merge=lfs -text
50
+ SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/different_text_inter.jsonl filter=lfs diff=lfs merge=lfs -text
51
+ SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/0filter_ambiguous/intra_fixed.jsonl filter=lfs diff=lfs merge=lfs -text
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/0filter_ambiguous/intra_fixed.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af3768b624539180f9f3f96bced76935478a013682b7e60514a31fc956398fa8
3
+ size 1670544155
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_closest/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7bd3d8472707c27dd3f5bc4244ef688a24f686b1712ae6ae55bcff9a353e6f
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_farest/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f50c62b1be871d2b358015a319f9092c70b18ebb03e76de2e514102feca377b
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8cc209e213f198adfdf3e6d15537489975651102fa5f34e0bfefe0a6fc1b1c
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_random/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d8e8f114c1eabd176a00d35a285561e88e30d27421f4388dbc6f335f4f4a5ae
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d428a17bb5367923de68261f218c7bdb6d491f89944f31773e2803eceed47b
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_closest_image_closest/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b12cdf9e2909e257e5c914c893518ed2433c3ae34e4a14286dae2582ed8f36
3
+ size 1512951586
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea37af0b6e97b8efe426718b0dc9c64dd4b330a7811f53e46f91a30b8b6e4fd
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_farest_image_farest/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62067d0f84aa2205f6ca5ed384827e08e801e5bb08e0a6f604e005fc98abae38
3
+ size 1512951586
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/keep_text_uniform/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9acf98a396f6f9f7f7776c77d26a33005f516a7a1ce067f613a21ffac15a2613
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774f38dcf66e7a359d55b7c6a93322e54a0002c54177eec4670866fc59015cc5
3
+ size 1512951522
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "caltech101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8611111111111112, "acc5": 0.9646762904636921, "mean_per_class_recall": 0.9117574143020897}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cars", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8604651162790697, "acc5": 0.9906728018903121, "mean_per_class_recall": 0.8603007472001762}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar100", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7329, "acc5": 0.9282, "mean_per_class_recall": 0.7332000000000002}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9239, "acc5": 0.9975, "mean_per_class_recall": 0.9239}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "country211", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1642654028436019, "acc5": 0.37867298578199055, "mean_per_class_recall": 0.1641706161137441}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "dtd", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5569148936170213, "acc5": 0.851063829787234, "mean_per_class_recall": 0.5558510638297871}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "eurosat", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5003333333333333, "acc5": 0.9582962962962963, "mean_per_class_recall": 0.5124266666666667}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "fgvc_aircraft", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.23492349234923493, "acc5": 0.5643564356435643, "mean_per_class_recall": 0.23520499108734402}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flickr30k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6736000180244446, "text_retrieval_recall@1": 0.8360000252723694, "image_retrieval_recall@5": 0.8812000155448914, "text_retrieval_recall@5": 0.9599999785423279, "image_retrieval_recall@10": 0.932200014591217, "text_retrieval_recall@10": 0.984000027179718}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flowers", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7049926817368678, "acc5": 0.8697349162465442, "mean_per_class_recall": 0.7014735954769374}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "food101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.819009900990099, "acc5": 0.9664950495049505, "mean_per_class_recall": 0.8192475247524755}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "gtsrb", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5064133016627078, "acc5": 0.7681710213776722, "mean_per_class_recall": 0.4390340643422039}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "imagenet1k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.65884, "acc5": 0.89428, "mean_per_class_recall": 0.6587000000000001}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "mscoco_captions", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.39972010254859924, "text_retrieval_recall@1": 0.5735999941825867, "image_retrieval_recall@5": 0.6586565375328064, "text_retrieval_recall@5": 0.8055999875068665, "image_retrieval_recall@10": 0.759576141834259, "text_retrieval_recall@10": 0.878000020980835}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "pets", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9067865903515945, "acc5": 0.9942763695829926, "mean_per_class_recall": 0.9051993440098055}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "stl10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.96425, "acc5": 0.99975, "mean_per_class_recall": 0.96425}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "sun397", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6866689960829027, "acc5": 0.9371057616271585, "mean_per_class_recall": 0.6837170321383007}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "vtab/resisc45", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6, "acc5": 0.9061904761904762, "mean_per_class_recall": 0.6079051385663736}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/out.log ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-24,00:12:07 | INFO | Running with a single process. Device cuda.
2
+ 2025-04-24,00:12:07 | INFO | Loaded ViT-B-32 model config.
3
+ 2025-04-24,00:12:09 | INFO | Loading pretrained ViT-B-32 weights (laion2b_s34b_b79k).
4
+ 2025-04-24,00:12:09 | INFO | Model:
5
+ 2025-04-24,00:12:09 | INFO | CLIP(
6
+ (visual): VisionTransformer(
7
+ (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
8
+ (patch_dropout): Identity()
9
+ (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
10
+ (transformer): Transformer(
11
+ (resblocks): ModuleList(
12
+ (0-11): 12 x ResidualAttentionBlock(
13
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
14
+ (attn): MultiheadAttention(
15
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
16
+ )
17
+ (ls_1): Identity()
18
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
19
+ (mlp): Sequential(
20
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
21
+ (gelu): GELU(approximate='none')
22
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
23
+ )
24
+ (ls_2): Identity()
25
+ )
26
+ )
27
+ )
28
+ (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
29
+ )
30
+ (transformer): Transformer(
31
+ (resblocks): ModuleList(
32
+ (0-11): 12 x ResidualAttentionBlock(
33
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
34
+ (attn): MultiheadAttention(
35
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
36
+ )
37
+ (ls_1): Identity()
38
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
39
+ (mlp): Sequential(
40
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
41
+ (gelu): GELU(approximate='none')
42
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
43
+ )
44
+ (ls_2): Identity()
45
+ )
46
+ )
47
+ )
48
+ (token_embedding): Embedding(49408, 512)
49
+ (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
50
+ )
51
+ 2025-04-24,00:12:09 | INFO | Params:
52
+ 2025-04-24,00:12:09 | INFO | accum_freq: 4
53
+ 2025-04-24,00:12:09 | INFO | aug_cfg: {}
54
+ 2025-04-24,00:12:09 | INFO | batch_size: 2048
55
+ 2025-04-24,00:12:09 | INFO | beta1: 0.9
56
+ 2025-04-24,00:12:09 | INFO | beta2: 0.98
57
+ 2025-04-24,00:12:09 | INFO | cache_dir: None
58
+ 2025-04-24,00:12:09 | INFO | caption_ratio: 0.1
59
+ 2025-04-24,00:12:09 | INFO | checkpoint_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints
60
+ 2025-04-24,00:12:09 | INFO | coca_caption_loss_weight: 2.0
61
+ 2025-04-24,00:12:09 | INFO | coca_contrastive_loss_weight: 1.0
62
+ 2025-04-24,00:12:09 | INFO | copy_codebase: False
63
+ 2025-04-24,00:12:09 | INFO | csv_caption_key: title
64
+ 2025-04-24,00:12:09 | INFO | csv_img_key: filepath
65
+ 2025-04-24,00:12:09 | INFO | csv_separator:
66
+ 2025-04-24,00:12:09 | INFO | dataset_resampled: False
67
+ 2025-04-24,00:12:09 | INFO | dataset_type: synthetic
68
+ 2025-04-24,00:12:09 | INFO | ddp_static_graph: False
69
+ 2025-04-24,00:12:09 | INFO | debug: False
70
+ 2025-04-24,00:12:09 | INFO | delete_previous_checkpoint: False
71
+ 2025-04-24,00:12:09 | INFO | device: cuda
72
+ 2025-04-24,00:12:09 | INFO | dist_backend: None
73
+ 2025-04-24,00:12:09 | INFO | dist_url: None
74
+ 2025-04-24,00:12:09 | INFO | distill: False
75
+ 2025-04-24,00:12:09 | INFO | distill_model: None
76
+ 2025-04-24,00:12:09 | INFO | distill_pretrained: None
77
+ 2025-04-24,00:12:09 | INFO | distributed: False
78
+ 2025-04-24,00:12:09 | INFO | epochs: 1
79
+ 2025-04-24,00:12:09 | INFO | epochs_cooldown: None
80
+ 2025-04-24,00:12:09 | INFO | eps: 1e-08
81
+ 2025-04-24,00:12:09 | INFO | force_custom_text: False
82
+ 2025-04-24,00:12:09 | INFO | force_image_size: None
83
+ 2025-04-24,00:12:09 | INFO | force_patch_dropout: None
84
+ 2025-04-24,00:12:09 | INFO | force_quick_gelu: False
85
+ 2025-04-24,00:12:09 | INFO | gather_with_grad: True
86
+ 2025-04-24,00:12:09 | INFO | grad_checkpointing: True
87
+ 2025-04-24,00:12:09 | INFO | grad_clip_norm: None
88
+ 2025-04-24,00:12:09 | INFO | horovod: False
89
+ 2025-04-24,00:12:09 | INFO | image_interpolation: None
90
+ 2025-04-24,00:12:09 | INFO | image_mean: None
91
+ 2025-04-24,00:12:09 | INFO | image_resize_mode: None
92
+ 2025-04-24,00:12:09 | INFO | image_std: None
93
+ 2025-04-24,00:12:09 | INFO | imagenet_v2: None
94
+ 2025-04-24,00:12:09 | INFO | imagenet_val: None
95
+ 2025-04-24,00:12:09 | INFO | keep_func_name: keep_image_farest
96
+ 2025-04-24,00:12:09 | INFO | local_loss: False
97
+ 2025-04-24,00:12:09 | INFO | local_rank: 0
98
+ 2025-04-24,00:12:09 | INFO | lock_image: False
99
+ 2025-04-24,00:12:09 | INFO | lock_image_freeze_bn_stats: False
100
+ 2025-04-24,00:12:09 | INFO | lock_image_unlocked_groups: 0
101
+ 2025-04-24,00:12:09 | INFO | lock_text: False
102
+ 2025-04-24,00:12:09 | INFO | lock_text_freeze_layer_norm: False
103
+ 2025-04-24,00:12:09 | INFO | lock_text_unlocked_layers: 0
104
+ 2025-04-24,00:12:09 | INFO | log_every_n_steps: 100
105
+ 2025-04-24,00:12:09 | INFO | log_level: 20
106
+ 2025-04-24,00:12:09 | INFO | log_local: False
107
+ 2025-04-24,00:12:09 | INFO | log_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/out.log
108
+ 2025-04-24,00:12:09 | INFO | logs: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
109
+ 2025-04-24,00:12:09 | INFO | loss_dist_impl: None
110
+ 2025-04-24,00:12:09 | INFO | lr: 1e-05
111
+ 2025-04-24,00:12:09 | INFO | lr_cooldown_end: 0.0
112
+ 2025-04-24,00:12:09 | INFO | lr_cooldown_power: 1.0
113
+ 2025-04-24,00:12:09 | INFO | lr_scheduler: cosine
114
+ 2025-04-24,00:12:09 | INFO | map_func_name: use_all
115
+ 2025-04-24,00:12:09 | INFO | model: ViT-B-32
116
+ 2025-04-24,00:12:09 | INFO | momentum: None
117
+ 2025-04-24,00:12:09 | INFO | name: ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest
118
+ 2025-04-24,00:12:09 | INFO | no_set_device_rank: False
119
+ 2025-04-24,00:12:09 | INFO | opt: adamw
120
+ 2025-04-24,00:12:09 | INFO | precision: amp
121
+ 2025-04-24,00:12:09 | INFO | pretrained: laion2b_s34b_b79k
122
+ 2025-04-24,00:12:09 | INFO | pretrained_image: False
123
+ 2025-04-24,00:12:09 | INFO | rank: 0
124
+ 2025-04-24,00:12:09 | INFO | remote_sync: None
125
+ 2025-04-24,00:12:09 | INFO | remote_sync_frequency: 300
126
+ 2025-04-24,00:12:09 | INFO | remote_sync_protocol: s3
127
+ 2025-04-24,00:12:09 | INFO | report_to: wandb
128
+ 2025-04-24,00:12:09 | INFO | resume: None
129
+ 2025-04-24,00:12:09 | INFO | save_frequency: 1
130
+ 2025-04-24,00:12:09 | INFO | save_most_recent: False
131
+ 2025-04-24,00:12:09 | INFO | seed: 0
132
+ 2025-04-24,00:12:09 | INFO | siglip: False
133
+ 2025-04-24,00:12:09 | INFO | skip_scheduler: False
134
+ 2025-04-24,00:12:09 | INFO | tensorboard: False
135
+ 2025-04-24,00:12:09 | INFO | tensorboard_path:
136
+ 2025-04-24,00:12:09 | INFO | torchcompile: False
137
+ 2025-04-24,00:12:09 | INFO | torchscript: False
138
+ 2025-04-24,00:12:09 | INFO | trace: False
139
+ 2025-04-24,00:12:09 | INFO | train_data: /mnt/personal/zhudongy/CapsFusion120M-wds/{00000..01426}.tar
140
+ 2025-04-24,00:12:09 | INFO | train_data_upsampling_factors: None
141
+ 2025-04-24,00:12:09 | INFO | train_num_samples: 10006295
142
+ 2025-04-24,00:12:09 | INFO | use_bn_sync: False
143
+ 2025-04-24,00:12:09 | INFO | use_bnb_linear: None
144
+ 2025-04-24,00:12:09 | INFO | val_data: None
145
+ 2025-04-24,00:12:09 | INFO | val_frequency: 1
146
+ 2025-04-24,00:12:09 | INFO | val_num_samples: None
147
+ 2025-04-24,00:12:09 | INFO | wandb: True
148
+ 2025-04-24,00:12:09 | INFO | wandb_notes:
149
+ 2025-04-24,00:12:09 | INFO | wandb_project_name: open-clip
150
+ 2025-04-24,00:12:09 | INFO | warmup: 122
151
+ 2025-04-24,00:12:09 | INFO | wd: 0.5
152
+ 2025-04-24,00:12:09 | INFO | workers: 16
153
+ 2025-04-24,00:12:09 | INFO | world_size: 1
154
+ 2025-04-24,00:12:09 | INFO | zeroshot_frequency: 2
155
+ 2025-04-24,00:12:09 | INFO | Created AdamW (adamw) optimizer: lr: 1e-05, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None
156
+ 2025-04-24,00:15:17 | INFO | Start epoch 0
157
+ 2025-04-24,00:15:49 | INFO | Train Epoch: 0 [ 8192/1015808 (1%)] Data (t): 23.724 Batch (t): 31.607, 259.181/s, 259.181/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 32.957 (32.957) Imm_text: 32.957 (32.957) Isd_image: 4.5893 (4.5893) Isd_text: 4.5893 (4.5893) Contrastive_loss: 0.61915 (0.61915) Loss: 0.61915 (0.61915)
158
+ 2025-04-24,00:28:52 | INFO | Train Epoch: 0 [ 827392/1015808 (81%)] Data (t): 1.129 Batch (t): 7.828, 1053.99/s, 1053.99/s/gpu LR: 0.000008 Logit Scale: 99.989 Imm_image: 33.944 (33.451) Imm_text: 33.944 (33.451) Isd_image: 5.0255 (4.8074) Isd_text: 5.0255 (4.8074) Contrastive_loss: 0.37668 (0.49792) Loss: 0.37668 (0.49792)
159
+ 2025-04-24,00:31:57 | INFO | Train Epoch: 0 [1015808/1015808 (100%)] Data (t): 1.280 Batch (t): 8.069, 1047.99/s, 1047.99/s/gpu LR: 0.000005 Logit Scale: 99.987 Imm_image: 34.089 (33.663) Imm_text: 34.089 (33.663) Isd_image: 4.6706 (4.7618) Isd_text: 4.6706 (4.7618) Contrastive_loss: 0.36459 (0.45347) Loss: 0.36459 (0.45347)
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_farest/params.txt ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_freq: 4
2
+ aug_cfg: {}
3
+ batch_size: 2048
4
+ beta1: 0.9
5
+ beta2: 0.98
6
+ cache_dir: None
7
+ caption_ratio: 0.1
8
+ checkpoint_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/checkpoints
9
+ coca_caption_loss_weight: 2.0
10
+ coca_contrastive_loss_weight: 1.0
11
+ copy_codebase: False
12
+ csv_caption_key: title
13
+ csv_img_key: filepath
14
+ csv_separator:
15
+ dataset_resampled: False
16
+ dataset_type: synthetic
17
+ ddp_static_graph: False
18
+ debug: False
19
+ delete_previous_checkpoint: False
20
+ device: cuda
21
+ dist_backend: None
22
+ dist_url: None
23
+ distill: False
24
+ distill_model: None
25
+ distill_pretrained: None
26
+ distributed: False
27
+ epochs: 1
28
+ epochs_cooldown: None
29
+ eps: 1e-08
30
+ force_custom_text: False
31
+ force_image_size: None
32
+ force_patch_dropout: None
33
+ force_quick_gelu: False
34
+ gather_with_grad: True
35
+ grad_checkpointing: True
36
+ grad_clip_norm: None
37
+ horovod: False
38
+ image_interpolation: None
39
+ image_mean: None
40
+ image_resize_mode: None
41
+ image_std: None
42
+ imagenet_v2: None
43
+ imagenet_val: None
44
+ keep_func_name: keep_image_farest
45
+ local_loss: False
46
+ local_rank: 0
47
+ lock_image: False
48
+ lock_image_freeze_bn_stats: False
49
+ lock_image_unlocked_groups: 0
50
+ lock_text: False
51
+ lock_text_freeze_layer_norm: False
52
+ lock_text_unlocked_layers: 0
53
+ log_every_n_steps: 100
54
+ log_level: 20
55
+ log_local: False
56
+ log_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest/out.log
57
+ logs: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
58
+ loss_dist_impl: None
59
+ lr: 1e-05
60
+ lr_cooldown_end: 0.0
61
+ lr_cooldown_power: 1.0
62
+ lr_scheduler: cosine
63
+ map_func_name: use_all
64
+ model: ViT-B-32
65
+ momentum: None
66
+ name: ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_farest
67
+ no_set_device_rank: False
68
+ opt: adamw
69
+ precision: amp
70
+ pretrained: laion2b_s34b_b79k
71
+ pretrained_image: False
72
+ rank: 0
73
+ remote_sync: None
74
+ remote_sync_frequency: 300
75
+ remote_sync_protocol: s3
76
+ report_to: wandb
77
+ resume: None
78
+ save_frequency: 1
79
+ save_most_recent: False
80
+ seed: 0
81
+ siglip: False
82
+ skip_scheduler: False
83
+ tensorboard: False
84
+ tensorboard_path:
85
+ torchcompile: False
86
+ torchscript: False
87
+ trace: False
88
+ train_data: /mnt/personal/zhudongy/CapsFusion120M-wds/{00000..01426}.tar
89
+ train_data_upsampling_factors: None
90
+ train_num_samples: 10006295
91
+ use_bn_sync: False
92
+ use_bnb_linear: None
93
+ val_data: None
94
+ val_frequency: 1
95
+ val_num_samples: None
96
+ wandb: True
97
+ wandb_notes:
98
+ wandb_project_name: open-clip
99
+ warmup: 122
100
+ wd: 0.5
101
+ workers: 16
102
+ world_size: 1
103
+ zeroshot_frequency: 2
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_caltech101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "caltech101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8632983377077865, "acc5": 0.9641294838145232, "mean_per_class_recall": 0.9132225597261372}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cars_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cars", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8614600174107698, "acc5": 0.9898022634000746, "mean_per_class_recall": 0.862527756274282}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar100_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar100", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7413, "acc5": 0.9325, "mean_per_class_recall": 0.7419}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_cifar10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.928, "acc5": 0.9973, "mean_per_class_recall": 0.9279}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_country211_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "country211", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1638388625592417, "acc5": 0.37668246445497633, "mean_per_class_recall": 0.16421800947867302}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_dtd_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "dtd", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.576063829787234, "acc5": 0.8563829787234043, "mean_per_class_recall": 0.5771276595744682}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_eurosat_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "eurosat", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.4793703703703704, "acc5": 0.9591481481481482, "mean_per_class_recall": 0.49027666666666664}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_fgvc_aircraft_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "fgvc_aircraft", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.24002400240024002, "acc5": 0.5553555355535553, "mean_per_class_recall": 0.23985739750445628}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flickr30k_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flickr30k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6711999773979187, "text_retrieval_recall@1": 0.8299999833106995, "image_retrieval_recall@5": 0.8858000040054321, "text_retrieval_recall@5": 0.9570000171661377, "image_retrieval_recall@10": 0.9312000274658203, "text_retrieval_recall@10": 0.9810000061988831}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_flowers_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flowers", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7088957554073834, "acc5": 0.8736379899170597, "mean_per_class_recall": 0.6985781923439345}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_food101_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "food101", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8213069306930693, "acc5": 0.9667326732673267, "mean_per_class_recall": 0.821069306930693}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_gtsrb_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "gtsrb", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.498812351543943, "acc5": 0.7555027711797307, "mean_per_class_recall": 0.42383596334759127}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_imagenet1k_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "imagenet1k", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.66026, "acc5": 0.89522, "mean_per_class_recall": 0.6604000000000001}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_mscoco_captions_epoch_1.pt_ViT-B-32_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "mscoco_captions", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.400479793548584, "text_retrieval_recall@1": 0.5802000164985657, "image_retrieval_recall@5": 0.6611355543136597, "text_retrieval_recall@5": 0.8051999807357788, "image_retrieval_recall@10": 0.7623750567436218, "text_retrieval_recall@10": 0.8772000074386597}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_pets_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "pets", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9051512673753066, "acc5": 0.9953665849005179, "mean_per_class_recall": 0.9041328844971016}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_stl10_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "stl10", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.96075, "acc5": 0.999625, "mean_per_class_recall": 0.960625}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_sun397_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "sun397", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6863011935193188, "acc5": 0.9371057616271585, "mean_per_class_recall": 0.6836321109384585}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-32-laion2b_s34b_b79k-010-1e5-recaption-finetune/keep_image_uniform/benchmark_vtab_resisc45_epoch_1.pt_ViT-B-32_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "vtab/resisc45", "model": "ViT-B-32", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-32-laion2b_s34b_b79k-finetune-dc10m-010-1e5-recaption-keep_image_uniform/checkpoints/epoch_1.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5992063492063492, "acc5": 0.9063492063492063, "mean_per_class_recall": 0.60694300093902}, "language": "en"}