cornuHGF commited on
Commit
82ca9dc
·
verified ·
1 Parent(s): f9a2f4e

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_farest/tensorboard/events.out.tfevents.1745688045.g12.2586430.0 +3 -0
  3. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  4. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  5. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  6. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  7. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  8. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  9. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  10. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_fgvc_aircraft_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  11. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  12. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  13. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  14. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  15. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  16. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  17. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  18. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  19. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  20. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  21. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log +195 -0
  22. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/params.txt +103 -0
  23. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard/events.out.tfevents.1745733263.g12.2655775.0 +3 -0
  24. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_random/tensorboard/events.out.tfevents.1745710659.g12.2628499.0 +3 -0
  25. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_closest_image_closest/tensorboard/events.out.tfevents.1745663928.g12.2393971.0 +3 -0
  26. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_farest_image_farest/tensorboard/events.out.tfevents.1745755854.g12.2682891.0 +3 -0
  27. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  28. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  29. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  30. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  31. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  32. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  33. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  34. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  35. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  36. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  37. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  38. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  39. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  40. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  41. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  42. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  43. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  44. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/out.log +195 -0
  45. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/tensorboard/events.out.tfevents.1745780012.g12.2713681.0 +3 -0
  46. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_I_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy +3 -0
  47. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_farest.jsonl +3 -0
  48. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_farest.jsonl +3 -0
  49. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl +3 -0
  50. captions.tsv +3 -0
.gitattributes CHANGED
@@ -36,3 +36,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_uniform.jsonl filter=lfs diff=lfs merge=lfs -text
37
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_closest.jsonl filter=lfs diff=lfs merge=lfs -text
38
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_closest.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
36
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_uniform.jsonl filter=lfs diff=lfs merge=lfs -text
37
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_closest.jsonl filter=lfs diff=lfs merge=lfs -text
38
  SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_closest.jsonl filter=lfs diff=lfs merge=lfs -text
39
+ SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_uniform.jsonl filter=lfs diff=lfs merge=lfs -text
40
+ SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/image_farest.jsonl filter=lfs diff=lfs merge=lfs -text
41
+ SFR-Embedding-Code-2B_R\#0.8\#0.6\#dinov2-large\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_farest.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ siglip-so400m-patch14-384\#0.8\#0.6\#siglip-so400m-patch14-384\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_closest.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ siglip-so400m-patch14-384\#0.8\#0.6\#siglip-so400m-patch14-384\#0.0\#0.2\#rouge_0.2\#top_8\#inter_0.4/clusters/text_uniform.jsonl filter=lfs diff=lfs merge=lfs -text
44
+ captions.tsv filter=lfs diff=lfs merge=lfs -text
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_farest/tensorboard/events.out.tfevents.1745688045.g12.2586430.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72c633d46b0620be846fb5b5afdb9b32eb3b3f502669dd38665578d573a7a097
3
+ size 19936
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "caltech101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8613298337707787, "acc5": 0.9757217847769029, "mean_per_class_recall": 0.9243666325650888}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8311155328939187, "acc5": 0.9896779007586121, "mean_per_class_recall": 0.8300859230489636}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar100", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7575, "acc5": 0.9361, "mean_per_class_recall": 0.7575}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9358, "acc5": 0.9976, "mean_per_class_recall": 0.9359000000000002}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "country211", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.19450236966824644, "acc5": 0.42265402843601896, "mean_per_class_recall": 0.19445497630331754}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5531914893617021, "acc5": 0.8308510638297872, "mean_per_class_recall": 0.5531914893617021}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5212962962962963, "acc5": 0.8868148148148148, "mean_per_class_recall": 0.5254933333333334}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_fgvc_aircraft_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "fgvc_aircraft", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.2652265226522652, "acc5": 0.6126612661266126, "mean_per_class_recall": 0.26398395721925133}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6549999713897705, "text_retrieval_recall@1": 0.8149999976158142, "image_retrieval_recall@5": 0.8682000041007996, "text_retrieval_recall@5": 0.9520000219345093, "image_retrieval_recall@10": 0.921999990940094, "text_retrieval_recall@10": 0.9810000061988831}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flowers", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7440234184420231, "acc5": 0.9011221336802732, "mean_per_class_recall": 0.7362584000701315}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "food101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.867049504950495, "acc5": 0.9799603960396039, "mean_per_class_recall": 0.867009900990099}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "gtsrb", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5332541567695962, "acc5": 0.7577988915281076, "mean_per_class_recall": 0.4841045961397327}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "imagenet1k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.68376, "acc5": 0.90904, "mean_per_class_recall": 0.68382}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "mscoco_captions", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.38504597544670105, "text_retrieval_recall@1": 0.5383999943733215, "image_retrieval_recall@5": 0.6431427597999573, "text_retrieval_recall@5": 0.7778000235557556, "image_retrieval_recall@10": 0.7439424395561218, "text_retrieval_recall@10": 0.8586000204086304}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "pets", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9078768056691197, "acc5": 0.9961842463886618, "mean_per_class_recall": 0.9070997688018754}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "stl10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.973375, "acc5": 1.0, "mean_per_class_recall": 0.9733749999999999}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "sun397", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6725453776412822, "acc5": 0.9295841992018684, "mean_per_class_recall": 0.6641095198145022}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "vtab/resisc45", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6007936507936508, "acc5": 0.9041269841269841, "mean_per_class_recall": 0.6084713941217861}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-27,07:54:14 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
2
+ 2025-04-27,07:54:14 | INFO | Loaded ViT-B-16 model config.
3
+ 2025-04-27,07:54:15 | INFO | Loading pretrained ViT-B-16 weights (datacomp_xl_s13b_b90k).
4
+ 2025-04-27,07:54:15 | INFO | Model:
5
+ 2025-04-27,07:54:15 | INFO | CLIP(
6
+ (visual): VisionTransformer(
7
+ (conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
8
+ (patch_dropout): Identity()
9
+ (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
10
+ (transformer): Transformer(
11
+ (resblocks): ModuleList(
12
+ (0-11): 12 x ResidualAttentionBlock(
13
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
14
+ (attn): MultiheadAttention(
15
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
16
+ )
17
+ (ls_1): Identity()
18
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
19
+ (mlp): Sequential(
20
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
21
+ (gelu): GELU(approximate='none')
22
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
23
+ )
24
+ (ls_2): Identity()
25
+ )
26
+ )
27
+ )
28
+ (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
29
+ )
30
+ (transformer): Transformer(
31
+ (resblocks): ModuleList(
32
+ (0-11): 12 x ResidualAttentionBlock(
33
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
34
+ (attn): MultiheadAttention(
35
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
36
+ )
37
+ (ls_1): Identity()
38
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
39
+ (mlp): Sequential(
40
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
41
+ (gelu): GELU(approximate='none')
42
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
43
+ )
44
+ (ls_2): Identity()
45
+ )
46
+ )
47
+ )
48
+ (token_embedding): Embedding(49408, 512)
49
+ (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
50
+ )
51
+ 2025-04-27,07:54:15 | INFO | Params:
52
+ 2025-04-27,07:54:15 | INFO | accum_freq: 2
53
+ 2025-04-27,07:54:15 | INFO | aug_cfg: {}
54
+ 2025-04-27,07:54:15 | INFO | batch_size: 2048
55
+ 2025-04-27,07:54:15 | INFO | beta1: 0.9
56
+ 2025-04-27,07:54:15 | INFO | beta2: 0.98
57
+ 2025-04-27,07:54:15 | INFO | cache_dir: None
58
+ 2025-04-27,07:54:15 | INFO | caption_ratio: 0.1
59
+ 2025-04-27,07:54:15 | INFO | checkpoint_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints
60
+ 2025-04-27,07:54:15 | INFO | coca_caption_loss_weight: 2.0
61
+ 2025-04-27,07:54:15 | INFO | coca_contrastive_loss_weight: 1.0
62
+ 2025-04-27,07:54:15 | INFO | copy_codebase: False
63
+ 2025-04-27,07:54:15 | INFO | csv_caption_key: title
64
+ 2025-04-27,07:54:15 | INFO | csv_img_key: filepath
65
+ 2025-04-27,07:54:15 | INFO | csv_separator:
66
+ 2025-04-27,07:54:15 | INFO | dataset_resampled: False
67
+ 2025-04-27,07:54:15 | INFO | dataset_type: synthetic
68
+ 2025-04-27,07:54:15 | INFO | ddp_static_graph: False
69
+ 2025-04-27,07:54:15 | INFO | debug: False
70
+ 2025-04-27,07:54:15 | INFO | delete_previous_checkpoint: False
71
+ 2025-04-27,07:54:15 | INFO | device: cuda:0
72
+ 2025-04-27,07:54:15 | INFO | dist_backend: None
73
+ 2025-04-27,07:54:15 | INFO | dist_url: None
74
+ 2025-04-27,07:54:15 | INFO | distill: False
75
+ 2025-04-27,07:54:15 | INFO | distill_model: None
76
+ 2025-04-27,07:54:15 | INFO | distill_pretrained: None
77
+ 2025-04-27,07:54:15 | INFO | distributed: True
78
+ 2025-04-27,07:54:15 | INFO | epochs: 10
79
+ 2025-04-27,07:54:15 | INFO | epochs_cooldown: None
80
+ 2025-04-27,07:54:15 | INFO | eps: 1e-08
81
+ 2025-04-27,07:54:15 | INFO | force_custom_text: False
82
+ 2025-04-27,07:54:15 | INFO | force_image_size: None
83
+ 2025-04-27,07:54:15 | INFO | force_patch_dropout: None
84
+ 2025-04-27,07:54:15 | INFO | force_quick_gelu: False
85
+ 2025-04-27,07:54:15 | INFO | gather_with_grad: True
86
+ 2025-04-27,07:54:15 | INFO | grad_checkpointing: True
87
+ 2025-04-27,07:54:15 | INFO | grad_clip_norm: None
88
+ 2025-04-27,07:54:15 | INFO | horovod: False
89
+ 2025-04-27,07:54:15 | INFO | image_interpolation: None
90
+ 2025-04-27,07:54:15 | INFO | image_mean: None
91
+ 2025-04-27,07:54:15 | INFO | image_resize_mode: None
92
+ 2025-04-27,07:54:15 | INFO | image_std: None
93
+ 2025-04-27,07:54:15 | INFO | imagenet_v2: None
94
+ 2025-04-27,07:54:15 | INFO | imagenet_val: None
95
+ 2025-04-27,07:54:15 | INFO | keep_func_name: keep_image_uniform
96
+ 2025-04-27,07:54:15 | INFO | local_loss: False
97
+ 2025-04-27,07:54:15 | INFO | local_rank: 0
98
+ 2025-04-27,07:54:15 | INFO | lock_image: False
99
+ 2025-04-27,07:54:15 | INFO | lock_image_freeze_bn_stats: False
100
+ 2025-04-27,07:54:15 | INFO | lock_image_unlocked_groups: 0
101
+ 2025-04-27,07:54:15 | INFO | lock_text: True
102
+ 2025-04-27,07:54:15 | INFO | lock_text_freeze_layer_norm: False
103
+ 2025-04-27,07:54:15 | INFO | lock_text_unlocked_layers: 0
104
+ 2025-04-27,07:54:15 | INFO | log_every_n_steps: 100
105
+ 2025-04-27,07:54:15 | INFO | log_level: 20
106
+ 2025-04-27,07:54:15 | INFO | log_local: False
107
+ 2025-04-27,07:54:15 | INFO | log_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log
108
+ 2025-04-27,07:54:15 | INFO | logs: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text
109
+ 2025-04-27,07:54:15 | INFO | loss_dist_impl: None
110
+ 2025-04-27,07:54:15 | INFO | lr: 4e-05
111
+ 2025-04-27,07:54:15 | INFO | lr_cooldown_end: 0.0
112
+ 2025-04-27,07:54:15 | INFO | lr_cooldown_power: 1.0
113
+ 2025-04-27,07:54:15 | INFO | lr_scheduler: cosine
114
+ 2025-04-27,07:54:15 | INFO | map_func_name: use_all
115
+ 2025-04-27,07:54:15 | INFO | model: ViT-B-16
116
+ 2025-04-27,07:54:15 | INFO | momentum: None
117
+ 2025-04-27,07:54:15 | INFO | name: keep_image_uniform
118
+ 2025-04-27,07:54:15 | INFO | no_set_device_rank: False
119
+ 2025-04-27,07:54:15 | INFO | opt: adamw
120
+ 2025-04-27,07:54:15 | INFO | precision: amp
121
+ 2025-04-27,07:54:15 | INFO | pretrained: datacomp_xl_s13b_b90k
122
+ 2025-04-27,07:54:15 | INFO | pretrained_image: False
123
+ 2025-04-27,07:54:15 | INFO | rank: 0
124
+ 2025-04-27,07:54:15 | INFO | remote_sync: None
125
+ 2025-04-27,07:54:15 | INFO | remote_sync_frequency: 300
126
+ 2025-04-27,07:54:15 | INFO | remote_sync_protocol: s3
127
+ 2025-04-27,07:54:15 | INFO | report_to: tensorboard,wandb
128
+ 2025-04-27,07:54:15 | INFO | resume: None
129
+ 2025-04-27,07:54:15 | INFO | save_frequency: 10
130
+ 2025-04-27,07:54:15 | INFO | save_most_recent: False
131
+ 2025-04-27,07:54:15 | INFO | seed: 0
132
+ 2025-04-27,07:54:15 | INFO | siglip: False
133
+ 2025-04-27,07:54:15 | INFO | skip_scheduler: False
134
+ 2025-04-27,07:54:15 | INFO | tensorboard: True
135
+ 2025-04-27,07:54:15 | INFO | tensorboard_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard
136
+ 2025-04-27,07:54:15 | INFO | torchcompile: False
137
+ 2025-04-27,07:54:15 | INFO | torchscript: False
138
+ 2025-04-27,07:54:15 | INFO | trace: False
139
+ 2025-04-27,07:54:15 | INFO | train_data: /mnt/personal/zhudongy/recap-datacomp-3m-wds/{00376..01507}.tar
140
+ 2025-04-27,07:54:15 | INFO | train_data_upsampling_factors: None
141
+ 2025-04-27,07:54:15 | INFO | train_num_samples: 9011874
142
+ 2025-04-27,07:54:15 | INFO | use_bn_sync: False
143
+ 2025-04-27,07:54:15 | INFO | use_bnb_linear: None
144
+ 2025-04-27,07:54:15 | INFO | val_data: None
145
+ 2025-04-27,07:54:15 | INFO | val_frequency: 1
146
+ 2025-04-27,07:54:15 | INFO | val_num_samples: None
147
+ 2025-04-27,07:54:15 | INFO | wandb: True
148
+ 2025-04-27,07:54:15 | INFO | wandb_notes:
149
+ 2025-04-27,07:54:15 | INFO | wandb_project_name: open-clip
150
+ 2025-04-27,07:54:15 | INFO | warmup: 110
151
+ 2025-04-27,07:54:15 | INFO | wd: 0.5
152
+ 2025-04-27,07:54:15 | INFO | workers: 16
153
+ 2025-04-27,07:54:15 | INFO | world_size: 2
154
+ 2025-04-27,07:54:15 | INFO | zeroshot_frequency: 2
155
+ 2025-04-27,07:54:16 | INFO | Created AdamW (adamw) optimizer: lr: 4e-05, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None
156
+ 2025-04-27,07:54:35 | INFO | Start epoch 0
157
+ 2025-04-27,07:55:28 | INFO | Train Epoch: 0 [ 8192/917504 (1%)] Data (t): 43.129 Batch (t): 52.904, 154.846/s, 77.4229/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 28.541 (28.541) Imm_text: 28.541 (28.541) Isd_image: 3.2232 (3.2232) Isd_text: 3.2232 (3.2232) Contrastive_loss: 1.4352 (1.4352) Loss: 1.4352 (1.4352)
158
+ 2025-04-27,08:10:27 | INFO | Train Epoch: 0 [827392/917504 (90%)] Data (t): 0.991 Batch (t): 8.995, 919.591/s, 459.795/s/gpu LR: 0.000037 Logit Scale: 99.933 Imm_image: 28.938 (28.739) Imm_text: 28.938 (28.739) Isd_image: 5.8971 (4.5602) Isd_text: 5.8971 (4.5602) Contrastive_loss: 0.73869 (1.0870) Loss: 0.73869 (1.0870)
159
+ 2025-04-27,08:12:05 | INFO | Train Epoch: 0 [917504/917504 (100%)] Data (t): 0.934 Batch (t): 8.914, 916.232/s, 458.116/s/gpu LR: 0.000040 Logit Scale: 99.925 Imm_image: 29.008 (28.829) Imm_text: 29.008 (28.829) Isd_image: 5.3500 (4.8234) Isd_text: 5.3500 (4.8234) Contrastive_loss: 0.70851 (0.96081) Loss: 0.70851 (0.96081)
160
+ 2025-04-27,08:12:05 | INFO | Start epoch 1
161
+ 2025-04-27,08:12:51 | INFO | Train Epoch: 1 [ 8192/917504 (1%)] Data (t): 37.414 Batch (t): 45.501, 180.041/s, 90.0203/s/gpu LR: 0.000040 Logit Scale: 99.924 Imm_image: 29.122 (29.122) Imm_text: 29.122 (29.122) Isd_image: 5.4319 (5.4319) Isd_text: 5.4319 (5.4319) Contrastive_loss: 0.63045 (0.63045) Loss: 0.63045 (0.63045)
162
+ 2025-04-27,08:27:54 | INFO | Train Epoch: 1 [827392/917504 (90%)] Data (t): 1.012 Batch (t): 9.027, 920.444/s, 460.222/s/gpu LR: 0.000039 Logit Scale: 99.879 Imm_image: 29.092 (29.107) Imm_text: 29.092 (29.107) Isd_image: 4.1551 (4.7935) Isd_text: 4.1551 (4.7935) Contrastive_loss: 0.62187 (0.62616) Loss: 0.62187 (0.62616)
163
+ 2025-04-27,08:29:32 | INFO | Train Epoch: 1 [917504/917504 (100%)] Data (t): 0.942 Batch (t): 8.924, 919.590/s, 459.795/s/gpu LR: 0.000039 Logit Scale: 99.882 Imm_image: 29.122 (29.112) Imm_text: 29.122 (29.112) Isd_image: 3.7083 (4.4318) Isd_text: 3.7083 (4.4318) Contrastive_loss: 0.52793 (0.59342) Loss: 0.52793 (0.59342)
164
+ 2025-04-27,08:29:32 | INFO | Start epoch 2
165
+ 2025-04-27,08:30:16 | INFO | Train Epoch: 2 [ 8192/917504 (1%)] Data (t): 34.892 Batch (t): 43.754, 187.231/s, 93.6153/s/gpu LR: 0.000039 Logit Scale: 99.884 Imm_image: 29.246 (29.246) Imm_text: 29.246 (29.246) Isd_image: 3.5515 (3.5515) Isd_text: 3.5515 (3.5515) Contrastive_loss: 0.50764 (0.50764) Loss: 0.50764 (0.50764)
166
+ 2025-04-27,08:45:16 | INFO | Train Epoch: 2 [827392/917504 (90%)] Data (t): 0.995 Batch (t): 9.003, 918.046/s, 459.023/s/gpu LR: 0.000036 Logit Scale: 100.000 Imm_image: 29.356 (29.301) Imm_text: 29.356 (29.301) Isd_image: 2.5848 (3.0682) Isd_text: 2.5848 (3.0682) Contrastive_loss: 0.47861 (0.49313) Loss: 0.47861 (0.49313)
167
+ 2025-04-27,08:46:54 | INFO | Train Epoch: 2 [917504/917504 (100%)] Data (t): 0.955 Batch (t): 8.942, 913.227/s, 456.613/s/gpu LR: 0.000035 Logit Scale: 100.000 Imm_image: 29.512 (29.371) Imm_text: 29.512 (29.371) Isd_image: 2.4185 (2.8516) Isd_text: 2.4185 (2.8516) Contrastive_loss: 0.40780 (0.46468) Loss: 0.40780 (0.46468)
168
+ 2025-04-27,08:46:55 | INFO | Start epoch 3
169
+ 2025-04-27,08:47:39 | INFO | Train Epoch: 3 [ 8192/917504 (1%)] Data (t): 36.440 Batch (t): 44.632, 183.546/s, 91.7728/s/gpu LR: 0.000035 Logit Scale: 100.000 Imm_image: 29.499 (29.499) Imm_text: 29.499 (29.499) Isd_image: 2.4591 (2.4591) Isd_text: 2.4591 (2.4591) Contrastive_loss: 0.39220 (0.39220) Loss: 0.39220 (0.39220)
170
+ 2025-04-27,09:02:42 | INFO | Train Epoch: 3 [827392/917504 (90%)] Data (t): 1.015 Batch (t): 9.030, 917.801/s, 458.901/s/gpu LR: 0.000031 Logit Scale: 100.000 Imm_image: 29.522 (29.510) Imm_text: 29.522 (29.510) Isd_image: 1.6210 (2.0401) Isd_text: 1.6210 (2.0401) Contrastive_loss: 0.41654 (0.40437) Loss: 0.41654 (0.40437)
171
+ 2025-04-27,09:04:21 | INFO | Train Epoch: 3 [917504/917504 (100%)] Data (t): 0.950 Batch (t): 8.931, 914.190/s, 457.095/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.597 (29.539) Imm_text: 29.597 (29.539) Isd_image: 1.4161 (1.8321) Isd_text: 1.4161 (1.8321) Contrastive_loss: 0.36959 (0.39277) Loss: 0.36959 (0.39277)
172
+ 2025-04-27,09:04:21 | INFO | Start epoch 4
173
+ 2025-04-27,09:05:04 | INFO | Train Epoch: 4 [ 8192/917504 (1%)] Data (t): 35.511 Batch (t): 43.616, 187.822/s, 93.9110/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.727 (29.727) Imm_text: 29.727 (29.727) Isd_image: 1.5082 (1.5082) Isd_text: 1.5082 (1.5082) Contrastive_loss: 0.34478 (0.34478) Loss: 0.34478 (0.34478)
174
+ 2025-04-27,09:20:06 | INFO | Train Epoch: 4 [827392/917504 (90%)] Data (t): 1.004 Batch (t): 9.014, 916.202/s, 458.101/s/gpu LR: 0.000024 Logit Scale: 100.000 Imm_image: 29.862 (29.794) Imm_text: 29.862 (29.794) Isd_image: 0.93800 (1.2231) Isd_text: 0.93800 (1.2231) Contrastive_loss: 0.30456 (0.32467) Loss: 0.30456 (0.32467)
175
+ 2025-04-27,09:21:44 | INFO | Train Epoch: 4 [917504/917504 (100%)] Data (t): 0.954 Batch (t): 8.936, 915.496/s, 457.748/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.837 (29.809) Imm_text: 29.837 (29.809) Isd_image: 1.1397 (1.1953) Isd_text: 1.1397 (1.1953) Contrastive_loss: 0.28918 (0.31284) Loss: 0.28918 (0.31284)
176
+ 2025-04-27,09:21:44 | INFO | Start epoch 5
177
+ 2025-04-27,09:22:29 | INFO | Train Epoch: 5 [ 8192/917504 (1%)] Data (t): 33.353 Batch (t): 44.506, 184.065/s, 92.0325/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.815 (29.815) Imm_text: 29.815 (29.815) Isd_image: 1.1481 (1.1481) Isd_text: 1.1481 (1.1481) Contrastive_loss: 0.30516 (0.30516) Loss: 0.30516 (0.30516)
178
+ 2025-04-27,09:37:30 | INFO | Train Epoch: 5 [827392/917504 (90%)] Data (t): 1.007 Batch (t): 9.014, 918.615/s, 459.307/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 29.975 (29.895) Imm_text: 29.975 (29.895) Isd_image: 0.74998 (0.94903) Isd_text: 0.74998 (0.94903) Contrastive_loss: 0.28071 (0.29293) Loss: 0.28071 (0.29293)
179
+ 2025-04-27,09:39:09 | INFO | Train Epoch: 5 [917504/917504 (100%)] Data (t): 0.950 Batch (t): 8.933, 914.077/s, 457.039/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 30.192 (29.994) Imm_text: 30.192 (29.994) Isd_image: 0.69866 (0.86557) Isd_text: 0.69866 (0.86557) Contrastive_loss: 0.22392 (0.26993) Loss: 0.22392 (0.26993)
180
+ 2025-04-27,09:39:09 | INFO | Start epoch 6
181
+ 2025-04-27,09:39:54 | INFO | Train Epoch: 6 [ 8192/917504 (1%)] Data (t): 35.636 Batch (t): 45.368, 180.569/s, 90.2846/s/gpu LR: 0.000016 Logit Scale: 100.000 Imm_image: 30.142 (30.142) Imm_text: 30.142 (30.142) Isd_image: 0.83195 (0.83195) Isd_text: 0.83195 (0.83195) Contrastive_loss: 0.24683 (0.24683) Loss: 0.24683 (0.24683)
182
+ 2025-04-27,09:54:56 | INFO | Train Epoch: 6 [827392/917504 (90%)] Data (t): 1.009 Batch (t): 9.016, 917.514/s, 458.757/s/gpu LR: 0.000011 Logit Scale: 100.000 Imm_image: 30.182 (30.162) Imm_text: 30.182 (30.162) Isd_image: 0.66375 (0.74785) Isd_text: 0.66375 (0.74785) Contrastive_loss: 0.25392 (0.25038) Loss: 0.25392 (0.25038)
183
+ 2025-04-27,09:56:34 | INFO | Train Epoch: 6 [917504/917504 (100%)] Data (t): 0.951 Batch (t): 8.934, 914.233/s, 457.117/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.260 (30.194) Imm_text: 30.260 (30.194) Isd_image: 0.58846 (0.69472) Isd_text: 0.58846 (0.69472) Contrastive_loss: 0.21603 (0.23893) Loss: 0.21603 (0.23893)
184
+ 2025-04-27,09:56:34 | INFO | Start epoch 7
185
+ 2025-04-27,09:57:20 | INFO | Train Epoch: 7 [ 8192/917504 (1%)] Data (t): 36.683 Batch (t): 45.466, 180.179/s, 90.0895/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.232 (30.232) Imm_text: 30.232 (30.232) Isd_image: 0.56494 (0.56494) Isd_text: 0.56494 (0.56494) Contrastive_loss: 0.21279 (0.21279) Loss: 0.21279 (0.21279)
186
+ 2025-04-27,10:12:22 | INFO | Train Epoch: 7 [827392/917504 (90%)] Data (t): 1.006 Batch (t): 9.018, 918.150/s, 459.075/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.290 (30.261) Imm_text: 30.290 (30.261) Isd_image: 0.56392 (0.56443) Isd_text: 0.56392 (0.56443) Contrastive_loss: 0.24375 (0.22827) Loss: 0.24375 (0.22827)
187
+ 2025-04-27,10:14:03 | INFO | Train Epoch: 7 [917504/917504 (100%)] Data (t): 1.142 Batch (t): 9.181, 904.205/s, 452.103/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.414 (30.312) Imm_text: 30.414 (30.312) Isd_image: 0.57138 (0.56675) Isd_text: 0.57138 (0.56675) Contrastive_loss: 0.21433 (0.22362) Loss: 0.21433 (0.22362)
188
+ 2025-04-27,10:14:03 | INFO | Start epoch 8
189
+ 2025-04-27,10:14:46 | INFO | Train Epoch: 8 [ 8192/917504 (1%)] Data (t): 34.882 Batch (t): 43.296, 189.209/s, 94.6043/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.263 (30.263) Imm_text: 30.263 (30.263) Isd_image: 0.63063 (0.63063) Isd_text: 0.63063 (0.63063) Contrastive_loss: 0.21246 (0.21246) Loss: 0.21246 (0.21246)
190
+ 2025-04-27,10:29:43 | INFO | Train Epoch: 8 [827392/917504 (90%)] Data (t): 0.969 Batch (t): 8.967, 920.783/s, 460.391/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.228 (30.245) Imm_text: 30.228 (30.245) Isd_image: 0.59837 (0.61450) Isd_text: 0.59837 (0.61450) Contrastive_loss: 0.24290 (0.22768) Loss: 0.24290 (0.22768)
191
+ 2025-04-27,10:31:21 | INFO | Train Epoch: 8 [917504/917504 (100%)] Data (t): 0.919 Batch (t): 8.886, 919.279/s, 459.640/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.411 (30.301) Imm_text: 30.411 (30.301) Isd_image: 0.56638 (0.59846) Isd_text: 0.56638 (0.59846) Contrastive_loss: 0.22206 (0.22581) Loss: 0.22206 (0.22581)
192
+ 2025-04-27,10:31:21 | INFO | Start epoch 9
193
+ 2025-04-27,10:32:04 | INFO | Train Epoch: 9 [ 8192/917504 (1%)] Data (t): 35.369 Batch (t): 43.449, 188.542/s, 94.2708/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.428 (30.428) Imm_text: 30.428 (30.428) Isd_image: 0.43297 (0.43297) Isd_text: 0.43297 (0.43297) Contrastive_loss: 0.18715 (0.18715) Loss: 0.18715 (0.18715)
194
+ 2025-04-27,10:47:03 | INFO | Train Epoch: 9 [827392/917504 (90%)] Data (t): 0.984 Batch (t): 8.989, 919.087/s, 459.543/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.323 (30.375) Imm_text: 30.323 (30.375) Isd_image: 0.59950 (0.51624) Isd_text: 0.59950 (0.51624) Contrastive_loss: 0.22314 (0.20515) Loss: 0.22314 (0.20515)
195
+ 2025-04-27,10:48:41 | INFO | Train Epoch: 9 [917504/917504 (100%)] Data (t): 0.937 Batch (t): 8.914, 919.208/s, 459.604/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.394 (30.382) Imm_text: 30.394 (30.382) Isd_image: 0.49815 (0.51021) Isd_text: 0.49815 (0.51021) Contrastive_loss: 0.20625 (0.20551) Loss: 0.20625 (0.20551)
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/params.txt ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_freq: 2
2
+ aug_cfg: {}
3
+ batch_size: 2048
4
+ beta1: 0.9
5
+ beta2: 0.98
6
+ cache_dir: None
7
+ caption_ratio: 0.1
8
+ checkpoint_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/checkpoints
9
+ coca_caption_loss_weight: 2.0
10
+ coca_contrastive_loss_weight: 1.0
11
+ copy_codebase: False
12
+ csv_caption_key: title
13
+ csv_img_key: filepath
14
+ csv_separator:
15
+ dataset_resampled: False
16
+ dataset_type: synthetic
17
+ ddp_static_graph: False
18
+ debug: False
19
+ delete_previous_checkpoint: False
20
+ device: cuda:0
21
+ dist_backend: None
22
+ dist_url: None
23
+ distill: False
24
+ distill_model: None
25
+ distill_pretrained: None
26
+ distributed: True
27
+ epochs: 10
28
+ epochs_cooldown: None
29
+ eps: 1e-08
30
+ force_custom_text: False
31
+ force_image_size: None
32
+ force_patch_dropout: None
33
+ force_quick_gelu: False
34
+ gather_with_grad: True
35
+ grad_checkpointing: True
36
+ grad_clip_norm: None
37
+ horovod: False
38
+ image_interpolation: None
39
+ image_mean: None
40
+ image_resize_mode: None
41
+ image_std: None
42
+ imagenet_v2: None
43
+ imagenet_val: None
44
+ keep_func_name: keep_image_uniform
45
+ local_loss: False
46
+ local_rank: 0
47
+ lock_image: False
48
+ lock_image_freeze_bn_stats: False
49
+ lock_image_unlocked_groups: 0
50
+ lock_text: True
51
+ lock_text_freeze_layer_norm: False
52
+ lock_text_unlocked_layers: 0
53
+ log_every_n_steps: 100
54
+ log_level: 20
55
+ log_local: False
56
+ log_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/out.log
57
+ logs: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text
58
+ loss_dist_impl: None
59
+ lr: 4e-05
60
+ lr_cooldown_end: 0.0
61
+ lr_cooldown_power: 1.0
62
+ lr_scheduler: cosine
63
+ map_func_name: use_all
64
+ model: ViT-B-16
65
+ momentum: None
66
+ name: keep_image_uniform
67
+ no_set_device_rank: False
68
+ opt: adamw
69
+ precision: amp
70
+ pretrained: datacomp_xl_s13b_b90k
71
+ pretrained_image: False
72
+ rank: 0
73
+ remote_sync: None
74
+ remote_sync_frequency: 300
75
+ remote_sync_protocol: s3
76
+ report_to: tensorboard,wandb
77
+ resume: None
78
+ save_frequency: 10
79
+ save_most_recent: False
80
+ seed: 0
81
+ siglip: False
82
+ skip_scheduler: False
83
+ tensorboard: True
84
+ tensorboard_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard
85
+ torchcompile: False
86
+ torchscript: False
87
+ trace: False
88
+ train_data: /mnt/personal/zhudongy/recap-datacomp-3m-wds/{00376..01507}.tar
89
+ train_data_upsampling_factors: None
90
+ train_num_samples: 9011874
91
+ use_bn_sync: False
92
+ use_bnb_linear: None
93
+ val_data: None
94
+ val_frequency: 1
95
+ val_num_samples: None
96
+ wandb: True
97
+ wandb_notes:
98
+ wandb_project_name: open-clip
99
+ warmup: 110
100
+ wd: 0.5
101
+ workers: 16
102
+ world_size: 2
103
+ zeroshot_frequency: 2
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_image_uniform/tensorboard/events.out.tfevents.1745733263.g12.2655775.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d083cee4ab2f2fb5312adf93f6583505476bcc1b1f2fc69ab2462355cb6aec4
3
+ size 19936
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_random/tensorboard/events.out.tfevents.1745710659.g12.2628499.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f17f7467451bdea01cb70bda9f19c3d10fc6fa0852805af8a7ec1333a87945a
3
+ size 19936
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_closest_image_closest/tensorboard/events.out.tfevents.1745663928.g12.2393971.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7461fb18c1b53d1ed330cdf217a4ce913fd2348163ec9e311e36493041bd03db
3
+ size 19936
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/keep_text_farest_image_farest/tensorboard/events.out.tfevents.1745755854.g12.2682891.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3498cb2e8566f663433bd058a744f1565c5d2ba308c6658b9965266bc5cd1f4d
3
+ size 19936
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_caltech101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "caltech101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8611111111111112, "acc5": 0.9732064741907261, "mean_per_class_recall": 0.9224088045513618}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cars_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8303693570451436, "acc5": 0.988931724909837, "mean_per_class_recall": 0.8305479995448934}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar100_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar100", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7475, "acc5": 0.9333, "mean_per_class_recall": 0.7476000000000002}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_cifar10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.9345, "acc5": 0.999, "mean_per_class_recall": 0.9345000000000001}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_country211_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "country211", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.19374407582938388, "acc5": 0.4218483412322275, "mean_per_class_recall": 0.1937914691943128}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_dtd_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5340425531914894, "acc5": 0.8228723404255319, "mean_per_class_recall": 0.5329787234042552}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_eurosat_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.4602222222222222, "acc5": 0.8801111111111111, "mean_per_class_recall": 0.47197999999999996}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flickr30k_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6621999740600586, "text_retrieval_recall@1": 0.8259999752044678, "image_retrieval_recall@5": 0.8751999735832214, "text_retrieval_recall@5": 0.9580000042915344, "image_retrieval_recall@10": 0.9215999841690063, "text_retrieval_recall@10": 0.9800000190734863}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_flowers_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flowers", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.7518295657830542, "acc5": 0.9006342494714588, "mean_per_class_recall": 0.7590609398935276}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_food101_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "food101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8712475247524752, "acc5": 0.9801584158415841, "mean_per_class_recall": 0.8710891089108911}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_gtsrb_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "gtsrb", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.5334916864608076, "acc5": 0.7653206650831353, "mean_per_class_recall": 0.49353806950268025}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_imagenet1k_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "imagenet1k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.68076, "acc5": 0.90792, "mean_per_class_recall": 0.68056}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_mscoco_captions_epoch_10.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "mscoco_captions", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.3862854838371277, "text_retrieval_recall@1": 0.5411999821662903, "image_retrieval_recall@5": 0.6445421576499939, "text_retrieval_recall@5": 0.7820000052452087, "image_retrieval_recall@10": 0.745781660079956, "text_retrieval_recall@10": 0.8583999872207642}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_pets_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "pets", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.8997001907876806, "acc5": 0.9967293540474244, "mean_per_class_recall": 0.8993521290462428}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_stl10_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "stl10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.977, "acc5": 1.0, "mean_per_class_recall": 0.977}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_sun397_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "sun397", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6715707008477849, "acc5": 0.9273681887562756, "mean_per_class_recall": 0.6589770233905725}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/benchmark_vtab_resisc45_epoch_10.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "vtab/resisc45", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints/epoch_10.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.6215873015873016, "acc5": 0.9153968253968254, "mean_per_class_recall": 0.6280168420204852}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/out.log ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-27,20:53:01 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
2
+ 2025-04-27,20:53:01 | INFO | Loaded ViT-B-16 model config.
3
+ 2025-04-27,20:53:02 | INFO | Loading pretrained ViT-B-16 weights (datacomp_xl_s13b_b90k).
4
+ 2025-04-27,20:53:03 | INFO | Model:
5
+ 2025-04-27,20:53:03 | INFO | CLIP(
6
+ (visual): VisionTransformer(
7
+ (conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
8
+ (patch_dropout): Identity()
9
+ (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
10
+ (transformer): Transformer(
11
+ (resblocks): ModuleList(
12
+ (0-11): 12 x ResidualAttentionBlock(
13
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
14
+ (attn): MultiheadAttention(
15
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
16
+ )
17
+ (ls_1): Identity()
18
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
19
+ (mlp): Sequential(
20
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
21
+ (gelu): GELU(approximate='none')
22
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
23
+ )
24
+ (ls_2): Identity()
25
+ )
26
+ )
27
+ )
28
+ (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
29
+ )
30
+ (transformer): Transformer(
31
+ (resblocks): ModuleList(
32
+ (0-11): 12 x ResidualAttentionBlock(
33
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
34
+ (attn): MultiheadAttention(
35
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
36
+ )
37
+ (ls_1): Identity()
38
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
39
+ (mlp): Sequential(
40
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
41
+ (gelu): GELU(approximate='none')
42
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
43
+ )
44
+ (ls_2): Identity()
45
+ )
46
+ )
47
+ )
48
+ (token_embedding): Embedding(49408, 512)
49
+ (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
50
+ )
51
+ 2025-04-27,20:53:03 | INFO | Params:
52
+ 2025-04-27,20:53:03 | INFO | accum_freq: 2
53
+ 2025-04-27,20:53:03 | INFO | aug_cfg: {}
54
+ 2025-04-27,20:53:03 | INFO | batch_size: 2048
55
+ 2025-04-27,20:53:03 | INFO | beta1: 0.9
56
+ 2025-04-27,20:53:03 | INFO | beta2: 0.98
57
+ 2025-04-27,20:53:03 | INFO | cache_dir: None
58
+ 2025-04-27,20:53:03 | INFO | caption_ratio: 0.1
59
+ 2025-04-27,20:53:03 | INFO | checkpoint_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/checkpoints
60
+ 2025-04-27,20:53:03 | INFO | coca_caption_loss_weight: 2.0
61
+ 2025-04-27,20:53:03 | INFO | coca_contrastive_loss_weight: 1.0
62
+ 2025-04-27,20:53:03 | INFO | copy_codebase: False
63
+ 2025-04-27,20:53:03 | INFO | csv_caption_key: title
64
+ 2025-04-27,20:53:03 | INFO | csv_img_key: filepath
65
+ 2025-04-27,20:53:03 | INFO | csv_separator:
66
+ 2025-04-27,20:53:03 | INFO | dataset_resampled: False
67
+ 2025-04-27,20:53:03 | INFO | dataset_type: synthetic
68
+ 2025-04-27,20:53:03 | INFO | ddp_static_graph: False
69
+ 2025-04-27,20:53:03 | INFO | debug: False
70
+ 2025-04-27,20:53:03 | INFO | delete_previous_checkpoint: False
71
+ 2025-04-27,20:53:03 | INFO | device: cuda:0
72
+ 2025-04-27,20:53:03 | INFO | dist_backend: None
73
+ 2025-04-27,20:53:03 | INFO | dist_url: None
74
+ 2025-04-27,20:53:03 | INFO | distill: False
75
+ 2025-04-27,20:53:03 | INFO | distill_model: None
76
+ 2025-04-27,20:53:03 | INFO | distill_pretrained: None
77
+ 2025-04-27,20:53:03 | INFO | distributed: True
78
+ 2025-04-27,20:53:03 | INFO | epochs: 10
79
+ 2025-04-27,20:53:03 | INFO | epochs_cooldown: None
80
+ 2025-04-27,20:53:03 | INFO | eps: 1e-08
81
+ 2025-04-27,20:53:03 | INFO | force_custom_text: False
82
+ 2025-04-27,20:53:03 | INFO | force_image_size: None
83
+ 2025-04-27,20:53:03 | INFO | force_patch_dropout: None
84
+ 2025-04-27,20:53:03 | INFO | force_quick_gelu: False
85
+ 2025-04-27,20:53:03 | INFO | gather_with_grad: True
86
+ 2025-04-27,20:53:03 | INFO | grad_checkpointing: True
87
+ 2025-04-27,20:53:03 | INFO | grad_clip_norm: None
88
+ 2025-04-27,20:53:03 | INFO | horovod: False
89
+ 2025-04-27,20:53:03 | INFO | image_interpolation: None
90
+ 2025-04-27,20:53:03 | INFO | image_mean: None
91
+ 2025-04-27,20:53:03 | INFO | image_resize_mode: None
92
+ 2025-04-27,20:53:03 | INFO | image_std: None
93
+ 2025-04-27,20:53:03 | INFO | imagenet_v2: None
94
+ 2025-04-27,20:53:03 | INFO | imagenet_val: None
95
+ 2025-04-27,20:53:03 | INFO | keep_func_name: low_inter_only
96
+ 2025-04-27,20:53:03 | INFO | local_loss: False
97
+ 2025-04-27,20:53:03 | INFO | local_rank: 0
98
+ 2025-04-27,20:53:03 | INFO | lock_image: False
99
+ 2025-04-27,20:53:03 | INFO | lock_image_freeze_bn_stats: False
100
+ 2025-04-27,20:53:03 | INFO | lock_image_unlocked_groups: 0
101
+ 2025-04-27,20:53:03 | INFO | lock_text: True
102
+ 2025-04-27,20:53:03 | INFO | lock_text_freeze_layer_norm: False
103
+ 2025-04-27,20:53:03 | INFO | lock_text_unlocked_layers: 0
104
+ 2025-04-27,20:53:03 | INFO | log_every_n_steps: 100
105
+ 2025-04-27,20:53:03 | INFO | log_level: 20
106
+ 2025-04-27,20:53:03 | INFO | log_local: False
107
+ 2025-04-27,20:53:03 | INFO | log_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/out.log
108
+ 2025-04-27,20:53:03 | INFO | logs: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text
109
+ 2025-04-27,20:53:03 | INFO | loss_dist_impl: None
110
+ 2025-04-27,20:53:03 | INFO | lr: 4e-05
111
+ 2025-04-27,20:53:03 | INFO | lr_cooldown_end: 0.0
112
+ 2025-04-27,20:53:03 | INFO | lr_cooldown_power: 1.0
113
+ 2025-04-27,20:53:03 | INFO | lr_scheduler: cosine
114
+ 2025-04-27,20:53:03 | INFO | map_func_name: use_all
115
+ 2025-04-27,20:53:03 | INFO | model: ViT-B-16
116
+ 2025-04-27,20:53:03 | INFO | momentum: None
117
+ 2025-04-27,20:53:03 | INFO | name: low_inter_only
118
+ 2025-04-27,20:53:03 | INFO | no_set_device_rank: False
119
+ 2025-04-27,20:53:03 | INFO | opt: adamw
120
+ 2025-04-27,20:53:03 | INFO | precision: amp
121
+ 2025-04-27,20:53:03 | INFO | pretrained: datacomp_xl_s13b_b90k
122
+ 2025-04-27,20:53:03 | INFO | pretrained_image: False
123
+ 2025-04-27,20:53:03 | INFO | rank: 0
124
+ 2025-04-27,20:53:03 | INFO | remote_sync: None
125
+ 2025-04-27,20:53:03 | INFO | remote_sync_frequency: 300
126
+ 2025-04-27,20:53:03 | INFO | remote_sync_protocol: s3
127
+ 2025-04-27,20:53:03 | INFO | report_to: tensorboard,wandb
128
+ 2025-04-27,20:53:03 | INFO | resume: None
129
+ 2025-04-27,20:53:03 | INFO | save_frequency: 10
130
+ 2025-04-27,20:53:03 | INFO | save_most_recent: False
131
+ 2025-04-27,20:53:03 | INFO | seed: 0
132
+ 2025-04-27,20:53:03 | INFO | siglip: False
133
+ 2025-04-27,20:53:03 | INFO | skip_scheduler: False
134
+ 2025-04-27,20:53:03 | INFO | tensorboard: True
135
+ 2025-04-27,20:53:03 | INFO | tensorboard_path: /mnt/personal/zhudongy/rdc9m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/tensorboard
136
+ 2025-04-27,20:53:03 | INFO | torchcompile: False
137
+ 2025-04-27,20:53:03 | INFO | torchscript: False
138
+ 2025-04-27,20:53:03 | INFO | trace: False
139
+ 2025-04-27,20:53:03 | INFO | train_data: /mnt/personal/zhudongy/recap-datacomp-3m-wds/{00376..01507}.tar
140
+ 2025-04-27,20:53:03 | INFO | train_data_upsampling_factors: None
141
+ 2025-04-27,20:53:03 | INFO | train_num_samples: 9011874
142
+ 2025-04-27,20:53:03 | INFO | use_bn_sync: False
143
+ 2025-04-27,20:53:03 | INFO | use_bnb_linear: None
144
+ 2025-04-27,20:53:03 | INFO | val_data: None
145
+ 2025-04-27,20:53:03 | INFO | val_frequency: 1
146
+ 2025-04-27,20:53:03 | INFO | val_num_samples: None
147
+ 2025-04-27,20:53:03 | INFO | wandb: True
148
+ 2025-04-27,20:53:03 | INFO | wandb_notes:
149
+ 2025-04-27,20:53:03 | INFO | wandb_project_name: open-clip
150
+ 2025-04-27,20:53:03 | INFO | warmup: 110
151
+ 2025-04-27,20:53:03 | INFO | wd: 0.5
152
+ 2025-04-27,20:53:03 | INFO | workers: 16
153
+ 2025-04-27,20:53:03 | INFO | world_size: 2
154
+ 2025-04-27,20:53:03 | INFO | zeroshot_frequency: 2
155
+ 2025-04-27,20:53:03 | INFO | Created AdamW (adamw) optimizer: lr: 4e-05, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None
156
+ 2025-04-27,20:53:45 | INFO | Start epoch 0
157
+ 2025-04-27,20:54:39 | INFO | Train Epoch: 0 [ 8192/917504 (1%)] Data (t): 44.740 Batch (t): 54.300, 150.864/s, 75.4322/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 28.004 (28.004) Imm_text: 28.004 (28.004) Isd_image: 0.48300 (0.48300) Isd_text: 0.48300 (0.48300) Contrastive_loss: 1.3863 (1.3863) Loss: 1.3863 (1.3863)
158
+ 2025-04-27,21:09:40 | INFO | Train Epoch: 0 [827392/917504 (90%)] Data (t): 1.004 Batch (t): 9.011, 913.635/s, 456.818/s/gpu LR: 0.000037 Logit Scale: 99.932 Imm_image: 28.809 (28.407) Imm_text: 28.809 (28.407) Isd_image: 4.0077 (2.2454) Isd_text: 4.0077 (2.2454) Contrastive_loss: 0.64345 (1.0149) Loss: 0.64345 (1.0149)
159
+ 2025-04-27,21:11:19 | INFO | Train Epoch: 0 [917504/917504 (100%)] Data (t): 0.966 Batch (t): 8.956, 911.469/s, 455.734/s/gpu LR: 0.000040 Logit Scale: 99.925 Imm_image: 28.850 (28.554) Imm_text: 28.850 (28.554) Isd_image: 3.6929 (2.7279) Isd_text: 3.6929 (2.7279) Contrastive_loss: 0.61486 (0.88152) Loss: 0.61486 (0.88152)
160
+ 2025-04-27,21:11:19 | INFO | Start epoch 1
161
+ 2025-04-27,21:12:04 | INFO | Train Epoch: 1 [ 8192/917504 (1%)] Data (t): 37.221 Batch (t): 45.300, 180.838/s, 90.4192/s/gpu LR: 0.000040 Logit Scale: 99.924 Imm_image: 28.894 (28.894) Imm_text: 28.894 (28.894) Isd_image: 3.6459 (3.6459) Isd_text: 3.6459 (3.6459) Contrastive_loss: 0.56427 (0.56427) Loss: 0.56427 (0.56427)
162
+ 2025-04-27,21:27:11 | INFO | Train Epoch: 1 [827392/917504 (90%)] Data (t): 1.045 Batch (t): 9.067, 913.108/s, 456.554/s/gpu LR: 0.000039 Logit Scale: 99.876 Imm_image: 28.777 (28.835) Imm_text: 28.777 (28.835) Isd_image: 2.0186 (2.8323) Isd_text: 2.0186 (2.8323) Contrastive_loss: 0.54482 (0.55454) Loss: 0.54482 (0.55454)
163
+ 2025-04-27,21:28:50 | INFO | Train Epoch: 1 [917504/917504 (100%)] Data (t): 0.990 Batch (t): 8.986, 910.140/s, 455.070/s/gpu LR: 0.000039 Logit Scale: 99.875 Imm_image: 28.908 (28.860) Imm_text: 28.908 (28.860) Isd_image: 1.5264 (2.3970) Isd_text: 1.5264 (2.3970) Contrastive_loss: 0.44167 (0.51692) Loss: 0.44167 (0.51692)
164
+ 2025-04-27,21:28:50 | INFO | Start epoch 2
165
+ 2025-04-27,21:29:35 | INFO | Train Epoch: 2 [ 8192/917504 (1%)] Data (t): 36.369 Batch (t): 44.535, 183.945/s, 91.9726/s/gpu LR: 0.000039 Logit Scale: 99.876 Imm_image: 28.971 (28.971) Imm_text: 28.971 (28.971) Isd_image: 1.5085 (1.5085) Isd_text: 1.5085 (1.5085) Contrastive_loss: 0.46002 (0.46002) Loss: 0.46002 (0.46002)
166
+ 2025-04-27,21:44:31 | INFO | Train Epoch: 2 [827392/917504 (90%)] Data (t): 0.966 Batch (t): 8.965, 921.609/s, 460.804/s/gpu LR: 0.000036 Logit Scale: 99.957 Imm_image: 29.224 (29.097) Imm_text: 29.224 (29.097) Isd_image: 0.16710 (0.83778) Isd_text: 0.16710 (0.83778) Contrastive_loss: 0.42271 (0.44137) Loss: 0.42271 (0.44137)
167
+ 2025-04-27,21:46:09 | INFO | Train Epoch: 2 [917504/917504 (100%)] Data (t): 0.923 Batch (t): 8.896, 916.982/s, 458.491/s/gpu LR: 0.000035 Logit Scale: 99.971 Imm_image: 29.325 (29.173) Imm_text: 29.325 (29.173) Isd_image: 0.038107 (0.57122) Isd_text: 0.038107 (0.57122) Contrastive_loss: 0.36076 (0.41450) Loss: 0.36076 (0.41450)
168
+ 2025-04-27,21:46:09 | INFO | Start epoch 3
169
+ 2025-04-27,21:46:54 | INFO | Train Epoch: 3 [ 8192/917504 (1%)] Data (t): 36.596 Batch (t): 44.687, 183.320/s, 91.6598/s/gpu LR: 0.000035 Logit Scale: 99.975 Imm_image: 29.349 (29.349) Imm_text: 29.349 (29.349) Isd_image: -0.042467 (-0.042467) Isd_text: -0.042467 (-0.042467) Contrastive_loss: 0.37044 (0.37044) Loss: 0.37044 (0.37044)
170
+ 2025-04-27,22:01:54 | INFO | Train Epoch: 3 [827392/917504 (90%)] Data (t): 0.992 Batch (t): 8.996, 919.788/s, 459.894/s/gpu LR: 0.000031 Logit Scale: 100.000 Imm_image: 29.476 (29.412) Imm_text: 29.476 (29.412) Isd_image: -0.75747 (-0.39997) Isd_text: -0.75747 (-0.39997) Contrastive_loss: 0.37558 (0.37301) Loss: 0.37558 (0.37301)
171
+ 2025-04-27,22:03:32 | INFO | Train Epoch: 3 [917504/917504 (100%)] Data (t): 0.935 Batch (t): 8.913, 917.124/s, 458.562/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.571 (29.465) Imm_text: 29.571 (29.465) Isd_image: -0.97516 (-0.59170) Isd_text: -0.97516 (-0.59170) Contrastive_loss: 0.31964 (0.35522) Loss: 0.31964 (0.35522)
172
+ 2025-04-27,22:03:32 | INFO | Start epoch 4
173
+ 2025-04-27,22:04:17 | INFO | Train Epoch: 4 [ 8192/917504 (1%)] Data (t): 36.657 Batch (t): 44.719, 183.187/s, 91.5936/s/gpu LR: 0.000030 Logit Scale: 100.000 Imm_image: 29.600 (29.600) Imm_text: 29.600 (29.600) Isd_image: -0.96800 (-0.96800) Isd_text: -0.96800 (-0.96800) Contrastive_loss: 0.31170 (0.31170) Loss: 0.31170 (0.31170)
174
+ 2025-04-27,22:19:17 | INFO | Train Epoch: 4 [827392/917504 (90%)] Data (t): 0.995 Batch (t): 9.002, 919.149/s, 459.574/s/gpu LR: 0.000024 Logit Scale: 100.000 Imm_image: 29.778 (29.689) Imm_text: 29.778 (29.689) Isd_image: -1.3826 (-1.1753) Isd_text: -1.3826 (-1.1753) Contrastive_loss: 0.30417 (0.30793) Loss: 0.30417 (0.30793)
175
+ 2025-04-27,22:20:55 | INFO | Train Epoch: 4 [917504/917504 (100%)] Data (t): 0.948 Batch (t): 8.931, 914.345/s, 457.172/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.832 (29.737) Imm_text: 29.832 (29.737) Isd_image: -1.6278 (-1.3261) Isd_text: -1.6278 (-1.3261) Contrastive_loss: 0.25928 (0.29172) Loss: 0.25928 (0.29172)
176
+ 2025-04-27,22:20:55 | INFO | Start epoch 5
177
+ 2025-04-27,22:21:40 | INFO | Train Epoch: 5 [ 8192/917504 (1%)] Data (t): 35.569 Batch (t): 44.337, 184.767/s, 92.3833/s/gpu LR: 0.000023 Logit Scale: 100.000 Imm_image: 29.864 (29.864) Imm_text: 29.864 (29.864) Isd_image: -1.5245 (-1.5245) Isd_text: -1.5245 (-1.5245) Contrastive_loss: 0.28583 (0.28583) Loss: 0.28583 (0.28583)
178
+ 2025-04-27,22:36:41 | INFO | Train Epoch: 5 [827392/917504 (90%)] Data (t): 1.005 Batch (t): 9.014, 915.904/s, 457.952/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 30.039 (29.951) Imm_text: 30.039 (29.951) Isd_image: -1.7178 (-1.6211) Isd_text: -1.7178 (-1.6211) Contrastive_loss: 0.26470 (0.27526) Loss: 0.26470 (0.27526)
179
+ 2025-04-27,22:38:19 | INFO | Train Epoch: 5 [917504/917504 (100%)] Data (t): 0.956 Batch (t): 8.938, 913.227/s, 456.613/s/gpu LR: 0.000017 Logit Scale: 100.000 Imm_image: 29.983 (29.962) Imm_text: 29.983 (29.962) Isd_image: -1.5348 (-1.5924) Isd_text: -1.5348 (-1.5924) Contrastive_loss: 0.25894 (0.26982) Loss: 0.25894 (0.26982)
180
+ 2025-04-27,22:38:20 | INFO | Start epoch 6
181
+ 2025-04-27,22:39:05 | INFO | Train Epoch: 6 [ 8192/917504 (1%)] Data (t): 36.913 Batch (t): 45.213, 181.186/s, 90.5929/s/gpu LR: 0.000016 Logit Scale: 100.000 Imm_image: 30.205 (30.205) Imm_text: 30.205 (30.205) Isd_image: -1.5840 (-1.5840) Isd_text: -1.5840 (-1.5840) Contrastive_loss: 0.21889 (0.21889) Loss: 0.21889 (0.21889)
182
+ 2025-04-27,22:54:06 | INFO | Train Epoch: 6 [827392/917504 (90%)] Data (t): 1.001 Batch (t): 9.008, 918.420/s, 459.210/s/gpu LR: 0.000011 Logit Scale: 100.000 Imm_image: 30.129 (30.167) Imm_text: 30.129 (30.167) Isd_image: -1.8268 (-1.7054) Isd_text: -1.8268 (-1.7054) Contrastive_loss: 0.24040 (0.22965) Loss: 0.24040 (0.22965)
183
+ 2025-04-27,22:55:44 | INFO | Train Epoch: 6 [917504/917504 (100%)] Data (t): 0.956 Batch (t): 8.938, 914.360/s, 457.180/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.250 (30.195) Imm_text: 30.250 (30.195) Isd_image: -1.7475 (-1.7195) Isd_text: -1.7475 (-1.7195) Contrastive_loss: 0.22036 (0.22655) Loss: 0.22036 (0.22655)
184
+ 2025-04-27,22:55:44 | INFO | Start epoch 7
185
+ 2025-04-27,22:56:29 | INFO | Train Epoch: 7 [ 8192/917504 (1%)] Data (t): 36.050 Batch (t): 44.469, 184.219/s, 92.1094/s/gpu LR: 0.000010 Logit Scale: 100.000 Imm_image: 30.307 (30.307) Imm_text: 30.307 (30.307) Isd_image: -1.7967 (-1.7967) Isd_text: -1.7967 (-1.7967) Contrastive_loss: 0.22148 (0.22148) Loss: 0.22148 (0.22148)
186
+ 2025-04-27,23:11:28 | INFO | Train Epoch: 7 [827392/917504 (90%)] Data (t): 0.991 Batch (t): 8.997, 918.481/s, 459.241/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.379 (30.343) Imm_text: 30.379 (30.343) Isd_image: -1.9151 (-1.8559) Isd_text: -1.9151 (-1.8559) Contrastive_loss: 0.21597 (0.21872) Loss: 0.21597 (0.21872)
187
+ 2025-04-27,23:13:07 | INFO | Train Epoch: 7 [917504/917504 (100%)] Data (t): 0.950 Batch (t): 8.932, 914.317/s, 457.159/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.325 (30.337) Imm_text: 30.325 (30.337) Isd_image: -1.7899 (-1.8339) Isd_text: -1.7899 (-1.8339) Contrastive_loss: 0.21086 (0.21610) Loss: 0.21086 (0.21610)
188
+ 2025-04-27,23:13:07 | INFO | Start epoch 8
189
+ 2025-04-27,23:13:52 | INFO | Train Epoch: 8 [ 8192/917504 (1%)] Data (t): 36.134 Batch (t): 44.652, 183.463/s, 91.7315/s/gpu LR: 0.000005 Logit Scale: 100.000 Imm_image: 30.386 (30.386) Imm_text: 30.386 (30.386) Isd_image: -1.8206 (-1.8206) Isd_text: -1.8206 (-1.8206) Contrastive_loss: 0.21730 (0.21730) Loss: 0.21730 (0.21730)
190
+ 2025-04-27,23:28:51 | INFO | Train Epoch: 8 [827392/917504 (90%)] Data (t): 0.992 Batch (t): 8.997, 919.420/s, 459.710/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.363 (30.374) Imm_text: 30.363 (30.374) Isd_image: -1.8669 (-1.8437) Isd_text: -1.8669 (-1.8437) Contrastive_loss: 0.22544 (0.22137) Loss: 0.22544 (0.22137)
191
+ 2025-04-27,23:30:29 | INFO | Train Epoch: 8 [917504/917504 (100%)] Data (t): 0.946 Batch (t): 8.925, 915.526/s, 457.763/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.382 (30.377) Imm_text: 30.382 (30.377) Isd_image: -1.8523 (-1.8466) Isd_text: -1.8523 (-1.8466) Contrastive_loss: 0.20832 (0.21702) Loss: 0.20832 (0.21702)
192
+ 2025-04-27,23:30:30 | INFO | Start epoch 9
193
+ 2025-04-27,23:31:14 | INFO | Train Epoch: 9 [ 8192/917504 (1%)] Data (t): 36.011 Batch (t): 44.621, 183.592/s, 91.7960/s/gpu LR: 0.000001 Logit Scale: 100.000 Imm_image: 30.424 (30.424) Imm_text: 30.424 (30.424) Isd_image: -1.8386 (-1.8386) Isd_text: -1.8386 (-1.8386) Contrastive_loss: 0.20019 (0.20019) Loss: 0.20019 (0.20019)
194
+ 2025-04-27,23:46:16 | INFO | Train Epoch: 9 [827392/917504 (90%)] Data (t): 1.009 Batch (t): 9.018, 919.090/s, 459.545/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.359 (30.392) Imm_text: 30.359 (30.392) Isd_image: -1.8356 (-1.8371) Isd_text: -1.8356 (-1.8371) Contrastive_loss: 0.22642 (0.21331) Loss: 0.22642 (0.21331)
195
+ 2025-04-27,23:47:54 | INFO | Train Epoch: 9 [917504/917504 (100%)] Data (t): 0.949 Batch (t): 8.931, 917.092/s, 458.546/s/gpu LR: 0.000000 Logit Scale: 100.000 Imm_image: 30.506 (30.430) Imm_text: 30.506 (30.430) Isd_image: -1.8817 (-1.8520) Isd_text: -1.8817 (-1.8520) Contrastive_loss: 0.18029 (0.20230) Loss: 0.18029 (0.20230)
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/ViT-B-16-datacomp_xl_s13b_b90k-010-4e5-e10-recaption-finetune-lock-text/low_inter_only/tensorboard/events.out.tfevents.1745780012.g12.2713681.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:265d4b9d7c4756414318fe99f7568a80b7462cd2872802cdd17f1c9d6f497c9a
3
+ size 19936
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_I_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2641da262316bb2b895abae3224ab6cc166b82a2dd4f65b1fe1f67cb907350
3
+ size 228262016
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_farest.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60d6b08a008e194de90c00df9913a0203d8898fd75763bf2d6a84840c64192e
3
+ size 124009441
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_farest.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51013bcfe4a79c5ee87068f55c96d5a0cb0bfb0c1899f48a634c42240d5a2e2f
3
+ size 124084962
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_uniform.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c39234fd0396c1f512ea29d8efbb4e717734421d7132a2dcf073af3dabb23ce
3
+ size 124061847
captions.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b710ad65f51fd80e92d8ea2e1aabf1df625f9720d7626d9724e48d4d09b3d8
3
+ size 771831271