cornuHGF commited on
Commit
d5a502a
·
verified ·
1 Parent(s): 6673da4

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_D_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy +3 -0
  3. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_I_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy +3 -0
  4. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy +3 -0
  5. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_labels_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy +3 -0
  6. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_caltech101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  7. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  8. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  9. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  10. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  11. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_fgvc_aircraft_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  12. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  13. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_flowers_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  14. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_food101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  15. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_gtsrb_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  16. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_imagenet1k_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  17. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_mscoco_captions_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  18. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_pets_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  19. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_stl10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  20. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_sun397_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  21. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/params.txt +103 -0
  22. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_caltech101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  23. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  24. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_cifar100_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  25. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  26. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_country211_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  27. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  28. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  29. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_fgvc_aircraft_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  30. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  31. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_flowers_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  32. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_food101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  33. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_gtsrb_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  34. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_imagenet1k_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  35. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_mscoco_captions_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
  36. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_pets_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  37. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_stl10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  38. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_sun397_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  39. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_vtab_resisc45_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  40. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/out.log +394 -0
  41. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/params.txt +103 -0
  42. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_caltech101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  43. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  44. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_cifar100_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  45. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  46. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_country211_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  47. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  48. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  49. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_fgvc_aircraft_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
  50. SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ captions_added_laclip_coco.tsv filter=lfs diff=lfs merge=lfs -text
37
+ captions_added_laclip_human.tsv filter=lfs diff=lfs merge=lfs -text
38
+ captions_added_laclip_chatgpt.tsv filter=lfs diff=lfs merge=lfs -text
39
+ captions_added_laclip_bard.tsv filter=lfs diff=lfs merge=lfs -text
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_D_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96923e2a4aebbe0691bd022212a82b443e7190a6e9f158ec2f388401e0a7b17b
3
+ size 9166976
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/image_I_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:435f3f19b8632ba480c98ead738976fb498acd064deb3150e52f6c22e0a79424
3
+ size 18333824
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_closest_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c43c9d941038bea5b0cc26db284ed09e501bc108dcf1eadb8ee83354ae1046d
3
+ size 11698304
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/clusters/text_D_labels_0.1_SFR-Embedding-Code-2B_R_dinov2-large.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33222ccc2596400c30153942ee7c44ba2849bba6f5d6a6ad7d3fbc79d469ca2d
3
+ size 12066688
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_caltech101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "caltech101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.4434601924759405, "acc5": 0.7028652668416447, "mean_per_class_recall": 0.4082095902761506}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.008581022260912822, "acc5": 0.045268001492351696, "mean_per_class_recall": 0.008638296018293678}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.4414, "acc5": 0.9011, "mean_per_class_recall": 0.44189999999999996}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.08723404255319149, "acc5": 0.2601063829787234, "mean_per_class_recall": 0.08723404255319148}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.17703703703703705, "acc5": 0.627962962962963, "mean_per_class_recall": 0.17955333333333331}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_fgvc_aircraft_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "fgvc_aircraft", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.011701170117011701, "acc5": 0.0537053705370537, "mean_per_class_recall": 0.011951871657754012}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.13760000467300415, "text_retrieval_recall@1": 0.1860000044107437, "image_retrieval_recall@5": 0.3253999948501587, "text_retrieval_recall@5": 0.4300000071525574, "image_retrieval_recall@10": 0.42500001192092896, "text_retrieval_recall@10": 0.5440000295639038}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_flowers_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flowers", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.10066677508537973, "acc5": 0.25418767279232396, "mean_per_class_recall": 0.10089990159175895}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_food101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "food101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.08261386138613862, "acc5": 0.21611881188118812, "mean_per_class_recall": 0.08257425742574255}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_gtsrb_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "gtsrb", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.0615993665874901, "acc5": 0.2391132224861441, "mean_per_class_recall": 0.06541932879875247}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_imagenet1k_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "imagenet1k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1472, "acc5": 0.30212, "mean_per_class_recall": 0.14724}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_mscoco_captions_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "mscoco_captions", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.07233107089996338, "text_retrieval_recall@1": 0.09759999811649323, "image_retrieval_recall@5": 0.19888044893741608, "text_retrieval_recall@5": 0.2635999917984009, "image_retrieval_recall@10": 0.28116753697395325, "text_retrieval_recall@10": 0.362199991941452}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_pets_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "pets", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.09266830198964296, "acc5": 0.27091850640501497, "mean_per_class_recall": 0.09214182612421928}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_stl10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "stl10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.735625, "acc5": 0.980125, "mean_per_class_recall": 0.735625}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/benchmark_sun397_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "sun397", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest//checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.2661051547529286, "acc5": 0.5383618073817974, "mean_per_class_recall": 0.23191021381006108}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/params.txt ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_freq: 4
2
+ aug_cfg: {}
3
+ batch_size: 512
4
+ beta1: 0.9
5
+ beta2: 0.98
6
+ cache_dir: None
7
+ caption_ratio: 0.1
8
+ checkpoint_path: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/checkpoints
9
+ coca_caption_loss_weight: 2.0
10
+ coca_contrastive_loss_weight: 1.0
11
+ copy_codebase: False
12
+ csv_caption_key: title
13
+ csv_img_key: filepath
14
+ csv_separator:
15
+ dataset_resampled: False
16
+ dataset_type: synthetic
17
+ ddp_static_graph: False
18
+ debug: False
19
+ delete_previous_checkpoint: False
20
+ device: cuda:0
21
+ dist_backend: None
22
+ dist_url: None
23
+ distill: False
24
+ distill_model: None
25
+ distill_pretrained: None
26
+ distributed: True
27
+ epochs: 40
28
+ epochs_cooldown: None
29
+ eps: 1e-08
30
+ force_custom_text: False
31
+ force_image_size: None
32
+ force_patch_dropout: None
33
+ force_quick_gelu: False
34
+ gather_with_grad: True
35
+ grad_checkpointing: True
36
+ grad_clip_norm: None
37
+ horovod: False
38
+ image_interpolation: None
39
+ image_mean: None
40
+ image_resize_mode: None
41
+ image_std: None
42
+ imagenet_v2: None
43
+ imagenet_val: None
44
+ keep_func_name:
45
+ local_loss: False
46
+ local_rank: 0
47
+ lock_image: False
48
+ lock_image_freeze_bn_stats: False
49
+ lock_image_unlocked_groups: 0
50
+ lock_text: False
51
+ lock_text_freeze_layer_norm: False
52
+ lock_text_unlocked_layers: 0
53
+ log_every_n_steps: 100
54
+ log_level: 20
55
+ log_local: False
56
+ log_path: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/out.log
57
+ logs: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
58
+ loss_dist_impl: None
59
+ lr: 0.001
60
+ lr_cooldown_end: 0.0
61
+ lr_cooldown_power: 1.0
62
+ lr_scheduler: cosine
63
+ map_func_name: map_text_closest_image_closest
64
+ model: ViT-B-16
65
+ momentum: None
66
+ name: ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest
67
+ no_set_device_rank: False
68
+ opt: adamw
69
+ precision: amp
70
+ pretrained:
71
+ pretrained_image: False
72
+ rank: 0
73
+ remote_sync: None
74
+ remote_sync_frequency: 300
75
+ remote_sync_protocol: s3
76
+ report_to: tensorboard,wandb
77
+ resume: None
78
+ save_frequency: 1
79
+ save_most_recent: False
80
+ seed: 0
81
+ siglip: False
82
+ skip_scheduler: False
83
+ tensorboard: True
84
+ tensorboard_path: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-map_text_closest_image_closest/tensorboard
85
+ torchcompile: False
86
+ torchscript: False
87
+ trace: False
88
+ train_data: /mnt/personal/zhudongy/cc3m-hgf-wds/{0000..0301}.tar
89
+ train_data_upsampling_factors: None
90
+ train_num_samples: 3016640
91
+ use_bn_sync: False
92
+ use_bnb_linear: None
93
+ val_data: None
94
+ val_frequency: 1
95
+ val_num_samples: None
96
+ wandb: True
97
+ wandb_notes:
98
+ wandb_project_name: open-clip
99
+ warmup: 368
100
+ wd: 0.5
101
+ workers: 16
102
+ world_size: 4
103
+ zeroshot_frequency: 2
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_caltech101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "caltech101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.46981627296587924, "acc5": 0.723753280839895, "mean_per_class_recall": 0.4308645556478004}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.010446461882850392, "acc5": 0.05496828752642706, "mean_per_class_recall": 0.010470401779564877}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_cifar100_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar100", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.2219, "acc5": 0.471, "mean_per_class_recall": 0.22199999999999995}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.458, "acc5": 0.9095, "mean_per_class_recall": 0.45780000000000004}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_country211_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "country211", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.006492890995260663, "acc5": 0.02976303317535545, "mean_per_class_recall": 0.006587677725118483}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.09148936170212765, "acc5": 0.2946808510638298, "mean_per_class_recall": 0.09148936170212765}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.164, "acc5": 0.7107777777777777, "mean_per_class_recall": 0.15283}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_fgvc_aircraft_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "fgvc_aircraft", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.012001200120012, "acc5": 0.04950495049504951, "mean_per_class_recall": 0.011434937611408201}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.1428000032901764, "text_retrieval_recall@1": 0.25, "image_retrieval_recall@5": 0.32199999690055847, "text_retrieval_recall@5": 0.5040000081062317, "image_retrieval_recall@10": 0.4228000044822693, "text_retrieval_recall@10": 0.621999979019165}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_flowers_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flowers", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.11432753293218409, "acc5": 0.25402504472271914, "mean_per_class_recall": 0.09934472918573373}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_food101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "food101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.10229702970297029, "acc5": 0.25005940594059406, "mean_per_class_recall": 0.10237623762376237}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_gtsrb_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "gtsrb", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.048218527315914486, "acc5": 0.2452098178939034, "mean_per_class_recall": 0.07385537144763636}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_imagenet1k_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "imagenet1k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1611, "acc5": 0.31514, "mean_per_class_recall": 0.16116000000000003}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_mscoco_captions_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "mscoco_captions", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.06977209448814392, "text_retrieval_recall@1": 0.12060000002384186, "image_retrieval_recall@5": 0.18524590134620667, "text_retrieval_recall@5": 0.298799991607666, "image_retrieval_recall@10": 0.26469412446022034, "text_retrieval_recall@10": 0.40459999442100525}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_pets_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "pets", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.10820387026437721, "acc5": 0.3060779503952031, "mean_per_class_recall": 0.10816244853891095}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_stl10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "stl10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.73625, "acc5": 0.982, "mean_per_class_recall": 0.736375}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_sun397_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "sun397", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.2650293322544458, "acc5": 0.5314563142505103, "mean_per_class_recall": 0.22922416785400998}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/benchmark_vtab_resisc45_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "vtab/resisc45", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.16936507936507936, "acc5": 0.4606349206349206, "mean_per_class_recall": 0.17198112466950818}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/out.log ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-23,07:32:10 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 4.
2
+ 2025-04-23,07:32:10 | INFO | Loaded ViT-B-16 model config.
3
+ 2025-04-23,07:32:12 | INFO | Model:
4
+ 2025-04-23,07:32:12 | INFO | CLIP(
5
+ (visual): VisionTransformer(
6
+ (conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
7
+ (patch_dropout): Identity()
8
+ (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
9
+ (transformer): Transformer(
10
+ (resblocks): ModuleList(
11
+ (0-11): 12 x ResidualAttentionBlock(
12
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
13
+ (attn): MultiheadAttention(
14
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
15
+ )
16
+ (ls_1): Identity()
17
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
18
+ (mlp): Sequential(
19
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
20
+ (gelu): GELU(approximate='none')
21
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
22
+ )
23
+ (ls_2): Identity()
24
+ )
25
+ )
26
+ )
27
+ (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
28
+ )
29
+ (transformer): Transformer(
30
+ (resblocks): ModuleList(
31
+ (0-11): 12 x ResidualAttentionBlock(
32
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
33
+ (attn): MultiheadAttention(
34
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
35
+ )
36
+ (ls_1): Identity()
37
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
38
+ (mlp): Sequential(
39
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
40
+ (gelu): GELU(approximate='none')
41
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
42
+ )
43
+ (ls_2): Identity()
44
+ )
45
+ )
46
+ )
47
+ (token_embedding): Embedding(49408, 512)
48
+ (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
49
+ )
50
+ 2025-04-23,07:32:12 | INFO | Params:
51
+ 2025-04-23,07:32:12 | INFO | accum_freq: 4
52
+ 2025-04-23,07:32:12 | INFO | aug_cfg: {}
53
+ 2025-04-23,07:32:12 | INFO | batch_size: 512
54
+ 2025-04-23,07:32:12 | INFO | beta1: 0.9
55
+ 2025-04-23,07:32:12 | INFO | beta2: 0.98
56
+ 2025-04-23,07:32:12 | INFO | cache_dir: None
57
+ 2025-04-23,07:32:12 | INFO | caption_ratio: 0.1
58
+ 2025-04-23,07:32:12 | INFO | checkpoint_path: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints
59
+ 2025-04-23,07:32:12 | INFO | coca_caption_loss_weight: 2.0
60
+ 2025-04-23,07:32:12 | INFO | coca_contrastive_loss_weight: 1.0
61
+ 2025-04-23,07:32:12 | INFO | copy_codebase: False
62
+ 2025-04-23,07:32:12 | INFO | csv_caption_key: title
63
+ 2025-04-23,07:32:12 | INFO | csv_img_key: filepath
64
+ 2025-04-23,07:32:12 | INFO | csv_separator:
65
+ 2025-04-23,07:32:12 | INFO | dataset_resampled: False
66
+ 2025-04-23,07:32:12 | INFO | dataset_type: synthetic
67
+ 2025-04-23,07:32:12 | INFO | ddp_static_graph: False
68
+ 2025-04-23,07:32:12 | INFO | debug: False
69
+ 2025-04-23,07:32:12 | INFO | delete_previous_checkpoint: False
70
+ 2025-04-23,07:32:12 | INFO | device: cuda:0
71
+ 2025-04-23,07:32:12 | INFO | dist_backend: None
72
+ 2025-04-23,07:32:12 | INFO | dist_url: None
73
+ 2025-04-23,07:32:12 | INFO | distill: False
74
+ 2025-04-23,07:32:12 | INFO | distill_model: None
75
+ 2025-04-23,07:32:12 | INFO | distill_pretrained: None
76
+ 2025-04-23,07:32:12 | INFO | distributed: True
77
+ 2025-04-23,07:32:12 | INFO | epochs: 40
78
+ 2025-04-23,07:32:12 | INFO | epochs_cooldown: None
79
+ 2025-04-23,07:32:12 | INFO | eps: 1e-08
80
+ 2025-04-23,07:32:12 | INFO | force_custom_text: False
81
+ 2025-04-23,07:32:12 | INFO | force_image_size: None
82
+ 2025-04-23,07:32:12 | INFO | force_patch_dropout: None
83
+ 2025-04-23,07:32:12 | INFO | force_quick_gelu: False
84
+ 2025-04-23,07:32:12 | INFO | gather_with_grad: True
85
+ 2025-04-23,07:32:12 | INFO | grad_checkpointing: True
86
+ 2025-04-23,07:32:12 | INFO | grad_clip_norm: None
87
+ 2025-04-23,07:32:12 | INFO | horovod: False
88
+ 2025-04-23,07:32:12 | INFO | image_interpolation: None
89
+ 2025-04-23,07:32:12 | INFO | image_mean: None
90
+ 2025-04-23,07:32:12 | INFO | image_resize_mode: None
91
+ 2025-04-23,07:32:12 | INFO | image_std: None
92
+ 2025-04-23,07:32:12 | INFO | imagenet_v2: None
93
+ 2025-04-23,07:32:12 | INFO | imagenet_val: None
94
+ 2025-04-23,07:32:12 | INFO | keep_func_name:
95
+ 2025-04-23,07:32:12 | INFO | local_loss: False
96
+ 2025-04-23,07:32:12 | INFO | local_rank: 0
97
+ 2025-04-23,07:32:12 | INFO | lock_image: False
98
+ 2025-04-23,07:32:12 | INFO | lock_image_freeze_bn_stats: False
99
+ 2025-04-23,07:32:12 | INFO | lock_image_unlocked_groups: 0
100
+ 2025-04-23,07:32:12 | INFO | lock_text: False
101
+ 2025-04-23,07:32:12 | INFO | lock_text_freeze_layer_norm: False
102
+ 2025-04-23,07:32:12 | INFO | lock_text_unlocked_layers: 0
103
+ 2025-04-23,07:32:12 | INFO | log_every_n_steps: 100
104
+ 2025-04-23,07:32:12 | INFO | log_level: 20
105
+ 2025-04-23,07:32:12 | INFO | log_local: False
106
+ 2025-04-23,07:32:12 | INFO | log_path: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/out.log
107
+ 2025-04-23,07:32:12 | INFO | logs: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
108
+ 2025-04-23,07:32:12 | INFO | loss_dist_impl: None
109
+ 2025-04-23,07:32:12 | INFO | lr: 0.001
110
+ 2025-04-23,07:32:12 | INFO | lr_cooldown_end: 0.0
111
+ 2025-04-23,07:32:12 | INFO | lr_cooldown_power: 1.0
112
+ 2025-04-23,07:32:12 | INFO | lr_scheduler: cosine
113
+ 2025-04-23,07:32:12 | INFO | map_func_name: use_image_closest
114
+ 2025-04-23,07:32:12 | INFO | model: ViT-B-16
115
+ 2025-04-23,07:32:12 | INFO | momentum: None
116
+ 2025-04-23,07:32:12 | INFO | name: ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest
117
+ 2025-04-23,07:32:12 | INFO | no_set_device_rank: False
118
+ 2025-04-23,07:32:12 | INFO | opt: adamw
119
+ 2025-04-23,07:32:12 | INFO | precision: amp
120
+ 2025-04-23,07:32:12 | INFO | pretrained:
121
+ 2025-04-23,07:32:12 | INFO | pretrained_image: False
122
+ 2025-04-23,07:32:12 | INFO | rank: 0
123
+ 2025-04-23,07:32:12 | INFO | remote_sync: None
124
+ 2025-04-23,07:32:12 | INFO | remote_sync_frequency: 300
125
+ 2025-04-23,07:32:12 | INFO | remote_sync_protocol: s3
126
+ 2025-04-23,07:32:12 | INFO | report_to: wandb
127
+ 2025-04-23,07:32:12 | INFO | resume: None
128
+ 2025-04-23,07:32:12 | INFO | save_frequency: 1
129
+ 2025-04-23,07:32:12 | INFO | save_most_recent: False
130
+ 2025-04-23,07:32:12 | INFO | seed: 0
131
+ 2025-04-23,07:32:12 | INFO | siglip: False
132
+ 2025-04-23,07:32:12 | INFO | skip_scheduler: False
133
+ 2025-04-23,07:32:12 | INFO | tensorboard: False
134
+ 2025-04-23,07:32:12 | INFO | tensorboard_path:
135
+ 2025-04-23,07:32:12 | INFO | torchcompile: False
136
+ 2025-04-23,07:32:12 | INFO | torchscript: False
137
+ 2025-04-23,07:32:12 | INFO | trace: False
138
+ 2025-04-23,07:32:12 | INFO | train_data: /mnt/personal/zhudongy/cc3m-hgf-wds/{0000..0301}.tar
139
+ 2025-04-23,07:32:12 | INFO | train_data_upsampling_factors: None
140
+ 2025-04-23,07:32:12 | INFO | train_num_samples: 3016640
141
+ 2025-04-23,07:32:12 | INFO | use_bn_sync: False
142
+ 2025-04-23,07:32:12 | INFO | use_bnb_linear: None
143
+ 2025-04-23,07:32:12 | INFO | val_data: None
144
+ 2025-04-23,07:32:12 | INFO | val_frequency: 1
145
+ 2025-04-23,07:32:12 | INFO | val_num_samples: None
146
+ 2025-04-23,07:32:12 | INFO | wandb: True
147
+ 2025-04-23,07:32:12 | INFO | wandb_notes:
148
+ 2025-04-23,07:32:12 | INFO | wandb_project_name: open-clip
149
+ 2025-04-23,07:32:12 | INFO | warmup: 368
150
+ 2025-04-23,07:32:12 | INFO | wd: 0.5
151
+ 2025-04-23,07:32:12 | INFO | workers: 16
152
+ 2025-04-23,07:32:12 | INFO | world_size: 4
153
+ 2025-04-23,07:32:12 | INFO | zeroshot_frequency: 2
154
+ 2025-04-23,07:32:14 | INFO | Created AdamW (adamw) optimizer: lr: 0.001, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None
155
+ 2025-04-23,07:35:07 | INFO | Start epoch 0
156
+ 2025-04-23,07:35:55 | INFO | Train Epoch: 0 [ 8192/3047424 (0%)] Data (t): 11.688 Batch (t): 48.250, 169.782/s, 42.4456/s/gpu LR: 0.000003 Logit Scale: 14.286 Imm_image: -0.26842 (-0.26842) Imm_text: -0.26842 (-0.26842) Isd_image: -0.26748 (-0.26748) Isd_text: -0.26748 (-0.26748) Contrastive_loss: 9.1269 (9.1269) Loss: 9.1269 (9.1269)
157
+ 2025-04-23,07:43:56 | INFO | Train Epoch: 0 [ 827392/3047424 (27%)] Data (t): 0.599 Batch (t): 4.816, 1659.73/s, 414.933/s/gpu LR: 0.000274 Logit Scale: 14.250 Imm_image: 4.0661 (1.8988) Imm_text: 4.0661 (1.8988) Isd_image: 2.3502 (1.0414) Isd_text: 2.3502 (1.0414) Contrastive_loss: 8.2034 (8.6651) Loss: 8.2034 (8.6651)
158
+ 2025-04-23,07:52:04 | INFO | Train Epoch: 0 [1646592/3047424 (54%)] Data (t): 0.641 Batch (t): 4.876, 1690.64/s, 422.659/s/gpu LR: 0.000546 Logit Scale: 14.225 Imm_image: 5.6783 (3.1587) Imm_text: 5.6783 (3.1587) Isd_image: 2.0150 (1.3659) Isd_text: 2.0150 (1.3659) Contrastive_loss: 7.4679 (8.2660) Loss: 7.4679 (8.2660)
159
+ 2025-04-23,08:00:13 | INFO | Train Epoch: 0 [2465792/3047424 (81%)] Data (t): 0.663 Batch (t): 4.893, 1706.19/s, 426.548/s/gpu LR: 0.000818 Logit Scale: 14.218 Imm_image: 5.7583 (3.8086) Imm_text: 5.7583 (3.8086) Isd_image: 2.1064 (1.5510) Isd_text: 2.1064 (1.5510) Contrastive_loss: 7.3858 (8.0460) Loss: 7.3858 (8.0460)
160
+ 2025-04-23,08:05:55 | INFO | Train Epoch: 0 [3047424/3047424 (100%)] Data (t): 0.618 Batch (t): 4.817, 1740.31/s, 435.078/s/gpu LR: 0.001000 Logit Scale: 14.263 Imm_image: 6.7296 (4.3928) Imm_text: 6.7296 (4.3928) Isd_image: 1.9461 (1.6301) Isd_text: 1.9461 (1.6301) Contrastive_loss: 6.9389 (7.8246) Loss: 6.9389 (7.8246)
161
+ 2025-04-23,08:05:58 | INFO | Start epoch 1
162
+ 2025-04-23,08:06:12 | INFO | Train Epoch: 1 [ 8192/3047424 (0%)] Data (t): 9.735 Batch (t): 14.372, 569.978/s, 142.494/s/gpu LR: 0.001000 Logit Scale: 14.268 Imm_image: 6.7693 (6.7693) Imm_text: 6.7693 (6.7693) Isd_image: 1.9439 (1.9439) Isd_text: 1.9439 (1.9439) Contrastive_loss: 6.8991 (6.8991) Loss: 6.8991 (6.8991)
163
+ 2025-04-23,08:14:21 | INFO | Train Epoch: 1 [ 827392/3047424 (27%)] Data (t): 0.656 Batch (t): 4.892, 1686.51/s, 421.627/s/gpu LR: 0.001000 Logit Scale: 14.952 Imm_image: 7.1137 (6.9415) Imm_text: 7.1137 (6.9415) Isd_image: 1.6997 (1.8218) Isd_text: 1.6997 (1.8218) Contrastive_loss: 6.5837 (6.7414) Loss: 6.5837 (6.7414)
164
+ 2025-04-23,08:22:35 | INFO | Train Epoch: 1 [1646592/3047424 (54%)] Data (t): 0.691 Batch (t): 4.940, 1631.84/s, 407.961/s/gpu LR: 0.001000 Logit Scale: 16.022 Imm_image: 7.6521 (7.1784) Imm_text: 7.6521 (7.1784) Isd_image: 1.3301 (1.6579) Isd_text: 1.3301 (1.6579) Contrastive_loss: 6.1517 (6.5448) Loss: 6.1517 (6.5448)
165
+ 2025-04-23,08:30:49 | INFO | Train Epoch: 1 [2465792/3047424 (81%)] Data (t): 0.691 Batch (t): 4.938, 1662.45/s, 415.612/s/gpu LR: 0.000999 Logit Scale: 17.294 Imm_image: 8.3083 (7.4608) Imm_text: 8.3083 (7.4608) Isd_image: 0.98160 (1.4888) Isd_text: 0.98160 (1.4888) Contrastive_loss: 5.7666 (6.3503) Loss: 5.7666 (6.3503)
166
+ 2025-04-23,08:36:38 | INFO | Train Epoch: 1 [3047424/3047424 (100%)] Data (t): 0.677 Batch (t): 4.913, 1721.21/s, 430.302/s/gpu LR: 0.000998 Logit Scale: 18.227 Imm_image: 8.7115 (7.7110) Imm_text: 8.7115 (7.7110) Isd_image: 1.2487 (1.4408) Isd_text: 1.2487 (1.4408) Contrastive_loss: 5.6038 (6.2010) Loss: 5.6038 (6.2010)
167
+ 2025-04-23,08:36:40 | INFO | Start epoch 2
168
+ 2025-04-23,08:36:54 | INFO | Train Epoch: 2 [ 8192/3047424 (0%)] Data (t): 9.722 Batch (t): 14.293, 573.162/s, 143.291/s/gpu LR: 0.000998 Logit Scale: 18.244 Imm_image: 8.8415 (8.8415) Imm_text: 8.8415 (8.8415) Isd_image: 1.0674 (1.0674) Isd_text: 1.0674 (1.0674) Contrastive_loss: 5.4338 (5.4338) Loss: 5.4338 (5.4338)
169
+ 2025-04-23,08:45:02 | INFO | Train Epoch: 2 [ 827392/3047424 (27%)] Data (t): 0.637 Batch (t): 4.884, 1674.79/s, 418.697/s/gpu LR: 0.000997 Logit Scale: 19.588 Imm_image: 9.0481 (8.9448) Imm_text: 9.0481 (8.9448) Isd_image: 1.0545 (1.0609) Isd_text: 1.0545 (1.0609) Contrastive_loss: 5.4357 (5.4348) Loss: 5.4357 (5.4348)
170
+ 2025-04-23,08:53:16 | INFO | Train Epoch: 2 [1646592/3047424 (54%)] Data (t): 0.665 Batch (t): 4.933, 1672.13/s, 418.032/s/gpu LR: 0.000996 Logit Scale: 21.147 Imm_image: 9.8591 (9.2496) Imm_text: 9.8591 (9.2496) Isd_image: 0.90188 (1.0079) Isd_text: 0.90188 (1.0079) Contrastive_loss: 5.0564 (5.3087) Loss: 5.0564 (5.3087)
171
+ 2025-04-23,09:01:24 | INFO | Train Epoch: 2 [2465792/3047424 (81%)] Data (t): 0.644 Batch (t): 4.886, 1699.77/s, 424.942/s/gpu LR: 0.000995 Logit Scale: 22.610 Imm_image: 10.261 (9.5024) Imm_text: 10.261 (9.5024) Isd_image: 1.0353 (1.0148) Isd_text: 1.0353 (1.0148) Contrastive_loss: 4.9522 (5.2195) Loss: 4.9522 (5.2195)
172
+ 2025-04-23,09:07:11 | INFO | Train Epoch: 2 [3047424/3047424 (100%)] Data (t): 0.649 Batch (t): 4.883, 1723.83/s, 430.958/s/gpu LR: 0.000993 Logit Scale: 23.553 Imm_image: 10.702 (9.7424) Imm_text: 10.702 (9.7424) Isd_image: 1.1049 (1.0328) Isd_text: 1.1049 (1.0328) Contrastive_loss: 4.7509 (5.1258) Loss: 4.7509 (5.1258)
173
+ 2025-04-23,09:07:13 | INFO | Start epoch 3
174
+ 2025-04-23,09:07:29 | INFO | Train Epoch: 3 [ 8192/3047424 (0%)] Data (t): 11.699 Batch (t): 16.279, 503.218/s, 125.805/s/gpu LR: 0.000993 Logit Scale: 23.565 Imm_image: 10.659 (10.659) Imm_text: 10.659 (10.659) Isd_image: 1.1997 (1.1997) Isd_text: 1.1997 (1.1997) Contrastive_loss: 4.7960 (4.7960) Loss: 4.7960 (4.7960)
175
+ 2025-04-23,09:15:50 | INFO | Train Epoch: 3 [ 827392/3047424 (27%)] Data (t): 0.728 Batch (t): 5.008, 1625.96/s, 406.490/s/gpu LR: 0.000992 Logit Scale: 25.119 Imm_image: 11.391 (11.025) Imm_text: 11.391 (11.025) Isd_image: 1.0692 (1.1344) Isd_text: 1.0692 (1.1344) Contrastive_loss: 4.5134 (4.6547) Loss: 4.5134 (4.6547)
176
+ 2025-04-23,09:24:14 | INFO | Train Epoch: 3 [1646592/3047424 (54%)] Data (t): 0.749 Batch (t): 5.040, 1673.29/s, 418.324/s/gpu LR: 0.000990 Logit Scale: 26.389 Imm_image: 12.174 (11.408) Imm_text: 12.174 (11.408) Isd_image: 1.5036 (1.2575) Isd_text: 1.5036 (1.2575) Contrastive_loss: 4.3131 (4.5408) Loss: 4.3131 (4.5408)
177
+ 2025-04-23,09:32:30 | INFO | Train Epoch: 3 [2465792/3047424 (81%)] Data (t): 0.712 Batch (t): 4.965, 1658.39/s, 414.597/s/gpu LR: 0.000987 Logit Scale: 27.363 Imm_image: 12.462 (11.672) Imm_text: 12.462 (11.672) Isd_image: 1.7198 (1.3731) Isd_text: 1.7198 (1.3731) Contrastive_loss: 4.2643 (4.4717) Loss: 4.2643 (4.4717)
178
+ 2025-04-23,09:38:15 | INFO | Train Epoch: 3 [3047424/3047424 (100%)] Data (t): 0.645 Batch (t): 4.859, 1733.67/s, 433.417/s/gpu LR: 0.000985 Logit Scale: 28.056 Imm_image: 12.724 (11.882) Imm_text: 12.724 (11.882) Isd_image: 1.8183 (1.4621) Isd_text: 1.8183 (1.4621) Contrastive_loss: 4.0980 (4.3969) Loss: 4.0980 (4.3969)
179
+ 2025-04-23,09:38:17 | INFO | Start epoch 4
180
+ 2025-04-23,09:38:32 | INFO | Train Epoch: 4 [ 8192/3047424 (0%)] Data (t): 10.373 Batch (t): 14.807, 553.246/s, 138.312/s/gpu LR: 0.000985 Logit Scale: 28.068 Imm_image: 12.910 (12.910) Imm_text: 12.910 (12.910) Isd_image: 1.3867 (1.3867) Isd_text: 1.3867 (1.3867) Contrastive_loss: 3.9426 (3.9426) Loss: 3.9426 (3.9426)
181
+ 2025-04-23,09:46:43 | INFO | Train Epoch: 4 [ 827392/3047424 (27%)] Data (t): 0.664 Batch (t): 4.909, 1680.47/s, 420.118/s/gpu LR: 0.000983 Logit Scale: 29.420 Imm_image: 13.233 (13.071) Imm_text: 13.233 (13.071) Isd_image: 1.4567 (1.4217) Isd_text: 1.4567 (1.4217) Contrastive_loss: 3.9388 (3.9407) Loss: 3.9388 (3.9407)
182
+ 2025-04-23,09:54:54 | INFO | Train Epoch: 4 [1646592/3047424 (54%)] Data (t): 0.662 Batch (t): 4.912, 1622.51/s, 405.627/s/gpu LR: 0.000980 Logit Scale: 30.416 Imm_image: 13.668 (13.270) Imm_text: 13.668 (13.270) Isd_image: 1.7894 (1.5443) Isd_text: 1.7894 (1.5443) Contrastive_loss: 3.9101 (3.9305) Loss: 3.9101 (3.9305)
183
+ 2025-04-23,10:03:12 | INFO | Train Epoch: 4 [2465792/3047424 (81%)] Data (t): 0.723 Batch (t): 4.979, 1687.52/s, 421.880/s/gpu LR: 0.000977 Logit Scale: 31.147 Imm_image: 14.036 (13.462) Imm_text: 14.036 (13.462) Isd_image: 1.8956 (1.6321) Isd_text: 1.8956 (1.6321) Contrastive_loss: 3.8185 (3.9025) Loss: 3.8185 (3.9025)
184
+ 2025-04-23,10:08:55 | INFO | Train Epoch: 4 [3047424/3047424 (100%)] Data (t): 0.616 Batch (t): 4.826, 1743.04/s, 435.761/s/gpu LR: 0.000974 Logit Scale: 31.495 Imm_image: 14.268 (13.623) Imm_text: 14.268 (13.623) Isd_image: 2.2144 (1.7486) Isd_text: 2.2144 (1.7486) Contrastive_loss: 3.6571 (3.8534) Loss: 3.6571 (3.8534)
185
+ 2025-04-23,10:08:57 | INFO | Start epoch 5
186
+ 2025-04-23,10:09:11 | INFO | Train Epoch: 5 [ 8192/3047424 (0%)] Data (t): 9.987 Batch (t): 14.301, 572.808/s, 143.202/s/gpu LR: 0.000974 Logit Scale: 31.502 Imm_image: 14.458 (14.458) Imm_text: 14.458 (14.458) Isd_image: 1.9379 (1.9379) Isd_text: 1.9379 (1.9379) Contrastive_loss: 3.4843 (3.4843) Loss: 3.4843 (3.4843)
187
+ 2025-04-23,10:17:15 | INFO | Train Epoch: 5 [ 827392/3047424 (27%)] Data (t): 0.626 Batch (t): 4.836, 1677.21/s, 419.302/s/gpu LR: 0.000971 Logit Scale: 32.573 Imm_image: 14.586 (14.522) Imm_text: 14.586 (14.522) Isd_image: 1.7669 (1.8524) Isd_text: 1.7669 (1.8524) Contrastive_loss: 3.5427 (3.5135) Loss: 3.5427 (3.5135)
188
+ 2025-04-23,10:25:16 | INFO | Train Epoch: 5 [1646592/3047424 (54%)] Data (t): 0.604 Batch (t): 4.811, 1688.95/s, 422.238/s/gpu LR: 0.000967 Logit Scale: 33.265 Imm_image: 14.931 (14.659) Imm_text: 14.931 (14.659) Isd_image: 2.0275 (1.9107) Isd_text: 2.0275 (1.9107) Contrastive_loss: 3.5602 (3.5291) Loss: 3.5602 (3.5291)
189
+ 2025-04-23,10:33:21 | INFO | Train Epoch: 5 [2465792/3047424 (81%)] Data (t): 0.613 Batch (t): 4.855, 1685.49/s, 421.371/s/gpu LR: 0.000963 Logit Scale: 33.927 Imm_image: 15.212 (14.797) Imm_text: 15.212 (14.797) Isd_image: 2.2299 (1.9905) Isd_text: 2.2299 (1.9905) Contrastive_loss: 3.5100 (3.5243) Loss: 3.5100 (3.5243)
190
+ 2025-04-23,10:39:04 | INFO | Train Epoch: 5 [3047424/3047424 (100%)] Data (t): 0.601 Batch (t): 4.835, 1738.61/s, 434.651/s/gpu LR: 0.000960 Logit Scale: 34.240 Imm_image: 15.462 (14.930) Imm_text: 15.462 (14.930) Isd_image: 2.4886 (2.0902) Isd_text: 2.4886 (2.0902) Contrastive_loss: 3.3225 (3.4840) Loss: 3.3225 (3.4840)
191
+ 2025-04-23,10:39:06 | INFO | Start epoch 6
192
+ 2025-04-23,10:39:20 | INFO | Train Epoch: 6 [ 8192/3047424 (0%)] Data (t): 9.834 Batch (t): 14.186, 577.477/s, 144.369/s/gpu LR: 0.000960 Logit Scale: 34.247 Imm_image: 15.691 (15.691) Imm_text: 15.691 (15.691) Isd_image: 2.2189 (2.2189) Isd_text: 2.2189 (2.2189) Contrastive_loss: 3.1608 (3.1608) Loss: 3.1608 (3.1608)
193
+ 2025-04-23,10:47:27 | INFO | Train Epoch: 6 [ 827392/3047424 (27%)] Data (t): 0.625 Batch (t): 4.867, 1678.01/s, 419.503/s/gpu LR: 0.000955 Logit Scale: 35.296 Imm_image: 15.859 (15.775) Imm_text: 15.859 (15.775) Isd_image: 1.8915 (2.0552) Isd_text: 1.8915 (2.0552) Contrastive_loss: 3.2581 (3.2095) Loss: 3.2581 (3.2095)
194
+ 2025-04-23,10:55:31 | INFO | Train Epoch: 6 [1646592/3047424 (54%)] Data (t): 0.615 Batch (t): 4.839, 1681.25/s, 420.313/s/gpu LR: 0.000951 Logit Scale: 35.850 Imm_image: 16.085 (15.878) Imm_text: 16.085 (15.878) Isd_image: 2.3749 (2.1617) Isd_text: 2.3749 (2.1617) Contrastive_loss: 3.2280 (3.2156) Loss: 3.2280 (3.2156)
195
+ 2025-04-23,11:03:38 | INFO | Train Epoch: 6 [2465792/3047424 (81%)] Data (t): 0.642 Batch (t): 4.872, 1673.55/s, 418.388/s/gpu LR: 0.000946 Logit Scale: 36.338 Imm_image: 16.274 (15.977) Imm_text: 16.274 (15.977) Isd_image: 2.5160 (2.2503) Isd_text: 2.5160 (2.2503) Contrastive_loss: 3.2630 (3.2275) Loss: 3.2630 (3.2275)
196
+ 2025-04-23,11:09:22 | INFO | Train Epoch: 6 [3047424/3047424 (100%)] Data (t): 0.609 Batch (t): 4.847, 1729.32/s, 432.330/s/gpu LR: 0.000943 Logit Scale: 36.605 Imm_image: 16.624 (16.106) Imm_text: 16.624 (16.106) Isd_image: 2.5662 (2.3135) Isd_text: 2.5662 (2.3135) Contrastive_loss: 3.0005 (3.1821) Loss: 3.0005 (3.1821)
197
+ 2025-04-23,11:09:24 | INFO | Start epoch 7
198
+ 2025-04-23,11:09:38 | INFO | Train Epoch: 7 [ 8192/3047424 (0%)] Data (t): 9.907 Batch (t): 14.525, 564.006/s, 141.001/s/gpu LR: 0.000943 Logit Scale: 36.611 Imm_image: 16.816 (16.816) Imm_text: 16.816 (16.816) Isd_image: 2.2393 (2.2393) Isd_text: 2.2393 (2.2393) Contrastive_loss: 2.8283 (2.8283) Loss: 2.8283 (2.8283)
199
+ 2025-04-23,11:17:44 | INFO | Train Epoch: 7 [ 827392/3047424 (27%)] Data (t): 0.627 Batch (t): 4.860, 1708.38/s, 427.094/s/gpu LR: 0.000937 Logit Scale: 37.481 Imm_image: 16.763 (16.789) Imm_text: 16.763 (16.789) Isd_image: 2.2367 (2.2380) Isd_text: 2.2367 (2.2380) Contrastive_loss: 3.0166 (2.9225) Loss: 3.0166 (2.9225)
200
+ 2025-04-23,11:25:50 | INFO | Train Epoch: 7 [1646592/3047424 (54%)] Data (t): 0.605 Batch (t): 4.852, 1673.58/s, 418.396/s/gpu LR: 0.000932 Logit Scale: 38.031 Imm_image: 17.025 (16.868) Imm_text: 17.025 (16.868) Isd_image: 2.6848 (2.3869) Isd_text: 2.6848 (2.3869) Contrastive_loss: 3.0436 (2.9628) Loss: 3.0436 (2.9628)
201
+ 2025-04-23,11:33:53 | INFO | Train Epoch: 7 [2465792/3047424 (81%)] Data (t): 0.615 Batch (t): 4.829, 1704.64/s, 426.159/s/gpu LR: 0.000927 Logit Scale: 38.372 Imm_image: 17.098 (16.926) Imm_text: 17.098 (16.926) Isd_image: 2.8312 (2.4980) Isd_text: 2.8312 (2.4980) Contrastive_loss: 3.0427 (2.9828) Loss: 3.0427 (2.9828)
202
+ 2025-04-23,11:39:35 | INFO | Train Epoch: 7 [3047424/3047424 (100%)] Data (t): 0.603 Batch (t): 4.821, 1734.49/s, 433.621/s/gpu LR: 0.000922 Logit Scale: 38.554 Imm_image: 17.536 (17.048) Imm_text: 17.536 (17.048) Isd_image: 3.0713 (2.6127) Isd_text: 3.0713 (2.6127) Contrastive_loss: 2.7983 (2.9459) Loss: 2.7983 (2.9459)
203
+ 2025-04-23,11:39:37 | INFO | Start epoch 8
204
+ 2025-04-23,11:39:51 | INFO | Train Epoch: 8 [ 8192/3047424 (0%)] Data (t): 10.217 Batch (t): 14.565, 562.454/s, 140.613/s/gpu LR: 0.000922 Logit Scale: 38.558 Imm_image: 17.770 (17.770) Imm_text: 17.770 (17.770) Isd_image: 2.8486 (2.8486) Isd_text: 2.8486 (2.8486) Contrastive_loss: 2.6171 (2.6171) Loss: 2.6171 (2.6171)
205
+ 2025-04-23,11:47:56 | INFO | Train Epoch: 8 [ 827392/3047424 (27%)] Data (t): 0.623 Batch (t): 4.850, 1713.41/s, 428.352/s/gpu LR: 0.000917 Logit Scale: 39.439 Imm_image: 17.782 (17.776) Imm_text: 17.782 (17.776) Isd_image: 2.3049 (2.5768) Isd_text: 2.3049 (2.5768) Contrastive_loss: 2.7607 (2.6889) Loss: 2.7607 (2.6889)
206
+ 2025-04-23,11:56:01 | INFO | Train Epoch: 8 [1646592/3047424 (54%)] Data (t): 0.615 Batch (t): 4.849, 1679.70/s, 419.926/s/gpu LR: 0.000910 Logit Scale: 39.883 Imm_image: 17.796 (17.783) Imm_text: 17.796 (17.783) Isd_image: 2.6153 (2.5896) Isd_text: 2.6153 (2.5896) Contrastive_loss: 2.8561 (2.7447) Loss: 2.8561 (2.7447)
207
+ 2025-04-23,12:04:06 | INFO | Train Epoch: 8 [2465792/3047424 (81%)] Data (t): 0.593 Batch (t): 4.850, 1712.21/s, 428.052/s/gpu LR: 0.000904 Logit Scale: 40.068 Imm_image: 17.944 (17.823) Imm_text: 17.944 (17.823) Isd_image: 3.0779 (2.7117) Isd_text: 3.0779 (2.7117) Contrastive_loss: 2.8710 (2.7763) Loss: 2.8710 (2.7763)
208
+ 2025-04-23,12:09:48 | INFO | Train Epoch: 8 [3047424/3047424 (100%)] Data (t): 0.602 Batch (t): 4.819, 1740.62/s, 435.156/s/gpu LR: 0.000900 Logit Scale: 40.237 Imm_image: 18.255 (17.909) Imm_text: 18.255 (17.909) Isd_image: 3.1208 (2.7935) Isd_text: 3.1208 (2.7935) Contrastive_loss: 2.5980 (2.7406) Loss: 2.5980 (2.7406)
209
+ 2025-04-23,12:09:50 | INFO | Start epoch 9
210
+ 2025-04-23,12:10:05 | INFO | Train Epoch: 9 [ 8192/3047424 (0%)] Data (t): 10.464 Batch (t): 14.842, 551.942/s, 137.985/s/gpu LR: 0.000900 Logit Scale: 40.242 Imm_image: 18.493 (18.493) Imm_text: 18.493 (18.493) Isd_image: 2.9041 (2.9041) Isd_text: 2.9041 (2.9041) Contrastive_loss: 2.4125 (2.4125) Loss: 2.4125 (2.4125)
211
+ 2025-04-23,12:18:09 | INFO | Train Epoch: 9 [ 827392/3047424 (27%)] Data (t): 0.616 Batch (t): 4.846, 1678.31/s, 419.577/s/gpu LR: 0.000893 Logit Scale: 41.143 Imm_image: 18.438 (18.465) Imm_text: 18.438 (18.465) Isd_image: 2.6546 (2.7794) Isd_text: 2.6546 (2.7794) Contrastive_loss: 2.6158 (2.5141) Loss: 2.6158 (2.5141)
212
+ 2025-04-23,12:26:12 | INFO | Train Epoch: 9 [1646592/3047424 (54%)] Data (t): 0.604 Batch (t): 4.828, 1693.18/s, 423.296/s/gpu LR: 0.000886 Logit Scale: 41.501 Imm_image: 18.639 (18.523) Imm_text: 18.639 (18.523) Isd_image: 2.8961 (2.8183) Isd_text: 2.8961 (2.8183) Contrastive_loss: 2.6100 (2.5461) Loss: 2.6100 (2.5461)
213
+ 2025-04-23,12:34:16 | INFO | Train Epoch: 9 [2465792/3047424 (81%)] Data (t): 0.605 Batch (t): 4.835, 1707.59/s, 426.898/s/gpu LR: 0.000879 Logit Scale: 41.752 Imm_image: 18.623 (18.548) Imm_text: 18.623 (18.548) Isd_image: 3.3804 (2.9588) Isd_text: 3.3804 (2.9588) Contrastive_loss: 2.6625 (2.5752) Loss: 2.6625 (2.5752)
214
+ 2025-04-23,12:40:01 | INFO | Train Epoch: 9 [3047424/3047424 (100%)] Data (t): 0.627 Batch (t): 4.862, 1728.81/s, 432.201/s/gpu LR: 0.000874 Logit Scale: 41.885 Imm_image: 19.054 (18.649) Imm_text: 19.054 (18.649) Isd_image: 3.2765 (3.0224) Isd_text: 3.2765 (3.0224) Contrastive_loss: 2.4132 (2.5428) Loss: 2.4132 (2.5428)
215
+ 2025-04-23,12:40:02 | INFO | Start epoch 10
216
+ 2025-04-23,12:40:18 | INFO | Train Epoch: 10 [ 8192/3047424 (0%)] Data (t): 11.017 Batch (t): 15.689, 522.152/s, 130.538/s/gpu LR: 0.000874 Logit Scale: 41.889 Imm_image: 19.263 (19.263) Imm_text: 19.263 (19.263) Isd_image: 3.0188 (3.0188) Isd_text: 3.0188 (3.0188) Contrastive_loss: 2.2615 (2.2615) Loss: 2.2615 (2.2615)
217
+ 2025-04-23,12:48:24 | INFO | Train Epoch: 10 [ 827392/3047424 (27%)] Data (t): 0.620 Batch (t): 4.863, 1689.08/s, 422.270/s/gpu LR: 0.000867 Logit Scale: 42.804 Imm_image: 19.304 (19.283) Imm_text: 19.304 (19.283) Isd_image: 2.6035 (2.8112) Isd_text: 2.6035 (2.8112) Contrastive_loss: 2.3624 (2.3120) Loss: 2.3624 (2.3120)
218
+ 2025-04-23,12:56:29 | INFO | Train Epoch: 10 [1646592/3047424 (54%)] Data (t): 0.613 Batch (t): 4.848, 1703.17/s, 425.793/s/gpu LR: 0.000859 Logit Scale: 43.163 Imm_image: 19.382 (19.316) Imm_text: 19.382 (19.316) Isd_image: 3.1559 (2.9261) Isd_text: 3.1559 (2.9261) Contrastive_loss: 2.4773 (2.3671) Loss: 2.4773 (2.3671)
219
+ 2025-04-23,13:04:34 | INFO | Train Epoch: 10 [2465792/3047424 (81%)] Data (t): 0.614 Batch (t): 4.854, 1702.02/s, 425.504/s/gpu LR: 0.000852 Logit Scale: 43.312 Imm_image: 19.377 (19.332) Imm_text: 19.377 (19.332) Isd_image: 3.2949 (3.0183) Isd_text: 3.2949 (3.0183) Contrastive_loss: 2.5342 (2.4089) Loss: 2.5342 (2.4089)
220
+ 2025-04-23,13:10:21 | INFO | Train Epoch: 10 [3047424/3047424 (100%)] Data (t): 0.649 Batch (t): 4.886, 1720.83/s, 430.207/s/gpu LR: 0.000846 Logit Scale: 43.468 Imm_image: 19.789 (19.423) Imm_text: 19.789 (19.423) Isd_image: 3.4000 (3.0946) Isd_text: 3.4000 (3.0946) Contrastive_loss: 2.2814 (2.3834) Loss: 2.2814 (2.3834)
221
+ 2025-04-23,13:10:23 | INFO | Start epoch 11
222
+ 2025-04-23,13:10:37 | INFO | Train Epoch: 11 [ 8192/3047424 (0%)] Data (t): 9.457 Batch (t): 13.911, 588.898/s, 147.224/s/gpu LR: 0.000846 Logit Scale: 43.471 Imm_image: 20.063 (20.063) Imm_text: 20.063 (20.063) Isd_image: 3.2905 (3.2905) Isd_text: 3.2905 (3.2905) Contrastive_loss: 2.0882 (2.0882) Loss: 2.0882 (2.0882)
223
+ 2025-04-23,13:18:41 | INFO | Train Epoch: 11 [ 827392/3047424 (27%)] Data (t): 0.620 Batch (t): 4.841, 1679.53/s, 419.881/s/gpu LR: 0.000838 Logit Scale: 44.400 Imm_image: 19.951 (20.007) Imm_text: 19.951 (20.007) Isd_image: 2.8086 (3.0496) Isd_text: 2.8086 (3.0496) Contrastive_loss: 2.2564 (2.1723) Loss: 2.2564 (2.1723)
224
+ 2025-04-23,13:26:48 | INFO | Train Epoch: 11 [1646592/3047424 (54%)] Data (t): 0.636 Batch (t): 4.868, 1676.96/s, 419.240/s/gpu LR: 0.000830 Logit Scale: 44.711 Imm_image: 20.011 (20.008) Imm_text: 20.011 (20.008) Isd_image: 3.3728 (3.1573) Isd_text: 3.3728 (3.1573) Contrastive_loss: 2.3813 (2.2420) Loss: 2.3813 (2.2420)
225
+ 2025-04-23,13:34:56 | INFO | Train Epoch: 11 [2465792/3047424 (81%)] Data (t): 0.647 Batch (t): 4.886, 1715.65/s, 428.913/s/gpu LR: 0.000822 Logit Scale: 44.854 Imm_image: 20.006 (20.007) Imm_text: 20.006 (20.007) Isd_image: 3.3823 (3.2136) Isd_text: 3.3823 (3.2136) Contrastive_loss: 2.3944 (2.2801) Loss: 2.3944 (2.2801)
226
+ 2025-04-23,13:40:38 | INFO | Train Epoch: 11 [3047424/3047424 (100%)] Data (t): 0.583 Batch (t): 4.813, 1730.67/s, 432.668/s/gpu LR: 0.000816 Logit Scale: 44.973 Imm_image: 20.451 (20.096) Imm_text: 20.451 (20.096) Isd_image: 3.5766 (3.2862) Isd_text: 3.5766 (3.2862) Contrastive_loss: 2.1565 (2.2554) Loss: 2.1565 (2.2554)
227
+ 2025-04-23,13:40:40 | INFO | Start epoch 12
228
+ 2025-04-23,13:40:55 | INFO | Train Epoch: 12 [ 8192/3047424 (0%)] Data (t): 10.522 Batch (t): 14.937, 548.422/s, 137.105/s/gpu LR: 0.000816 Logit Scale: 44.978 Imm_image: 20.933 (20.933) Imm_text: 20.933 (20.933) Isd_image: 3.3848 (3.3848) Isd_text: 3.3848 (3.3848) Contrastive_loss: 1.8417 (1.8417) Loss: 1.8417 (1.8417)
229
+ 2025-04-23,13:49:00 | INFO | Train Epoch: 12 [ 827392/3047424 (27%)] Data (t): 0.633 Batch (t): 4.853, 1672.06/s, 418.016/s/gpu LR: 0.000808 Logit Scale: 46.039 Imm_image: 20.761 (20.847) Imm_text: 20.761 (20.847) Isd_image: 2.8626 (3.1237) Isd_text: 2.8626 (3.1237) Contrastive_loss: 2.0789 (1.9603) Loss: 2.0789 (1.9603)
230
+ 2025-04-23,13:57:08 | INFO | Train Epoch: 12 [1646592/3047424 (54%)] Data (t): 0.645 Batch (t): 4.881, 1690.97/s, 422.744/s/gpu LR: 0.000799 Logit Scale: 46.266 Imm_image: 20.758 (20.817) Imm_text: 20.758 (20.817) Isd_image: 3.5226 (3.2567) Isd_text: 3.5226 (3.2567) Contrastive_loss: 2.2293 (2.0500) Loss: 2.2293 (2.0500)
231
+ 2025-04-23,14:05:12 | INFO | Train Epoch: 12 [2465792/3047424 (81%)] Data (t): 0.610 Batch (t): 4.838, 1693.29/s, 423.323/s/gpu LR: 0.000790 Logit Scale: 46.292 Imm_image: 20.788 (20.810) Imm_text: 20.788 (20.810) Isd_image: 3.6327 (3.3507) Isd_text: 3.6327 (3.3507) Contrastive_loss: 2.2577 (2.1019) Loss: 2.2577 (2.1019)
232
+ 2025-04-23,14:10:54 | INFO | Train Epoch: 12 [3047424/3047424 (100%)] Data (t): 0.611 Batch (t): 4.824, 1730.44/s, 432.610/s/gpu LR: 0.000784 Logit Scale: 46.376 Imm_image: 21.088 (20.865) Imm_text: 21.088 (20.865) Isd_image: 3.6858 (3.4177) Isd_text: 3.6858 (3.4177) Contrastive_loss: 2.0089 (2.0833) Loss: 2.0089 (2.0833)
233
+ 2025-04-23,14:10:56 | INFO | Start epoch 13
234
+ 2025-04-23,14:11:11 | INFO | Train Epoch: 13 [ 8192/3047424 (0%)] Data (t): 10.311 Batch (t): 14.830, 552.388/s, 138.097/s/gpu LR: 0.000784 Logit Scale: 46.379 Imm_image: 21.527 (21.527) Imm_text: 21.527 (21.527) Isd_image: 3.5626 (3.5626) Isd_text: 3.5626 (3.5626) Contrastive_loss: 1.7129 (1.7129) Loss: 1.7129 (1.7129)
235
+ 2025-04-23,14:19:19 | INFO | Train Epoch: 13 [ 827392/3047424 (27%)] Data (t): 0.652 Batch (t): 4.881, 1688.75/s, 422.188/s/gpu LR: 0.000775 Logit Scale: 47.383 Imm_image: 21.364 (21.446) Imm_text: 21.364 (21.446) Isd_image: 2.9909 (3.2767) Isd_text: 2.9909 (3.2767) Contrastive_loss: 1.9588 (1.8358) Loss: 1.9588 (1.8358)
236
+ 2025-04-23,14:27:26 | INFO | Train Epoch: 13 [1646592/3047424 (54%)] Data (t): 0.632 Batch (t): 4.869, 1696.53/s, 424.132/s/gpu LR: 0.000766 Logit Scale: 47.663 Imm_image: 21.302 (21.398) Imm_text: 21.302 (21.398) Isd_image: 3.3733 (3.3089) Isd_text: 3.3733 (3.3089) Contrastive_loss: 2.0906 (1.9208) Loss: 2.0906 (1.9208)
237
+ 2025-04-23,14:35:29 | INFO | Train Epoch: 13 [2465792/3047424 (81%)] Data (t): 0.598 Batch (t): 4.834, 1692.36/s, 423.091/s/gpu LR: 0.000756 Logit Scale: 47.625 Imm_image: 21.274 (21.367) Imm_text: 21.274 (21.367) Isd_image: 3.6164 (3.3858) Isd_text: 3.6164 (3.3858) Contrastive_loss: 2.1597 (1.9805) Loss: 2.1597 (1.9805)
238
+ 2025-04-23,14:41:13 | INFO | Train Epoch: 13 [3047424/3047424 (100%)] Data (t): 0.617 Batch (t): 4.844, 1731.48/s, 432.871/s/gpu LR: 0.000750 Logit Scale: 47.775 Imm_image: 21.741 (21.442) Imm_text: 21.741 (21.442) Isd_image: 3.4914 (3.4069) Isd_text: 3.4914 (3.4069) Contrastive_loss: 1.8571 (1.9558) Loss: 1.8571 (1.9558)
239
+ 2025-04-23,14:41:15 | INFO | Start epoch 14
240
+ 2025-04-23,14:41:30 | INFO | Train Epoch: 14 [ 8192/3047424 (0%)] Data (t): 10.220 Batch (t): 14.840, 552.008/s, 138.002/s/gpu LR: 0.000750 Logit Scale: 47.778 Imm_image: 22.180 (22.180) Imm_text: 22.180 (22.180) Isd_image: 3.2993 (3.2993) Isd_text: 3.2993 (3.2993) Contrastive_loss: 1.5975 (1.5975) Loss: 1.5975 (1.5975)
241
+ 2025-04-23,14:49:34 | INFO | Train Epoch: 14 [ 827392/3047424 (27%)] Data (t): 0.618 Batch (t): 4.844, 1700.75/s, 425.187/s/gpu LR: 0.000740 Logit Scale: 48.821 Imm_image: 22.164 (22.172) Imm_text: 22.164 (22.172) Isd_image: 3.3421 (3.3207) Isd_text: 3.3421 (3.3207) Contrastive_loss: 1.8399 (1.7187) Loss: 1.8399 (1.7187)
242
+ 2025-04-23,14:57:40 | INFO | Train Epoch: 14 [1646592/3047424 (54%)] Data (t): 0.618 Batch (t): 4.862, 1680.01/s, 420.002/s/gpu LR: 0.000731 Logit Scale: 49.088 Imm_image: 22.068 (22.137) Imm_text: 22.068 (22.137) Isd_image: 3.3547 (3.3320) Isd_text: 3.3547 (3.3320) Contrastive_loss: 1.9015 (1.7797) Loss: 1.9015 (1.7797)
243
+ 2025-04-23,15:05:47 | INFO | Train Epoch: 14 [2465792/3047424 (81%)] Data (t): 0.630 Batch (t): 4.860, 1704.65/s, 426.163/s/gpu LR: 0.000721 Logit Scale: 49.117 Imm_image: 22.082 (22.123) Imm_text: 22.082 (22.123) Isd_image: 3.6039 (3.4000) Isd_text: 3.6039 (3.4000) Contrastive_loss: 1.9499 (1.8222) Loss: 1.9499 (1.8222)
244
+ 2025-04-23,15:11:28 | INFO | Train Epoch: 14 [3047424/3047424 (100%)] Data (t): 0.594 Batch (t): 4.804, 1733.41/s, 433.351/s/gpu LR: 0.000714 Logit Scale: 49.211 Imm_image: 22.455 (22.190) Imm_text: 22.455 (22.190) Isd_image: 3.6637 (3.4527) Isd_text: 3.6637 (3.4527) Contrastive_loss: 1.6566 (1.7891) Loss: 1.6566 (1.7891)
245
+ 2025-04-23,15:11:29 | INFO | Start epoch 15
246
+ 2025-04-23,15:11:44 | INFO | Train Epoch: 15 [ 8192/3047424 (0%)] Data (t): 10.570 Batch (t): 15.014, 545.642/s, 136.410/s/gpu LR: 0.000714 Logit Scale: 49.214 Imm_image: 22.662 (22.662) Imm_text: 22.662 (22.662) Isd_image: 3.5377 (3.5377) Isd_text: 3.5377 (3.5377) Contrastive_loss: 1.5699 (1.5699) Loss: 1.5699 (1.5699)
247
+ 2025-04-23,15:19:49 | INFO | Train Epoch: 15 [ 827392/3047424 (27%)] Data (t): 0.609 Batch (t): 4.848, 1705.78/s, 426.444/s/gpu LR: 0.000704 Logit Scale: 50.253 Imm_image: 22.761 (22.711) Imm_text: 22.761 (22.711) Isd_image: 3.1256 (3.3317) Isd_text: 3.1256 (3.3317) Contrastive_loss: 1.6918 (1.6309) Loss: 1.6918 (1.6309)
248
+ 2025-04-23,15:27:56 | INFO | Train Epoch: 15 [1646592/3047424 (54%)] Data (t): 0.633 Batch (t): 4.870, 1693.24/s, 423.310/s/gpu LR: 0.000694 Logit Scale: 50.515 Imm_image: 22.709 (22.711) Imm_text: 22.709 (22.711) Isd_image: 3.3204 (3.3279) Isd_text: 3.3204 (3.3279) Contrastive_loss: 1.7885 (1.6834) Loss: 1.7885 (1.6834)
249
+ 2025-04-23,15:35:59 | INFO | Train Epoch: 15 [2465792/3047424 (81%)] Data (t): 0.609 Batch (t): 4.831, 1695.23/s, 423.809/s/gpu LR: 0.000684 Logit Scale: 50.560 Imm_image: 22.749 (22.720) Imm_text: 22.749 (22.720) Isd_image: 3.7641 (3.4370) Isd_text: 3.7641 (3.4370) Contrastive_loss: 1.8261 (1.7191) Loss: 1.8261 (1.7191)
250
+ 2025-04-23,15:41:43 | INFO | Train Epoch: 15 [3047424/3047424 (100%)] Data (t): 0.612 Batch (t): 4.836, 1733.21/s, 433.303/s/gpu LR: 0.000677 Logit Scale: 50.685 Imm_image: 23.236 (22.823) Imm_text: 23.236 (22.823) Isd_image: 3.9319 (3.5359) Isd_text: 3.9319 (3.5359) Contrastive_loss: 1.5232 (1.6799) Loss: 1.5232 (1.6799)
251
+ 2025-04-23,15:41:44 | INFO | Start epoch 16
252
+ 2025-04-23,15:41:59 | INFO | Train Epoch: 16 [ 8192/3047424 (0%)] Data (t): 10.689 Batch (t): 15.084, 543.101/s, 135.775/s/gpu LR: 0.000677 Logit Scale: 50.691 Imm_image: 23.716 (23.716) Imm_text: 23.716 (23.716) Isd_image: 3.6803 (3.6803) Isd_text: 3.6803 (3.6803) Contrastive_loss: 1.3021 (1.3021) Loss: 1.3021 (1.3021)
253
+ 2025-04-23,15:50:05 | INFO | Train Epoch: 16 [ 827392/3047424 (27%)] Data (t): 0.628 Batch (t): 4.853, 1685.44/s, 421.361/s/gpu LR: 0.000667 Logit Scale: 51.787 Imm_image: 23.579 (23.648) Imm_text: 23.579 (23.648) Isd_image: 3.0287 (3.3545) Isd_text: 3.0287 (3.3545) Contrastive_loss: 1.5077 (1.4049) Loss: 1.5077 (1.4049)
254
+ 2025-04-23,15:58:11 | INFO | Train Epoch: 16 [1646592/3047424 (54%)] Data (t): 0.634 Batch (t): 4.866, 1706.02/s, 426.504/s/gpu LR: 0.000657 Logit Scale: 52.058 Imm_image: 23.242 (23.513) Imm_text: 23.242 (23.513) Isd_image: 3.6690 (3.4594) Isd_text: 3.6690 (3.4594) Contrastive_loss: 1.7250 (1.5116) Loss: 1.7250 (1.5116)
255
+ 2025-04-23,16:06:14 | INFO | Train Epoch: 16 [2465792/3047424 (81%)] Data (t): 0.603 Batch (t): 4.830, 1697.73/s, 424.433/s/gpu LR: 0.000646 Logit Scale: 52.027 Imm_image: 23.174 (23.428) Imm_text: 23.174 (23.428) Isd_image: 3.8825 (3.5651) Isd_text: 3.8825 (3.5651) Contrastive_loss: 1.7532 (1.5720) Loss: 1.7532 (1.5720)
256
+ 2025-04-23,16:11:55 | INFO | Train Epoch: 16 [3047424/3047424 (100%)] Data (t): 0.601 Batch (t): 4.799, 1729.62/s, 432.406/s/gpu LR: 0.000639 Logit Scale: 52.111 Imm_image: 23.909 (23.524) Imm_text: 23.909 (23.524) Isd_image: 3.9074 (3.6336) Isd_text: 3.9074 (3.6336) Contrastive_loss: 1.3954 (1.5367) Loss: 1.3954 (1.5367)
257
+ 2025-04-23,16:11:56 | INFO | Start epoch 17
258
+ 2025-04-23,16:12:11 | INFO | Train Epoch: 17 [ 8192/3047424 (0%)] Data (t): 10.260 Batch (t): 14.776, 554.405/s, 138.601/s/gpu LR: 0.000639 Logit Scale: 52.116 Imm_image: 24.251 (24.251) Imm_text: 24.251 (24.251) Isd_image: 3.6545 (3.6545) Isd_text: 3.6545 (3.6545) Contrastive_loss: 1.2508 (1.2508) Loss: 1.2508 (1.2508)
259
+ 2025-04-23,16:20:17 | INFO | Train Epoch: 17 [ 827392/3047424 (27%)] Data (t): 0.621 Batch (t): 4.857, 1686.63/s, 421.657/s/gpu LR: 0.000628 Logit Scale: 53.280 Imm_image: 24.295 (24.273) Imm_text: 24.295 (24.273) Isd_image: 2.8294 (3.2419) Isd_text: 2.8294 (3.2419) Contrastive_loss: 1.3761 (1.3135) Loss: 1.3761 (1.3135)
260
+ 2025-04-23,16:28:23 | INFO | Train Epoch: 17 [1646592/3047424 (54%)] Data (t): 0.624 Batch (t): 4.864, 1682.40/s, 420.599/s/gpu LR: 0.000618 Logit Scale: 53.522 Imm_image: 24.110 (24.219) Imm_text: 24.110 (24.219) Isd_image: 3.3716 (3.2852) Isd_text: 3.3716 (3.2852) Contrastive_loss: 1.5582 (1.3951) Loss: 1.5582 (1.3951)
261
+ 2025-04-23,16:36:31 | INFO | Train Epoch: 17 [2465792/3047424 (81%)] Data (t): 0.643 Batch (t): 4.873, 1686.72/s, 421.681/s/gpu LR: 0.000607 Logit Scale: 53.546 Imm_image: 24.039 (24.174) Imm_text: 24.039 (24.174) Isd_image: 3.7643 (3.4049) Isd_text: 3.7643 (3.4049) Contrastive_loss: 1.5883 (1.4434) Loss: 1.5883 (1.4434)
262
+ 2025-04-23,16:42:16 | INFO | Train Epoch: 17 [3047424/3047424 (100%)] Data (t): 0.636 Batch (t): 4.866, 1737.15/s, 434.287/s/gpu LR: 0.000600 Logit Scale: 53.623 Imm_image: 24.585 (24.256) Imm_text: 24.585 (24.256) Isd_image: 3.7876 (3.4815) Isd_text: 3.7876 (3.4815) Contrastive_loss: 1.2795 (1.4106) Loss: 1.2795 (1.4106)
263
+ 2025-04-23,16:42:18 | INFO | Start epoch 18
264
+ 2025-04-23,16:42:32 | INFO | Train Epoch: 18 [ 8192/3047424 (0%)] Data (t): 10.375 Batch (t): 14.678, 558.105/s, 139.526/s/gpu LR: 0.000600 Logit Scale: 53.624 Imm_image: 25.032 (25.032) Imm_text: 25.032 (25.032) Isd_image: 3.7644 (3.7644) Isd_text: 3.7644 (3.7644) Contrastive_loss: 1.0721 (1.0721) Loss: 1.0721 (1.0721)
265
+ 2025-04-23,16:50:37 | INFO | Train Epoch: 18 [ 827392/3047424 (27%)] Data (t): 0.622 Batch (t): 4.849, 1708.35/s, 427.086/s/gpu LR: 0.000589 Logit Scale: 54.748 Imm_image: 25.098 (25.065) Imm_text: 25.098 (25.065) Isd_image: 3.0186 (3.3915) Isd_text: 3.0186 (3.3915) Contrastive_loss: 1.2153 (1.1437) Loss: 1.2153 (1.1437)
266
+ 2025-04-23,16:58:42 | INFO | Train Epoch: 18 [1646592/3047424 (54%)] Data (t): 0.602 Batch (t): 4.846, 1667.51/s, 416.879/s/gpu LR: 0.000578 Logit Scale: 55.038 Imm_image: 24.953 (25.028) Imm_text: 24.953 (25.028) Isd_image: 3.4799 (3.4210) Isd_text: 3.4799 (3.4210) Contrastive_loss: 1.3360 (1.2078) Loss: 1.3360 (1.2078)
267
+ 2025-04-23,17:06:47 | INFO | Train Epoch: 18 [2465792/3047424 (81%)] Data (t): 0.603 Batch (t): 4.849, 1679.07/s, 419.768/s/gpu LR: 0.000568 Logit Scale: 54.947 Imm_image: 24.574 (24.914) Imm_text: 24.574 (24.914) Isd_image: 3.5862 (3.4623) Isd_text: 3.5862 (3.4623) Contrastive_loss: 1.5291 (1.2881) Loss: 1.5291 (1.2881)
268
+ 2025-04-23,17:12:29 | INFO | Train Epoch: 18 [3047424/3047424 (100%)] Data (t): 0.609 Batch (t): 4.818, 1730.15/s, 432.538/s/gpu LR: 0.000560 Logit Scale: 55.126 Imm_image: 25.432 (25.018) Imm_text: 25.432 (25.018) Isd_image: 3.6323 (3.4963) Isd_text: 3.6323 (3.4963) Contrastive_loss: 1.1055 (1.2516) Loss: 1.1055 (1.2516)
269
+ 2025-04-23,17:12:31 | INFO | Start epoch 19
270
+ 2025-04-23,17:12:46 | INFO | Train Epoch: 19 [ 8192/3047424 (0%)] Data (t): 10.950 Batch (t): 15.455, 530.065/s, 132.516/s/gpu LR: 0.000560 Logit Scale: 55.131 Imm_image: 25.820 (25.820) Imm_text: 25.820 (25.820) Isd_image: 3.4452 (3.4452) Isd_text: 3.4452 (3.4452) Contrastive_loss: 0.96650 (0.96650) Loss: 0.96650 (0.96650)
271
+ 2025-04-23,17:20:51 | INFO | Train Epoch: 19 [ 827392/3047424 (27%)] Data (t): 0.614 Batch (t): 4.846, 1690.44/s, 422.611/s/gpu LR: 0.000549 Logit Scale: 56.267 Imm_image: 25.798 (25.809) Imm_text: 25.798 (25.809) Isd_image: 3.0191 (3.2321) Isd_text: 3.0191 (3.2321) Contrastive_loss: 1.1348 (1.0507) Loss: 1.1348 (1.0507)
272
+ 2025-04-23,17:28:56 | INFO | Train Epoch: 19 [1646592/3047424 (54%)] Data (t): 0.619 Batch (t): 4.856, 1672.28/s, 418.070/s/gpu LR: 0.000538 Logit Scale: 56.631 Imm_image: 25.848 (25.822) Imm_text: 25.848 (25.822) Isd_image: 3.5283 (3.3309) Isd_text: 3.5283 (3.3309) Contrastive_loss: 1.1956 (1.0990) Loss: 1.1956 (1.0990)
273
+ 2025-04-23,17:37:00 | INFO | Train Epoch: 19 [2465792/3047424 (81%)] Data (t): 0.611 Batch (t): 4.841, 1706.32/s, 426.581/s/gpu LR: 0.000528 Logit Scale: 56.731 Imm_image: 25.654 (25.780) Imm_text: 25.654 (25.780) Isd_image: 3.7924 (3.4462) Isd_text: 3.7924 (3.4462) Contrastive_loss: 1.2927 (1.1474) Loss: 1.2927 (1.1474)
274
+ 2025-04-23,17:42:44 | INFO | Train Epoch: 19 [3047424/3047424 (100%)] Data (t): 0.607 Batch (t): 4.841, 1735.33/s, 433.832/s/gpu LR: 0.000520 Logit Scale: 56.899 Imm_image: 26.109 (25.846) Imm_text: 26.109 (25.846) Isd_image: 3.8702 (3.5310) Isd_text: 3.8702 (3.5310) Contrastive_loss: 1.0427 (1.1264) Loss: 1.0427 (1.1264)
275
+ 2025-04-23,17:42:46 | INFO | Start epoch 20
276
+ 2025-04-23,17:43:00 | INFO | Train Epoch: 20 [ 8192/3047424 (0%)] Data (t): 10.418 Batch (t): 14.842, 551.944/s, 137.986/s/gpu LR: 0.000520 Logit Scale: 56.900 Imm_image: 26.385 (26.385) Imm_text: 26.385 (26.385) Isd_image: 3.8449 (3.8449) Isd_text: 3.8449 (3.8449) Contrastive_loss: 0.93277 (0.93277) Loss: 0.93277 (0.93277)
277
+ 2025-04-23,17:51:10 | INFO | Train Epoch: 20 [ 827392/3047424 (27%)] Data (t): 0.609 Batch (t): 4.895, 1641.39/s, 410.347/s/gpu LR: 0.000509 Logit Scale: 58.133 Imm_image: 26.753 (26.569) Imm_text: 26.753 (26.569) Isd_image: 3.0961 (3.4705) Isd_text: 3.0961 (3.4705) Contrastive_loss: 1.0378 (0.98526) Loss: 1.0378 (0.98526)
278
+ 2025-04-23,17:59:18 | INFO | Train Epoch: 20 [1646592/3047424 (54%)] Data (t): 0.624 Batch (t): 4.876, 1678.68/s, 419.670/s/gpu LR: 0.000498 Logit Scale: 58.482 Imm_image: 26.528 (26.555) Imm_text: 26.528 (26.555) Isd_image: 3.6626 (3.5345) Isd_text: 3.6626 (3.5345) Contrastive_loss: 1.1503 (1.0403) Loss: 1.1503 (1.0403)
279
+ 2025-04-23,18:07:25 | INFO | Train Epoch: 20 [2465792/3047424 (81%)] Data (t): 0.642 Batch (t): 4.877, 1689.13/s, 422.283/s/gpu LR: 0.000487 Logit Scale: 58.459 Imm_image: 26.441 (26.527) Imm_text: 26.441 (26.527) Isd_image: 3.6803 (3.5710) Isd_text: 3.6803 (3.5710) Contrastive_loss: 1.1866 (1.0769) Loss: 1.1866 (1.0769)
280
+ 2025-04-23,18:13:08 | INFO | Train Epoch: 20 [3047424/3047424 (100%)] Data (t): 0.605 Batch (t): 4.823, 1732.30/s, 433.076/s/gpu LR: 0.000480 Logit Scale: 58.681 Imm_image: 27.156 (26.653) Imm_text: 27.156 (26.653) Isd_image: 3.8126 (3.6193) Isd_text: 3.8126 (3.6193) Contrastive_loss: 0.91291 (1.0441) Loss: 0.91291 (1.0441)
281
+ 2025-04-23,18:13:10 | INFO | Start epoch 21
282
+ 2025-04-23,18:13:25 | INFO | Train Epoch: 21 [ 8192/3047424 (0%)] Data (t): 10.679 Batch (t): 15.101, 542.474/s, 135.618/s/gpu LR: 0.000480 Logit Scale: 58.684 Imm_image: 27.611 (27.611) Imm_text: 27.611 (27.611) Isd_image: 3.6876 (3.6876) Isd_text: 3.6876 (3.6876) Contrastive_loss: 0.75266 (0.75266) Loss: 0.75266 (0.75266)
283
+ 2025-04-23,18:21:29 | INFO | Train Epoch: 21 [ 827392/3047424 (27%)] Data (t): 0.612 Batch (t): 4.844, 1710.66/s, 427.664/s/gpu LR: 0.000469 Logit Scale: 59.908 Imm_image: 27.728 (27.669) Imm_text: 27.728 (27.669) Isd_image: 3.2130 (3.4503) Isd_text: 3.2130 (3.4503) Contrastive_loss: 0.92249 (0.83757) Loss: 0.92249 (0.83757)
284
+ 2025-04-23,18:29:33 | INFO | Train Epoch: 21 [1646592/3047424 (54%)] Data (t): 0.625 Batch (t): 4.840, 1693.95/s, 423.488/s/gpu LR: 0.000458 Logit Scale: 60.198 Imm_image: 27.489 (27.609) Imm_text: 27.489 (27.609) Isd_image: 3.7729 (3.5578) Isd_text: 3.7729 (3.5578) Contrastive_loss: 1.0028 (0.89266) Loss: 1.0028 (0.89266)
285
+ 2025-04-23,18:37:37 | INFO | Train Epoch: 21 [2465792/3047424 (81%)] Data (t): 0.627 Batch (t): 4.843, 1720.98/s, 430.244/s/gpu LR: 0.000447 Logit Scale: 60.351 Imm_image: 27.429 (27.564) Imm_text: 27.429 (27.564) Isd_image: 3.8784 (3.6380) Isd_text: 3.8784 (3.6380) Contrastive_loss: 1.0920 (0.94250) Loss: 1.0920 (0.94250)
286
+ 2025-04-23,18:43:19 | INFO | Train Epoch: 21 [3047424/3047424 (100%)] Data (t): 0.615 Batch (t): 4.818, 1739.10/s, 434.775/s/gpu LR: 0.000440 Logit Scale: 60.566 Imm_image: 28.101 (27.672) Imm_text: 28.101 (27.672) Isd_image: 4.0612 (3.7226) Isd_text: 4.0612 (3.7226) Contrastive_loss: 0.83134 (0.92027) Loss: 0.83134 (0.92027)
287
+ 2025-04-23,18:43:21 | INFO | Start epoch 22
288
+ 2025-04-23,18:43:36 | INFO | Train Epoch: 22 [ 8192/3047424 (0%)] Data (t): 10.707 Batch (t): 15.039, 544.701/s, 136.175/s/gpu LR: 0.000440 Logit Scale: 60.568 Imm_image: 28.517 (28.517) Imm_text: 28.517 (28.517) Isd_image: 3.9587 (3.9587) Isd_text: 3.9587 (3.9587) Contrastive_loss: 0.69880 (0.69880) Loss: 0.69880 (0.69880)
289
+ 2025-04-23,18:51:41 | INFO | Train Epoch: 22 [ 827392/3047424 (27%)] Data (t): 0.602 Batch (t): 4.851, 1691.67/s, 422.918/s/gpu LR: 0.000429 Logit Scale: 61.810 Imm_image: 28.590 (28.554) Imm_text: 28.590 (28.554) Isd_image: 3.6190 (3.7888) Isd_text: 3.6190 (3.7888) Contrastive_loss: 0.80837 (0.75358) Loss: 0.80837 (0.75358)
290
+ 2025-04-23,18:59:45 | INFO | Train Epoch: 22 [1646592/3047424 (54%)] Data (t): 0.600 Batch (t): 4.832, 1691.81/s, 422.953/s/gpu LR: 0.000418 Logit Scale: 62.154 Imm_image: 28.428 (28.512) Imm_text: 28.428 (28.512) Isd_image: 3.8752 (3.8176) Isd_text: 3.8752 (3.8176) Contrastive_loss: 0.93646 (0.81454) Loss: 0.93646 (0.81454)
291
+ 2025-04-23,19:07:50 | INFO | Train Epoch: 22 [2465792/3047424 (81%)] Data (t): 0.623 Batch (t): 4.851, 1685.17/s, 421.292/s/gpu LR: 0.000407 Logit Scale: 62.274 Imm_image: 28.443 (28.494) Imm_text: 28.443 (28.494) Isd_image: 4.1035 (3.8891) Isd_text: 4.1035 (3.8891) Contrastive_loss: 0.94543 (0.84726) Loss: 0.94543 (0.84726)
292
+ 2025-04-23,19:13:34 | INFO | Train Epoch: 22 [3047424/3047424 (100%)] Data (t): 0.631 Batch (t): 4.858, 1734.42/s, 433.605/s/gpu LR: 0.000400 Logit Scale: 62.512 Imm_image: 29.100 (28.615) Imm_text: 29.100 (28.615) Isd_image: 3.9555 (3.9024) Isd_text: 3.9555 (3.9024) Contrastive_loss: 0.72221 (0.82225) Loss: 0.72221 (0.82225)
293
+ 2025-04-23,19:13:36 | INFO | Start epoch 23
294
+ 2025-04-23,19:13:52 | INFO | Train Epoch: 23 [ 8192/3047424 (0%)] Data (t): 10.827 Batch (t): 15.309, 535.120/s, 133.780/s/gpu LR: 0.000400 Logit Scale: 62.516 Imm_image: 29.372 (29.372) Imm_text: 29.372 (29.372) Isd_image: 3.6809 (3.6809) Isd_text: 3.6809 (3.6809) Contrastive_loss: 0.64292 (0.64292) Loss: 0.64292 (0.64292)
295
+ 2025-04-23,19:22:00 | INFO | Train Epoch: 23 [ 827392/3047424 (27%)] Data (t): 0.645 Batch (t): 4.881, 1677.17/s, 419.293/s/gpu LR: 0.000389 Logit Scale: 63.705 Imm_image: 29.630 (29.501) Imm_text: 29.630 (29.501) Isd_image: 3.6996 (3.6902) Isd_text: 3.6996 (3.6902) Contrastive_loss: 0.70577 (0.67435) Loss: 0.70577 (0.67435)
296
+ 2025-04-23,19:30:08 | INFO | Train Epoch: 23 [1646592/3047424 (54%)] Data (t): 0.637 Batch (t): 4.882, 1689.96/s, 422.491/s/gpu LR: 0.000379 Logit Scale: 64.272 Imm_image: 29.587 (29.529) Imm_text: 29.587 (29.529) Isd_image: 4.0355 (3.8053) Isd_text: 4.0355 (3.8053) Contrastive_loss: 0.79325 (0.71398) Loss: 0.79325 (0.71398)
297
+ 2025-04-23,19:38:16 | INFO | Train Epoch: 23 [2465792/3047424 (81%)] Data (t): 0.647 Batch (t): 4.877, 1684.37/s, 421.091/s/gpu LR: 0.000368 Logit Scale: 64.504 Imm_image: 29.541 (29.532) Imm_text: 29.541 (29.532) Isd_image: 4.2818 (3.9244) Isd_text: 4.2818 (3.9244) Contrastive_loss: 0.84745 (0.74735) Loss: 0.84745 (0.74735)
298
+ 2025-04-23,19:44:01 | INFO | Train Epoch: 23 [3047424/3047424 (100%)] Data (t): 0.638 Batch (t): 4.868, 1730.22/s, 432.556/s/gpu LR: 0.000361 Logit Scale: 64.732 Imm_image: 30.231 (29.672) Imm_text: 30.231 (29.672) Isd_image: 4.0531 (3.9502) Isd_text: 4.0531 (3.9502) Contrastive_loss: 0.64158 (0.72620) Loss: 0.64158 (0.72620)
299
+ 2025-04-23,19:44:04 | INFO | Start epoch 24
300
+ 2025-04-23,19:44:19 | INFO | Train Epoch: 24 [ 8192/3047424 (0%)] Data (t): 10.776 Batch (t): 15.187, 539.421/s, 134.855/s/gpu LR: 0.000361 Logit Scale: 64.734 Imm_image: 30.583 (30.583) Imm_text: 30.583 (30.583) Isd_image: 4.1807 (4.1807) Isd_text: 4.1807 (4.1807) Contrastive_loss: 0.57984 (0.57984) Loss: 0.57984 (0.57984)
301
+ 2025-04-23,19:52:29 | INFO | Train Epoch: 24 [ 827392/3047424 (27%)] Data (t): 0.646 Batch (t): 4.905, 1639.00/s, 409.749/s/gpu LR: 0.000350 Logit Scale: 65.852 Imm_image: 30.850 (30.716) Imm_text: 30.850 (30.716) Isd_image: 3.9888 (4.0848) Isd_text: 3.9888 (4.0848) Contrastive_loss: 0.64103 (0.61044) Loss: 0.64103 (0.61044)
302
+ 2025-04-23,20:00:39 | INFO | Train Epoch: 24 [1646592/3047424 (54%)] Data (t): 0.643 Batch (t): 4.902, 1689.80/s, 422.449/s/gpu LR: 0.000340 Logit Scale: 66.380 Imm_image: 30.689 (30.707) Imm_text: 30.689 (30.707) Isd_image: 4.3764 (4.1820) Isd_text: 4.3764 (4.1820) Contrastive_loss: 0.76139 (0.66075) Loss: 0.76139 (0.66075)
303
+ 2025-04-23,20:08:49 | INFO | Train Epoch: 24 [2465792/3047424 (81%)] Data (t): 0.652 Batch (t): 4.895, 1672.14/s, 418.036/s/gpu LR: 0.000330 Logit Scale: 66.626 Imm_image: 30.679 (30.700) Imm_text: 30.679 (30.700) Isd_image: 4.4415 (4.2469) Isd_text: 4.4415 (4.2469) Contrastive_loss: 0.77092 (0.68830) Loss: 0.77092 (0.68830)
304
+ 2025-04-23,20:14:34 | INFO | Train Epoch: 24 [3047424/3047424 (100%)] Data (t): 0.615 Batch (t): 4.856, 1722.68/s, 430.669/s/gpu LR: 0.000323 Logit Scale: 66.897 Imm_image: 31.622 (30.885) Imm_text: 31.622 (30.885) Isd_image: 4.9389 (4.3853) Isd_text: 4.9389 (4.3853) Contrastive_loss: 0.54939 (0.66051) Loss: 0.54939 (0.66051)
305
+ 2025-04-23,20:14:36 | INFO | Start epoch 25
306
+ 2025-04-23,20:14:51 | INFO | Train Epoch: 25 [ 8192/3047424 (0%)] Data (t): 10.362 Batch (t): 15.011, 545.731/s, 136.433/s/gpu LR: 0.000323 Logit Scale: 66.907 Imm_image: 31.847 (31.847) Imm_text: 31.847 (31.847) Isd_image: 4.6726 (4.6726) Isd_text: 4.6726 (4.6726) Contrastive_loss: 0.51553 (0.51553) Loss: 0.51553 (0.51553)
307
+ 2025-04-23,20:22:58 | INFO | Train Epoch: 25 [ 827392/3047424 (27%)] Data (t): 0.648 Batch (t): 4.874, 1666.25/s, 416.562/s/gpu LR: 0.000312 Logit Scale: 67.978 Imm_image: 31.913 (31.880) Imm_text: 31.913 (31.880) Isd_image: 4.3203 (4.4964) Isd_text: 4.3203 (4.4964) Contrastive_loss: 0.56383 (0.53968) Loss: 0.56383 (0.53968)
308
+ 2025-04-23,20:31:05 | INFO | Train Epoch: 25 [1646592/3047424 (54%)] Data (t): 0.645 Batch (t): 4.873, 1685.36/s, 421.341/s/gpu LR: 0.000302 Logit Scale: 68.548 Imm_image: 32.087 (31.949) Imm_text: 32.087 (31.949) Isd_image: 4.4616 (4.4848) Isd_text: 4.4616 (4.4848) Contrastive_loss: 0.64589 (0.57508) Loss: 0.64589 (0.57508)
309
+ 2025-04-23,20:39:12 | INFO | Train Epoch: 25 [2465792/3047424 (81%)] Data (t): 0.623 Batch (t): 4.866, 1695.24/s, 423.811/s/gpu LR: 0.000293 Logit Scale: 68.936 Imm_image: 31.949 (31.949) Imm_text: 31.949 (31.949) Isd_image: 4.5448 (4.4998) Isd_text: 4.5448 (4.4998) Contrastive_loss: 0.63289 (0.58953) Loss: 0.63289 (0.58953)
310
+ 2025-04-23,20:44:56 | INFO | Train Epoch: 25 [3047424/3047424 (100%)] Data (t): 0.629 Batch (t): 4.851, 1730.89/s, 432.723/s/gpu LR: 0.000286 Logit Scale: 69.145 Imm_image: 32.717 (32.103) Imm_text: 32.717 (32.103) Isd_image: 4.6373 (4.5273) Isd_text: 4.6373 (4.5273) Contrastive_loss: 0.46723 (0.56507) Loss: 0.46723 (0.56507)
311
+ 2025-04-23,20:44:58 | INFO | Start epoch 26
312
+ 2025-04-23,20:45:13 | INFO | Train Epoch: 26 [ 8192/3047424 (0%)] Data (t): 10.831 Batch (t): 15.352, 533.622/s, 133.405/s/gpu LR: 0.000286 Logit Scale: 69.152 Imm_image: 32.954 (32.954) Imm_text: 32.954 (32.954) Isd_image: 4.6923 (4.6923) Isd_text: 4.6923 (4.6923) Contrastive_loss: 0.45325 (0.45325) Loss: 0.45325 (0.45325)
313
+ 2025-04-23,20:53:20 | INFO | Train Epoch: 26 [ 827392/3047424 (27%)] Data (t): 0.632 Batch (t): 4.868, 1686.19/s, 421.549/s/gpu LR: 0.000276 Logit Scale: 70.166 Imm_image: 33.358 (33.156) Imm_text: 33.358 (33.156) Isd_image: 4.7473 (4.7198) Isd_text: 4.7473 (4.7198) Contrastive_loss: 0.50447 (0.47886) Loss: 0.50447 (0.47886)
314
+ 2025-04-23,21:01:26 | INFO | Train Epoch: 26 [1646592/3047424 (54%)] Data (t): 0.620 Batch (t): 4.854, 1680.57/s, 420.144/s/gpu LR: 0.000266 Logit Scale: 70.690 Imm_image: 33.145 (33.153) Imm_text: 33.145 (33.153) Isd_image: 4.8415 (4.7603) Isd_text: 4.8415 (4.7603) Contrastive_loss: 0.53640 (0.49804) Loss: 0.53640 (0.49804)
315
+ 2025-04-23,21:09:29 | INFO | Train Epoch: 26 [2465792/3047424 (81%)] Data (t): 0.627 Batch (t): 4.836, 1707.72/s, 426.930/s/gpu LR: 0.000257 Logit Scale: 71.184 Imm_image: 33.419 (33.219) Imm_text: 33.419 (33.219) Isd_image: 5.1295 (4.8526) Isd_text: 5.1295 (4.8526) Contrastive_loss: 0.54424 (0.50959) Loss: 0.54424 (0.50959)
316
+ 2025-04-23,21:15:13 | INFO | Train Epoch: 26 [3047424/3047424 (100%)] Data (t): 0.626 Batch (t): 4.838, 1732.41/s, 433.102/s/gpu LR: 0.000250 Logit Scale: 71.526 Imm_image: 33.961 (33.367) Imm_text: 33.961 (33.367) Isd_image: 4.8218 (4.8465) Isd_text: 4.8218 (4.8465) Contrastive_loss: 0.42251 (0.49217) Loss: 0.42251 (0.49217)
317
+ 2025-04-23,21:15:14 | INFO | Start epoch 27
318
+ 2025-04-23,21:15:29 | INFO | Train Epoch: 27 [ 8192/3047424 (0%)] Data (t): 10.489 Batch (t): 14.819, 552.800/s, 138.200/s/gpu LR: 0.000250 Logit Scale: 71.532 Imm_image: 34.227 (34.227) Imm_text: 34.227 (34.227) Isd_image: 4.8487 (4.8487) Isd_text: 4.8487 (4.8487) Contrastive_loss: 0.38837 (0.38837) Loss: 0.38837 (0.38837)
319
+ 2025-04-23,21:23:33 | INFO | Train Epoch: 27 [ 827392/3047424 (27%)] Data (t): 0.622 Batch (t): 4.838, 1701.33/s, 425.333/s/gpu LR: 0.000241 Logit Scale: 72.444 Imm_image: 34.472 (34.349) Imm_text: 34.472 (34.349) Isd_image: 4.9049 (4.8768) Isd_text: 4.9049 (4.8768) Contrastive_loss: 0.48689 (0.43763) Loss: 0.48689 (0.43763)
320
+ 2025-04-23,21:31:37 | INFO | Train Epoch: 27 [1646592/3047424 (54%)] Data (t): 0.625 Batch (t): 4.845, 1704.89/s, 426.221/s/gpu LR: 0.000231 Logit Scale: 72.872 Imm_image: 34.401 (34.367) Imm_text: 34.401 (34.367) Isd_image: 5.2252 (4.9929) Isd_text: 5.2252 (4.9929) Contrastive_loss: 0.45936 (0.44487) Loss: 0.45936 (0.44487)
321
+ 2025-04-23,21:39:44 | INFO | Train Epoch: 27 [2465792/3047424 (81%)] Data (t): 0.614 Batch (t): 4.867, 1700.92/s, 425.231/s/gpu LR: 0.000222 Logit Scale: 73.370 Imm_image: 34.448 (34.387) Imm_text: 34.448 (34.387) Isd_image: 5.4297 (5.1021) Isd_text: 5.4297 (5.1021) Contrastive_loss: 0.50985 (0.46112) Loss: 0.50985 (0.46112)
322
+ 2025-04-23,21:45:27 | INFO | Train Epoch: 27 [3047424/3047424 (100%)] Data (t): 0.617 Batch (t): 4.832, 1738.27/s, 434.567/s/gpu LR: 0.000216 Logit Scale: 73.676 Imm_image: 35.307 (34.571) Imm_text: 35.307 (34.571) Isd_image: 5.6027 (5.2022) Isd_text: 5.6027 (5.2022) Contrastive_loss: 0.39095 (0.44708) Loss: 0.39095 (0.44708)
323
+ 2025-04-23,21:45:29 | INFO | Start epoch 28
324
+ 2025-04-23,21:45:44 | INFO | Train Epoch: 28 [ 8192/3047424 (0%)] Data (t): 10.732 Batch (t): 15.261, 536.789/s, 134.197/s/gpu LR: 0.000216 Logit Scale: 73.682 Imm_image: 35.668 (35.668) Imm_text: 35.668 (35.668) Isd_image: 5.4253 (5.4253) Isd_text: 5.4253 (5.4253) Contrastive_loss: 0.33911 (0.33911) Loss: 0.33911 (0.33911)
325
+ 2025-04-23,21:53:50 | INFO | Train Epoch: 28 [ 827392/3047424 (27%)] Data (t): 0.622 Batch (t): 4.859, 1706.47/s, 426.617/s/gpu LR: 0.000207 Logit Scale: 74.543 Imm_image: 35.743 (35.706) Imm_text: 35.743 (35.706) Isd_image: 5.5021 (5.4637) Isd_text: 5.5021 (5.4637) Contrastive_loss: 0.37970 (0.35940) Loss: 0.37970 (0.35940)
326
+ 2025-04-23,22:01:51 | INFO | Train Epoch: 28 [1646592/3047424 (54%)] Data (t): 0.611 Batch (t): 4.804, 1717.61/s, 429.403/s/gpu LR: 0.000198 Logit Scale: 75.029 Imm_image: 35.903 (35.772) Imm_text: 35.903 (35.772) Isd_image: 5.6645 (5.5306) Isd_text: 5.6645 (5.5306) Contrastive_loss: 0.38996 (0.36959) Loss: 0.38996 (0.36959)
327
+ 2025-04-23,22:09:55 | INFO | Train Epoch: 28 [2465792/3047424 (81%)] Data (t): 0.620 Batch (t): 4.846, 1699.30/s, 424.826/s/gpu LR: 0.000190 Logit Scale: 75.449 Imm_image: 35.936 (35.813) Imm_text: 35.936 (35.813) Isd_image: 5.7750 (5.5917) Isd_text: 5.7750 (5.5917) Contrastive_loss: 0.42394 (0.38318) Loss: 0.42394 (0.38318)
328
+ 2025-04-23,22:15:38 | INFO | Train Epoch: 28 [3047424/3047424 (100%)] Data (t): 0.598 Batch (t): 4.822, 1742.78/s, 435.696/s/gpu LR: 0.000184 Logit Scale: 75.758 Imm_image: 36.583 (35.967) Imm_text: 36.583 (35.967) Isd_image: 5.9204 (5.6575) Isd_text: 5.9204 (5.6575) Contrastive_loss: 0.33385 (0.37331) Loss: 0.33385 (0.37331)
329
+ 2025-04-23,22:15:39 | INFO | Start epoch 29
330
+ 2025-04-23,22:15:54 | INFO | Train Epoch: 29 [ 8192/3047424 (0%)] Data (t): 10.854 Batch (t): 15.154, 540.588/s, 135.147/s/gpu LR: 0.000184 Logit Scale: 75.758 Imm_image: 36.710 (36.710) Imm_text: 36.710 (36.710) Isd_image: 5.9628 (5.9628) Isd_text: 5.9628 (5.9628) Contrastive_loss: 0.31925 (0.31925) Loss: 0.31925 (0.31925)
331
+ 2025-04-23,22:23:55 | INFO | Train Epoch: 29 [ 827392/3047424 (27%)] Data (t): 0.609 Batch (t): 4.812, 1691.83/s, 422.958/s/gpu LR: 0.000175 Logit Scale: 76.379 Imm_image: 36.841 (36.776) Imm_text: 36.841 (36.776) Isd_image: 5.9781 (5.9704) Isd_text: 5.9781 (5.9704) Contrastive_loss: 0.32796 (0.32360) Loss: 0.32796 (0.32360)
332
+ 2025-04-23,22:32:00 | INFO | Train Epoch: 29 [1646592/3047424 (54%)] Data (t): 0.636 Batch (t): 4.847, 1680.25/s, 420.061/s/gpu LR: 0.000167 Logit Scale: 76.868 Imm_image: 36.946 (36.832) Imm_text: 36.946 (36.832) Isd_image: 6.1671 (6.0360) Isd_text: 6.1671 (6.0360) Contrastive_loss: 0.33985 (0.32902) Loss: 0.33985 (0.32902)
333
+ 2025-04-23,22:40:04 | INFO | Train Epoch: 29 [2465792/3047424 (81%)] Data (t): 0.609 Batch (t): 4.835, 1701.62/s, 425.404/s/gpu LR: 0.000159 Logit Scale: 77.359 Imm_image: 37.053 (36.888) Imm_text: 37.053 (36.888) Isd_image: 6.1993 (6.0768) Isd_text: 6.1993 (6.0768) Contrastive_loss: 0.35796 (0.33625) Loss: 0.35796 (0.33625)
334
+ 2025-04-23,22:45:47 | INFO | Train Epoch: 29 [3047424/3047424 (100%)] Data (t): 0.626 Batch (t): 4.838, 1744.87/s, 436.217/s/gpu LR: 0.000154 Logit Scale: 77.715 Imm_image: 37.734 (37.057) Imm_text: 37.734 (37.057) Isd_image: 6.2521 (6.1118) Isd_text: 6.2521 (6.1118) Contrastive_loss: 0.28575 (0.32615) Loss: 0.28575 (0.32615)
335
+ 2025-04-23,22:45:49 | INFO | Start epoch 30
336
+ 2025-04-23,22:46:04 | INFO | Train Epoch: 30 [ 8192/3047424 (0%)] Data (t): 10.994 Batch (t): 15.511, 528.152/s, 132.038/s/gpu LR: 0.000154 Logit Scale: 77.718 Imm_image: 37.758 (37.758) Imm_text: 37.758 (37.758) Isd_image: 6.2920 (6.2920) Isd_text: 6.2920 (6.2920) Contrastive_loss: 0.28561 (0.28561) Loss: 0.28561 (0.28561)
337
+ 2025-04-23,22:54:06 | INFO | Train Epoch: 30 [ 827392/3047424 (27%)] Data (t): 0.604 Batch (t): 4.817, 1707.32/s, 426.829/s/gpu LR: 0.000146 Logit Scale: 78.299 Imm_image: 38.018 (37.888) Imm_text: 38.018 (37.888) Isd_image: 6.3025 (6.2973) Isd_text: 6.3025 (6.2973) Contrastive_loss: 0.30561 (0.29561) Loss: 0.30561 (0.29561)
338
+ 2025-04-23,23:02:09 | INFO | Train Epoch: 30 [1646592/3047424 (54%)] Data (t): 0.619 Batch (t): 4.836, 1684.34/s, 421.085/s/gpu LR: 0.000138 Logit Scale: 78.825 Imm_image: 38.225 (38.000) Imm_text: 38.225 (38.000) Isd_image: 6.4180 (6.3375) Isd_text: 6.4180 (6.3375) Contrastive_loss: 0.31615 (0.30245) Loss: 0.31615 (0.30245)
339
+ 2025-04-23,23:10:17 | INFO | Train Epoch: 30 [2465792/3047424 (81%)] Data (t): 0.651 Batch (t): 4.872, 1676.41/s, 419.101/s/gpu LR: 0.000131 Logit Scale: 79.181 Imm_image: 38.426 (38.107) Imm_text: 38.426 (38.107) Isd_image: 6.7417 (6.4386) Isd_text: 6.7417 (6.4386) Contrastive_loss: 0.30190 (0.30232) Loss: 0.30190 (0.30232)
340
+ 2025-04-23,23:16:00 | INFO | Train Epoch: 30 [3047424/3047424 (100%)] Data (t): 0.617 Batch (t): 4.837, 1735.69/s, 433.921/s/gpu LR: 0.000126 Logit Scale: 79.497 Imm_image: 38.978 (38.281) Imm_text: 38.978 (38.281) Isd_image: 6.3805 (6.4270) Isd_text: 6.3805 (6.4270) Contrastive_loss: 0.27506 (0.29687) Loss: 0.27506 (0.29687)
341
+ 2025-04-23,23:16:02 | INFO | Start epoch 31
342
+ 2025-04-23,23:16:17 | INFO | Train Epoch: 31 [ 8192/3047424 (0%)] Data (t): 10.933 Batch (t): 15.418, 531.333/s, 132.833/s/gpu LR: 0.000126 Logit Scale: 79.499 Imm_image: 39.085 (39.085) Imm_text: 39.085 (39.085) Isd_image: 6.3963 (6.3963) Isd_text: 6.3963 (6.3963) Contrastive_loss: 0.25275 (0.25275) Loss: 0.25275 (0.25275)
343
+ 2025-04-23,23:24:22 | INFO | Train Epoch: 31 [ 827392/3047424 (27%)] Data (t): 0.619 Batch (t): 4.851, 1677.16/s, 419.290/s/gpu LR: 0.000119 Logit Scale: 79.919 Imm_image: 39.191 (39.138) Imm_text: 39.191 (39.138) Isd_image: 6.7139 (6.5551) Isd_text: 6.7139 (6.5551) Contrastive_loss: 0.26039 (0.25657) Loss: 0.26039 (0.25657)
344
+ 2025-04-23,23:32:26 | INFO | Train Epoch: 31 [1646592/3047424 (54%)] Data (t): 0.628 Batch (t): 4.835, 1678.66/s, 419.664/s/gpu LR: 0.000112 Logit Scale: 80.304 Imm_image: 39.231 (39.169) Imm_text: 39.231 (39.169) Isd_image: 6.6722 (6.5941) Isd_text: 6.6722 (6.5941) Contrastive_loss: 0.24036 (0.25117) Loss: 0.24036 (0.25117)
345
+ 2025-04-23,23:40:31 | INFO | Train Epoch: 31 [2465792/3047424 (81%)] Data (t): 0.615 Batch (t): 4.850, 1706.87/s, 426.718/s/gpu LR: 0.000105 Logit Scale: 80.682 Imm_image: 39.467 (39.244) Imm_text: 39.467 (39.244) Isd_image: 6.6653 (6.6119) Isd_text: 6.6653 (6.6119) Contrastive_loss: 0.26978 (0.25582) Loss: 0.26978 (0.25582)
346
+ 2025-04-23,23:46:15 | INFO | Train Epoch: 31 [3047424/3047424 (100%)] Data (t): 0.625 Batch (t): 4.844, 1739.73/s, 434.931/s/gpu LR: 0.000100 Logit Scale: 80.912 Imm_image: 39.916 (39.378) Imm_text: 39.916 (39.378) Isd_image: 6.8971 (6.6690) Isd_text: 6.8971 (6.6690) Contrastive_loss: 0.22395 (0.24945) Loss: 0.22395 (0.24945)
347
+ 2025-04-23,23:46:16 | INFO | Start epoch 32
348
+ 2025-04-23,23:46:31 | INFO | Train Epoch: 32 [ 8192/3047424 (0%)] Data (t): 10.853 Batch (t): 15.135, 541.264/s, 135.316/s/gpu LR: 0.000100 Logit Scale: 80.916 Imm_image: 39.979 (39.979) Imm_text: 39.979 (39.979) Isd_image: 6.8648 (6.8648) Isd_text: 6.8648 (6.8648) Contrastive_loss: 0.22156 (0.22156) Loss: 0.22156 (0.22156)
349
+ 2025-04-23,23:54:33 | INFO | Train Epoch: 32 [ 827392/3047424 (27%)] Data (t): 0.608 Batch (t): 4.820, 1690.52/s, 422.630/s/gpu LR: 0.000094 Logit Scale: 81.300 Imm_image: 40.148 (40.064) Imm_text: 40.148 (40.064) Isd_image: 6.9480 (6.9064) Isd_text: 6.9480 (6.9064) Contrastive_loss: 0.23833 (0.22995) Loss: 0.23833 (0.22995)
350
+ 2025-04-24,00:02:39 | INFO | Train Epoch: 32 [1646592/3047424 (54%)] Data (t): 0.623 Batch (t): 4.857, 1674.59/s, 418.647/s/gpu LR: 0.000088 Logit Scale: 81.589 Imm_image: 40.110 (40.079) Imm_text: 40.110 (40.079) Isd_image: 6.7485 (6.8538) Isd_text: 6.7485 (6.8538) Contrastive_loss: 0.22921 (0.22970) Loss: 0.22921 (0.22970)
351
+ 2025-04-24,00:10:46 | INFO | Train Epoch: 32 [2465792/3047424 (81%)] Data (t): 0.646 Batch (t): 4.869, 1677.44/s, 419.360/s/gpu LR: 0.000082 Logit Scale: 81.913 Imm_image: 40.281 (40.129) Imm_text: 40.281 (40.129) Isd_image: 6.9970 (6.8896) Isd_text: 6.9970 (6.8896) Contrastive_loss: 0.23596 (0.23127) Loss: 0.23596 (0.23127)
352
+ 2025-04-24,00:16:29 | INFO | Train Epoch: 32 [3047424/3047424 (100%)] Data (t): 0.603 Batch (t): 4.830, 1741.51/s, 435.378/s/gpu LR: 0.000077 Logit Scale: 82.130 Imm_image: 40.886 (40.281) Imm_text: 40.886 (40.281) Isd_image: 7.0887 (6.9294) Isd_text: 7.0887 (6.9294) Contrastive_loss: 0.18105 (0.22122) Loss: 0.18105 (0.22122)
353
+ 2025-04-24,00:16:31 | INFO | Start epoch 33
354
+ 2025-04-24,00:16:45 | INFO | Train Epoch: 33 [ 8192/3047424 (0%)] Data (t): 9.755 Batch (t): 14.332, 571.581/s, 142.895/s/gpu LR: 0.000077 Logit Scale: 82.133 Imm_image: 40.881 (40.881) Imm_text: 40.881 (40.881) Isd_image: 7.1827 (7.1827) Isd_text: 7.1827 (7.1827) Contrastive_loss: 0.20359 (0.20359) Loss: 0.20359 (0.20359)
355
+ 2025-04-24,00:24:48 | INFO | Train Epoch: 33 [ 827392/3047424 (27%)] Data (t): 0.604 Batch (t): 4.827, 1707.05/s, 426.762/s/gpu LR: 0.000072 Logit Scale: 82.457 Imm_image: 40.937 (40.909) Imm_text: 40.937 (40.909) Isd_image: 7.3372 (7.2600) Isd_text: 7.3372 (7.2600) Contrastive_loss: 0.20799 (0.20579) Loss: 0.20799 (0.20579)
356
+ 2025-04-24,00:32:51 | INFO | Train Epoch: 33 [1646592/3047424 (54%)] Data (t): 0.626 Batch (t): 4.830, 1687.79/s, 421.947/s/gpu LR: 0.000066 Logit Scale: 82.770 Imm_image: 41.158 (40.992) Imm_text: 41.158 (40.992) Isd_image: 7.1048 (7.2083) Isd_text: 7.1048 (7.2083) Contrastive_loss: 0.19614 (0.20257) Loss: 0.19614 (0.20257)
357
+ 2025-04-24,00:40:56 | INFO | Train Epoch: 33 [2465792/3047424 (81%)] Data (t): 0.633 Batch (t): 4.850, 1686.56/s, 421.639/s/gpu LR: 0.000061 Logit Scale: 83.038 Imm_image: 41.397 (41.093) Imm_text: 41.397 (41.093) Isd_image: 6.9361 (7.1402) Isd_text: 6.9361 (7.1402) Contrastive_loss: 0.17492 (0.19566) Loss: 0.17492 (0.19566)
358
+ 2025-04-24,00:46:39 | INFO | Train Epoch: 33 [3047424/3047424 (100%)] Data (t): 0.615 Batch (t): 4.842, 1739.28/s, 434.819/s/gpu LR: 0.000057 Logit Scale: 83.174 Imm_image: 41.700 (41.215) Imm_text: 41.700 (41.215) Isd_image: 7.2266 (7.1575) Isd_text: 7.2266 (7.1575) Contrastive_loss: 0.17821 (0.19217) Loss: 0.17821 (0.19217)
359
+ 2025-04-24,00:46:41 | INFO | Start epoch 34
360
+ 2025-04-24,00:46:55 | INFO | Train Epoch: 34 [ 8192/3047424 (0%)] Data (t): 9.791 Batch (t): 14.220, 576.082/s, 144.021/s/gpu LR: 0.000057 Logit Scale: 83.176 Imm_image: 41.756 (41.756) Imm_text: 41.756 (41.756) Isd_image: 7.2188 (7.2188) Isd_text: 7.2188 (7.2188) Contrastive_loss: 0.16633 (0.16633) Loss: 0.16633 (0.16633)
361
+ 2025-04-24,00:54:57 | INFO | Train Epoch: 34 [ 827392/3047424 (27%)] Data (t): 0.618 Batch (t): 4.818, 1705.35/s, 426.339/s/gpu LR: 0.000052 Logit Scale: 83.391 Imm_image: 41.775 (41.765) Imm_text: 41.775 (41.765) Isd_image: 7.5486 (7.3837) Isd_text: 7.5486 (7.3837) Contrastive_loss: 0.17036 (0.16835) Loss: 0.17036 (0.16835)
362
+ 2025-04-24,01:03:03 | INFO | Train Epoch: 34 [1646592/3047424 (54%)] Data (t): 0.633 Batch (t): 4.859, 1698.30/s, 424.574/s/gpu LR: 0.000048 Logit Scale: 83.604 Imm_image: 41.909 (41.813) Imm_text: 41.909 (41.813) Isd_image: 7.3406 (7.3693) Isd_text: 7.3406 (7.3693) Contrastive_loss: 0.16177 (0.16616) Loss: 0.16177 (0.16616)
363
+ 2025-04-24,01:11:04 | INFO | Train Epoch: 34 [2465792/3047424 (81%)] Data (t): 0.603 Batch (t): 4.805, 1710.80/s, 427.699/s/gpu LR: 0.000043 Logit Scale: 83.755 Imm_image: 42.086 (41.881) Imm_text: 42.086 (41.881) Isd_image: 7.4017 (7.3774) Isd_text: 7.4017 (7.3774) Contrastive_loss: 0.16945 (0.16698) Loss: 0.16945 (0.16698)
364
+ 2025-04-24,01:16:43 | INFO | Train Epoch: 34 [3047424/3047424 (100%)] Data (t): 0.588 Batch (t): 4.781, 1738.17/s, 434.544/s/gpu LR: 0.000040 Logit Scale: 83.882 Imm_image: 42.226 (41.950) Imm_text: 42.226 (41.950) Isd_image: 7.5515 (7.4123) Isd_text: 7.5515 (7.4123) Contrastive_loss: 0.16252 (0.16609) Loss: 0.16252 (0.16609)
365
+ 2025-04-24,01:16:45 | INFO | Start epoch 35
366
+ 2025-04-24,01:17:00 | INFO | Train Epoch: 35 [ 8192/3047424 (0%)] Data (t): 10.746 Batch (t): 15.270, 536.474/s, 134.118/s/gpu LR: 0.000040 Logit Scale: 83.884 Imm_image: 42.317 (42.317) Imm_text: 42.317 (42.317) Isd_image: 7.5036 (7.5036) Isd_text: 7.5036 (7.5036) Contrastive_loss: 0.14960 (0.14960) Loss: 0.14960 (0.14960)
367
+ 2025-04-24,01:25:05 | INFO | Train Epoch: 35 [ 827392/3047424 (27%)] Data (t): 0.622 Batch (t): 4.854, 1694.80/s, 423.699/s/gpu LR: 0.000036 Logit Scale: 84.054 Imm_image: 42.453 (42.385) Imm_text: 42.453 (42.385) Isd_image: 7.5786 (7.5411) Isd_text: 7.5786 (7.5411) Contrastive_loss: 0.15064 (0.15012) Loss: 0.15064 (0.15012)
368
+ 2025-04-24,01:33:10 | INFO | Train Epoch: 35 [1646592/3047424 (54%)] Data (t): 0.629 Batch (t): 4.845, 1702.22/s, 425.554/s/gpu LR: 0.000032 Logit Scale: 84.198 Imm_image: 42.533 (42.434) Imm_text: 42.533 (42.434) Isd_image: 7.6160 (7.5661) Isd_text: 7.6160 (7.5661) Contrastive_loss: 0.15397 (0.15141) Loss: 0.15397 (0.15141)
369
+ 2025-04-24,01:41:12 | INFO | Train Epoch: 35 [2465792/3047424 (81%)] Data (t): 0.614 Batch (t): 4.819, 1716.09/s, 429.023/s/gpu LR: 0.000028 Logit Scale: 84.321 Imm_image: 42.643 (42.487) Imm_text: 42.643 (42.487) Isd_image: 7.9206 (7.6547) Isd_text: 7.9206 (7.6547) Contrastive_loss: 0.15888 (0.15327) Loss: 0.15888 (0.15327)
370
+ 2025-04-24,01:46:52 | INFO | Train Epoch: 35 [3047424/3047424 (100%)] Data (t): 0.598 Batch (t): 4.789, 1736.03/s, 434.007/s/gpu LR: 0.000026 Logit Scale: 84.413 Imm_image: 42.859 (42.561) Imm_text: 42.859 (42.561) Isd_image: 7.6935 (7.6625) Isd_text: 7.6935 (7.6625) Contrastive_loss: 0.14394 (0.15141) Loss: 0.14394 (0.15141)
371
+ 2025-04-24,01:46:53 | INFO | Start epoch 36
372
+ 2025-04-24,01:47:10 | INFO | Train Epoch: 36 [ 8192/3047424 (0%)] Data (t): 11.802 Batch (t): 16.195, 505.848/s, 126.462/s/gpu LR: 0.000026 Logit Scale: 84.414 Imm_image: 42.816 (42.816) Imm_text: 42.816 (42.816) Isd_image: 7.5974 (7.5974) Isd_text: 7.5974 (7.5974) Contrastive_loss: 0.15469 (0.15469) Loss: 0.15469 (0.15469)
373
+ 2025-04-24,01:55:13 | INFO | Train Epoch: 36 [ 827392/3047424 (27%)] Data (t): 0.633 Batch (t): 4.835, 1697.51/s, 424.377/s/gpu LR: 0.000022 Logit Scale: 84.541 Imm_image: 43.006 (42.911) Imm_text: 43.006 (42.911) Isd_image: 7.5277 (7.5625) Isd_text: 7.5277 (7.5625) Contrastive_loss: 0.14782 (0.15126) Loss: 0.14782 (0.15126)
374
+ 2025-04-24,02:03:15 | INFO | Train Epoch: 36 [1646592/3047424 (54%)] Data (t): 0.607 Batch (t): 4.822, 1681.88/s, 420.471/s/gpu LR: 0.000019 Logit Scale: 84.636 Imm_image: 43.040 (42.954) Imm_text: 43.040 (42.954) Isd_image: 7.7300 (7.6183) Isd_text: 7.7300 (7.6183) Contrastive_loss: 0.13444 (0.14565) Loss: 0.13444 (0.14565)
375
+ 2025-04-24,02:11:20 | INFO | Train Epoch: 36 [2465792/3047424 (81%)] Data (t): 0.617 Batch (t): 4.845, 1721.17/s, 430.292/s/gpu LR: 0.000016 Logit Scale: 84.719 Imm_image: 43.205 (43.017) Imm_text: 43.205 (43.017) Isd_image: 7.5075 (7.5906) Isd_text: 7.5075 (7.5906) Contrastive_loss: 0.12711 (0.14102) Loss: 0.12711 (0.14102)
376
+ 2025-04-24,02:17:02 | INFO | Train Epoch: 36 [3047424/3047424 (100%)] Data (t): 0.615 Batch (t): 4.823, 1742.21/s, 435.552/s/gpu LR: 0.000015 Logit Scale: 84.768 Imm_image: 43.221 (43.057) Imm_text: 43.221 (43.057) Isd_image: 7.6946 (7.6114) Isd_text: 7.6946 (7.6114) Contrastive_loss: 0.12679 (0.13817) Loss: 0.12679 (0.13817)
377
+ 2025-04-24,02:17:04 | INFO | Start epoch 37
378
+ 2025-04-24,02:17:19 | INFO | Train Epoch: 37 [ 8192/3047424 (0%)] Data (t): 10.841 Batch (t): 15.255, 537.020/s, 134.255/s/gpu LR: 0.000015 Logit Scale: 84.769 Imm_image: 43.317 (43.317) Imm_text: 43.317 (43.317) Isd_image: 7.6731 (7.6731) Isd_text: 7.6731 (7.6731) Contrastive_loss: 0.11858 (0.11858) Loss: 0.11858 (0.11858)
379
+ 2025-04-24,02:25:22 | INFO | Train Epoch: 37 [ 827392/3047424 (27%)] Data (t): 0.628 Batch (t): 4.832, 1682.42/s, 420.606/s/gpu LR: 0.000012 Logit Scale: 84.834 Imm_image: 43.323 (43.320) Imm_text: 43.323 (43.320) Isd_image: 7.8653 (7.7692) Isd_text: 7.8653 (7.7692) Contrastive_loss: 0.13055 (0.12456) Loss: 0.13055 (0.12456)
380
+ 2025-04-24,02:33:27 | INFO | Train Epoch: 37 [1646592/3047424 (54%)] Data (t): 0.638 Batch (t): 4.843, 1676.23/s, 419.057/s/gpu LR: 0.000010 Logit Scale: 84.886 Imm_image: 43.389 (43.343) Imm_text: 43.389 (43.343) Isd_image: 7.5365 (7.6916) Isd_text: 7.5365 (7.6916) Contrastive_loss: 0.12937 (0.12617) Loss: 0.12937 (0.12617)
381
+ 2025-04-24,02:41:31 | INFO | Train Epoch: 37 [2465792/3047424 (81%)] Data (t): 0.625 Batch (t): 4.837, 1684.62/s, 421.155/s/gpu LR: 0.000008 Logit Scale: 84.930 Imm_image: 43.478 (43.377) Imm_text: 43.478 (43.377) Isd_image: 7.5790 (7.6635) Isd_text: 7.5790 (7.6635) Contrastive_loss: 0.13098 (0.12737) Loss: 0.13098 (0.12737)
382
+ 2025-04-24,02:47:14 | INFO | Train Epoch: 37 [3047424/3047424 (100%)] Data (t): 0.623 Batch (t): 4.840, 1738.20/s, 434.550/s/gpu LR: 0.000006 Logit Scale: 84.957 Imm_image: 43.504 (43.402) Imm_text: 43.504 (43.402) Isd_image: 7.6652 (7.6638) Isd_text: 7.6652 (7.6638) Contrastive_loss: 0.13845 (0.12959) Loss: 0.13845 (0.12959)
383
+ 2025-04-24,02:47:16 | INFO | Start epoch 38
384
+ 2025-04-24,02:47:31 | INFO | Train Epoch: 38 [ 8192/3047424 (0%)] Data (t): 10.805 Batch (t): 15.235, 537.720/s, 134.430/s/gpu LR: 0.000006 Logit Scale: 84.957 Imm_image: 43.581 (43.581) Imm_text: 43.581 (43.581) Isd_image: 7.5766 (7.5766) Isd_text: 7.5766 (7.5766) Contrastive_loss: 0.12755 (0.12755) Loss: 0.12755 (0.12755)
385
+ 2025-04-24,02:55:35 | INFO | Train Epoch: 38 [ 827392/3047424 (27%)] Data (t): 0.623 Batch (t): 4.841, 1693.64/s, 423.411/s/gpu LR: 0.000005 Logit Scale: 84.987 Imm_image: 43.540 (43.560) Imm_text: 43.540 (43.560) Isd_image: 7.6102 (7.5934) Isd_text: 7.6102 (7.5934) Contrastive_loss: 0.12511 (0.12633) Loss: 0.12511 (0.12633)
386
+ 2025-04-24,03:03:41 | INFO | Train Epoch: 38 [1646592/3047424 (54%)] Data (t): 0.633 Batch (t): 4.857, 1686.90/s, 421.724/s/gpu LR: 0.000003 Logit Scale: 85.006 Imm_image: 43.600 (43.574) Imm_text: 43.600 (43.574) Isd_image: 7.5882 (7.5916) Isd_text: 7.5882 (7.5916) Contrastive_loss: 0.12856 (0.12707) Loss: 0.12856 (0.12707)
387
+ 2025-04-24,03:11:48 | INFO | Train Epoch: 38 [2465792/3047424 (81%)] Data (t): 0.642 Batch (t): 4.871, 1699.97/s, 424.993/s/gpu LR: 0.000002 Logit Scale: 85.020 Imm_image: 43.680 (43.600) Imm_text: 43.680 (43.600) Isd_image: 7.5445 (7.5798) Isd_text: 7.5445 (7.5798) Contrastive_loss: 0.12728 (0.12712) Loss: 0.12728 (0.12712)
388
+ 2025-04-24,03:17:29 | INFO | Train Epoch: 38 [3047424/3047424 (100%)] Data (t): 0.589 Batch (t): 4.800, 1743.20/s, 435.801/s/gpu LR: 0.000002 Logit Scale: 85.027 Imm_image: 43.585 (43.597) Imm_text: 43.585 (43.597) Isd_image: 7.6257 (7.5890) Isd_text: 7.6257 (7.5890) Contrastive_loss: 0.13614 (0.12893) Loss: 0.13614 (0.12893)
389
+ 2025-04-24,03:17:31 | INFO | Start epoch 39
390
+ 2025-04-24,03:17:46 | INFO | Train Epoch: 39 [ 8192/3047424 (0%)] Data (t): 10.516 Batch (t): 15.015, 545.592/s, 136.398/s/gpu LR: 0.000002 Logit Scale: 85.027 Imm_image: 43.651 (43.651) Imm_text: 43.651 (43.651) Isd_image: 7.5024 (7.5024) Isd_text: 7.5024 (7.5024) Contrastive_loss: 0.11379 (0.11379) Loss: 0.11379 (0.11379)
391
+ 2025-04-24,03:25:51 | INFO | Train Epoch: 39 [ 827392/3047424 (27%)] Data (t): 0.626 Batch (t): 4.851, 1672.80/s, 418.200/s/gpu LR: 0.000001 Logit Scale: 85.035 Imm_image: 43.657 (43.654) Imm_text: 43.657 (43.654) Isd_image: 7.4716 (7.4870) Isd_text: 7.4716 (7.4870) Contrastive_loss: 0.13023 (0.12201) Loss: 0.13023 (0.12201)
392
+ 2025-04-24,03:33:55 | INFO | Train Epoch: 39 [1646592/3047424 (54%)] Data (t): 0.603 Batch (t): 4.844, 1673.77/s, 418.441/s/gpu LR: 0.000000 Logit Scale: 85.038 Imm_image: 43.652 (43.653) Imm_text: 43.652 (43.653) Isd_image: 7.6102 (7.5281) Isd_text: 7.6102 (7.5281) Contrastive_loss: 0.11705 (0.12036) Loss: 0.11705 (0.12036)
393
+ 2025-04-24,03:42:01 | INFO | Train Epoch: 39 [2465792/3047424 (81%)] Data (t): 0.619 Batch (t): 4.853, 1684.10/s, 421.025/s/gpu LR: 0.000000 Logit Scale: 85.038 Imm_image: 43.623 (43.646) Imm_text: 43.623 (43.646) Isd_image: 7.5146 (7.5247) Isd_text: 7.5146 (7.5247) Contrastive_loss: 0.12364 (0.12118) Loss: 0.12364 (0.12118)
394
+ 2025-04-24,03:47:44 | INFO | Train Epoch: 39 [3047424/3047424 (100%)] Data (t): 0.621 Batch (t): 4.832, 1741.52/s, 435.380/s/gpu LR: 0.000000 Logit Scale: 85.038 Imm_image: 43.656 (43.648) Imm_text: 43.656 (43.648) Isd_image: 7.6047 (7.5407) Isd_text: 7.6047 (7.5407) Contrastive_loss: 0.13403 (0.12375) Loss: 0.13403 (0.12375)
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/params.txt ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_freq: 4
2
+ aug_cfg: {}
3
+ batch_size: 512
4
+ beta1: 0.9
5
+ beta2: 0.98
6
+ cache_dir: None
7
+ caption_ratio: 0.1
8
+ checkpoint_path: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/checkpoints
9
+ coca_caption_loss_weight: 2.0
10
+ coca_contrastive_loss_weight: 1.0
11
+ copy_codebase: False
12
+ csv_caption_key: title
13
+ csv_img_key: filepath
14
+ csv_separator:
15
+ dataset_resampled: False
16
+ dataset_type: synthetic
17
+ ddp_static_graph: False
18
+ debug: False
19
+ delete_previous_checkpoint: False
20
+ device: cuda:0
21
+ dist_backend: None
22
+ dist_url: None
23
+ distill: False
24
+ distill_model: None
25
+ distill_pretrained: None
26
+ distributed: True
27
+ epochs: 40
28
+ epochs_cooldown: None
29
+ eps: 1e-08
30
+ force_custom_text: False
31
+ force_image_size: None
32
+ force_patch_dropout: None
33
+ force_quick_gelu: False
34
+ gather_with_grad: True
35
+ grad_checkpointing: True
36
+ grad_clip_norm: None
37
+ horovod: False
38
+ image_interpolation: None
39
+ image_mean: None
40
+ image_resize_mode: None
41
+ image_std: None
42
+ imagenet_v2: None
43
+ imagenet_val: None
44
+ keep_func_name:
45
+ local_loss: False
46
+ local_rank: 0
47
+ lock_image: False
48
+ lock_image_freeze_bn_stats: False
49
+ lock_image_unlocked_groups: 0
50
+ lock_text: False
51
+ lock_text_freeze_layer_norm: False
52
+ lock_text_unlocked_layers: 0
53
+ log_every_n_steps: 100
54
+ log_level: 20
55
+ log_local: False
56
+ log_path: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest/out.log
57
+ logs: /mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
58
+ loss_dist_impl: None
59
+ lr: 0.001
60
+ lr_cooldown_end: 0.0
61
+ lr_cooldown_power: 1.0
62
+ lr_scheduler: cosine
63
+ map_func_name: use_image_closest
64
+ model: ViT-B-16
65
+ momentum: None
66
+ name: ViT-B-16-cc3m-laclip-mix-010-filled-use_image_closest
67
+ no_set_device_rank: False
68
+ opt: adamw
69
+ precision: amp
70
+ pretrained:
71
+ pretrained_image: False
72
+ rank: 0
73
+ remote_sync: None
74
+ remote_sync_frequency: 300
75
+ remote_sync_protocol: s3
76
+ report_to: wandb
77
+ resume: None
78
+ save_frequency: 1
79
+ save_most_recent: False
80
+ seed: 0
81
+ siglip: False
82
+ skip_scheduler: False
83
+ tensorboard: False
84
+ tensorboard_path:
85
+ torchcompile: False
86
+ torchscript: False
87
+ trace: False
88
+ train_data: /mnt/personal/zhudongy/cc3m-hgf-wds/{0000..0301}.tar
89
+ train_data_upsampling_factors: None
90
+ train_num_samples: 3016640
91
+ use_bn_sync: False
92
+ use_bnb_linear: None
93
+ val_data: None
94
+ val_frequency: 1
95
+ val_num_samples: None
96
+ wandb: True
97
+ wandb_notes:
98
+ wandb_project_name: open-clip
99
+ warmup: 368
100
+ wd: 0.5
101
+ workers: 16
102
+ world_size: 4
103
+ zeroshot_frequency: 2
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_caltech101_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "caltech101", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.44214785651793526, "acc5": 0.6919291338582677, "mean_per_class_recall": 0.4057238323222893}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.0106951871657754, "acc5": 0.0453923641338142, "mean_per_class_recall": 0.010690858820413754}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_cifar100_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar100", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1891, "acc5": 0.4211, "mean_per_class_recall": 0.1891}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.3988, "acc5": 0.8657, "mean_per_class_recall": 0.3987}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_country211_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "country211", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.005971563981042654, "acc5": 0.02829383886255924, "mean_per_class_recall": 0.006018957345971564}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.10372340425531915, "acc5": 0.2904255319148936, "mean_per_class_recall": 0.10372340425531913}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1424074074074074, "acc5": 0.5987407407407408, "mean_per_class_recall": 0.13944666666666666}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_fgvc_aircraft_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "fgvc_aircraft", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.011401140114011402, "acc5": 0.0465046504650465, "mean_per_class_recall": 0.011639928698752229}, "language": "en"}
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/cc3m_results/SFR-Embedding-Code-2B_R_0.95_1.0_dinov2-large_0.0_0.05_rouge_0.7_top_4/logs/ViT-B-16-cc3m-laclip-mix-inter-010-filled-b2/checkpoints/epoch_40.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.131400004029274, "text_retrieval_recall@1": 0.21699999272823334, "image_retrieval_recall@5": 0.3073999881744385, "text_retrieval_recall@5": 0.4519999921321869, "image_retrieval_recall@10": 0.40119999647140503, "text_retrieval_recall@10": 0.5809999704360962}, "language": "en"}