Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/config.yaml +49 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/filter_ambiguous/filtered_pairs.jsonl +0 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_cifar100_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_country211_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_flowers_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_gtsrb_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_imagenet1k_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_mscoco_captions_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_pets_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_stl10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_sun397_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_vtab_resisc45_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json +1 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/out.log +314 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/params.txt +103 -0
- SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/preprocess_embedding/run.sh +10 -0
- ViT-B-32-laion2b_e16/benchmark_caltech101_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_cars_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_cifar100_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_cifar10_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_country211_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_dtd_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_eurosat_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_fgvc_aircraft_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_flickr30k_laion2b_e16_ViT-B-32_en_zeroshot_retrieval.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_flowers_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_food101_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_gtsrb_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_imagenet1k_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_mscoco_captions_laion2b_e16_ViT-B-32_en_zeroshot_retrieval.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_pets_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_stl10_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_e16/benchmark_vtab_resisc45_laion2b_e16_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_caltech101_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_cars_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_cifar100_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_cifar10_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_country211_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_dtd_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_eurosat_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_fgvc_aircraft_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_flickr30k_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_retrieval.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_flowers_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_food101_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_gtsrb_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
- ViT-B-32-laion2b_s34b_b79k/benchmark_imagenet1k_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json +1 -0
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/config.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type: webdataset
|
| 2 |
+
dataset_path: /mnt/personal/zhudongy/CapsFusion120M-wds/{00000..01426}.tar
|
| 3 |
+
embedding_path: /mnt/personal/zhudongy/capsfusion10m_emb
|
| 4 |
+
results_path: /mnt/personal/zhudongy/capsfusion10m_results
|
| 5 |
+
intra_text_model: SFR-Embedding-Code-2B_R
|
| 6 |
+
intra_text_ckpt: Salesforce/SFR-Embedding-Code-2B_R
|
| 7 |
+
intra_image_model: dinov2-large
|
| 8 |
+
intra_image_ckpt: facebook/dinov2-large
|
| 9 |
+
inter_model_name: siglip-so400m-patch14-384
|
| 10 |
+
inter_model_ckpt: google/siglip-so400m-patch14-384
|
| 11 |
+
preprocess:
|
| 12 |
+
device: cuda:0
|
| 13 |
+
chunk_size: 512
|
| 14 |
+
start: 0
|
| 15 |
+
end: 3000
|
| 16 |
+
process_text: false
|
| 17 |
+
process_image: true
|
| 18 |
+
filter:
|
| 19 |
+
device: cuda:0
|
| 20 |
+
text_threshold_min: 0.8
|
| 21 |
+
text_threshold_max: 0.6
|
| 22 |
+
image_threshold_min: 0.0
|
| 23 |
+
image_threshold_max: 0.2
|
| 24 |
+
rouge_threshold: 0.2
|
| 25 |
+
top_k: 8
|
| 26 |
+
inter_threshold_max: 0.4
|
| 27 |
+
recaption:
|
| 28 |
+
device: cuda:0
|
| 29 |
+
recaption_model: llava-1.5-7b-hf
|
| 30 |
+
recaption_ckpt: llava-hf/llava-1.5-7b-hf
|
| 31 |
+
start: 0
|
| 32 |
+
end: 18750
|
| 33 |
+
evaluate:
|
| 34 |
+
device: cuda:0
|
| 35 |
+
models:
|
| 36 |
+
- clip-vit-large-patch14
|
| 37 |
+
- clip-vit-base-patch16
|
| 38 |
+
- siglip-so400m-patch14-384
|
| 39 |
+
model_ckpts:
|
| 40 |
+
- openai/clip-vit-large-patch14
|
| 41 |
+
- openai/clip-vit-base-patch16
|
| 42 |
+
- google/siglip-so400m-patch14-384
|
| 43 |
+
random: false
|
| 44 |
+
icl: false
|
| 45 |
+
chunk_size: 1024
|
| 46 |
+
start: 0
|
| 47 |
+
end: 2000
|
| 48 |
+
train:
|
| 49 |
+
batch_size: 8192
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/filter_ambiguous/filtered_pairs.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_cars_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cars", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.01119263773162542, "acc5": 0.04215893545578908, "mean_per_class_recall": 0.010578547007508576}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_cifar100_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar100", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.0373, "acc5": 0.122, "mean_per_class_recall": 0.0373}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_cifar10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1197, "acc5": 0.5745, "mean_per_class_recall": 0.11979999999999999}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_country211_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "country211", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.0061137440758293835, "acc5": 0.02824644549763033, "mean_per_class_recall": 0.006113744075829384}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_dtd_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "dtd", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.026063829787234042, "acc5": 0.14042553191489363, "mean_per_class_recall": 0.026595744680851068}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_eurosat_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "eurosat", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.1205925925925926, "acc5": 0.49503703703703705, "mean_per_class_recall": 0.11524000000000001}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_flickr30k_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flickr30k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.006000000052154064, "text_retrieval_recall@1": 0.003000000026077032, "image_retrieval_recall@5": 0.023800000548362732, "text_retrieval_recall@5": 0.023000000044703484, "image_retrieval_recall@10": 0.04179999977350235, "text_retrieval_recall@10": 0.04100000113248825}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_flowers_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flowers", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.020816392909416167, "acc5": 0.07464628394860953, "mean_per_class_recall": 0.02650641874765217}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_gtsrb_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "gtsrb", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.023357086302454474, "acc5": 0.152652414885194, "mean_per_class_recall": 0.03859308405895868}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_imagenet1k_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "imagenet1k", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.01392, "acc5": 0.0459, "mean_per_class_recall": 0.013879999999999998}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_mscoco_captions_epoch_40.pt_ViT-B-16_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "mscoco_captions", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.002359056379646063, "text_retrieval_recall@1": 0.003800000064074993, "image_retrieval_recall@5": 0.010355857200920582, "text_retrieval_recall@5": 0.013399999588727951, "image_retrieval_recall@10": 0.019512195140123367, "text_retrieval_recall@10": 0.023399999365210533}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_pets_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "pets", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.038975197601526304, "acc5": 0.17715998909784683, "mean_per_class_recall": 0.03872339632694418}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_stl10_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "stl10", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.233375, "acc5": 0.722625, "mean_per_class_recall": 0.23337500000000003}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_sun397_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "sun397", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.021976203174136125, "acc5": 0.0837118634716884, "mean_per_class_recall": 0.015189344069330762}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/benchmark_vtab_resisc45_epoch_40.pt_ViT-B-16_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "vtab/resisc45", "model": "ViT-B-16", "pretrained": "/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints/epoch_40.pt", "task": "zeroshot_classification", "metrics": {"acc1": 0.04698412698412698, "acc5": 0.1680952380952381, "mean_per_class_recall": 0.05040465927648766}, "language": "en"}
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/out.log
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-21,19:53:18 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
|
| 2 |
+
2025-04-21,19:53:18 | INFO | Loaded ViT-B-16 model config.
|
| 3 |
+
2025-04-21,19:53:20 | INFO | Model:
|
| 4 |
+
2025-04-21,19:53:20 | INFO | CLIP(
|
| 5 |
+
(visual): VisionTransformer(
|
| 6 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 7 |
+
(patch_dropout): Identity()
|
| 8 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 9 |
+
(transformer): Transformer(
|
| 10 |
+
(resblocks): ModuleList(
|
| 11 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 12 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 13 |
+
(attn): MultiheadAttention(
|
| 14 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 15 |
+
)
|
| 16 |
+
(ls_1): Identity()
|
| 17 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 18 |
+
(mlp): Sequential(
|
| 19 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 20 |
+
(gelu): GELU(approximate='none')
|
| 21 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 22 |
+
)
|
| 23 |
+
(ls_2): Identity()
|
| 24 |
+
)
|
| 25 |
+
)
|
| 26 |
+
)
|
| 27 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 28 |
+
)
|
| 29 |
+
(transformer): Transformer(
|
| 30 |
+
(resblocks): ModuleList(
|
| 31 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 32 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 33 |
+
(attn): MultiheadAttention(
|
| 34 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 35 |
+
)
|
| 36 |
+
(ls_1): Identity()
|
| 37 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 38 |
+
(mlp): Sequential(
|
| 39 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 40 |
+
(gelu): GELU(approximate='none')
|
| 41 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 42 |
+
)
|
| 43 |
+
(ls_2): Identity()
|
| 44 |
+
)
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
+
(token_embedding): Embedding(49408, 512)
|
| 48 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 49 |
+
)
|
| 50 |
+
2025-04-21,19:53:20 | INFO | Params:
|
| 51 |
+
2025-04-21,19:53:20 | INFO | accum_freq: 2
|
| 52 |
+
2025-04-21,19:53:20 | INFO | aug_cfg: {}
|
| 53 |
+
2025-04-21,19:53:20 | INFO | batch_size: 2048
|
| 54 |
+
2025-04-21,19:53:20 | INFO | beta1: 0.9
|
| 55 |
+
2025-04-21,19:53:20 | INFO | beta2: 0.98
|
| 56 |
+
2025-04-21,19:53:20 | INFO | cache_dir: None
|
| 57 |
+
2025-04-21,19:53:20 | INFO | caption_ratio: 0.1
|
| 58 |
+
2025-04-21,19:53:20 | INFO | checkpoint_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints
|
| 59 |
+
2025-04-21,19:53:20 | INFO | coca_caption_loss_weight: 2.0
|
| 60 |
+
2025-04-21,19:53:20 | INFO | coca_contrastive_loss_weight: 1.0
|
| 61 |
+
2025-04-21,19:53:20 | INFO | copy_codebase: False
|
| 62 |
+
2025-04-21,19:53:20 | INFO | csv_caption_key: title
|
| 63 |
+
2025-04-21,19:53:20 | INFO | csv_img_key: filepath
|
| 64 |
+
2025-04-21,19:53:20 | INFO | csv_separator:
|
| 65 |
+
2025-04-21,19:53:20 | INFO | dataset_resampled: False
|
| 66 |
+
2025-04-21,19:53:20 | INFO | dataset_type: synthetic
|
| 67 |
+
2025-04-21,19:53:20 | INFO | ddp_static_graph: False
|
| 68 |
+
2025-04-21,19:53:20 | INFO | debug: False
|
| 69 |
+
2025-04-21,19:53:20 | INFO | delete_previous_checkpoint: False
|
| 70 |
+
2025-04-21,19:53:20 | INFO | device: cuda:0
|
| 71 |
+
2025-04-21,19:53:20 | INFO | dist_backend: None
|
| 72 |
+
2025-04-21,19:53:20 | INFO | dist_url: None
|
| 73 |
+
2025-04-21,19:53:20 | INFO | distill: False
|
| 74 |
+
2025-04-21,19:53:20 | INFO | distill_model: None
|
| 75 |
+
2025-04-21,19:53:20 | INFO | distill_pretrained: None
|
| 76 |
+
2025-04-21,19:53:20 | INFO | distributed: True
|
| 77 |
+
2025-04-21,19:53:20 | INFO | epochs: 40
|
| 78 |
+
2025-04-21,19:53:20 | INFO | epochs_cooldown: None
|
| 79 |
+
2025-04-21,19:53:20 | INFO | eps: 1e-08
|
| 80 |
+
2025-04-21,19:53:20 | INFO | force_custom_text: False
|
| 81 |
+
2025-04-21,19:53:20 | INFO | force_image_size: None
|
| 82 |
+
2025-04-21,19:53:20 | INFO | force_patch_dropout: None
|
| 83 |
+
2025-04-21,19:53:20 | INFO | force_quick_gelu: False
|
| 84 |
+
2025-04-21,19:53:20 | INFO | gather_with_grad: True
|
| 85 |
+
2025-04-21,19:53:20 | INFO | grad_checkpointing: True
|
| 86 |
+
2025-04-21,19:53:20 | INFO | grad_clip_norm: None
|
| 87 |
+
2025-04-21,19:53:20 | INFO | horovod: False
|
| 88 |
+
2025-04-21,19:53:20 | INFO | image_interpolation: None
|
| 89 |
+
2025-04-21,19:53:20 | INFO | image_mean: None
|
| 90 |
+
2025-04-21,19:53:20 | INFO | image_resize_mode: None
|
| 91 |
+
2025-04-21,19:53:20 | INFO | image_std: None
|
| 92 |
+
2025-04-21,19:53:20 | INFO | imagenet_v2: None
|
| 93 |
+
2025-04-21,19:53:20 | INFO | imagenet_val: None
|
| 94 |
+
2025-04-21,19:53:20 | INFO | keep_func_name: keep_low_intra_only
|
| 95 |
+
2025-04-21,19:53:20 | INFO | local_loss: False
|
| 96 |
+
2025-04-21,19:53:20 | INFO | local_rank: 0
|
| 97 |
+
2025-04-21,19:53:20 | INFO | lock_image: False
|
| 98 |
+
2025-04-21,19:53:20 | INFO | lock_image_freeze_bn_stats: False
|
| 99 |
+
2025-04-21,19:53:20 | INFO | lock_image_unlocked_groups: 0
|
| 100 |
+
2025-04-21,19:53:20 | INFO | lock_text: False
|
| 101 |
+
2025-04-21,19:53:20 | INFO | lock_text_freeze_layer_norm: False
|
| 102 |
+
2025-04-21,19:53:20 | INFO | lock_text_unlocked_layers: 0
|
| 103 |
+
2025-04-21,19:53:20 | INFO | log_every_n_steps: 100
|
| 104 |
+
2025-04-21,19:53:20 | INFO | log_level: 20
|
| 105 |
+
2025-04-21,19:53:20 | INFO | log_local: False
|
| 106 |
+
2025-04-21,19:53:20 | INFO | log_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/out.log
|
| 107 |
+
2025-04-21,19:53:20 | INFO | logs: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
|
| 108 |
+
2025-04-21,19:53:20 | INFO | loss_dist_impl: None
|
| 109 |
+
2025-04-21,19:53:20 | INFO | lr: 0.001
|
| 110 |
+
2025-04-21,19:53:20 | INFO | lr_cooldown_end: 0.0
|
| 111 |
+
2025-04-21,19:53:20 | INFO | lr_cooldown_power: 1.0
|
| 112 |
+
2025-04-21,19:53:20 | INFO | lr_scheduler: cosine
|
| 113 |
+
2025-04-21,19:53:20 | INFO | map_func_name: use_none
|
| 114 |
+
2025-04-21,19:53:20 | INFO | model: ViT-B-16
|
| 115 |
+
2025-04-21,19:53:20 | INFO | momentum: None
|
| 116 |
+
2025-04-21,19:53:20 | INFO | name: ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only
|
| 117 |
+
2025-04-21,19:53:20 | INFO | no_set_device_rank: False
|
| 118 |
+
2025-04-21,19:53:20 | INFO | opt: adamw
|
| 119 |
+
2025-04-21,19:53:20 | INFO | precision: amp
|
| 120 |
+
2025-04-21,19:53:20 | INFO | pretrained:
|
| 121 |
+
2025-04-21,19:53:20 | INFO | pretrained_image: False
|
| 122 |
+
2025-04-21,19:53:20 | INFO | rank: 0
|
| 123 |
+
2025-04-21,19:53:20 | INFO | remote_sync: None
|
| 124 |
+
2025-04-21,19:53:20 | INFO | remote_sync_frequency: 300
|
| 125 |
+
2025-04-21,19:53:20 | INFO | remote_sync_protocol: s3
|
| 126 |
+
2025-04-21,19:53:20 | INFO | report_to: tensorboard,wandb
|
| 127 |
+
2025-04-21,19:53:20 | INFO | resume: None
|
| 128 |
+
2025-04-21,19:53:20 | INFO | save_frequency: 1
|
| 129 |
+
2025-04-21,19:53:20 | INFO | save_most_recent: False
|
| 130 |
+
2025-04-21,19:53:20 | INFO | seed: 0
|
| 131 |
+
2025-04-21,19:53:20 | INFO | siglip: False
|
| 132 |
+
2025-04-21,19:53:20 | INFO | skip_scheduler: False
|
| 133 |
+
2025-04-21,19:53:20 | INFO | tensorboard: True
|
| 134 |
+
2025-04-21,19:53:20 | INFO | tensorboard_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/tensorboard
|
| 135 |
+
2025-04-21,19:53:20 | INFO | torchcompile: False
|
| 136 |
+
2025-04-21,19:53:20 | INFO | torchscript: False
|
| 137 |
+
2025-04-21,19:53:20 | INFO | trace: False
|
| 138 |
+
2025-04-21,19:53:20 | INFO | train_data: /mnt/personal/zhudongy/CapsFusion120M-wds/{00000..01426}.tar
|
| 139 |
+
2025-04-21,19:53:20 | INFO | train_data_upsampling_factors: None
|
| 140 |
+
2025-04-21,19:53:20 | INFO | train_num_samples: 10006295
|
| 141 |
+
2025-04-21,19:53:20 | INFO | use_bn_sync: False
|
| 142 |
+
2025-04-21,19:53:20 | INFO | use_bnb_linear: None
|
| 143 |
+
2025-04-21,19:53:20 | INFO | val_data: None
|
| 144 |
+
2025-04-21,19:53:20 | INFO | val_frequency: 1
|
| 145 |
+
2025-04-21,19:53:20 | INFO | val_num_samples: None
|
| 146 |
+
2025-04-21,19:53:20 | INFO | wandb: True
|
| 147 |
+
2025-04-21,19:53:20 | INFO | wandb_notes:
|
| 148 |
+
2025-04-21,19:53:20 | INFO | wandb_project_name: open-clip
|
| 149 |
+
2025-04-21,19:53:20 | INFO | warmup: 122
|
| 150 |
+
2025-04-21,19:53:20 | INFO | wd: 0.5
|
| 151 |
+
2025-04-21,19:53:20 | INFO | workers: 16
|
| 152 |
+
2025-04-21,19:53:20 | INFO | world_size: 2
|
| 153 |
+
2025-04-21,19:53:20 | INFO | zeroshot_frequency: 2
|
| 154 |
+
2025-04-21,19:53:22 | INFO | Created AdamW (adamw) optimizer: lr: 0.001, betas: (0.9, 0.98), eps: 1e-08, weight_decay: 0.5, amsgrad: False, foreach: None, maximize: False, capturable: False, differentiable: False, fused: None
|
| 155 |
+
2025-04-21,20:00:03 | INFO | Start epoch 0
|
| 156 |
+
2025-04-21,20:00:53 | INFO | Train Epoch: 0 [ 8192/1048576 (1%)] Data (t): 39.418 Batch (t): 50.277, 162.938/s, 81.4692/s/gpu LR: 0.000008 Logit Scale: 14.286 Imm_image: -0.36837 (-0.36837) Imm_text: -0.36837 (-0.36837) Isd_image: -0.36385 (-0.36385) Isd_text: -0.36385 (-0.36385) Contrastive_loss: 9.1196 (9.1196) Loss: 9.1196 (9.1196)
|
| 157 |
+
2025-04-21,20:16:58 | INFO | Train Epoch: 0 [ 827392/1048576 (79%)] Data (t): 1.308 Batch (t): 9.650, 863.991/s, 431.995/s/gpu LR: 0.000828 Logit Scale: 14.203 Imm_image: 2.4476 (1.0396) Imm_text: 2.4476 (1.0396) Isd_image: 2.0643 (0.85022) Isd_text: 2.0643 (0.85022) Contrastive_loss: 8.8541 (8.9869) Loss: 8.8541 (8.9869)
|
| 158 |
+
2025-04-21,20:21:16 | INFO | Train Epoch: 0 [1048576/1048576 (100%)] Data (t): 1.278 Batch (t): 9.554, 873.456/s, 436.728/s/gpu LR: 0.001000 Logit Scale: 14.136 Imm_image: 4.4223 (2.1672) Imm_text: 4.4223 (2.1672) Isd_image: 3.9521 (1.8842) Isd_text: 3.9521 (1.8842) Contrastive_loss: 8.7824 (8.9187) Loss: 8.7824 (8.9187)
|
| 159 |
+
2025-04-21,20:21:18 | INFO | Start epoch 1
|
| 160 |
+
2025-04-21,20:22:06 | INFO | Train Epoch: 1 [ 8192/1048576 (1%)] Data (t): 39.685 Batch (t): 48.070, 170.418/s, 85.2092/s/gpu LR: 0.001000 Logit Scale: 14.133 Imm_image: 4.4031 (4.4031) Imm_text: 4.4031 (4.4031) Isd_image: 3.9121 (3.9121) Isd_text: 3.9121 (3.9121) Contrastive_loss: 8.7408 (8.7408) Loss: 8.7408 (8.7408)
|
| 161 |
+
2025-04-21,20:38:13 | INFO | Train Epoch: 1 [ 827392/1048576 (79%)] Data (t): 1.361 Batch (t): 9.669, 850.538/s, 425.269/s/gpu LR: 0.000999 Logit Scale: 13.947 Imm_image: 5.3181 (4.8606) Imm_text: 5.3181 (4.8606) Isd_image: 4.6104 (4.2612) Isd_text: 4.6104 (4.2612) Contrastive_loss: 8.6541 (8.6974) Loss: 8.6541 (8.6974)
|
| 162 |
+
2025-04-21,20:42:33 | INFO | Train Epoch: 1 [1048576/1048576 (100%)] Data (t): 1.317 Batch (t): 9.627, 853.081/s, 426.541/s/gpu LR: 0.000998 Logit Scale: 13.910 Imm_image: 5.4338 (5.0517) Imm_text: 5.4338 (5.0517) Isd_image: 4.4064 (4.3096) Isd_text: 4.4064 (4.3096) Contrastive_loss: 8.6223 (8.6724) Loss: 8.6223 (8.6724)
|
| 163 |
+
2025-04-21,20:42:35 | INFO | Start epoch 2
|
| 164 |
+
2025-04-21,20:43:24 | INFO | Train Epoch: 2 [ 8192/1048576 (1%)] Data (t): 38.638 Batch (t): 48.739, 168.080/s, 84.0399/s/gpu LR: 0.000998 Logit Scale: 13.906 Imm_image: 5.5045 (5.5045) Imm_text: 5.5045 (5.5045) Isd_image: 4.5926 (4.5926) Isd_text: 4.5926 (4.5926) Contrastive_loss: 8.5579 (8.5579) Loss: 8.5579 (8.5579)
|
| 165 |
+
2025-04-21,20:59:25 | INFO | Train Epoch: 2 [ 827392/1048576 (79%)] Data (t): 1.247 Batch (t): 9.608, 870.841/s, 435.420/s/gpu LR: 0.000995 Logit Scale: 13.778 Imm_image: 6.8661 (6.1853) Imm_text: 6.8661 (6.1853) Isd_image: 5.3794 (4.9860) Isd_text: 5.3794 (4.9860) Contrastive_loss: 8.3533 (8.4556) Loss: 8.3533 (8.4556)
|
| 166 |
+
2025-04-21,21:03:42 | INFO | Train Epoch: 2 [1048576/1048576 (100%)] Data (t): 1.243 Batch (t): 9.516, 896.557/s, 448.278/s/gpu LR: 0.000993 Logit Scale: 13.748 Imm_image: 7.1870 (6.5192) Imm_text: 7.1870 (6.5192) Isd_image: 5.9245 (5.2988) Isd_text: 5.9245 (5.2988) Contrastive_loss: 8.3921 (8.4344) Loss: 8.3921 (8.4344)
|
| 167 |
+
2025-04-21,21:03:44 | INFO | Start epoch 3
|
| 168 |
+
2025-04-21,21:04:32 | INFO | Train Epoch: 3 [ 8192/1048576 (1%)] Data (t): 40.068 Batch (t): 48.745, 168.059/s, 84.0294/s/gpu LR: 0.000993 Logit Scale: 13.744 Imm_image: 6.9304 (6.9304) Imm_text: 6.9304 (6.9304) Isd_image: 5.7381 (5.7381) Isd_text: 5.7381 (5.7381) Contrastive_loss: 8.4777 (8.4777) Loss: 8.4777 (8.4777)
|
| 169 |
+
2025-04-21,21:20:10 | INFO | Train Epoch: 3 [ 827392/1048576 (79%)] Data (t): 1.093 Batch (t): 9.372, 875.864/s, 437.932/s/gpu LR: 0.000987 Logit Scale: 13.698 Imm_image: 7.5421 (7.2362) Imm_text: 7.5421 (7.2362) Isd_image: 5.5027 (5.6204) Isd_text: 5.5027 (5.6204) Contrastive_loss: 8.1817 (8.3297) Loss: 8.1817 (8.3297)
|
| 170 |
+
2025-04-21,21:24:19 | INFO | Train Epoch: 3 [1048576/1048576 (100%)] Data (t): 1.011 Batch (t): 9.229, 904.258/s, 452.129/s/gpu LR: 0.000985 Logit Scale: 13.698 Imm_image: 7.9477 (7.4734) Imm_text: 7.9477 (7.4734) Isd_image: 5.1939 (5.4782) Isd_text: 5.1939 (5.4782) Contrastive_loss: 7.9592 (8.2062) Loss: 7.9592 (8.2062)
|
| 171 |
+
2025-04-21,21:24:21 | INFO | Start epoch 4
|
| 172 |
+
2025-04-21,21:25:07 | INFO | Train Epoch: 4 [ 8192/1048576 (1%)] Data (t): 35.564 Batch (t): 46.414, 176.500/s, 88.2499/s/gpu LR: 0.000985 Logit Scale: 13.700 Imm_image: 7.9912 (7.9912) Imm_text: 7.9912 (7.9912) Isd_image: 5.0700 (5.0700) Isd_text: 5.0700 (5.0700) Contrastive_loss: 7.9317 (7.9317) Loss: 7.9317 (7.9317)
|
| 173 |
+
2025-04-21,21:40:47 | INFO | Train Epoch: 4 [ 827392/1048576 (79%)] Data (t): 1.092 Batch (t): 9.398, 851.586/s, 425.793/s/gpu LR: 0.000976 Logit Scale: 13.705 Imm_image: 7.7751 (7.8832) Imm_text: 7.7751 (7.8832) Isd_image: 4.7157 (4.8929) Isd_text: 4.7157 (4.8929) Contrastive_loss: 7.8275 (7.8796) Loss: 7.8275 (7.8796)
|
| 174 |
+
2025-04-21,21:44:56 | INFO | Train Epoch: 4 [1048576/1048576 (100%)] Data (t): 0.997 Batch (t): 9.239, 894.402/s, 447.201/s/gpu LR: 0.000974 Logit Scale: 13.703 Imm_image: 7.8277 (7.8647) Imm_text: 7.8277 (7.8647) Isd_image: 4.6871 (4.8243) Isd_text: 4.6871 (4.8243) Contrastive_loss: 7.7731 (7.8441) Loss: 7.7731 (7.8441)
|
| 175 |
+
2025-04-21,21:44:58 | INFO | Start epoch 5
|
| 176 |
+
2025-04-21,21:45:48 | INFO | Train Epoch: 5 [ 8192/1048576 (1%)] Data (t): 34.666 Batch (t): 50.188, 163.226/s, 81.6128/s/gpu LR: 0.000974 Logit Scale: 13.702 Imm_image: 7.8433 (7.8433) Imm_text: 7.8433 (7.8433) Isd_image: 4.4302 (4.4302) Isd_text: 4.4302 (4.4302) Contrastive_loss: 7.6758 (7.6758) Loss: 7.6758 (7.6758)
|
| 177 |
+
2025-04-21,22:01:21 | INFO | Train Epoch: 5 [ 827392/1048576 (79%)] Data (t): 1.037 Batch (t): 9.331, 874.490/s, 437.245/s/gpu LR: 0.000963 Logit Scale: 13.877 Imm_image: 8.3464 (8.0949) Imm_text: 8.3464 (8.0949) Isd_image: 4.2675 (4.3488) Isd_text: 4.2675 (4.3488) Contrastive_loss: 7.4460 (7.5609) Loss: 7.4460 (7.5609)
|
| 178 |
+
2025-04-21,22:05:30 | INFO | Train Epoch: 5 [1048576/1048576 (100%)] Data (t): 0.995 Batch (t): 9.220, 908.852/s, 454.426/s/gpu LR: 0.000959 Logit Scale: 13.968 Imm_image: 8.5083 (8.2327) Imm_text: 8.5083 (8.2327) Isd_image: 3.8744 (4.1907) Isd_text: 3.8744 (4.1907) Contrastive_loss: 7.3839 (7.5019) Loss: 7.3839 (7.5019)
|
| 179 |
+
2025-04-21,22:05:32 | INFO | Start epoch 6
|
| 180 |
+
2025-04-21,22:06:23 | INFO | Train Epoch: 6 [ 8192/1048576 (1%)] Data (t): 36.694 Batch (t): 50.504, 162.206/s, 81.1030/s/gpu LR: 0.000959 Logit Scale: 13.970 Imm_image: 8.8557 (8.8557) Imm_text: 8.8557 (8.8557) Isd_image: 4.0350 (4.0350) Isd_text: 4.0350 (4.0350) Contrastive_loss: 7.0985 (7.0985) Loss: 7.0985 (7.0985)
|
| 181 |
+
2025-04-21,22:21:56 | INFO | Train Epoch: 6 [ 827392/1048576 (79%)] Data (t): 1.065 Batch (t): 9.334, 857.862/s, 428.931/s/gpu LR: 0.000946 Logit Scale: 14.402 Imm_image: 8.3337 (8.5947) Imm_text: 8.3337 (8.5947) Isd_image: 4.4245 (4.2298) Isd_text: 4.4245 (4.2298) Contrastive_loss: 7.4111 (7.2548) Loss: 7.4111 (7.2548)
|
| 182 |
+
2025-04-21,22:26:06 | INFO | Train Epoch: 6 [1048576/1048576 (100%)] Data (t): 0.999 Batch (t): 9.257, 903.408/s, 451.704/s/gpu LR: 0.000942 Logit Scale: 14.471 Imm_image: 9.4197 (8.8697) Imm_text: 9.4197 (8.8697) Isd_image: 3.5616 (4.0070) Isd_text: 3.5616 (4.0070) Contrastive_loss: 6.8972 (7.1356) Loss: 6.8972 (7.1356)
|
| 183 |
+
2025-04-21,22:26:08 | INFO | Start epoch 7
|
| 184 |
+
2025-04-21,22:26:54 | INFO | Train Epoch: 7 [ 8192/1048576 (1%)] Data (t): 35.823 Batch (t): 46.484, 176.234/s, 88.1169/s/gpu LR: 0.000942 Logit Scale: 14.476 Imm_image: 9.6036 (9.6036) Imm_text: 9.6036 (9.6036) Isd_image: 3.1636 (3.1636) Isd_text: 3.1636 (3.1636) Contrastive_loss: 6.7002 (6.7002) Loss: 6.7002 (6.7002)
|
| 185 |
+
2025-04-21,22:42:28 | INFO | Train Epoch: 7 [ 827392/1048576 (79%)] Data (t): 1.064 Batch (t): 9.333, 873.486/s, 436.743/s/gpu LR: 0.000926 Logit Scale: 15.082 Imm_image: 9.7026 (9.6531) Imm_text: 9.7026 (9.6531) Isd_image: 3.6157 (3.3896) Isd_text: 3.6157 (3.3896) Contrastive_loss: 6.8166 (6.7584) Loss: 6.8166 (6.7584)
|
| 186 |
+
2025-04-21,22:46:37 | INFO | Train Epoch: 7 [1048576/1048576 (100%)] Data (t): 0.966 Batch (t): 9.222, 903.705/s, 451.852/s/gpu LR: 0.000922 Logit Scale: 15.199 Imm_image: 10.124 (9.8102) Imm_text: 10.124 (9.8102) Isd_image: 2.7879 (3.1890) Isd_text: 2.7879 (3.1890) Contrastive_loss: 6.4664 (6.6611) Loss: 6.4664 (6.6611)
|
| 187 |
+
2025-04-21,22:46:38 | INFO | Start epoch 8
|
| 188 |
+
2025-04-21,22:47:23 | INFO | Train Epoch: 8 [ 8192/1048576 (1%)] Data (t): 35.256 Batch (t): 44.316, 184.856/s, 92.4278/s/gpu LR: 0.000922 Logit Scale: 15.208 Imm_image: 10.210 (10.210) Imm_text: 10.210 (10.210) Isd_image: 2.8009 (2.8009) Isd_text: 2.8009 (2.8009) Contrastive_loss: 6.4050 (6.4050) Loss: 6.4050 (6.4050)
|
| 189 |
+
2025-04-21,23:03:03 | INFO | Train Epoch: 8 [ 827392/1048576 (79%)] Data (t): 1.088 Batch (t): 9.399, 862.332/s, 431.166/s/gpu LR: 0.000904 Logit Scale: 16.048 Imm_image: 10.222 (10.216) Imm_text: 10.222 (10.216) Isd_image: 2.7883 (2.7946) Isd_text: 2.7883 (2.7946) Contrastive_loss: 6.5765 (6.4907) Loss: 6.5765 (6.4907)
|
| 190 |
+
2025-04-21,23:07:12 | INFO | Train Epoch: 8 [1048576/1048576 (100%)] Data (t): 1.033 Batch (t): 9.255, 904.224/s, 452.112/s/gpu LR: 0.000899 Logit Scale: 16.222 Imm_image: 10.466 (10.299) Imm_text: 10.466 (10.299) Isd_image: 2.3033 (2.6308) Isd_text: 2.3033 (2.6308) Contrastive_loss: 6.2841 (6.4219) Loss: 6.2841 (6.4219)
|
| 191 |
+
2025-04-21,23:07:14 | INFO | Start epoch 9
|
| 192 |
+
2025-04-21,23:08:03 | INFO | Train Epoch: 9 [ 8192/1048576 (1%)] Data (t): 35.934 Batch (t): 49.090, 166.878/s, 83.4391/s/gpu LR: 0.000899 Logit Scale: 16.229 Imm_image: 10.976 (10.976) Imm_text: 10.976 (10.976) Isd_image: 2.3454 (2.3454) Isd_text: 2.3454 (2.3454) Contrastive_loss: 5.8266 (5.8266) Loss: 5.8266 (5.8266)
|
| 193 |
+
2025-04-21,23:23:40 | INFO | Train Epoch: 9 [ 827392/1048576 (79%)] Data (t): 1.061 Batch (t): 9.365, 885.093/s, 442.547/s/gpu LR: 0.000879 Logit Scale: 17.123 Imm_image: 10.671 (10.823) Imm_text: 10.671 (10.823) Isd_image: 2.8036 (2.5745) Isd_text: 2.8036 (2.5745) Contrastive_loss: 6.3293 (6.0779) Loss: 6.3293 (6.0779)
|
| 194 |
+
2025-04-21,23:27:49 | INFO | Train Epoch: 9 [1048576/1048576 (100%)] Data (t): 0.977 Batch (t): 9.224, 902.818/s, 451.409/s/gpu LR: 0.000874 Logit Scale: 17.323 Imm_image: 11.439 (11.029) Imm_text: 11.439 (11.029) Isd_image: 1.7596 (2.3029) Isd_text: 1.7596 (2.3029) Contrastive_loss: 5.9129 (6.0229) Loss: 5.9129 (6.0229)
|
| 195 |
+
2025-04-21,23:27:51 | INFO | Start epoch 10
|
| 196 |
+
2025-04-21,23:28:36 | INFO | Train Epoch: 10 [ 8192/1048576 (1%)] Data (t): 33.646 Batch (t): 45.216, 181.175/s, 90.5877/s/gpu LR: 0.000873 Logit Scale: 17.334 Imm_image: 11.559 (11.559) Imm_text: 11.559 (11.559) Isd_image: 1.7511 (1.7511) Isd_text: 1.7511 (1.7511) Contrastive_loss: 5.7620 (5.7620) Loss: 5.7620 (5.7620)
|
| 197 |
+
2025-04-21,23:44:11 | INFO | Train Epoch: 10 [ 827392/1048576 (79%)] Data (t): 1.081 Batch (t): 9.352, 873.437/s, 436.718/s/gpu LR: 0.000852 Logit Scale: 18.490 Imm_image: 11.840 (11.699) Imm_text: 11.840 (11.699) Isd_image: 2.1592 (1.9552) Isd_text: 2.1592 (1.9552) Contrastive_loss: 5.8099 (5.7860) Loss: 5.8099 (5.7860)
|
| 198 |
+
2025-04-21,23:48:21 | INFO | Train Epoch: 10 [1048576/1048576 (100%)] Data (t): 1.032 Batch (t): 9.257, 895.135/s, 447.568/s/gpu LR: 0.000846 Logit Scale: 18.775 Imm_image: 11.999 (11.799) Imm_text: 11.999 (11.799) Isd_image: 1.2359 (1.7154) Isd_text: 1.2359 (1.7154) Contrastive_loss: 5.5812 (5.7177) Loss: 5.5812 (5.7177)
|
| 199 |
+
2025-04-21,23:48:23 | INFO | Start epoch 11
|
| 200 |
+
2025-04-21,23:49:06 | INFO | Train Epoch: 11 [ 8192/1048576 (1%)] Data (t): 34.692 Batch (t): 43.125, 189.958/s, 94.9792/s/gpu LR: 0.000845 Logit Scale: 18.788 Imm_image: 11.909 (11.909) Imm_text: 11.909 (11.909) Isd_image: 1.3138 (1.3138) Isd_text: 1.3138 (1.3138) Contrastive_loss: 5.6080 (5.6080) Loss: 5.6080 (5.6080)
|
| 201 |
+
2025-04-22,00:04:57 | INFO | Train Epoch: 11 [ 827392/1048576 (79%)] Data (t): 1.177 Batch (t): 9.506, 864.155/s, 432.077/s/gpu LR: 0.000822 Logit Scale: 20.192 Imm_image: 12.917 (12.413) Imm_text: 12.917 (12.413) Isd_image: 2.1194 (1.7166) Isd_text: 2.1194 (1.7166) Contrastive_loss: 5.3919 (5.5000) Loss: 5.3919 (5.5000)
|
| 202 |
+
2025-04-22,00:09:10 | INFO | Train Epoch: 11 [1048576/1048576 (100%)] Data (t): 1.135 Batch (t): 9.383, 891.267/s, 445.634/s/gpu LR: 0.000815 Logit Scale: 20.460 Imm_image: 13.386 (12.737) Imm_text: 13.386 (12.737) Isd_image: 1.6267 (1.6867) Isd_text: 1.6267 (1.6867) Contrastive_loss: 5.0151 (5.3384) Loss: 5.0151 (5.3384)
|
| 203 |
+
2025-04-22,00:09:12 | INFO | Start epoch 12
|
| 204 |
+
2025-04-22,00:10:00 | INFO | Train Epoch: 12 [ 8192/1048576 (1%)] Data (t): 34.178 Batch (t): 48.069, 170.422/s, 85.2111/s/gpu LR: 0.000815 Logit Scale: 20.474 Imm_image: 13.472 (13.472) Imm_text: 13.472 (13.472) Isd_image: 1.4004 (1.4004) Isd_text: 1.4004 (1.4004) Contrastive_loss: 4.8872 (4.8872) Loss: 4.8872 (4.8872)
|
| 205 |
+
2025-04-22,00:25:58 | INFO | Train Epoch: 12 [ 827392/1048576 (79%)] Data (t): 1.197 Batch (t): 9.584, 863.276/s, 431.638/s/gpu LR: 0.000790 Logit Scale: 21.890 Imm_image: 13.971 (13.722) Imm_text: 13.971 (13.722) Isd_image: 2.0668 (1.7336) Isd_text: 2.0668 (1.7336) Contrastive_loss: 5.0719 (4.9795) Loss: 5.0719 (4.9795)
|
| 206 |
+
2025-04-22,00:30:14 | INFO | Train Epoch: 12 [1048576/1048576 (100%)] Data (t): 1.156 Batch (t): 9.460, 887.736/s, 443.868/s/gpu LR: 0.000783 Logit Scale: 22.221 Imm_image: 13.898 (13.781) Imm_text: 13.898 (13.781) Isd_image: 1.3987 (1.6220) Isd_text: 1.3987 (1.6220) Contrastive_loss: 5.0063 (4.9885) Loss: 5.0063 (4.9885)
|
| 207 |
+
2025-04-22,00:30:15 | INFO | Start epoch 13
|
| 208 |
+
2025-04-22,00:31:06 | INFO | Train Epoch: 13 [ 8192/1048576 (1%)] Data (t): 42.005 Batch (t): 50.413, 162.496/s, 81.2482/s/gpu LR: 0.000783 Logit Scale: 22.231 Imm_image: 14.035 (14.035) Imm_text: 14.035 (14.035) Isd_image: 1.5638 (1.5638) Isd_text: 1.5638 (1.5638) Contrastive_loss: 5.0072 (5.0072) Loss: 5.0072 (5.0072)
|
| 209 |
+
2025-04-22,00:46:51 | INFO | Train Epoch: 13 [ 827392/1048576 (79%)] Data (t): 1.110 Batch (t): 9.451, 866.920/s, 433.460/s/gpu LR: 0.000757 Logit Scale: 23.788 Imm_image: 14.846 (14.441) Imm_text: 14.846 (14.441) Isd_image: 2.0734 (1.8186) Isd_text: 2.0734 (1.8186) Contrastive_loss: 4.9115 (4.9593) Loss: 4.9115 (4.9593)
|
| 210 |
+
2025-04-22,00:51:01 | INFO | Train Epoch: 13 [1048576/1048576 (100%)] Data (t): 1.012 Batch (t): 9.268, 895.815/s, 447.907/s/gpu LR: 0.000749 Logit Scale: 24.118 Imm_image: 15.875 (14.919) Imm_text: 15.875 (14.919) Isd_image: 1.4286 (1.6886) Isd_text: 1.4286 (1.6886) Contrastive_loss: 4.2791 (4.7326) Loss: 4.2791 (4.7326)
|
| 211 |
+
2025-04-22,00:51:03 | INFO | Start epoch 14
|
| 212 |
+
2025-04-22,00:51:56 | INFO | Train Epoch: 14 [ 8192/1048576 (1%)] Data (t): 42.324 Batch (t): 52.895, 154.873/s, 77.4363/s/gpu LR: 0.000749 Logit Scale: 24.135 Imm_image: 16.335 (16.335) Imm_text: 16.335 (16.335) Isd_image: 1.4689 (1.4689) Isd_text: 1.4689 (1.4689) Contrastive_loss: 3.9028 (3.9028) Loss: 3.9028 (3.9028)
|
| 213 |
+
2025-04-22,01:07:42 | INFO | Train Epoch: 14 [ 827392/1048576 (79%)] Data (t): 1.136 Batch (t): 9.464, 879.087/s, 439.543/s/gpu LR: 0.000721 Logit Scale: 25.785 Imm_image: 16.453 (16.394) Imm_text: 16.453 (16.394) Isd_image: 2.0381 (1.7535) Isd_text: 2.0381 (1.7535) Contrastive_loss: 4.3617 (4.1322) Loss: 4.3617 (4.1322)
|
| 214 |
+
2025-04-22,01:11:57 | INFO | Train Epoch: 14 [1048576/1048576 (100%)] Data (t): 1.103 Batch (t): 9.414, 899.541/s, 449.770/s/gpu LR: 0.000714 Logit Scale: 26.135 Imm_image: 17.231 (16.673) Imm_text: 17.231 (16.673) Isd_image: 1.9119 (1.8063) Isd_text: 1.9119 (1.8063) Contrastive_loss: 3.8846 (4.0497) Loss: 3.8846 (4.0497)
|
| 215 |
+
2025-04-22,01:11:58 | INFO | Start epoch 15
|
| 216 |
+
2025-04-22,01:12:44 | INFO | Train Epoch: 15 [ 8192/1048576 (1%)] Data (t): 35.472 Batch (t): 45.167, 181.372/s, 90.6860/s/gpu LR: 0.000713 Logit Scale: 26.152 Imm_image: 17.521 (17.521) Imm_text: 17.521 (17.521) Isd_image: 1.4554 (1.4554) Isd_text: 1.4554 (1.4554) Contrastive_loss: 3.5747 (3.5747) Loss: 3.5747 (3.5747)
|
| 217 |
+
2025-04-22,01:28:35 | INFO | Train Epoch: 15 [ 827392/1048576 (79%)] Data (t): 1.105 Batch (t): 9.516, 852.010/s, 426.005/s/gpu LR: 0.000684 Logit Scale: 27.827 Imm_image: 17.885 (17.703) Imm_text: 17.885 (17.703) Isd_image: 1.8281 (1.6417) Isd_text: 1.8281 (1.6417) Contrastive_loss: 3.9385 (3.7566) Loss: 3.9385 (3.7566)
|
| 218 |
+
2025-04-22,01:32:52 | INFO | Train Epoch: 15 [1048576/1048576 (100%)] Data (t): 1.123 Batch (t): 9.498, 872.191/s, 436.096/s/gpu LR: 0.000677 Logit Scale: 28.180 Imm_image: 17.907 (17.771) Imm_text: 17.907 (17.771) Isd_image: 1.5712 (1.6182) Isd_text: 1.5712 (1.6182) Contrastive_loss: 3.9188 (3.8106) Loss: 3.9188 (3.8106)
|
| 219 |
+
2025-04-22,01:32:53 | INFO | Start epoch 16
|
| 220 |
+
2025-04-22,01:33:41 | INFO | Train Epoch: 16 [ 8192/1048576 (1%)] Data (t): 35.920 Batch (t): 47.413, 172.780/s, 86.3902/s/gpu LR: 0.000676 Logit Scale: 28.191 Imm_image: 18.311 (18.311) Imm_text: 18.311 (18.311) Isd_image: 1.3776 (1.3776) Isd_text: 1.3776 (1.3776) Contrastive_loss: 3.5832 (3.5832) Loss: 3.5832 (3.5832)
|
| 221 |
+
2025-04-22,01:49:26 | INFO | Train Epoch: 16 [ 827392/1048576 (79%)] Data (t): 1.131 Batch (t): 9.450, 871.132/s, 435.566/s/gpu LR: 0.000646 Logit Scale: 29.910 Imm_image: 19.193 (18.752) Imm_text: 19.193 (18.752) Isd_image: 2.2303 (1.8040) Isd_text: 2.2303 (1.8040) Contrastive_loss: 3.6092 (3.5962) Loss: 3.6092 (3.5962)
|
| 222 |
+
2025-04-22,01:53:42 | INFO | Train Epoch: 16 [1048576/1048576 (100%)] Data (t): 1.169 Batch (t): 9.505, 885.105/s, 442.552/s/gpu LR: 0.000638 Logit Scale: 30.282 Imm_image: 19.592 (19.032) Imm_text: 19.592 (19.032) Isd_image: 1.8122 (1.8067) Isd_text: 1.8122 (1.8067) Contrastive_loss: 3.3608 (3.5177) Loss: 3.3608 (3.5177)
|
| 223 |
+
2025-04-22,01:53:44 | INFO | Start epoch 17
|
| 224 |
+
2025-04-22,01:54:29 | INFO | Train Epoch: 17 [ 8192/1048576 (1%)] Data (t): 33.752 Batch (t): 44.222, 185.247/s, 92.6237/s/gpu LR: 0.000638 Logit Scale: 30.297 Imm_image: 19.621 (19.621) Imm_text: 19.621 (19.621) Isd_image: 1.6044 (1.6044) Isd_text: 1.6044 (1.6044) Contrastive_loss: 3.3473 (3.3473) Loss: 3.3473 (3.3473)
|
| 225 |
+
2025-04-22,02:10:08 | INFO | Train Epoch: 17 [ 827392/1048576 (79%)] Data (t): 1.040 Batch (t): 9.395, 868.012/s, 434.006/s/gpu LR: 0.000608 Logit Scale: 32.031 Imm_image: 20.725 (20.173) Imm_text: 20.725 (20.173) Isd_image: 1.8428 (1.7236) Isd_text: 1.8428 (1.7236) Contrastive_loss: 3.2180 (3.2826) Loss: 3.2180 (3.2826)
|
| 226 |
+
2025-04-22,02:14:18 | INFO | Train Epoch: 17 [1048576/1048576 (100%)] Data (t): 1.006 Batch (t): 9.253, 898.909/s, 449.454/s/gpu LR: 0.000599 Logit Scale: 32.428 Imm_image: 21.295 (20.547) Imm_text: 21.295 (20.547) Isd_image: 1.2718 (1.5730) Isd_text: 1.2718 (1.5730) Contrastive_loss: 2.8722 (3.1458) Loss: 2.8722 (3.1458)
|
| 227 |
+
2025-04-22,02:14:20 | INFO | Start epoch 18
|
| 228 |
+
2025-04-22,02:15:12 | INFO | Train Epoch: 18 [ 8192/1048576 (1%)] Data (t): 43.744 Batch (t): 51.973, 157.622/s, 78.8109/s/gpu LR: 0.000599 Logit Scale: 32.447 Imm_image: 21.615 (21.615) Imm_text: 21.615 (21.615) Isd_image: 1.0146 (1.0146) Isd_text: 1.0146 (1.0146) Contrastive_loss: 2.5753 (2.5753) Loss: 2.5753 (2.5753)
|
| 229 |
+
2025-04-22,02:30:45 | INFO | Train Epoch: 18 [ 827392/1048576 (79%)] Data (t): 1.032 Batch (t): 9.337, 885.166/s, 442.583/s/gpu LR: 0.000568 Logit Scale: 34.185 Imm_image: 22.119 (21.867) Imm_text: 22.119 (21.867) Isd_image: 2.0683 (1.5415) Isd_text: 2.0683 (1.5415) Contrastive_loss: 2.9206 (2.7479) Loss: 2.9206 (2.7479)
|
| 230 |
+
2025-04-22,02:34:55 | INFO | Train Epoch: 18 [1048576/1048576 (100%)] Data (t): 0.955 Batch (t): 9.231, 900.095/s, 450.047/s/gpu LR: 0.000560 Logit Scale: 34.575 Imm_image: 22.600 (22.111) Imm_text: 22.600 (22.111) Isd_image: 1.1938 (1.4256) Isd_text: 1.1938 (1.4256) Contrastive_loss: 2.6357 (2.7105) Loss: 2.6357 (2.7105)
|
| 231 |
+
2025-04-22,02:34:56 | INFO | Start epoch 19
|
| 232 |
+
2025-04-22,02:35:47 | INFO | Train Epoch: 19 [ 8192/1048576 (1%)] Data (t): 40.769 Batch (t): 50.767, 161.364/s, 80.6818/s/gpu LR: 0.000559 Logit Scale: 34.593 Imm_image: 23.352 (23.352) Imm_text: 23.352 (23.352) Isd_image: 1.1015 (1.1015) Isd_text: 1.1015 (1.1015) Contrastive_loss: 2.1415 (2.1415) Loss: 2.1415 (2.1415)
|
| 233 |
+
2025-04-22,02:51:25 | INFO | Train Epoch: 19 [ 827392/1048576 (79%)] Data (t): 1.054 Batch (t): 9.374, 833.574/s, 416.787/s/gpu LR: 0.000528 Logit Scale: 36.285 Imm_image: 23.422 (23.387) Imm_text: 23.422 (23.387) Isd_image: 1.9669 (1.5342) Isd_text: 1.9669 (1.5342) Contrastive_loss: 2.7385 (2.4400) Loss: 2.7385 (2.4400)
|
| 234 |
+
2025-04-22,02:55:36 | INFO | Train Epoch: 19 [1048576/1048576 (100%)] Data (t): 1.036 Batch (t): 9.315, 891.505/s, 445.752/s/gpu LR: 0.000519 Logit Scale: 36.709 Imm_image: 24.361 (23.712) Imm_text: 24.361 (23.712) Isd_image: 1.5146 (1.5277) Isd_text: 1.5146 (1.5277) Contrastive_loss: 2.2260 (2.3687) Loss: 2.2260 (2.3687)
|
| 235 |
+
2025-04-22,02:55:38 | INFO | Start epoch 20
|
| 236 |
+
2025-04-22,02:56:23 | INFO | Train Epoch: 20 [ 8192/1048576 (1%)] Data (t): 35.789 Batch (t): 45.300, 180.841/s, 90.4203/s/gpu LR: 0.000519 Logit Scale: 36.729 Imm_image: 24.625 (24.625) Imm_text: 24.625 (24.625) Isd_image: 1.3410 (1.3410) Isd_text: 1.3410 (1.3410) Contrastive_loss: 2.0375 (2.0375) Loss: 2.0375 (2.0375)
|
| 237 |
+
2025-04-22,03:12:09 | INFO | Train Epoch: 20 [ 827392/1048576 (79%)] Data (t): 1.107 Batch (t): 9.458, 840.499/s, 420.250/s/gpu LR: 0.000488 Logit Scale: 38.383 Imm_image: 25.134 (24.880) Imm_text: 25.134 (24.880) Isd_image: 1.9705 (1.6558) Isd_text: 1.9705 (1.6558) Contrastive_loss: 2.3057 (2.1716) Loss: 2.3057 (2.1716)
|
| 238 |
+
2025-04-22,03:16:18 | INFO | Train Epoch: 20 [1048576/1048576 (100%)] Data (t): 0.989 Batch (t): 9.210, 902.628/s, 451.314/s/gpu LR: 0.000479 Logit Scale: 38.788 Imm_image: 25.877 (25.212) Imm_text: 25.877 (25.212) Isd_image: 1.2644 (1.5253) Isd_text: 1.2644 (1.5253) Contrastive_loss: 1.9045 (2.0826) Loss: 1.9045 (2.0826)
|
| 239 |
+
2025-04-22,03:16:20 | INFO | Start epoch 21
|
| 240 |
+
2025-04-22,03:17:10 | INFO | Train Epoch: 21 [ 8192/1048576 (1%)] Data (t): 41.021 Batch (t): 50.450, 162.377/s, 81.1886/s/gpu LR: 0.000479 Logit Scale: 38.807 Imm_image: 26.046 (26.046) Imm_text: 26.046 (26.046) Isd_image: 1.3478 (1.3478) Isd_text: 1.3478 (1.3478) Contrastive_loss: 1.8161 (1.8161) Loss: 1.8161 (1.8161)
|
| 241 |
+
2025-04-22,03:32:49 | INFO | Train Epoch: 21 [ 827392/1048576 (79%)] Data (t): 1.055 Batch (t): 9.389, 896.785/s, 448.392/s/gpu LR: 0.000448 Logit Scale: 40.456 Imm_image: 26.354 (26.200) Imm_text: 26.354 (26.200) Isd_image: 1.8287 (1.5882) Isd_text: 1.8287 (1.5882) Contrastive_loss: 2.1377 (1.9769) Loss: 2.1377 (1.9769)
|
| 242 |
+
2025-04-22,03:36:58 | INFO | Train Epoch: 21 [1048576/1048576 (100%)] Data (t): 0.964 Batch (t): 9.226, 902.176/s, 451.088/s/gpu LR: 0.000439 Logit Scale: 40.856 Imm_image: 26.912 (26.437) Imm_text: 26.912 (26.437) Isd_image: 1.3326 (1.5030) Isd_text: 1.3326 (1.5030) Contrastive_loss: 1.8677 (1.9405) Loss: 1.8677 (1.9405)
|
| 243 |
+
2025-04-22,03:37:00 | INFO | Start epoch 22
|
| 244 |
+
2025-04-22,03:37:47 | INFO | Train Epoch: 22 [ 8192/1048576 (1%)] Data (t): 36.345 Batch (t): 46.809, 175.008/s, 87.5038/s/gpu LR: 0.000439 Logit Scale: 40.874 Imm_image: 27.119 (27.119) Imm_text: 27.119 (27.119) Isd_image: 1.3031 (1.3031) Isd_text: 1.3031 (1.3031) Contrastive_loss: 1.7366 (1.7366) Loss: 1.7366 (1.7366)
|
| 245 |
+
2025-04-22,03:53:24 | INFO | Train Epoch: 22 [ 827392/1048576 (79%)] Data (t): 1.034 Batch (t): 9.372, 883.608/s, 441.804/s/gpu LR: 0.000408 Logit Scale: 42.489 Imm_image: 27.757 (27.438) Imm_text: 27.757 (27.438) Isd_image: 1.6067 (1.4549) Isd_text: 1.6067 (1.4549) Contrastive_loss: 1.8366 (1.7866) Loss: 1.8366 (1.7866)
|
| 246 |
+
2025-04-22,03:57:34 | INFO | Train Epoch: 22 [1048576/1048576 (100%)] Data (t): 0.966 Batch (t): 9.257, 902.936/s, 451.468/s/gpu LR: 0.000399 Logit Scale: 42.897 Imm_image: 28.356 (27.744) Imm_text: 28.356 (27.744) Isd_image: 0.87214 (1.2607) Isd_text: 0.87214 (1.2607) Contrastive_loss: 1.5978 (1.7237) Loss: 1.5978 (1.7237)
|
| 247 |
+
2025-04-22,03:57:36 | INFO | Start epoch 23
|
| 248 |
+
2025-04-22,03:58:20 | INFO | Train Epoch: 23 [ 8192/1048576 (1%)] Data (t): 34.019 Batch (t): 44.023, 186.084/s, 93.0421/s/gpu LR: 0.000399 Logit Scale: 42.915 Imm_image: 28.897 (28.897) Imm_text: 28.897 (28.897) Isd_image: 0.73614 (0.73614) Isd_text: 0.73614 (0.73614) Contrastive_loss: 1.3154 (1.3154) Loss: 1.3154 (1.3154)
|
| 249 |
+
2025-04-22,04:14:11 | INFO | Train Epoch: 23 [ 827392/1048576 (79%)] Data (t): 1.146 Batch (t): 9.511, 866.075/s, 433.038/s/gpu LR: 0.000369 Logit Scale: 44.457 Imm_image: 28.920 (28.909) Imm_text: 28.920 (28.909) Isd_image: 1.5970 (1.1666) Isd_text: 1.5970 (1.1666) Contrastive_loss: 1.7620 (1.5387) Loss: 1.7620 (1.5387)
|
| 250 |
+
2025-04-22,04:18:19 | INFO | Train Epoch: 23 [1048576/1048576 (100%)] Data (t): 0.962 Batch (t): 9.183, 903.125/s, 451.563/s/gpu LR: 0.000360 Logit Scale: 44.853 Imm_image: 29.846 (29.221) Imm_text: 29.846 (29.221) Isd_image: 0.83969 (1.0576) Isd_text: 0.83969 (1.0576) Contrastive_loss: 1.3563 (1.4779) Loss: 1.3563 (1.4779)
|
| 251 |
+
2025-04-22,04:18:21 | INFO | Start epoch 24
|
| 252 |
+
2025-04-22,04:19:04 | INFO | Train Epoch: 24 [ 8192/1048576 (1%)] Data (t): 34.829 Batch (t): 43.179, 189.723/s, 94.8614/s/gpu LR: 0.000360 Logit Scale: 44.871 Imm_image: 30.301 (30.301) Imm_text: 30.301 (30.301) Isd_image: 0.95309 (0.95309) Isd_text: 0.95309 (0.95309) Contrastive_loss: 1.1238 (1.1238) Loss: 1.1238 (1.1238)
|
| 253 |
+
2025-04-22,04:34:51 | INFO | Train Epoch: 24 [ 827392/1048576 (79%)] Data (t): 1.125 Batch (t): 9.472, 865.344/s, 432.672/s/gpu LR: 0.000330 Logit Scale: 46.338 Imm_image: 30.204 (30.252) Imm_text: 30.204 (30.252) Isd_image: 1.4327 (1.1929) Isd_text: 1.4327 (1.1929) Contrastive_loss: 1.5971 (1.3604) Loss: 1.5971 (1.3604)
|
| 254 |
+
2025-04-22,04:39:05 | INFO | Train Epoch: 24 [1048576/1048576 (100%)] Data (t): 1.135 Batch (t): 9.396, 886.100/s, 443.050/s/gpu LR: 0.000322 Logit Scale: 46.698 Imm_image: 31.079 (30.528) Imm_text: 31.079 (30.528) Isd_image: 0.53120 (0.97234) Isd_text: 0.53120 (0.97234) Contrastive_loss: 1.1581 (1.2930) Loss: 1.1581 (1.2930)
|
| 255 |
+
2025-04-22,04:39:07 | INFO | Start epoch 25
|
| 256 |
+
2025-04-22,04:40:01 | INFO | Train Epoch: 25 [ 8192/1048576 (1%)] Data (t): 39.915 Batch (t): 54.751, 149.622/s, 74.8110/s/gpu LR: 0.000322 Logit Scale: 46.714 Imm_image: 31.450 (31.450) Imm_text: 31.450 (31.450) Isd_image: 0.47179 (0.47179) Isd_text: 0.47179 (0.47179) Contrastive_loss: 1.0223 (1.0223) Loss: 1.0223 (1.0223)
|
| 257 |
+
2025-04-22,04:55:52 | INFO | Train Epoch: 25 [ 827392/1048576 (79%)] Data (t): 1.114 Batch (t): 9.511, 888.626/s, 444.313/s/gpu LR: 0.000293 Logit Scale: 48.076 Imm_image: 31.740 (31.595) Imm_text: 31.740 (31.595) Isd_image: 1.0804 (0.77607) Isd_text: 1.0804 (0.77607) Contrastive_loss: 1.2178 (1.1200) Loss: 1.2178 (1.1200)
|
| 258 |
+
2025-04-22,05:00:01 | INFO | Train Epoch: 25 [1048576/1048576 (100%)] Data (t): 1.000 Batch (t): 9.200, 901.391/s, 450.695/s/gpu LR: 0.000285 Logit Scale: 48.430 Imm_image: 32.418 (31.869) Imm_text: 32.418 (31.869) Isd_image: 0.52787 (0.69334) Isd_text: 0.52787 (0.69334) Contrastive_loss: 0.99956 (1.0799) Loss: 0.99956 (1.0799)
|
| 259 |
+
2025-04-22,05:00:03 | INFO | Start epoch 26
|
| 260 |
+
2025-04-22,05:01:02 | INFO | Train Epoch: 26 [ 8192/1048576 (1%)] Data (t): 42.442 Batch (t): 59.316, 138.107/s, 69.0535/s/gpu LR: 0.000285 Logit Scale: 48.446 Imm_image: 32.915 (32.915) Imm_text: 32.915 (32.915) Isd_image: 0.38222 (0.38222) Isd_text: 0.38222 (0.38222) Contrastive_loss: 0.80590 (0.80590) Loss: 0.80590 (0.80590)
|
| 261 |
+
2025-04-22,05:16:46 | INFO | Train Epoch: 26 [ 827392/1048576 (79%)] Data (t): 1.062 Batch (t): 9.442, 896.922/s, 448.461/s/gpu LR: 0.000257 Logit Scale: 49.716 Imm_image: 32.827 (32.871) Imm_text: 32.827 (32.871) Isd_image: 1.0301 (0.70614) Isd_text: 1.0301 (0.70614) Contrastive_loss: 1.1351 (0.97050) Loss: 1.1351 (0.97050)
|
| 262 |
+
2025-04-22,05:20:55 | INFO | Train Epoch: 26 [1048576/1048576 (100%)] Data (t): 0.972 Batch (t): 9.223, 904.186/s, 452.093/s/gpu LR: 0.000250 Logit Scale: 50.036 Imm_image: 33.545 (33.096) Imm_text: 33.545 (33.096) Isd_image: 0.0029951 (0.47176) Isd_text: 0.0029951 (0.47176) Contrastive_loss: 0.82556 (0.92219) Loss: 0.82556 (0.92219)
|
| 263 |
+
2025-04-22,05:20:57 | INFO | Start epoch 27
|
| 264 |
+
2025-04-22,05:21:39 | INFO | Train Epoch: 27 [ 8192/1048576 (1%)] Data (t): 33.620 Batch (t): 41.870, 195.652/s, 97.8259/s/gpu LR: 0.000249 Logit Scale: 50.050 Imm_image: 33.780 (33.780) Imm_text: 33.780 (33.780) Isd_image: -0.084400 (-0.084400) Isd_text: -0.084400 (-0.084400) Contrastive_loss: 0.77689 (0.77689) Loss: 0.77689 (0.77689)
|
| 265 |
+
2025-04-22,05:37:21 | INFO | Train Epoch: 27 [ 827392/1048576 (79%)] Data (t): 1.082 Batch (t): 9.419, 867.499/s, 433.749/s/gpu LR: 0.000223 Logit Scale: 51.207 Imm_image: 33.797 (33.788) Imm_text: 33.797 (33.788) Isd_image: 0.79326 (0.35443) Isd_text: 0.79326 (0.35443) Contrastive_loss: 1.0277 (0.90230) Loss: 1.0277 (0.90230)
|
| 266 |
+
2025-04-22,05:41:34 | INFO | Train Epoch: 27 [1048576/1048576 (100%)] Data (t): 1.080 Batch (t): 9.384, 890.844/s, 445.422/s/gpu LR: 0.000216 Logit Scale: 51.496 Imm_image: 34.634 (34.070) Imm_text: 34.634 (34.070) Isd_image: -0.090092 (0.20626) Isd_text: -0.090092 (0.20626) Contrastive_loss: 0.70038 (0.83499) Loss: 0.70038 (0.83499)
|
| 267 |
+
2025-04-22,05:41:36 | INFO | Start epoch 28
|
| 268 |
+
2025-04-22,05:42:27 | INFO | Train Epoch: 28 [ 8192/1048576 (1%)] Data (t): 42.972 Batch (t): 51.399, 159.382/s, 79.6908/s/gpu LR: 0.000215 Logit Scale: 51.508 Imm_image: 34.713 (34.713) Imm_text: 34.713 (34.713) Isd_image: -0.10062 (-0.10062) Isd_text: -0.10062 (-0.10062) Contrastive_loss: 0.71356 (0.71356) Loss: 0.71356 (0.71356)
|
| 269 |
+
2025-04-22,05:58:22 | INFO | Train Epoch: 28 [ 827392/1048576 (79%)] Data (t): 1.199 Batch (t): 9.547, 852.667/s, 426.334/s/gpu LR: 0.000190 Logit Scale: 52.543 Imm_image: 34.988 (34.851) Imm_text: 34.988 (34.851) Isd_image: -0.21139 (-0.15601) Isd_text: -0.21139 (-0.15601) Contrastive_loss: 0.78777 (0.75067) Loss: 0.78777 (0.75067)
|
| 270 |
+
2025-04-22,06:02:38 | INFO | Train Epoch: 28 [1048576/1048576 (100%)] Data (t): 1.205 Batch (t): 9.491, 878.021/s, 439.010/s/gpu LR: 0.000184 Logit Scale: 52.801 Imm_image: 35.536 (35.079) Imm_text: 35.536 (35.079) Isd_image: -0.37345 (-0.22849) Isd_text: -0.37345 (-0.22849) Contrastive_loss: 0.59136 (0.69756) Loss: 0.59136 (0.69756)
|
| 271 |
+
2025-04-22,06:02:40 | INFO | Start epoch 29
|
| 272 |
+
2025-04-22,06:03:25 | INFO | Train Epoch: 29 [ 8192/1048576 (1%)] Data (t): 33.324 Batch (t): 44.962, 182.199/s, 91.0997/s/gpu LR: 0.000183 Logit Scale: 52.812 Imm_image: 35.633 (35.633) Imm_text: 35.633 (35.633) Isd_image: -0.49615 (-0.49615) Isd_text: -0.49615 (-0.49615) Contrastive_loss: 0.58952 (0.58952) Loss: 0.58952 (0.58952)
|
| 273 |
+
2025-04-22,06:19:30 | INFO | Train Epoch: 29 [ 827392/1048576 (79%)] Data (t): 1.234 Batch (t): 9.649, 824.732/s, 412.366/s/gpu LR: 0.000160 Logit Scale: 53.700 Imm_image: 35.724 (35.679) Imm_text: 35.724 (35.679) Isd_image: -0.27579 (-0.38597) Isd_text: -0.27579 (-0.38597) Contrastive_loss: 0.72920 (0.65936) Loss: 0.72920 (0.65936)
|
| 274 |
+
2025-04-22,06:23:43 | INFO | Train Epoch: 29 [1048576/1048576 (100%)] Data (t): 1.098 Batch (t): 9.360, 874.955/s, 437.477/s/gpu LR: 0.000154 Logit Scale: 53.923 Imm_image: 36.292 (35.883) Imm_text: 36.292 (35.883) Isd_image: -0.95716 (-0.57637) Isd_text: -0.95716 (-0.57637) Contrastive_loss: 0.51364 (0.61079) Loss: 0.51364 (0.61079)
|
| 275 |
+
2025-04-22,06:23:45 | INFO | Start epoch 30
|
| 276 |
+
2025-04-22,06:24:28 | INFO | Train Epoch: 30 [ 8192/1048576 (1%)] Data (t): 34.553 Batch (t): 43.249, 189.414/s, 94.7069/s/gpu LR: 0.000153 Logit Scale: 53.932 Imm_image: 36.648 (36.648) Imm_text: 36.648 (36.648) Isd_image: -1.0102 (-1.0102) Isd_text: -1.0102 (-1.0102) Contrastive_loss: 0.45911 (0.45911) Loss: 0.45911 (0.45911)
|
| 277 |
+
2025-04-22,06:40:18 | INFO | Train Epoch: 30 [ 827392/1048576 (79%)] Data (t): 1.153 Batch (t): 9.497, 873.271/s, 436.636/s/gpu LR: 0.000131 Logit Scale: 54.690 Imm_image: 36.511 (36.579) Imm_text: 36.511 (36.579) Isd_image: -0.60729 (-0.80876) Isd_text: -0.60729 (-0.80876) Contrastive_loss: 0.57983 (0.51947) Loss: 0.57983 (0.51947)
|
| 278 |
+
2025-04-22,06:44:28 | INFO | Train Epoch: 30 [1048576/1048576 (100%)] Data (t): 0.969 Batch (t): 9.285, 893.312/s, 446.656/s/gpu LR: 0.000126 Logit Scale: 54.878 Imm_image: 37.161 (36.773) Imm_text: 37.161 (36.773) Isd_image: -1.1047 (-0.90741) Isd_text: -1.1047 (-0.90741) Contrastive_loss: 0.39493 (0.47796) Loss: 0.39493 (0.47796)
|
| 279 |
+
2025-04-22,06:44:30 | INFO | Start epoch 31
|
| 280 |
+
2025-04-22,06:45:24 | INFO | Train Epoch: 31 [ 8192/1048576 (1%)] Data (t): 43.192 Batch (t): 53.415, 153.364/s, 76.6819/s/gpu LR: 0.000125 Logit Scale: 54.885 Imm_image: 37.284 (37.284) Imm_text: 37.284 (37.284) Isd_image: -1.1921 (-1.1921) Isd_text: -1.1921 (-1.1921) Contrastive_loss: 0.38576 (0.38576) Loss: 0.38576 (0.38576)
|
| 281 |
+
2025-04-22,07:01:10 | INFO | Train Epoch: 31 [ 827392/1048576 (79%)] Data (t): 1.133 Batch (t): 9.464, 851.374/s, 425.687/s/gpu LR: 0.000105 Logit Scale: 55.511 Imm_image: 37.413 (37.348) Imm_text: 37.413 (37.348) Isd_image: -1.1304 (-1.1613) Isd_text: -1.1304 (-1.1613) Contrastive_loss: 0.47141 (0.42858) Loss: 0.47141 (0.42858)
|
| 282 |
+
2025-04-22,07:05:22 | INFO | Train Epoch: 31 [1048576/1048576 (100%)] Data (t): 1.080 Batch (t): 9.345, 891.322/s, 445.661/s/gpu LR: 0.000100 Logit Scale: 55.664 Imm_image: 37.822 (37.506) Imm_text: 37.822 (37.506) Isd_image: -1.4288 (-1.2504) Isd_text: -1.4288 (-1.2504) Contrastive_loss: 0.34556 (0.40091) Loss: 0.34556 (0.40091)
|
| 283 |
+
2025-04-22,07:05:24 | INFO | Start epoch 32
|
| 284 |
+
2025-04-22,07:06:20 | INFO | Train Epoch: 32 [ 8192/1048576 (1%)] Data (t): 40.274 Batch (t): 56.306, 145.492/s, 72.7458/s/gpu LR: 0.000100 Logit Scale: 55.670 Imm_image: 37.952 (37.952) Imm_text: 37.952 (37.952) Isd_image: -1.3906 (-1.3906) Isd_text: -1.3906 (-1.3906) Contrastive_loss: 0.34634 (0.34634) Loss: 0.34634 (0.34634)
|
| 285 |
+
2025-04-22,07:22:13 | INFO | Train Epoch: 32 [ 827392/1048576 (79%)] Data (t): 1.175 Batch (t): 9.522, 829.835/s, 414.917/s/gpu LR: 0.000082 Logit Scale: 56.169 Imm_image: 37.956 (37.954) Imm_text: 37.956 (37.954) Isd_image: -1.3998 (-1.3952) Isd_text: -1.3998 (-1.3952) Contrastive_loss: 0.39241 (0.36938) Loss: 0.39241 (0.36938)
|
| 286 |
+
2025-04-22,07:26:29 | INFO | Train Epoch: 32 [1048576/1048576 (100%)] Data (t): 1.182 Batch (t): 9.487, 842.373/s, 421.186/s/gpu LR: 0.000077 Logit Scale: 56.290 Imm_image: 38.525 (38.144) Imm_text: 38.525 (38.144) Isd_image: -1.7152 (-1.5019) Isd_text: -1.7152 (-1.5019) Contrastive_loss: 0.28321 (0.34065) Loss: 0.28321 (0.34065)
|
| 287 |
+
2025-04-22,07:26:31 | INFO | Start epoch 33
|
| 288 |
+
2025-04-22,07:27:19 | INFO | Train Epoch: 33 [ 8192/1048576 (1%)] Data (t): 39.269 Batch (t): 47.937, 170.892/s, 85.4459/s/gpu LR: 0.000077 Logit Scale: 56.294 Imm_image: 38.475 (38.475) Imm_text: 38.475 (38.475) Isd_image: -1.5498 (-1.5498) Isd_text: -1.5498 (-1.5498) Contrastive_loss: 0.31425 (0.31425) Loss: 0.31425 (0.31425)
|
| 289 |
+
2025-04-22,07:43:14 | INFO | Train Epoch: 33 [ 827392/1048576 (79%)] Data (t): 1.148 Batch (t): 9.556, 848.134/s, 424.067/s/gpu LR: 0.000061 Logit Scale: 56.676 Imm_image: 38.401 (38.438) Imm_text: 38.401 (38.438) Isd_image: -1.5458 (-1.5478) Isd_text: -1.5458 (-1.5478) Contrastive_loss: 0.37095 (0.34260) Loss: 0.37095 (0.34260)
|
| 290 |
+
2025-04-22,07:47:29 | INFO | Train Epoch: 33 [1048576/1048576 (100%)] Data (t): 1.119 Batch (t): 9.443, 869.966/s, 434.983/s/gpu LR: 0.000057 Logit Scale: 56.766 Imm_image: 39.074 (38.650) Imm_text: 39.074 (38.650) Isd_image: -1.8886 (-1.6614) Isd_text: -1.8886 (-1.6614) Contrastive_loss: 0.23711 (0.30744) Loss: 0.23711 (0.30744)
|
| 291 |
+
2025-04-22,07:47:31 | INFO | Start epoch 34
|
| 292 |
+
2025-04-22,07:48:18 | INFO | Train Epoch: 34 [ 8192/1048576 (1%)] Data (t): 33.337 Batch (t): 47.485, 172.518/s, 86.2590/s/gpu LR: 0.000057 Logit Scale: 56.769 Imm_image: 39.018 (39.018) Imm_text: 39.018 (39.018) Isd_image: -1.9460 (-1.9460) Isd_text: -1.9460 (-1.9460) Contrastive_loss: 0.29310 (0.29310) Loss: 0.29310 (0.29310)
|
| 293 |
+
2025-04-22,08:04:12 | INFO | Train Epoch: 34 [ 827392/1048576 (79%)] Data (t): 1.149 Batch (t): 9.534, 874.157/s, 437.078/s/gpu LR: 0.000043 Logit Scale: 57.048 Imm_image: 38.960 (38.989) Imm_text: 38.960 (38.989) Isd_image: -1.8573 (-1.9016) Isd_text: -1.8573 (-1.9016) Contrastive_loss: 0.29834 (0.29572) Loss: 0.29834 (0.29572)
|
| 294 |
+
2025-04-22,08:08:26 | INFO | Train Epoch: 34 [1048576/1048576 (100%)] Data (t): 1.143 Batch (t): 9.415, 880.004/s, 440.002/s/gpu LR: 0.000040 Logit Scale: 57.112 Imm_image: 39.338 (39.106) Imm_text: 39.338 (39.106) Isd_image: -2.1484 (-1.9839) Isd_text: -2.1484 (-1.9839) Contrastive_loss: 0.22347 (0.27164) Loss: 0.22347 (0.27164)
|
| 295 |
+
2025-04-22,08:08:28 | INFO | Start epoch 35
|
| 296 |
+
2025-04-22,08:09:18 | INFO | Train Epoch: 35 [ 8192/1048576 (1%)] Data (t): 36.238 Batch (t): 50.321, 162.795/s, 81.3976/s/gpu LR: 0.000040 Logit Scale: 57.114 Imm_image: 39.416 (39.416) Imm_text: 39.416 (39.416) Isd_image: -2.0942 (-2.0942) Isd_text: -2.0942 (-2.0942) Contrastive_loss: 0.23404 (0.23404) Loss: 0.23404 (0.23404)
|
| 297 |
+
2025-04-22,08:25:19 | INFO | Train Epoch: 35 [ 827392/1048576 (79%)] Data (t): 1.217 Batch (t): 9.604, 843.616/s, 421.808/s/gpu LR: 0.000029 Logit Scale: 57.305 Imm_image: 39.181 (39.298) Imm_text: 39.181 (39.298) Isd_image: -2.0786 (-2.0864) Isd_text: -2.0786 (-2.0864) Contrastive_loss: 0.26991 (0.25198) Loss: 0.26991 (0.25198)
|
| 298 |
+
2025-04-22,08:29:37 | INFO | Train Epoch: 35 [1048576/1048576 (100%)] Data (t): 1.238 Batch (t): 9.582, 872.740/s, 436.370/s/gpu LR: 0.000026 Logit Scale: 57.347 Imm_image: 39.686 (39.428) Imm_text: 39.686 (39.428) Isd_image: -2.2156 (-2.1294) Isd_text: -2.2156 (-2.1294) Contrastive_loss: 0.20413 (0.23603) Loss: 0.20413 (0.23603)
|
| 299 |
+
2025-04-22,08:29:39 | INFO | Start epoch 36
|
| 300 |
+
2025-04-22,08:30:28 | INFO | Train Epoch: 36 [ 8192/1048576 (1%)] Data (t): 39.929 Batch (t): 48.493, 168.933/s, 84.4666/s/gpu LR: 0.000026 Logit Scale: 57.348 Imm_image: 39.599 (39.599) Imm_text: 39.599 (39.599) Isd_image: -2.2166 (-2.2166) Isd_text: -2.2166 (-2.2166) Contrastive_loss: 0.23179 (0.23179) Loss: 0.23179 (0.23179)
|
| 301 |
+
2025-04-22,08:46:19 | INFO | Train Epoch: 36 [ 827392/1048576 (79%)] Data (t): 1.150 Batch (t): 9.512, 895.849/s, 447.924/s/gpu LR: 0.000017 Logit Scale: 57.466 Imm_image: 39.479 (39.539) Imm_text: 39.479 (39.539) Isd_image: -2.2350 (-2.2258) Isd_text: -2.2350 (-2.2258) Contrastive_loss: 0.24949 (0.24064) Loss: 0.24949 (0.24064)
|
| 302 |
+
2025-04-22,08:50:27 | INFO | Train Epoch: 36 [1048576/1048576 (100%)] Data (t): 0.962 Batch (t): 9.178, 898.762/s, 449.381/s/gpu LR: 0.000015 Logit Scale: 57.491 Imm_image: 39.887 (39.655) Imm_text: 39.887 (39.655) Isd_image: -2.4298 (-2.2938) Isd_text: -2.4298 (-2.2938) Contrastive_loss: 0.17799 (0.21976) Loss: 0.17799 (0.21976)
|
| 303 |
+
2025-04-22,08:50:29 | INFO | Start epoch 37
|
| 304 |
+
2025-04-22,08:51:26 | INFO | Train Epoch: 37 [ 8192/1048576 (1%)] Data (t): 44.902 Batch (t): 57.513, 142.437/s, 71.2187/s/gpu LR: 0.000014 Logit Scale: 57.491 Imm_image: 39.857 (39.857) Imm_text: 39.857 (39.857) Isd_image: -2.4139 (-2.4139) Isd_text: -2.4139 (-2.4139) Contrastive_loss: 0.20261 (0.20261) Loss: 0.20261 (0.20261)
|
| 305 |
+
2025-04-22,09:07:19 | INFO | Train Epoch: 37 [ 827392/1048576 (79%)] Data (t): 1.126 Batch (t): 9.526, 859.903/s, 429.951/s/gpu LR: 0.000008 Logit Scale: 57.554 Imm_image: 39.672 (39.765) Imm_text: 39.672 (39.765) Isd_image: -2.2185 (-2.3162) Isd_text: -2.2185 (-2.3162) Contrastive_loss: 0.23173 (0.21717) Loss: 0.23173 (0.21717)
|
| 306 |
+
2025-04-22,09:11:32 | INFO | Train Epoch: 37 [1048576/1048576 (100%)] Data (t): 1.060 Batch (t): 9.383, 878.634/s, 439.317/s/gpu LR: 0.000007 Logit Scale: 57.565 Imm_image: 39.936 (39.822) Imm_text: 39.936 (39.822) Isd_image: -2.2953 (-2.3092) Isd_text: -2.2953 (-2.3092) Contrastive_loss: 0.20207 (0.21214) Loss: 0.20207 (0.21214)
|
| 307 |
+
2025-04-22,09:11:34 | INFO | Start epoch 38
|
| 308 |
+
2025-04-22,09:12:21 | INFO | Train Epoch: 38 [ 8192/1048576 (1%)] Data (t): 35.983 Batch (t): 46.670, 175.529/s, 87.7643/s/gpu LR: 0.000006 Logit Scale: 57.566 Imm_image: 39.943 (39.943) Imm_text: 39.943 (39.943) Isd_image: -2.3362 (-2.3362) Isd_text: -2.3362 (-2.3362) Contrastive_loss: 0.20550 (0.20550) Loss: 0.20550 (0.20550)
|
| 309 |
+
2025-04-22,09:28:18 | INFO | Train Epoch: 38 [ 827392/1048576 (79%)] Data (t): 1.204 Batch (t): 9.567, 806.037/s, 403.019/s/gpu LR: 0.000002 Logit Scale: 57.590 Imm_image: 39.865 (39.904) Imm_text: 39.865 (39.904) Isd_image: -2.2305 (-2.2834) Isd_text: -2.2305 (-2.2834) Contrastive_loss: 0.24403 (0.22476) Loss: 0.24403 (0.22476)
|
| 310 |
+
2025-04-22,09:32:32 | INFO | Train Epoch: 38 [1048576/1048576 (100%)] Data (t): 1.107 Batch (t): 9.432, 898.963/s, 449.482/s/gpu LR: 0.000002 Logit Scale: 57.593 Imm_image: 39.966 (39.925) Imm_text: 39.966 (39.925) Isd_image: -2.2969 (-2.2879) Isd_text: -2.2969 (-2.2879) Contrastive_loss: 0.21412 (0.22122) Loss: 0.21412 (0.22122)
|
| 311 |
+
2025-04-22,09:32:34 | INFO | Start epoch 39
|
| 312 |
+
2025-04-22,09:33:22 | INFO | Train Epoch: 39 [ 8192/1048576 (1%)] Data (t): 36.741 Batch (t): 47.637, 171.966/s, 85.9831/s/gpu LR: 0.000002 Logit Scale: 57.593 Imm_image: 40.038 (40.038) Imm_text: 40.038 (40.038) Isd_image: -2.3262 (-2.3262) Isd_text: -2.3262 (-2.3262) Contrastive_loss: 0.20500 (0.20500) Loss: 0.20500 (0.20500)
|
| 313 |
+
2025-04-22,09:49:17 | INFO | Train Epoch: 39 [ 827392/1048576 (79%)] Data (t): 1.147 Batch (t): 9.552, 847.527/s, 423.763/s/gpu LR: 0.000000 Logit Scale: 57.597 Imm_image: 39.868 (39.953) Imm_text: 39.868 (39.953) Isd_image: -2.2393 (-2.2827) Isd_text: -2.2393 (-2.2827) Contrastive_loss: 0.21664 (0.21082) Loss: 0.21664 (0.21082)
|
| 314 |
+
2025-04-22,09:53:30 | INFO | Train Epoch: 39 [1048576/1048576 (100%)] Data (t): 1.080 Batch (t): 9.381, 886.997/s, 443.498/s/gpu LR: 0.000000 Logit Scale: 57.597 Imm_image: 39.905 (39.937) Imm_text: 39.905 (39.937) Isd_image: -2.2689 (-2.2781) Isd_text: -2.2689 (-2.2781) Contrastive_loss: 0.21910 (0.21358) Loss: 0.21910 (0.21358)
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/params.txt
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accum_freq: 2
|
| 2 |
+
aug_cfg: {}
|
| 3 |
+
batch_size: 2048
|
| 4 |
+
beta1: 0.9
|
| 5 |
+
beta2: 0.98
|
| 6 |
+
cache_dir: None
|
| 7 |
+
caption_ratio: 0.1
|
| 8 |
+
checkpoint_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/checkpoints
|
| 9 |
+
coca_caption_loss_weight: 2.0
|
| 10 |
+
coca_contrastive_loss_weight: 1.0
|
| 11 |
+
copy_codebase: False
|
| 12 |
+
csv_caption_key: title
|
| 13 |
+
csv_img_key: filepath
|
| 14 |
+
csv_separator:
|
| 15 |
+
dataset_resampled: False
|
| 16 |
+
dataset_type: synthetic
|
| 17 |
+
ddp_static_graph: False
|
| 18 |
+
debug: False
|
| 19 |
+
delete_previous_checkpoint: False
|
| 20 |
+
device: cuda:0
|
| 21 |
+
dist_backend: None
|
| 22 |
+
dist_url: None
|
| 23 |
+
distill: False
|
| 24 |
+
distill_model: None
|
| 25 |
+
distill_pretrained: None
|
| 26 |
+
distributed: True
|
| 27 |
+
epochs: 40
|
| 28 |
+
epochs_cooldown: None
|
| 29 |
+
eps: 1e-08
|
| 30 |
+
force_custom_text: False
|
| 31 |
+
force_image_size: None
|
| 32 |
+
force_patch_dropout: None
|
| 33 |
+
force_quick_gelu: False
|
| 34 |
+
gather_with_grad: True
|
| 35 |
+
grad_checkpointing: True
|
| 36 |
+
grad_clip_norm: None
|
| 37 |
+
horovod: False
|
| 38 |
+
image_interpolation: None
|
| 39 |
+
image_mean: None
|
| 40 |
+
image_resize_mode: None
|
| 41 |
+
image_std: None
|
| 42 |
+
imagenet_v2: None
|
| 43 |
+
imagenet_val: None
|
| 44 |
+
keep_func_name: keep_low_intra_only
|
| 45 |
+
local_loss: False
|
| 46 |
+
local_rank: 0
|
| 47 |
+
lock_image: False
|
| 48 |
+
lock_image_freeze_bn_stats: False
|
| 49 |
+
lock_image_unlocked_groups: 0
|
| 50 |
+
lock_text: False
|
| 51 |
+
lock_text_freeze_layer_norm: False
|
| 52 |
+
lock_text_unlocked_layers: 0
|
| 53 |
+
log_every_n_steps: 100
|
| 54 |
+
log_level: 20
|
| 55 |
+
log_local: False
|
| 56 |
+
log_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/out.log
|
| 57 |
+
logs: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs
|
| 58 |
+
loss_dist_impl: None
|
| 59 |
+
lr: 0.001
|
| 60 |
+
lr_cooldown_end: 0.0
|
| 61 |
+
lr_cooldown_power: 1.0
|
| 62 |
+
lr_scheduler: cosine
|
| 63 |
+
map_func_name: use_none
|
| 64 |
+
model: ViT-B-16
|
| 65 |
+
momentum: None
|
| 66 |
+
name: ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only
|
| 67 |
+
no_set_device_rank: False
|
| 68 |
+
opt: adamw
|
| 69 |
+
precision: amp
|
| 70 |
+
pretrained:
|
| 71 |
+
pretrained_image: False
|
| 72 |
+
rank: 0
|
| 73 |
+
remote_sync: None
|
| 74 |
+
remote_sync_frequency: 300
|
| 75 |
+
remote_sync_protocol: s3
|
| 76 |
+
report_to: tensorboard,wandb
|
| 77 |
+
resume: None
|
| 78 |
+
save_frequency: 1
|
| 79 |
+
save_most_recent: False
|
| 80 |
+
seed: 0
|
| 81 |
+
siglip: False
|
| 82 |
+
skip_scheduler: False
|
| 83 |
+
tensorboard: True
|
| 84 |
+
tensorboard_path: /mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/logs/ViT-B-16-subset-dc10m-010-1e3-original-keep_low_intra_only/tensorboard
|
| 85 |
+
torchcompile: False
|
| 86 |
+
torchscript: False
|
| 87 |
+
trace: False
|
| 88 |
+
train_data: /mnt/personal/zhudongy/CapsFusion120M-wds/{00000..01426}.tar
|
| 89 |
+
train_data_upsampling_factors: None
|
| 90 |
+
train_num_samples: 10006295
|
| 91 |
+
use_bn_sync: False
|
| 92 |
+
use_bnb_linear: None
|
| 93 |
+
val_data: None
|
| 94 |
+
val_frequency: 1
|
| 95 |
+
val_num_samples: None
|
| 96 |
+
wandb: True
|
| 97 |
+
wandb_notes:
|
| 98 |
+
wandb_project_name: open-clip
|
| 99 |
+
warmup: 122
|
| 100 |
+
wd: 0.5
|
| 101 |
+
workers: 16
|
| 102 |
+
world_size: 2
|
| 103 |
+
zeroshot_frequency: 2
|
SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4/preprocess_embedding/run.sh
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
export EXP="/mnt/personal/zhudongy/capsfusion10m_results/SFR-Embedding-Code-2B_R#0.8#0.6#dinov2-large#0.0#0.2#rouge_0.2#top_8#inter_0.4"
|
| 4 |
+
export SCRIPT="preprocess_embedding"
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
sbatch -o "$EXP/$SCRIPT/logs/text.log" --job-name text "$EXP/$SCRIPT/jobs/text.sh"
|
| 8 |
+
# sbatch -o "$EXP/$SCRIPT/logs/image.log" --job-name image "$EXP/$SCRIPT/jobs/image.sh"
|
| 9 |
+
sbatch -o "$EXP/$SCRIPT/logs/text_inter.log" --job-name text_inter "$EXP/$SCRIPT/jobs/text_inter.sh"
|
| 10 |
+
# sbatch -o "$EXP/$SCRIPT/logs/image_inter.log" --job-name image_inter "$EXP/$SCRIPT/jobs/image_inter.sh"
|
ViT-B-32-laion2b_e16/benchmark_caltech101_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "caltech101", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.8591426071741033, "acc5": 0.9651137357830272, "mean_per_class_recall": 0.9118039279639667}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_cars_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cars", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.8435517970401691, "acc5": 0.9901753513244621, "mean_per_class_recall": 0.8447215074857712}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_cifar100_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar100", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.7544, "acc5": 0.9418, "mean_per_class_recall": 0.7544}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_cifar10_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar10", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.9403, "acc5": 0.9991, "mean_per_class_recall": 0.9404999999999999}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_country211_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "country211", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.16530805687203792, "acc5": 0.3793838862559242, "mean_per_class_recall": 0.16516587677725122}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_dtd_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "dtd", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.5372340425531915, "acc5": 0.8393617021276596, "mean_per_class_recall": 0.5377659574468084}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_eurosat_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "eurosat", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.49333333333333335, "acc5": 0.9537407407407408, "mean_per_class_recall": 0.5056766666666667}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_fgvc_aircraft_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "fgvc_aircraft", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.2295229522952295, "acc5": 0.531953195319532, "mean_per_class_recall": 0.2291800356506239}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_flickr30k_laion2b_e16_ViT-B-32_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flickr30k", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6638000011444092, "text_retrieval_recall@1": 0.843999981880188, "image_retrieval_recall@5": 0.8817999958992004, "text_retrieval_recall@5": 0.9629999995231628, "image_retrieval_recall@10": 0.9319999814033508, "text_retrieval_recall@10": 0.984000027179718}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_flowers_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flowers", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.6913319238900634, "acc5": 0.8482680110587087, "mean_per_class_recall": 0.6738670274697526}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_food101_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "food101", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.8164356435643564, "acc5": 0.9655049504950495, "mean_per_class_recall": 0.8162772277227723}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_gtsrb_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "gtsrb", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.3658749010292953, "acc5": 0.7003958828186857, "mean_per_class_recall": 0.344966812762566}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_imagenet1k_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "imagenet1k", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.65638, "acc5": 0.89392, "mean_per_class_recall": 0.6565799999999999}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_mscoco_captions_laion2b_e16_ViT-B-32_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "mscoco_captions", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.39124351739883423, "text_retrieval_recall@1": 0.5626000165939331, "image_retrieval_recall@5": 0.6470611691474915, "text_retrieval_recall@5": 0.7947999835014343, "image_retrieval_recall@10": 0.748100757598877, "text_retrieval_recall@10": 0.8708000183105469}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_pets_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "pets", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.8915235759062414, "acc5": 0.9959116925592805, "mean_per_class_recall": 0.8903837501708626}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_stl10_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "stl10", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.96525, "acc5": 0.9995, "mean_per_class_recall": 0.9653749999999999}, "language": "en"}
|
ViT-B-32-laion2b_e16/benchmark_vtab_resisc45_laion2b_e16_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "vtab/resisc45", "model": "ViT-B-32", "pretrained": "laion2b_e16", "task": "zeroshot_classification", "metrics": {"acc1": 0.6187301587301587, "acc5": 0.9203174603174603, "mean_per_class_recall": 0.6257013647375445}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_caltech101_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "caltech101", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.8650481189851269, "acc5": 0.9631452318460193, "mean_per_class_recall": 0.9155581104415481}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_cars_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cars", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.8605894789205323, "acc5": 0.9909215271732371, "mean_per_class_recall": 0.8616566229490181}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_cifar100_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar100", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.7552, "acc5": 0.9385, "mean_per_class_recall": 0.7553}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_cifar10_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "cifar10", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.9355, "acc5": 0.9981, "mean_per_class_recall": 0.9356}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_country211_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "country211", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.16691943127962086, "acc5": 0.3837440758293839, "mean_per_class_recall": 0.16682464454976303}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_dtd_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "dtd", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.5579787234042554, "acc5": 0.8627659574468085, "mean_per_class_recall": 0.5569148936170213}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_eurosat_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "eurosat", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.4715185185185185, "acc5": 0.9415925925925926, "mean_per_class_recall": 0.48756666666666665}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_fgvc_aircraft_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "fgvc_aircraft", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.24542454245424541, "acc5": 0.5724572457245725, "mean_per_class_recall": 0.2429679144385027}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_flickr30k_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_retrieval.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flickr30k", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_retrieval", "metrics": {"image_retrieval_recall@1": 0.6675999760627747, "text_retrieval_recall@1": 0.843999981880188, "image_retrieval_recall@5": 0.883400022983551, "text_retrieval_recall@5": 0.9629999995231628, "image_retrieval_recall@10": 0.9309999942779541, "text_retrieval_recall@10": 0.9829999804496765}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_flowers_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "flowers", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.7168645308180192, "acc5": 0.8759147828915271, "mean_per_class_recall": 0.701267455254337}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_food101_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "food101", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.8267326732673267, "acc5": 0.9683168316831683, "mean_per_class_recall": 0.8268118811881187}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_gtsrb_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "gtsrb", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.49287410926365793, "acc5": 0.7422011084718924, "mean_per_class_recall": 0.4322952901029241}, "language": "en"}
|
ViT-B-32-laion2b_s34b_b79k/benchmark_imagenet1k_laion2b_s34b_b79k_ViT-B-32_en_zeroshot_classification.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset": "imagenet1k", "model": "ViT-B-32", "pretrained": "laion2b_s34b_b79k", "task": "zeroshot_classification", "metrics": {"acc1": 0.66532, "acc5": 0.89896, "mean_per_class_recall": 0.66546}, "language": "en"}
|