Upload folder using huggingface_hub
Browse files- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints/epoch_1.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints/epoch_2.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints/results.jsonl +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/out.log +421 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/params.txt +113 -0
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3bfb1277d3f98158d5fd7e6447ce60b0e3a44cbd538f7224996f9d844a0ed73
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints/epoch_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17f6c840dcf05de811825d1600bcae7d8782475fc249c3af333352b7db6dc026
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints/results.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"imagenet-zeroshot-val-top1": 0.27182, "imagenet-zeroshot-val-top5": 0.51772}
|
| 2 |
+
{"imagenet-zeroshot-val-top1": 0.42258, "imagenet-zeroshot-val-top5": 0.70722}
|
| 3 |
+
{"imagenet-zeroshot-val-top1": 0.44746, "imagenet-zeroshot-val-top5": 0.73384}
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/out.log
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-29,16:12:13 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints.
|
| 2 |
+
2025-07-29,16:12:23 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 3 |
+
2025-07-29,16:12:23 | INFO | Loaded CLIPCLS-ViT-B-16 model config.
|
| 4 |
+
2025-07-29,16:12:25 | INFO | Model:
|
| 5 |
+
2025-07-29,16:12:25 | INFO | CLIPCLS(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 8 |
+
(patch_dropout): Identity()
|
| 9 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 10 |
+
(transformer): Transformer(
|
| 11 |
+
(resblocks): ModuleList(
|
| 12 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 13 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 14 |
+
(attn): MultiheadAttention(
|
| 15 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 16 |
+
)
|
| 17 |
+
(ls_1): Identity()
|
| 18 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 19 |
+
(mlp): Sequential(
|
| 20 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 21 |
+
(gelu): GELU(approximate='none')
|
| 22 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 23 |
+
)
|
| 24 |
+
(ls_2): Identity()
|
| 25 |
+
)
|
| 26 |
+
)
|
| 27 |
+
)
|
| 28 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
)
|
| 30 |
+
(text): TextTransformer(
|
| 31 |
+
(token_embedding): Embedding(49408, 512)
|
| 32 |
+
(transformer): Transformer(
|
| 33 |
+
(resblocks): ModuleList(
|
| 34 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 35 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 36 |
+
(attn): MultiheadAttention(
|
| 37 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_1): Identity()
|
| 40 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 41 |
+
(mlp): Sequential(
|
| 42 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 43 |
+
(gelu): GELU(approximate='none')
|
| 44 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_2): Identity()
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 51 |
+
)
|
| 52 |
+
(text_decoder): MixClsHead(
|
| 53 |
+
(mlps): ModuleList()
|
| 54 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 55 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
2025-07-29,16:12:25 | INFO | Params:
|
| 59 |
+
2025-07-29,16:12:25 | INFO | NDR_patch_size: 16
|
| 60 |
+
2025-07-29,16:12:25 | INFO | accum_freq: 1
|
| 61 |
+
2025-07-29,16:12:25 | INFO | aug_cfg: {}
|
| 62 |
+
2025-07-29,16:12:25 | INFO | batch_size: 2048
|
| 63 |
+
2025-07-29,16:12:25 | INFO | beta1: 0.9
|
| 64 |
+
2025-07-29,16:12:25 | INFO | beta2: 0.98
|
| 65 |
+
2025-07-29,16:12:25 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints
|
| 66 |
+
2025-07-29,16:12:25 | INFO | class_loss_weight: 1.0
|
| 67 |
+
2025-07-29,16:12:25 | INFO | coca_caption_loss_weight: 2.0
|
| 68 |
+
2025-07-29,16:12:25 | INFO | coca_contrastive_loss_weight: 1.0
|
| 69 |
+
2025-07-29,16:12:25 | INFO | copy_codebase: False
|
| 70 |
+
2025-07-29,16:12:25 | INFO | csv_caption_key: title
|
| 71 |
+
2025-07-29,16:12:25 | INFO | csv_img_key: filepath
|
| 72 |
+
2025-07-29,16:12:25 | INFO | csv_separator:
|
| 73 |
+
2025-07-29,16:12:25 | INFO | dataset_resampled: False
|
| 74 |
+
2025-07-29,16:12:25 | INFO | dataset_type: webdataset
|
| 75 |
+
2025-07-29,16:12:25 | INFO | ddp_static_graph: True
|
| 76 |
+
2025-07-29,16:12:25 | INFO | debug: False
|
| 77 |
+
2025-07-29,16:12:25 | INFO | delete_prev_step_ckpt: True
|
| 78 |
+
2025-07-29,16:12:25 | INFO | delete_previous_checkpoint: False
|
| 79 |
+
2025-07-29,16:12:25 | INFO | device: cuda:0
|
| 80 |
+
2025-07-29,16:12:25 | INFO | dist_backend: nccl
|
| 81 |
+
2025-07-29,16:12:25 | INFO | dist_url: env://
|
| 82 |
+
2025-07-29,16:12:25 | INFO | distill: False
|
| 83 |
+
2025-07-29,16:12:25 | INFO | distill_model: None
|
| 84 |
+
2025-07-29,16:12:25 | INFO | distill_pretrained: None
|
| 85 |
+
2025-07-29,16:12:25 | INFO | distributed: True
|
| 86 |
+
2025-07-29,16:12:25 | INFO | epochs: 2
|
| 87 |
+
2025-07-29,16:12:25 | INFO | epochs_cooldown: None
|
| 88 |
+
2025-07-29,16:12:25 | INFO | eps: 1e-06
|
| 89 |
+
2025-07-29,16:12:25 | INFO | force_custom_text: False
|
| 90 |
+
2025-07-29,16:12:25 | INFO | force_image_size: 224
|
| 91 |
+
2025-07-29,16:12:25 | INFO | force_patch_dropout: None
|
| 92 |
+
2025-07-29,16:12:25 | INFO | force_quick_gelu: False
|
| 93 |
+
2025-07-29,16:12:25 | INFO | gather_with_grad: True
|
| 94 |
+
2025-07-29,16:12:25 | INFO | global_batch_size: 16384
|
| 95 |
+
2025-07-29,16:12:25 | INFO | grad_checkpointing: True
|
| 96 |
+
2025-07-29,16:12:25 | INFO | grad_clip_norm: None
|
| 97 |
+
2025-07-29,16:12:25 | INFO | horovod: False
|
| 98 |
+
2025-07-29,16:12:25 | INFO | image_interpolation: None
|
| 99 |
+
2025-07-29,16:12:25 | INFO | image_mean: None
|
| 100 |
+
2025-07-29,16:12:25 | INFO | image_resize_mode: None
|
| 101 |
+
2025-07-29,16:12:25 | INFO | image_std: None
|
| 102 |
+
2025-07-29,16:12:25 | INFO | imagenet_v2: None
|
| 103 |
+
2025-07-29,16:12:25 | INFO | imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 104 |
+
2025-07-29,16:12:25 | INFO | is_cls_token: True
|
| 105 |
+
2025-07-29,16:12:25 | INFO | local_loss: True
|
| 106 |
+
2025-07-29,16:12:25 | INFO | local_rank: 0
|
| 107 |
+
2025-07-29,16:12:25 | INFO | lock_image: False
|
| 108 |
+
2025-07-29,16:12:25 | INFO | lock_image_freeze_bn_stats: False
|
| 109 |
+
2025-07-29,16:12:25 | INFO | lock_image_unlocked_groups: 0
|
| 110 |
+
2025-07-29,16:12:25 | INFO | lock_text: False
|
| 111 |
+
2025-07-29,16:12:25 | INFO | lock_text_freeze_layer_norm: False
|
| 112 |
+
2025-07-29,16:12:25 | INFO | lock_text_unlocked_layers: 0
|
| 113 |
+
2025-07-29,16:12:25 | INFO | log_every_n_steps: 128
|
| 114 |
+
2025-07-29,16:12:25 | INFO | log_level: 20
|
| 115 |
+
2025-07-29,16:12:25 | INFO | log_local: False
|
| 116 |
+
2025-07-29,16:12:25 | INFO | log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/out.log
|
| 117 |
+
2025-07-29,16:12:25 | INFO | logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 118 |
+
2025-07-29,16:12:25 | INFO | lr: 0.001
|
| 119 |
+
2025-07-29,16:12:25 | INFO | lr_cooldown_end: 0.0
|
| 120 |
+
2025-07-29,16:12:25 | INFO | lr_cooldown_power: 1.0
|
| 121 |
+
2025-07-29,16:12:25 | INFO | lr_scheduler: cosine
|
| 122 |
+
2025-07-29,16:12:25 | INFO | max_seq_len: 15000
|
| 123 |
+
2025-07-29,16:12:25 | INFO | model: CLIPCLS-ViT-B-16
|
| 124 |
+
2025-07-29,16:12:25 | INFO | name: clipcls_vit_b16_s512m_bs16k_wo_idf
|
| 125 |
+
2025-07-29,16:12:25 | INFO | native_dynamic_resolution: False
|
| 126 |
+
2025-07-29,16:12:25 | INFO | no_set_device_rank: False
|
| 127 |
+
2025-07-29,16:12:25 | INFO | only_class_loss: False
|
| 128 |
+
2025-07-29,16:12:25 | INFO | only_packing: False
|
| 129 |
+
2025-07-29,16:12:25 | INFO | post_train: False
|
| 130 |
+
2025-07-29,16:12:25 | INFO | precision: amp_bfloat16
|
| 131 |
+
2025-07-29,16:12:25 | INFO | pretrained:
|
| 132 |
+
2025-07-29,16:12:25 | INFO | pretrained_image:
|
| 133 |
+
2025-07-29,16:12:25 | INFO | pretrained_text:
|
| 134 |
+
2025-07-29,16:12:25 | INFO | rank: 0
|
| 135 |
+
2025-07-29,16:12:25 | INFO | remote_sync: None
|
| 136 |
+
2025-07-29,16:12:25 | INFO | remote_sync_frequency: 300
|
| 137 |
+
2025-07-29,16:12:25 | INFO | remote_sync_protocol: s3
|
| 138 |
+
2025-07-29,16:12:25 | INFO | report_to: wandb
|
| 139 |
+
2025-07-29,16:12:25 | INFO | resume: None
|
| 140 |
+
2025-07-29,16:12:25 | INFO | rope_attn_num_heads: 12
|
| 141 |
+
2025-07-29,16:12:25 | INFO | rope_model_width: 768
|
| 142 |
+
2025-07-29,16:12:25 | INFO | save_every_n_steps: 6104
|
| 143 |
+
2025-07-29,16:12:25 | INFO | save_frequency: 1
|
| 144 |
+
2025-07-29,16:12:25 | INFO | save_most_recent: False
|
| 145 |
+
2025-07-29,16:12:25 | INFO | seed: 0
|
| 146 |
+
2025-07-29,16:12:25 | INFO | siglip: False
|
| 147 |
+
2025-07-29,16:12:25 | INFO | skip_scheduler: False
|
| 148 |
+
2025-07-29,16:12:25 | INFO | tensorboard: False
|
| 149 |
+
2025-07-29,16:12:25 | INFO | tensorboard_path:
|
| 150 |
+
2025-07-29,16:12:25 | INFO | torchcompile: False
|
| 151 |
+
2025-07-29,16:12:25 | INFO | torchscript: False
|
| 152 |
+
2025-07-29,16:12:25 | INFO | trace: False
|
| 153 |
+
2025-07-29,16:12:25 | INFO | train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 154 |
+
2025-07-29,16:12:25 | INFO | train_data_upsampling_factors: None
|
| 155 |
+
2025-07-29,16:12:25 | INFO | train_num_samples: 128000000
|
| 156 |
+
2025-07-29,16:12:25 | INFO | use_bn_sync: False
|
| 157 |
+
2025-07-29,16:12:25 | INFO | use_bnb_linear: None
|
| 158 |
+
2025-07-29,16:12:25 | INFO | use_idf: False
|
| 159 |
+
2025-07-29,16:12:25 | INFO | val_data: None
|
| 160 |
+
2025-07-29,16:12:25 | INFO | val_frequency: 1
|
| 161 |
+
2025-07-29,16:12:25 | INFO | val_num_samples: None
|
| 162 |
+
2025-07-29,16:12:25 | INFO | val_steps: 6104
|
| 163 |
+
2025-07-29,16:12:25 | INFO | wandb: True
|
| 164 |
+
2025-07-29,16:12:25 | INFO | wandb_notes:
|
| 165 |
+
2025-07-29,16:12:25 | INFO | wandb_project_name: cls-clip-batch-size
|
| 166 |
+
2025-07-29,16:12:25 | INFO | warmup: 500
|
| 167 |
+
2025-07-29,16:12:25 | INFO | wd: 0.2
|
| 168 |
+
2025-07-29,16:12:25 | INFO | workers: 1
|
| 169 |
+
2025-07-29,16:12:25 | INFO | world_size: 8
|
| 170 |
+
2025-07-29,16:12:25 | INFO | zeroshot_frequency: 2
|
| 171 |
+
2025-07-29,16:12:25 | INFO | zeroshot_steps: 6104
|
| 172 |
+
2025-07-29,16:12:43 | INFO | Start epoch 0
|
| 173 |
+
2025-07-29,16:12:58 | INFO | Train Epoch: 0 [ 16384/128008192 (0%)] Data (t): 11.308 Batch (t): 14.854, 1102.97/s, 137.872/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.351 (11.351) Contrastive_loss: 9.7788 (9.7788) Loss: 21.130 (21.130)
|
| 174 |
+
2025-07-29,16:28:51 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 175 |
+
2025-07-29,16:30:10 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 176 |
+
2025-07-29,16:31:55 | INFO | Train Epoch: 0 [ 2113536/128008192 (2%)] Data (t): 3.178 Batch (t): 8.886, 1811.48/s, 226.435/s/gpu LR: 0.000258 Logit Scale: 14.329 Class_loss: 7.3654 (9.3582) Contrastive_loss: 8.9339 (9.3564) Loss: 16.299 (18.715)
|
| 177 |
+
2025-07-29,16:51:35 | INFO | Train Epoch: 0 [ 4210688/128008192 (3%)] Data (t): 0.755 Batch (t): 9.216, 1791.32/s, 223.915/s/gpu LR: 0.000514 Logit Scale: 14.648 Class_loss: 7.1376 (8.6180) Contrastive_loss: 8.4486 (9.0538) Loss: 15.586 (17.672)
|
| 178 |
+
2025-07-29,16:53:57 | WARNING | Handling webdataset error (OSError('image file is truncated (112 bytes not processed)')). Ignoring.
|
| 179 |
+
2025-07-29,17:10:57 | INFO | Train Epoch: 0 [ 6307840/128008192 (5%)] Data (t): 0.738 Batch (t): 9.082, 1735.28/s, 216.909/s/gpu LR: 0.000770 Logit Scale: 15.289 Class_loss: 6.3549 (8.0523) Contrastive_loss: 6.7676 (8.4822) Loss: 13.123 (16.534)
|
| 180 |
+
2025-07-29,17:25:20 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 181 |
+
2025-07-29,17:30:33 | INFO | Train Epoch: 0 [ 8404992/128008192 (7%)] Data (t): 0.781 Batch (t): 9.186, 1756.55/s, 219.568/s/gpu LR: 0.001000 Logit Scale: 16.225 Class_loss: 6.0803 (7.6579) Contrastive_loss: 6.3967 (8.0651) Loss: 12.477 (15.723)
|
| 182 |
+
2025-07-29,17:49:47 | INFO | Train Epoch: 0 [ 10502144/128008192 (8%)] Data (t): 6.351 Batch (t): 9.017, 1830.42/s, 228.802/s/gpu LR: 0.001000 Logit Scale: 17.603 Class_loss: 6.3114 (7.4335) Contrastive_loss: 6.0878 (7.7356) Loss: 12.399 (15.169)
|
| 183 |
+
2025-07-29,17:54:51 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 184 |
+
2025-07-29,17:54:55 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 185 |
+
2025-07-29,18:07:53 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 186 |
+
2025-07-29,18:08:56 | INFO | Train Epoch: 0 [ 12599296/128008192 (10%)] Data (t): 3.382 Batch (t): 8.973, 1808.61/s, 226.076/s/gpu LR: 0.000999 Logit Scale: 19.927 Class_loss: 5.9806 (7.2259) Contrastive_loss: 5.4818 (7.4136) Loss: 11.462 (14.640)
|
| 187 |
+
2025-07-29,18:28:19 | INFO | Train Epoch: 0 [ 14696448/128008192 (11%)] Data (t): 0.740 Batch (t): 9.086, 1777.93/s, 222.242/s/gpu LR: 0.000998 Logit Scale: 22.309 Class_loss: 5.8713 (7.0566) Contrastive_loss: 4.0691 (6.9955) Loss: 9.9404 (14.052)
|
| 188 |
+
2025-07-29,18:36:42 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 189 |
+
2025-07-29,18:47:40 | INFO | Train Epoch: 0 [ 16793600/128008192 (13%)] Data (t): 2.401 Batch (t): 9.074, 1751.07/s, 218.884/s/gpu LR: 0.000997 Logit Scale: 25.128 Class_loss: 5.8093 (6.9180) Contrastive_loss: 3.7880 (6.6391) Loss: 9.5972 (13.557)
|
| 190 |
+
2025-07-29,19:06:58 | INFO | Train Epoch: 0 [ 18890752/128008192 (15%)] Data (t): 1.641 Batch (t): 9.041, 1779.01/s, 222.376/s/gpu LR: 0.000995 Logit Scale: 28.174 Class_loss: 5.6855 (6.7947) Contrastive_loss: 2.9498 (6.2702) Loss: 8.6353 (13.065)
|
| 191 |
+
2025-07-29,19:26:18 | INFO | Train Epoch: 0 [ 20987904/128008192 (16%)] Data (t): 2.288 Batch (t): 9.064, 1985.92/s, 248.240/s/gpu LR: 0.000993 Logit Scale: 31.356 Class_loss: 6.8410 (6.7989) Contrastive_loss: 5.2833 (6.1805) Loss: 12.124 (12.979)
|
| 192 |
+
2025-07-29,19:45:37 | INFO | Train Epoch: 0 [ 23085056/128008192 (18%)] Data (t): 2.872 Batch (t): 9.060, 1812.62/s, 226.577/s/gpu LR: 0.000991 Logit Scale: 34.156 Class_loss: 7.0337 (6.8185) Contrastive_loss: 5.6039 (6.1324) Loss: 12.638 (12.951)
|
| 193 |
+
2025-07-29,20:02:07 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 194 |
+
2025-07-29,20:05:04 | INFO | Train Epoch: 0 [ 25182208/128008192 (20%)] Data (t): 3.122 Batch (t): 9.114, 1860.03/s, 232.503/s/gpu LR: 0.000988 Logit Scale: 37.231 Class_loss: 6.3937 (6.7858) Contrastive_loss: 3.9542 (5.9649) Loss: 10.348 (12.751)
|
| 195 |
+
2025-07-29,20:22:21 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 196 |
+
2025-07-29,20:24:12 | INFO | Train Epoch: 0 [ 27279360/128008192 (21%)] Data (t): 2.771 Batch (t): 8.965, 1852.01/s, 231.501/s/gpu LR: 0.000985 Logit Scale: 40.757 Class_loss: 5.6519 (6.7048) Contrastive_loss: 1.8800 (5.6731) Loss: 7.5319 (12.378)
|
| 197 |
+
2025-07-29,20:35:25 | WARNING | Handling webdataset error (OSError('image file is truncated (27 bytes not processed)')). Ignoring.
|
| 198 |
+
2025-07-29,20:43:24 | INFO | Train Epoch: 0 [ 29376512/128008192 (23%)] Data (t): 0.719 Batch (t): 9.005, 1877.41/s, 234.677/s/gpu LR: 0.000982 Logit Scale: 43.456 Class_loss: 5.9644 (6.6555) Contrastive_loss: 2.6517 (5.4717) Loss: 8.6161 (12.127)
|
| 199 |
+
2025-07-29,20:53:02 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 200 |
+
2025-07-29,21:02:36 | INFO | Train Epoch: 0 [ 31473664/128008192 (25%)] Data (t): 0.945 Batch (t): 8.996, 1844.23/s, 230.529/s/gpu LR: 0.000978 Logit Scale: 46.081 Class_loss: 5.6259 (6.5911) Contrastive_loss: 1.4529 (5.2205) Loss: 7.0788 (11.812)
|
| 201 |
+
2025-07-29,21:07:07 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 202 |
+
2025-07-29,21:07:38 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 203 |
+
2025-07-29,21:18:07 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 204 |
+
2025-07-29,21:21:59 | INFO | Train Epoch: 0 [ 33570816/128008192 (26%)] Data (t): 3.007 Batch (t): 9.084, 1769.34/s, 221.168/s/gpu LR: 0.000974 Logit Scale: 48.247 Class_loss: 5.7278 (6.5403) Contrastive_loss: 1.8399 (5.0216) Loss: 7.5677 (11.562)
|
| 205 |
+
2025-07-29,21:33:53 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 206 |
+
2025-07-29,21:41:18 | INFO | Train Epoch: 0 [ 35667968/128008192 (28%)] Data (t): 1.990 Batch (t): 9.058, 1822.58/s, 227.822/s/gpu LR: 0.000970 Logit Scale: 47.891 Class_loss: 6.9700 (6.5642) Contrastive_loss: 4.7778 (5.0081) Loss: 11.748 (11.572)
|
| 207 |
+
2025-07-29,22:00:47 | INFO | Train Epoch: 0 [ 37765120/128008192 (30%)] Data (t): 1.201 Batch (t): 9.136, 1789.21/s, 223.652/s/gpu LR: 0.000965 Logit Scale: 50.081 Class_loss: 5.4208 (6.5040) Contrastive_loss: 1.1498 (4.8050) Loss: 6.5706 (11.309)
|
| 208 |
+
2025-07-29,22:20:17 | INFO | Train Epoch: 0 [ 39862272/128008192 (31%)] Data (t): 6.269 Batch (t): 9.135, 1773.82/s, 221.727/s/gpu LR: 0.000960 Logit Scale: 51.902 Class_loss: 5.4565 (6.4517) Contrastive_loss: 1.2286 (4.6262) Loss: 6.6851 (11.078)
|
| 209 |
+
2025-07-29,22:31:27 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 210 |
+
2025-07-29,22:39:25 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 211 |
+
2025-07-29,22:39:43 | INFO | Train Epoch: 0 [ 41959424/128008192 (33%)] Data (t): 6.945 Batch (t): 9.110, 1743.27/s, 217.909/s/gpu LR: 0.000955 Logit Scale: 52.044 Class_loss: 5.3777 (6.4005) Contrastive_loss: 0.95217 (4.4512) Loss: 6.3298 (10.852)
|
| 212 |
+
2025-07-29,22:59:27 | INFO | Train Epoch: 0 [ 44056576/128008192 (34%)] Data (t): 0.752 Batch (t): 9.250, 1716.84/s, 214.605/s/gpu LR: 0.000949 Logit Scale: 54.521 Class_loss: 5.4181 (6.3559) Contrastive_loss: 0.90484 (4.2900) Loss: 6.3229 (10.646)
|
| 213 |
+
2025-07-29,23:11:41 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 214 |
+
2025-07-29,23:19:18 | INFO | Train Epoch: 0 [ 46153728/128008192 (36%)] Data (t): 0.807 Batch (t): 9.309, 1761.43/s, 220.179/s/gpu LR: 0.000943 Logit Scale: 56.107 Class_loss: 5.3664 (6.3128) Contrastive_loss: 0.88496 (4.1420) Loss: 6.2513 (10.455)
|
| 215 |
+
2025-07-29,23:34:42 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 216 |
+
2025-07-29,23:37:28 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 217 |
+
2025-07-29,23:38:03 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 218 |
+
2025-07-29,23:38:44 | INFO | Train Epoch: 0 [ 48250880/128008192 (38%)] Data (t): 0.823 Batch (t): 9.107, 1902.44/s, 237.805/s/gpu LR: 0.000937 Logit Scale: 57.173 Class_loss: 5.4043 (6.2750) Contrastive_loss: 0.91440 (4.0075) Loss: 6.3187 (10.283)
|
| 219 |
+
2025-07-29,23:58:14 | INFO | Train Epoch: 0 [ 50348032/128008192 (39%)] Data (t): 4.571 Batch (t): 9.140, 1775.21/s, 221.901/s/gpu LR: 0.000930 Logit Scale: 58.191 Class_loss: 5.3573 (6.2383) Contrastive_loss: 0.80949 (3.8796) Loss: 6.1668 (10.118)
|
| 220 |
+
2025-07-30,00:01:54 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7f4038904040>')). Ignoring.
|
| 221 |
+
2025-07-30,00:07:46 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 222 |
+
2025-07-30,00:17:37 | INFO | Train Epoch: 0 [ 52445184/128008192 (41%)] Data (t): 7.725 Batch (t): 9.083, 1897.26/s, 237.158/s/gpu LR: 0.000923 Logit Scale: 59.362 Class_loss: 5.3462 (6.2040) Contrastive_loss: 0.82562 (3.7621) Loss: 6.1718 (9.9661)
|
| 223 |
+
2025-07-30,00:37:09 | INFO | Train Epoch: 0 [ 54542336/128008192 (43%)] Data (t): 1.257 Batch (t): 9.162, 1768.68/s, 221.085/s/gpu LR: 0.000916 Logit Scale: 59.976 Class_loss: 6.6615 (6.2209) Contrastive_loss: 3.3363 (3.7464) Loss: 9.9978 (9.9673)
|
| 224 |
+
2025-07-30,00:56:39 | INFO | Train Epoch: 0 [ 56639488/128008192 (44%)] Data (t): 5.393 Batch (t): 9.137, 1801.61/s, 225.202/s/gpu LR: 0.000909 Logit Scale: 59.634 Class_loss: 5.3685 (6.1905) Contrastive_loss: 0.65861 (3.6361) Loss: 6.0271 (9.8266)
|
| 225 |
+
2025-07-30,01:16:00 | INFO | Train Epoch: 0 [ 58736640/128008192 (46%)] Data (t): 4.898 Batch (t): 9.075, 1784.52/s, 223.065/s/gpu LR: 0.000901 Logit Scale: 61.085 Class_loss: 5.5077 (6.1669) Contrastive_loss: 0.95897 (3.5438) Loss: 6.4667 (9.7107)
|
| 226 |
+
2025-07-30,01:32:37 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 227 |
+
2025-07-30,01:35:46 | INFO | Train Epoch: 0 [ 60833792/128008192 (48%)] Data (t): 0.795 Batch (t): 9.264, 1791.69/s, 223.961/s/gpu LR: 0.000893 Logit Scale: 62.003 Class_loss: 5.3814 (6.1407) Contrastive_loss: 0.77710 (3.4515) Loss: 6.1585 (9.5923)
|
| 228 |
+
2025-07-30,01:55:23 | INFO | Train Epoch: 0 [ 62930944/128008192 (49%)] Data (t): 0.797 Batch (t): 9.195, 1772.41/s, 221.551/s/gpu LR: 0.000884 Logit Scale: 63.077 Class_loss: 5.3720 (6.1159) Contrastive_loss: 0.80885 (3.3663) Loss: 6.1808 (9.4822)
|
| 229 |
+
2025-07-30,02:04:00 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 230 |
+
2025-07-30,02:14:47 | INFO | Train Epoch: 0 [ 65028096/128008192 (51%)] Data (t): 0.926 Batch (t): 9.093, 1765.13/s, 220.642/s/gpu LR: 0.000876 Logit Scale: 63.814 Class_loss: 5.3215 (6.0911) Contrastive_loss: 0.73305 (3.2840) Loss: 6.0545 (9.3751)
|
| 231 |
+
2025-07-30,02:22:41 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 232 |
+
2025-07-30,02:24:41 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 233 |
+
2025-07-30,02:25:59 | WARNING | Handling webdataset error (OSError('image file is truncated (4 bytes not processed)')). Ignoring.
|
| 234 |
+
2025-07-30,02:34:31 | INFO | Train Epoch: 0 [ 67125248/128008192 (52%)] Data (t): 0.863 Batch (t): 9.253, 1776.55/s, 222.068/s/gpu LR: 0.000867 Logit Scale: 64.432 Class_loss: 5.2893 (6.0668) Contrastive_loss: 0.65427 (3.2043) Loss: 5.9435 (9.2711)
|
| 235 |
+
2025-07-30,02:54:10 | INFO | Train Epoch: 0 [ 69222400/128008192 (54%)] Data (t): 1.602 Batch (t): 9.205, 1808.19/s, 226.024/s/gpu LR: 0.000858 Logit Scale: 65.087 Class_loss: 5.2605 (6.0431) Contrastive_loss: 0.57145 (3.1269) Loss: 5.8320 (9.1700)
|
| 236 |
+
2025-07-30,03:05:41 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 237 |
+
2025-07-30,03:13:50 | INFO | Train Epoch: 0 [ 71319552/128008192 (56%)] Data (t): 0.805 Batch (t): 9.223, 1787.40/s, 223.424/s/gpu LR: 0.000848 Logit Scale: 65.638 Class_loss: 5.2960 (6.0218) Contrastive_loss: 0.65354 (3.0562) Loss: 5.9496 (9.0780)
|
| 238 |
+
2025-07-30,03:33:29 | INFO | Train Epoch: 0 [ 73416704/128008192 (57%)] Data (t): 0.743 Batch (t): 9.209, 1837.13/s, 229.642/s/gpu LR: 0.000839 Logit Scale: 66.119 Class_loss: 5.2663 (6.0008) Contrastive_loss: 0.66591 (2.9898) Loss: 5.9323 (8.9906)
|
| 239 |
+
2025-07-30,03:53:22 | INFO | Train Epoch: 0 [ 75513856/128008192 (59%)] Data (t): 3.145 Batch (t): 9.323, 1936.74/s, 242.093/s/gpu LR: 0.000829 Logit Scale: 66.939 Class_loss: 5.1954 (5.9790) Contrastive_loss: 0.58237 (2.9248) Loss: 5.7778 (8.9038)
|
| 240 |
+
2025-07-30,04:13:06 | INFO | Train Epoch: 0 [ 77611008/128008192 (61%)] Data (t): 4.665 Batch (t): 9.250, 1766.17/s, 220.771/s/gpu LR: 0.000819 Logit Scale: 67.184 Class_loss: 5.2383 (5.9595) Contrastive_loss: 0.49615 (2.8608) Loss: 5.7344 (8.8204)
|
| 241 |
+
2025-07-30,04:24:16 | WARNING | Handling webdataset error (OSError('image file is truncated (96 bytes not processed)')). Ignoring.
|
| 242 |
+
2025-07-30,04:32:26 | INFO | Train Epoch: 0 [ 79708160/128008192 (62%)] Data (t): 2.696 Batch (t): 9.059, 1814.76/s, 226.845/s/gpu LR: 0.000808 Logit Scale: 67.773 Class_loss: 5.2398 (5.9411) Contrastive_loss: 0.52828 (2.8010) Loss: 5.7681 (8.7421)
|
| 243 |
+
2025-07-30,04:51:35 | INFO | Train Epoch: 0 [ 81805312/128008192 (64%)] Data (t): 4.292 Batch (t): 8.982, 1779.69/s, 222.461/s/gpu LR: 0.000798 Logit Scale: 68.562 Class_loss: 5.4065 (5.9277) Contrastive_loss: 0.81963 (2.7515) Loss: 6.2261 (8.6792)
|
| 244 |
+
2025-07-30,04:55:34 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 245 |
+
2025-07-30,04:59:50 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 246 |
+
2025-07-30,05:11:05 | INFO | Train Epoch: 0 [ 83902464/128008192 (66%)] Data (t): 0.794 Batch (t): 9.135, 1790.11/s, 223.764/s/gpu LR: 0.000787 Logit Scale: 68.914 Class_loss: 5.3487 (5.9136) Contrastive_loss: 0.51328 (2.6969) Loss: 5.8620 (8.6105)
|
| 247 |
+
2025-07-30,05:29:22 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 248 |
+
2025-07-30,05:30:37 | INFO | Train Epoch: 0 [ 85999616/128008192 (67%)] Data (t): 5.969 Batch (t): 9.158, 1780.17/s, 222.521/s/gpu LR: 0.000776 Logit Scale: 69.271 Class_loss: 5.2209 (5.8971) Contrastive_loss: 0.46011 (2.6437) Loss: 5.6810 (8.5407)
|
| 249 |
+
2025-07-30,05:48:32 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 250 |
+
2025-07-30,05:49:06 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 251 |
+
2025-07-30,05:50:17 | INFO | Train Epoch: 0 [ 88096768/128008192 (69%)] Data (t): 7.811 Batch (t): 9.217, 1768.88/s, 221.110/s/gpu LR: 0.000765 Logit Scale: 69.456 Class_loss: 5.2282 (5.8815) Contrastive_loss: 0.47160 (2.5931) Loss: 5.6998 (8.4747)
|
| 252 |
+
2025-07-30,05:52:06 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 253 |
+
2025-07-30,06:10:04 | INFO | Train Epoch: 0 [ 90193920/128008192 (70%)] Data (t): 7.871 Batch (t): 9.273, 1840.49/s, 230.061/s/gpu LR: 0.000753 Logit Scale: 70.328 Class_loss: 5.1981 (5.8660) Contrastive_loss: 0.42403 (2.5438) Loss: 5.6222 (8.4098)
|
| 254 |
+
2025-07-30,06:10:42 | WARNING | Handling webdataset error (OSError('image file is truncated (33 bytes not processed)')). Ignoring.
|
| 255 |
+
2025-07-30,06:20:21 | WARNING | Handling webdataset error (OSError('image file is truncated (76 bytes not processed)')). Ignoring.
|
| 256 |
+
2025-07-30,06:29:45 | INFO | Train Epoch: 0 [ 92291072/128008192 (72%)] Data (t): 5.488 Batch (t): 9.228, 1818.41/s, 227.301/s/gpu LR: 0.000742 Logit Scale: 70.678 Class_loss: 5.2045 (5.8513) Contrastive_loss: 0.48502 (2.4981) Loss: 5.6895 (8.3494)
|
| 257 |
+
2025-07-30,06:49:15 | INFO | Train Epoch: 0 [ 94388224/128008192 (74%)] Data (t): 7.773 Batch (t): 9.140, 1866.41/s, 233.302/s/gpu LR: 0.000730 Logit Scale: 71.148 Class_loss: 5.1713 (5.8365) Contrastive_loss: 0.50473 (2.4548) Loss: 5.6760 (8.2913)
|
| 258 |
+
2025-07-30,07:08:35 | INFO | Train Epoch: 0 [ 96485376/128008192 (75%)] Data (t): 5.167 Batch (t): 9.062, 1753.52/s, 219.190/s/gpu LR: 0.000718 Logit Scale: 71.641 Class_loss: 5.2101 (5.8232) Contrastive_loss: 0.43647 (2.4118) Loss: 5.6466 (8.2350)
|
| 259 |
+
2025-07-30,07:14:33 | WARNING | Handling webdataset error (OSError('image file is truncated (52 bytes not processed)')). Ignoring.
|
| 260 |
+
2025-07-30,07:20:08 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 261 |
+
2025-07-30,07:28:09 | INFO | Train Epoch: 0 [ 98582528/128008192 (77%)] Data (t): 0.767 Batch (t): 9.173, 1758.69/s, 219.836/s/gpu LR: 0.000706 Logit Scale: 71.938 Class_loss: 5.1758 (5.8097) Contrastive_loss: 0.46399 (2.3712) Loss: 5.6398 (8.1809)
|
| 262 |
+
2025-07-30,07:41:30 | INFO | Starting zero-shot imagenet.
|
| 263 |
+
2025-07-30,07:41:30 | INFO | Building zero-shot classifier
|
| 264 |
+
2025-07-30,07:41:37 | INFO | Using classifier
|
| 265 |
+
2025-07-30,07:55:30 | INFO | Finished zero-shot imagenet.
|
| 266 |
+
2025-07-30,07:55:30 | INFO | Eval Epoch: 0.7811340074235249 imagenet-zeroshot-val-top1: 0.2718 imagenet-zeroshot-val-top5: 0.5177
|
| 267 |
+
2025-07-30,08:01:32 | INFO | Train Epoch: 0 [100679680/128008192 (79%)] Data (t): 8.482 Batch (t): 15.647, 1802.60/s, 225.325/s/gpu LR: 0.000694 Logit Scale: 72.583 Class_loss: 5.1857 (5.7970) Contrastive_loss: 0.46153 (2.3323) Loss: 5.6473 (8.1292)
|
| 268 |
+
2025-07-30,08:21:05 | INFO | Train Epoch: 0 [102776832/128008192 (80%)] Data (t): 0.752 Batch (t): 9.169, 1814.77/s, 226.846/s/gpu LR: 0.000682 Logit Scale: 72.919 Class_loss: 5.1671 (5.7844) Contrastive_loss: 0.36456 (2.2929) Loss: 5.5316 (8.0773)
|
| 269 |
+
2025-07-30,08:25:01 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 270 |
+
2025-07-30,08:40:41 | INFO | Train Epoch: 0 [104873984/128008192 (82%)] Data (t): 0.779 Batch (t): 9.184, 1828.98/s, 228.622/s/gpu LR: 0.000669 Logit Scale: 73.100 Class_loss: 6.7165 (5.8026) Contrastive_loss: 2.5068 (2.2971) Loss: 9.2233 (8.0997)
|
| 271 |
+
2025-07-30,09:00:22 | INFO | Train Epoch: 0 [106971136/128008192 (84%)] Data (t): 0.763 Batch (t): 9.231, 1760.63/s, 220.079/s/gpu LR: 0.000657 Logit Scale: 73.346 Class_loss: 5.1977 (5.7910) Contrastive_loss: 0.45249 (2.2616) Loss: 5.6502 (8.0526)
|
| 272 |
+
2025-07-30,09:19:48 | INFO | Train Epoch: 0 [109068288/128008192 (85%)] Data (t): 0.752 Batch (t): 9.106, 1720.61/s, 215.077/s/gpu LR: 0.000644 Logit Scale: 73.830 Class_loss: 5.1689 (5.7793) Contrastive_loss: 0.45150 (2.2275) Loss: 5.6204 (8.0067)
|
| 273 |
+
2025-07-30,09:22:17 | WARNING | Handling webdataset error (OSError('image file is truncated (37 bytes not processed)')). Ignoring.
|
| 274 |
+
2025-07-30,09:39:14 | INFO | Train Epoch: 0 [111165440/128008192 (87%)] Data (t): 1.468 Batch (t): 9.112, 1786.96/s, 223.369/s/gpu LR: 0.000631 Logit Scale: 74.325 Class_loss: 5.2192 (5.7689) Contrastive_loss: 0.44261 (2.1944) Loss: 5.6619 (7.9633)
|
| 275 |
+
2025-07-30,09:58:51 | INFO | Train Epoch: 0 [113262592/128008192 (88%)] Data (t): 0.780 Batch (t): 9.195, 1893.25/s, 236.656/s/gpu LR: 0.000618 Logit Scale: 74.507 Class_loss: 5.2485 (5.7594) Contrastive_loss: 0.36742 (2.1612) Loss: 5.6159 (7.9206)
|
| 276 |
+
2025-07-30,10:13:01 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 277 |
+
2025-07-30,10:18:33 | INFO | Train Epoch: 0 [115359744/128008192 (90%)] Data (t): 0.731 Batch (t): 9.235, 1747.46/s, 218.432/s/gpu LR: 0.000605 Logit Scale: 74.770 Class_loss: 5.2419 (5.7502) Contrastive_loss: 0.53396 (2.1321) Loss: 5.7759 (7.8823)
|
| 278 |
+
2025-07-30,10:33:26 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 279 |
+
2025-07-30,10:35:35 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 280 |
+
2025-07-30,10:38:12 | INFO | Train Epoch: 0 [117456896/128008192 (92%)] Data (t): 1.920 Batch (t): 9.211, 1749.86/s, 218.733/s/gpu LR: 0.000592 Logit Scale: 75.276 Class_loss: 5.1498 (5.7397) Contrastive_loss: 0.35652 (2.1010) Loss: 5.5063 (7.8407)
|
| 281 |
+
2025-07-30,10:42:21 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 282 |
+
2025-07-30,10:58:00 | INFO | Train Epoch: 0 [119554048/128008192 (93%)] Data (t): 7.872 Batch (t): 9.278, 1735.09/s, 216.886/s/gpu LR: 0.000579 Logit Scale: 75.568 Class_loss: 5.2115 (5.7306) Contrastive_loss: 0.41280 (2.0719) Loss: 5.6243 (7.8024)
|
| 283 |
+
2025-07-30,11:05:43 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 284 |
+
2025-07-30,11:11:41 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 285 |
+
2025-07-30,11:17:35 | INFO | Train Epoch: 0 [121651200/128008192 (95%)] Data (t): 7.796 Batch (t): 9.179, 1810.93/s, 226.366/s/gpu LR: 0.000566 Logit Scale: 75.750 Class_loss: 6.4692 (5.7431) Contrastive_loss: 2.3668 (2.0769) Loss: 8.8360 (7.8200)
|
| 286 |
+
2025-07-30,11:36:50 | INFO | Train Epoch: 0 [123748352/128008192 (97%)] Data (t): 7.668 Batch (t): 9.024, 1797.51/s, 224.689/s/gpu LR: 0.000553 Logit Scale: 76.126 Class_loss: 5.1989 (5.7340) Contrastive_loss: 0.35736 (2.0482) Loss: 5.5563 (7.7822)
|
| 287 |
+
2025-07-30,11:44:05 | WARNING | Handling webdataset error (OSError('image file is truncated (181 bytes not processed)')). Ignoring.
|
| 288 |
+
2025-07-30,11:56:16 | INFO | Train Epoch: 0 [125845504/128008192 (98%)] Data (t): 7.726 Batch (t): 9.106, 1799.58/s, 224.947/s/gpu LR: 0.000540 Logit Scale: 76.304 Class_loss: 5.8169 (5.7354) Contrastive_loss: 1.2559 (2.0352) Loss: 7.0728 (7.7706)
|
| 289 |
+
2025-07-30,12:10:46 | WARNING | Handling webdataset error (OSError('image file is truncated (31 bytes not processed)')). Ignoring.
|
| 290 |
+
2025-07-30,12:15:37 | INFO | Train Epoch: 0 [127942656/128008192 (100%)] Data (t): 4.062 Batch (t): 9.077, 1904.57/s, 238.071/s/gpu LR: 0.000526 Logit Scale: 76.697 Class_loss: 5.1482 (5.7259) Contrastive_loss: 0.40732 (2.0090) Loss: 5.5555 (7.7349)
|
| 291 |
+
2025-07-30,12:16:11 | INFO | Train Epoch: 0 [128008192/128008192 (100%)] Data (t): 0.810 Batch (t): 8.332, 2012.44/s, 251.555/s/gpu LR: 0.000526 Logit Scale: 76.688 Class_loss: 5.1629 (5.7170) Contrastive_loss: 0.35835 (1.9828) Loss: 5.5213 (7.6997)
|
| 292 |
+
2025-07-30,12:16:18 | INFO | Start epoch 1
|
| 293 |
+
2025-07-30,12:16:30 | INFO | Train Epoch: 1 [ 16384/128008192 (0%)] Data (t): 10.178 Batch (t): 11.514, 1422.95/s, 177.868/s/gpu LR: 0.000526 Logit Scale: 76.693 Class_loss: 5.1417 (5.1417) Contrastive_loss: 0.30745 (0.30745) Loss: 5.4491 (5.4491)
|
| 294 |
+
2025-07-30,12:23:19 | WARNING | Handling webdataset error (OSError('image file is truncated (63 bytes not processed)')). Ignoring.
|
| 295 |
+
2025-07-30,12:29:03 | WARNING | Handling webdataset error (OSError('image file is truncated (208 bytes not processed)')). Ignoring.
|
| 296 |
+
2025-07-30,12:35:48 | INFO | Train Epoch: 1 [ 2113536/128008192 (2%)] Data (t): 2.200 Batch (t): 9.053, 1810.52/s, 226.315/s/gpu LR: 0.000513 Logit Scale: 76.879 Class_loss: 6.4648 (5.8032) Contrastive_loss: 2.4812 (1.3943) Loss: 8.9460 (7.1976)
|
| 297 |
+
2025-07-30,12:36:18 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 298 |
+
2025-07-30,12:55:29 | INFO | Train Epoch: 1 [ 4210688/128008192 (3%)] Data (t): 0.771 Batch (t): 9.222, 1804.20/s, 225.525/s/gpu LR: 0.000499 Logit Scale: 76.998 Class_loss: 5.4737 (5.6934) Contrastive_loss: 0.69538 (1.1613) Loss: 6.1691 (6.8547)
|
| 299 |
+
2025-07-30,13:15:02 | INFO | Train Epoch: 1 [ 6307840/128008192 (5%)] Data (t): 0.798 Batch (t): 9.162, 1799.18/s, 224.897/s/gpu LR: 0.000486 Logit Scale: 77.526 Class_loss: 6.5512 (5.9078) Contrastive_loss: 2.7640 (1.5620) Loss: 9.3152 (7.4699)
|
| 300 |
+
2025-07-30,13:27:53 | WARNING | Handling webdataset error (OSError('image file is truncated (64 bytes not processed)')). Ignoring.
|
| 301 |
+
2025-07-30,13:34:43 | INFO | Train Epoch: 1 [ 8404992/128008192 (7%)] Data (t): 1.190 Batch (t): 9.229, 1730.89/s, 216.361/s/gpu LR: 0.000473 Logit Scale: 77.787 Class_loss: 5.3806 (5.8024) Contrastive_loss: 0.58724 (1.3671) Loss: 5.9678 (7.1695)
|
| 302 |
+
2025-07-30,13:46:38 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 303 |
+
2025-07-30,13:54:22 | INFO | Train Epoch: 1 [ 10502144/128008192 (8%)] Data (t): 0.740 Batch (t): 9.210, 1752.84/s, 219.106/s/gpu LR: 0.000460 Logit Scale: 78.214 Class_loss: 5.1424 (5.6924) Contrastive_loss: 0.32455 (1.1933) Loss: 5.4670 (6.8857)
|
| 304 |
+
2025-07-30,14:14:06 | INFO | Train Epoch: 1 [ 12599296/128008192 (10%)] Data (t): 0.745 Batch (t): 9.249, 1854.69/s, 231.837/s/gpu LR: 0.000446 Logit Scale: 78.576 Class_loss: 5.8606 (5.7164) Contrastive_loss: 1.3381 (1.2140) Loss: 7.1987 (6.9304)
|
| 305 |
+
2025-07-30,14:14:54 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 306 |
+
2025-07-30,14:33:31 | INFO | Train Epoch: 1 [ 14696448/128008192 (11%)] Data (t): 0.754 Batch (t): 9.107, 1804.85/s, 225.606/s/gpu LR: 0.000433 Logit Scale: 78.993 Class_loss: 5.9094 (5.7406) Contrastive_loss: 1.5511 (1.2561) Loss: 7.4605 (6.9967)
|
| 307 |
+
2025-07-30,14:53:17 | INFO | Train Epoch: 1 [ 16793600/128008192 (13%)] Data (t): 0.883 Batch (t): 9.261, 1754.21/s, 219.276/s/gpu LR: 0.000420 Logit Scale: 79.199 Class_loss: 5.1589 (5.6759) Contrastive_loss: 0.32825 (1.1530) Loss: 5.4872 (6.8290)
|
| 308 |
+
2025-07-30,15:12:16 | WARNING | Handling webdataset error (OSError('image file is truncated (95 bytes not processed)')). Ignoring.
|
| 309 |
+
2025-07-30,15:13:05 | INFO | Train Epoch: 1 [ 18890752/128008192 (15%)] Data (t): 0.751 Batch (t): 9.287, 1788.85/s, 223.607/s/gpu LR: 0.000407 Logit Scale: 79.671 Class_loss: 5.1863 (5.6270) Contrastive_loss: 0.40150 (1.0779) Loss: 5.5878 (6.7048)
|
| 310 |
+
2025-07-30,15:21:48 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 311 |
+
2025-07-30,15:32:43 | INFO | Train Epoch: 1 [ 20987904/128008192 (16%)] Data (t): 0.783 Batch (t): 9.200, 1783.05/s, 222.881/s/gpu LR: 0.000394 Logit Scale: 79.999 Class_loss: 5.1497 (5.5836) Contrastive_loss: 0.26721 (1.0042) Loss: 5.4169 (6.5878)
|
| 312 |
+
2025-07-30,15:37:02 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 313 |
+
2025-07-30,15:52:10 | INFO | Train Epoch: 1 [ 23085056/128008192 (18%)] Data (t): 3.830 Batch (t): 9.113, 1872.06/s, 234.007/s/gpu LR: 0.000381 Logit Scale: 80.358 Class_loss: 5.1666 (5.5488) Contrastive_loss: 0.31780 (0.94699) Loss: 5.4844 (6.4958)
|
| 314 |
+
2025-07-30,16:11:34 | INFO | Train Epoch: 1 [ 25182208/128008192 (20%)] Data (t): 1.875 Batch (t): 9.093, 1916.13/s, 239.516/s/gpu LR: 0.000368 Logit Scale: 80.675 Class_loss: 5.4657 (5.5424) Contrastive_loss: 0.71988 (0.92952) Loss: 6.1856 (6.4719)
|
| 315 |
+
2025-07-30,16:31:04 | INFO | Train Epoch: 1 [ 27279360/128008192 (21%)] Data (t): 0.817 Batch (t): 9.147, 1768.28/s, 221.035/s/gpu LR: 0.000355 Logit Scale: 80.840 Class_loss: 5.2800 (5.5237) Contrastive_loss: 0.28957 (0.88381) Loss: 5.5696 (6.4075)
|
| 316 |
+
2025-07-30,16:38:14 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 317 |
+
2025-07-30,16:46:17 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 318 |
+
2025-07-30,16:50:44 | INFO | Train Epoch: 1 [ 29376512/128008192 (23%)] Data (t): 1.281 Batch (t): 9.213, 1779.58/s, 222.448/s/gpu LR: 0.000343 Logit Scale: 81.311 Class_loss: 5.1802 (5.5008) Contrastive_loss: 0.32539 (0.84658) Loss: 5.5056 (6.3474)
|
| 319 |
+
2025-07-30,16:52:42 | WARNING | Handling webdataset error (OSError('image file is truncated (34 bytes not processed)')). Ignoring.
|
| 320 |
+
2025-07-30,16:54:01 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 321 |
+
2025-07-30,16:54:06 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 322 |
+
2025-07-30,17:10:53 | INFO | Train Epoch: 1 [ 31473664/128008192 (25%)] Data (t): 1.766 Batch (t): 9.451, 1729.53/s, 216.192/s/gpu LR: 0.000330 Logit Scale: 81.607 Class_loss: 6.9039 (5.5885) Contrastive_loss: 2.4584 (0.94732) Loss: 9.3623 (6.5358)
|
| 323 |
+
2025-07-30,17:20:47 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 324 |
+
2025-07-30,17:29:11 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 325 |
+
2025-07-30,17:30:23 | INFO | Train Epoch: 1 [ 33570816/128008192 (26%)] Data (t): 2.426 Batch (t): 9.138, 1766.45/s, 220.807/s/gpu LR: 0.000318 Logit Scale: 81.950 Class_loss: 5.0891 (5.5591) Contrastive_loss: 0.23409 (0.90536) Loss: 5.3232 (6.4645)
|
| 326 |
+
2025-07-30,17:43:43 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 327 |
+
2025-07-30,17:50:10 | INFO | Train Epoch: 1 [ 35667968/128008192 (28%)] Data (t): 1.015 Batch (t): 9.276, 1801.47/s, 225.184/s/gpu LR: 0.000305 Logit Scale: 82.347 Class_loss: 5.0617 (5.5315) Contrastive_loss: 0.27727 (0.87047) Loss: 5.3389 (6.4019)
|
| 328 |
+
2025-07-30,17:55:36 | WARNING | Handling webdataset error (OSError('image file is truncated (120 bytes not processed)')). Ignoring.
|
| 329 |
+
2025-07-30,18:09:39 | INFO | Train Epoch: 1 [ 37765120/128008192 (30%)] Data (t): 0.867 Batch (t): 9.131, 1797.14/s, 224.643/s/gpu LR: 0.000293 Logit Scale: 82.472 Class_loss: 5.0880 (5.5081) Contrastive_loss: 0.28829 (0.83983) Loss: 5.3763 (6.3480)
|
| 330 |
+
2025-07-30,18:29:15 | INFO | Train Epoch: 1 [ 39862272/128008192 (31%)] Data (t): 0.750 Batch (t): 9.187, 1804.15/s, 225.518/s/gpu LR: 0.000281 Logit Scale: 82.738 Class_loss: 5.0583 (5.4856) Contrastive_loss: 0.27749 (0.81171) Loss: 5.3358 (6.2973)
|
| 331 |
+
2025-07-30,18:42:14 | WARNING | Handling webdataset error (OSError('image file is truncated (92 bytes not processed)')). Ignoring.
|
| 332 |
+
2025-07-30,18:44:44 | WARNING | Handling webdataset error (OSError('image file is truncated (40 bytes not processed)')). Ignoring.
|
| 333 |
+
2025-07-30,18:48:47 | INFO | Train Epoch: 1 [ 41959424/128008192 (33%)] Data (t): 1.080 Batch (t): 9.158, 1793.61/s, 224.201/s/gpu LR: 0.000269 Logit Scale: 83.087 Class_loss: 5.3104 (5.4773) Contrastive_loss: 0.46519 (0.79521) Loss: 5.7756 (6.2725)
|
| 334 |
+
2025-07-30,19:08:21 | INFO | Train Epoch: 1 [ 44056576/128008192 (34%)] Data (t): 0.748 Batch (t): 9.170, 1821.18/s, 227.648/s/gpu LR: 0.000258 Logit Scale: 83.449 Class_loss: 5.1138 (5.4608) Contrastive_loss: 0.26201 (0.77097) Loss: 5.3758 (6.2317)
|
| 335 |
+
2025-07-30,19:16:13 | WARNING | Handling webdataset error (OSError('image file is truncated (3 bytes not processed)')). Ignoring.
|
| 336 |
+
2025-07-30,19:27:59 | INFO | Train Epoch: 1 [ 46153728/128008192 (36%)] Data (t): 0.747 Batch (t): 9.200, 1789.51/s, 223.688/s/gpu LR: 0.000246 Logit Scale: 83.762 Class_loss: 5.4930 (5.4622) Contrastive_loss: 0.71842 (0.76869) Loss: 6.2114 (6.2309)
|
| 337 |
+
2025-07-30,19:47:43 | INFO | Train Epoch: 1 [ 48250880/128008192 (38%)] Data (t): 0.769 Batch (t): 9.249, 1791.98/s, 223.997/s/gpu LR: 0.000235 Logit Scale: 83.984 Class_loss: 5.1879 (5.4507) Contrastive_loss: 0.31677 (0.74986) Loss: 5.5047 (6.2006)
|
| 338 |
+
2025-07-30,19:52:00 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 339 |
+
2025-07-30,20:07:44 | INFO | Train Epoch: 1 [ 50348032/128008192 (39%)] Data (t): 1.379 Batch (t): 9.386, 1588.87/s, 198.608/s/gpu LR: 0.000223 Logit Scale: 84.190 Class_loss: 5.0571 (5.4350) Contrastive_loss: 0.26794 (0.73058) Loss: 5.3251 (6.1656)
|
| 340 |
+
2025-07-30,20:17:05 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 341 |
+
2025-07-30,20:25:44 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 342 |
+
2025-07-30,20:25:56 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 343 |
+
2025-07-30,20:27:24 | INFO | Train Epoch: 1 [ 52445184/128008192 (41%)] Data (t): 2.591 Batch (t): 9.217, 1778.38/s, 222.297/s/gpu LR: 0.000212 Logit Scale: 84.447 Class_loss: 5.0863 (5.4216) Contrastive_loss: 0.25978 (0.71247) Loss: 5.3461 (6.1341)
|
| 344 |
+
2025-07-30,20:32:52 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 345 |
+
2025-07-30,20:39:18 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 346 |
+
2025-07-30,20:47:09 | INFO | Train Epoch: 1 [ 54542336/128008192 (43%)] Data (t): 3.031 Batch (t): 9.256, 1753.91/s, 219.239/s/gpu LR: 0.000202 Logit Scale: 84.658 Class_loss: 5.0597 (5.4082) Contrastive_loss: 0.26202 (0.69579) Loss: 5.3217 (6.1040)
|
| 347 |
+
2025-07-30,20:54:57 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 348 |
+
2025-07-30,21:06:34 | INFO | Train Epoch: 1 [ 56639488/128008192 (44%)] Data (t): 3.500 Batch (t): 9.106, 1779.76/s, 222.470/s/gpu LR: 0.000191 Logit Scale: 84.879 Class_loss: 5.0408 (5.3951) Contrastive_loss: 0.26262 (0.68032) Loss: 5.3034 (6.0754)
|
| 349 |
+
2025-07-30,21:26:07 | INFO | Train Epoch: 1 [ 58736640/128008192 (46%)] Data (t): 0.891 Batch (t): 9.161, 1875.86/s, 234.483/s/gpu LR: 0.000181 Logit Scale: 85.106 Class_loss: 6.1738 (5.4219) Contrastive_loss: 1.5362 (0.70983) Loss: 7.7101 (6.1317)
|
| 350 |
+
2025-07-30,21:45:21 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 351 |
+
2025-07-30,21:45:36 | INFO | Train Epoch: 1 [ 60833792/128008192 (48%)] Data (t): 1.272 Batch (t): 9.138, 1639.20/s, 204.900/s/gpu LR: 0.000171 Logit Scale: 85.394 Class_loss: 5.1426 (5.4126) Contrastive_loss: 0.27430 (0.69532) Loss: 5.4169 (6.1079)
|
| 352 |
+
2025-07-30,21:49:35 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 353 |
+
2025-07-30,22:05:06 | INFO | Train Epoch: 1 [ 62930944/128008192 (49%)] Data (t): 0.823 Batch (t): 9.140, 1770.98/s, 221.373/s/gpu LR: 0.000161 Logit Scale: 85.651 Class_loss: 6.5394 (5.4490) Contrastive_loss: 1.7864 (0.73051) Loss: 8.3258 (6.1795)
|
| 354 |
+
2025-07-30,22:12:26 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 355 |
+
2025-07-30,22:24:54 | INFO | Train Epoch: 1 [ 65028096/128008192 (51%)] Data (t): 0.880 Batch (t): 9.282, 1757.56/s, 219.695/s/gpu LR: 0.000151 Logit Scale: 85.873 Class_loss: 6.3545 (5.4772) Contrastive_loss: 1.2459 (0.74662) Loss: 7.6003 (6.2239)
|
| 356 |
+
2025-07-30,22:32:26 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 357 |
+
2025-07-30,22:44:25 | INFO | Train Epoch: 1 [ 67125248/128008192 (52%)] Data (t): 1.790 Batch (t): 9.147, 1865.47/s, 233.184/s/gpu LR: 0.000142 Logit Scale: 86.125 Class_loss: 5.0354 (5.4639) Contrastive_loss: 0.21756 (0.73058) Loss: 5.2530 (6.1944)
|
| 358 |
+
2025-07-30,22:45:33 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 359 |
+
2025-07-30,22:47:35 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 360 |
+
2025-07-30,23:04:08 | INFO | Train Epoch: 1 [ 69222400/128008192 (54%)] Data (t): 7.315 Batch (t): 9.241, 1765.01/s, 220.626/s/gpu LR: 0.000133 Logit Scale: 86.440 Class_loss: 5.5263 (5.4657) Contrastive_loss: 0.67920 (0.72907) Loss: 6.2055 (6.1948)
|
| 361 |
+
2025-07-30,23:05:20 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 362 |
+
2025-07-30,23:06:56 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 363 |
+
2025-07-30,23:23:30 | INFO | Train Epoch: 1 [ 71319552/128008192 (56%)] Data (t): 5.688 Batch (t): 9.078, 1786.08/s, 223.260/s/gpu LR: 0.000124 Logit Scale: 86.508 Class_loss: 5.0361 (5.4534) Contrastive_loss: 0.23379 (0.71492) Loss: 5.2699 (6.1683)
|
| 364 |
+
2025-07-30,23:30:00 | INFO | Starting zero-shot imagenet.
|
| 365 |
+
2025-07-30,23:30:00 | INFO | Building zero-shot classifier
|
| 366 |
+
2025-07-30,23:30:07 | INFO | Using classifier
|
| 367 |
+
2025-07-30,23:48:20 | INFO | Finished zero-shot imagenet.
|
| 368 |
+
2025-07-30,23:48:20 | INFO | Eval Epoch: 1.562396006655574 imagenet-zeroshot-val-top1: 0.4226 imagenet-zeroshot-val-top5: 0.7072
|
| 369 |
+
2025-07-31,00:01:23 | INFO | Train Epoch: 1 [ 73416704/128008192 (57%)] Data (t): 10.042 Batch (t): 17.755, 1791.23/s, 223.904/s/gpu LR: 0.000115 Logit Scale: 86.764 Class_loss: 5.0707 (5.4428) Contrastive_loss: 0.21527 (0.70104) Loss: 5.2860 (6.1438)
|
| 370 |
+
2025-07-31,00:17:05 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 371 |
+
2025-07-31,00:21:01 | INFO | Train Epoch: 1 [ 75513856/128008192 (59%)] Data (t): 0.859 Batch (t): 9.205, 1790.27/s, 223.784/s/gpu LR: 0.000107 Logit Scale: 87.030 Class_loss: 5.0820 (5.4330) Contrastive_loss: 0.32980 (0.69101) Loss: 5.4118 (6.1241)
|
| 372 |
+
2025-07-31,00:39:47 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 373 |
+
2025-07-31,00:40:52 | INFO | Train Epoch: 1 [ 77611008/128008192 (61%)] Data (t): 0.900 Batch (t): 9.303, 1781.66/s, 222.708/s/gpu LR: 0.000099 Logit Scale: 87.299 Class_loss: 6.3186 (5.4563) Contrastive_loss: 1.4991 (0.71227) Loss: 7.8177 (6.1686)
|
| 374 |
+
2025-07-31,01:00:35 | INFO | Train Epoch: 1 [ 79708160/128008192 (62%)] Data (t): 0.931 Batch (t): 9.242, 1752.22/s, 219.027/s/gpu LR: 0.000091 Logit Scale: 87.447 Class_loss: 5.0496 (5.4459) Contrastive_loss: 0.24129 (0.70020) Loss: 5.2909 (6.1461)
|
| 375 |
+
2025-07-31,01:13:12 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 376 |
+
2025-07-31,01:14:33 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 377 |
+
2025-07-31,01:20:27 | INFO | Train Epoch: 1 [ 81805312/128008192 (64%)] Data (t): 1.007 Batch (t): 9.312, 1768.53/s, 221.066/s/gpu LR: 0.000083 Logit Scale: 87.651 Class_loss: 5.0618 (5.4363) Contrastive_loss: 0.23027 (0.68845) Loss: 5.2920 (6.1248)
|
| 378 |
+
2025-07-31,01:25:23 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 379 |
+
2025-07-31,01:25:36 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 380 |
+
2025-07-31,01:28:41 | WARNING | Handling webdataset error (OSError('image file is truncated (342 bytes not processed)')). Ignoring.
|
| 381 |
+
2025-07-31,01:39:50 | INFO | Train Epoch: 1 [ 83902464/128008192 (66%)] Data (t): 5.904 Batch (t): 9.088, 1730.78/s, 216.347/s/gpu LR: 0.000076 Logit Scale: 87.796 Class_loss: 5.0705 (5.4274) Contrastive_loss: 0.18519 (0.67618) Loss: 5.2557 (6.1036)
|
| 382 |
+
2025-07-31,01:59:22 | INFO | Train Epoch: 1 [ 85999616/128008192 (67%)] Data (t): 5.892 Batch (t): 9.159, 1784.61/s, 223.076/s/gpu LR: 0.000069 Logit Scale: 87.954 Class_loss: 5.6108 (5.4318) Contrastive_loss: 0.71363 (0.67707) Loss: 6.3244 (6.1088)
|
| 383 |
+
2025-07-31,02:06:09 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 384 |
+
2025-07-31,02:19:21 | INFO | Train Epoch: 1 [ 88096768/128008192 (69%)] Data (t): 1.014 Batch (t): 9.361, 1761.50/s, 220.188/s/gpu LR: 0.000063 Logit Scale: 88.085 Class_loss: 5.3163 (5.4291) Contrastive_loss: 0.38891 (0.67037) Loss: 5.7052 (6.0994)
|
| 385 |
+
2025-07-31,02:24:52 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 386 |
+
2025-07-31,02:38:55 | INFO | Train Epoch: 1 [ 90193920/128008192 (70%)] Data (t): 1.153 Batch (t): 9.178, 1799.41/s, 224.927/s/gpu LR: 0.000056 Logit Scale: 88.198 Class_loss: 5.0448 (5.4203) Contrastive_loss: 0.19049 (0.65946) Loss: 5.2353 (6.0798)
|
| 387 |
+
2025-07-31,02:58:15 | INFO | Train Epoch: 1 [ 92291072/128008192 (72%)] Data (t): 2.787 Batch (t): 9.058, 1783.12/s, 222.890/s/gpu LR: 0.000050 Logit Scale: 88.366 Class_loss: 5.0728 (5.4126) Contrastive_loss: 0.19622 (0.64917) Loss: 5.2690 (6.0618)
|
| 388 |
+
2025-07-31,03:17:57 | INFO | Train Epoch: 1 [ 94388224/128008192 (74%)] Data (t): 1.682 Batch (t): 9.238, 1733.19/s, 216.649/s/gpu LR: 0.000045 Logit Scale: 88.482 Class_loss: 5.0785 (5.4054) Contrastive_loss: 0.22424 (0.63993) Loss: 5.3027 (6.0453)
|
| 389 |
+
2025-07-31,03:37:38 | INFO | Train Epoch: 1 [ 96485376/128008192 (75%)] Data (t): 0.987 Batch (t): 9.220, 1764.73/s, 220.591/s/gpu LR: 0.000039 Logit Scale: 88.626 Class_loss: 5.0387 (5.3976) Contrastive_loss: 0.24263 (0.63147) Loss: 5.2814 (6.0290)
|
| 390 |
+
2025-07-31,03:46:02 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 391 |
+
2025-07-31,03:57:00 | INFO | Train Epoch: 1 [ 98582528/128008192 (77%)] Data (t): 6.965 Batch (t): 9.082, 1778.54/s, 222.318/s/gpu LR: 0.000034 Logit Scale: 88.754 Class_loss: 5.0011 (5.3893) Contrastive_loss: 0.20148 (0.62252) Loss: 5.2025 (6.0118)
|
| 392 |
+
2025-07-31,04:17:07 | INFO | Train Epoch: 1 [100679680/128008192 (79%)] Data (t): 1.366 Batch (t): 9.427, 1945.84/s, 243.230/s/gpu LR: 0.000030 Logit Scale: 88.839 Class_loss: 5.0522 (5.3824) Contrastive_loss: 0.22186 (0.61434) Loss: 5.2741 (5.9968)
|
| 393 |
+
2025-07-31,04:32:15 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 394 |
+
2025-07-31,04:36:19 | INFO | Train Epoch: 1 [102776832/128008192 (80%)] Data (t): 6.325 Batch (t): 9.004, 1827.33/s, 228.417/s/gpu LR: 0.000025 Logit Scale: 88.935 Class_loss: 5.0604 (5.3760) Contrastive_loss: 0.20145 (0.60608) Loss: 5.2619 (5.9821)
|
| 395 |
+
2025-07-31,04:56:08 | INFO | Train Epoch: 1 [104873984/128008192 (82%)] Data (t): 0.948 Batch (t): 9.285, 1734.58/s, 216.822/s/gpu LR: 0.000021 Logit Scale: 89.013 Class_loss: 6.3416 (5.3949) Contrastive_loss: 1.4977 (0.62357) Loss: 7.8394 (6.0185)
|
| 396 |
+
2025-07-31,05:05:46 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 397 |
+
2025-07-31,05:15:41 | INFO | Train Epoch: 1 [106971136/128008192 (84%)] Data (t): 6.095 Batch (t): 9.163, 1809.78/s, 226.223/s/gpu LR: 0.000018 Logit Scale: 89.073 Class_loss: 5.0591 (5.3884) Contrastive_loss: 0.21908 (0.61579) Loss: 5.2781 (6.0042)
|
| 398 |
+
2025-07-31,05:35:10 | INFO | Train Epoch: 1 [109068288/128008192 (85%)] Data (t): 3.708 Batch (t): 9.137, 1772.02/s, 221.503/s/gpu LR: 0.000014 Logit Scale: 89.131 Class_loss: 5.0414 (5.3819) Contrastive_loss: 0.17481 (0.60747) Loss: 5.2162 (5.9894)
|
| 399 |
+
2025-07-31,05:52:14 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7f3f173c7b50>')). Ignoring.
|
| 400 |
+
2025-07-31,05:54:42 | INFO | Train Epoch: 1 [111165440/128008192 (87%)] Data (t): 1.713 Batch (t): 9.155, 1786.35/s, 223.294/s/gpu LR: 0.000011 Logit Scale: 89.176 Class_loss: 5.0203 (5.3752) Contrastive_loss: 0.17922 (0.59954) Loss: 5.1995 (5.9747)
|
| 401 |
+
2025-07-31,06:14:26 | INFO | Train Epoch: 1 [113262592/128008192 (88%)] Data (t): 5.083 Batch (t): 9.250, 1800.00/s, 224.999/s/gpu LR: 0.000009 Logit Scale: 89.219 Class_loss: 4.9954 (5.3683) Contrastive_loss: 0.20155 (0.59230) Loss: 5.1970 (5.9606)
|
| 402 |
+
2025-07-31,06:34:00 | INFO | Train Epoch: 1 [115359744/128008192 (90%)] Data (t): 2.914 Batch (t): 9.170, 1759.94/s, 219.993/s/gpu LR: 0.000006 Logit Scale: 89.235 Class_loss: 4.9915 (5.3616) Contrastive_loss: 0.19816 (0.58526) Loss: 5.1897 (5.9468)
|
| 403 |
+
2025-07-31,06:50:13 | WARNING | Handling webdataset error (OSError('image file is truncated (2 bytes not processed)')). Ignoring.
|
| 404 |
+
2025-07-31,06:53:33 | INFO | Train Epoch: 1 [117456896/128008192 (92%)] Data (t): 0.930 Batch (t): 9.168, 1862.12/s, 232.765/s/gpu LR: 0.000004 Logit Scale: 89.251 Class_loss: 6.1217 (5.3749) Contrastive_loss: 1.0861 (0.59405) Loss: 7.2078 (5.9690)
|
| 405 |
+
2025-07-31,07:00:47 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 406 |
+
2025-07-31,07:13:07 | INFO | Train Epoch: 1 [119554048/128008192 (93%)] Data (t): 0.793 Batch (t): 9.174, 1787.03/s, 223.378/s/gpu LR: 0.000003 Logit Scale: 89.264 Class_loss: 5.1341 (5.3708) Contrastive_loss: 0.27781 (0.58860) Loss: 5.4119 (5.9593)
|
| 407 |
+
2025-07-31,07:15:54 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 408 |
+
2025-07-31,07:22:51 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 409 |
+
2025-07-31,07:32:31 | INFO | Train Epoch: 1 [121651200/128008192 (95%)] Data (t): 6.510 Batch (t): 9.088, 1741.62/s, 217.703/s/gpu LR: 0.000002 Logit Scale: 89.270 Class_loss: 5.0912 (5.3660) Contrastive_loss: 0.21347 (0.58224) Loss: 5.3047 (5.9483)
|
| 410 |
+
2025-07-31,07:52:17 | INFO | Train Epoch: 1 [123748352/128008192 (97%)] Data (t): 2.778 Batch (t): 9.269, 1792.11/s, 224.013/s/gpu LR: 0.000001 Logit Scale: 89.273 Class_loss: 4.9902 (5.3598) Contrastive_loss: 0.20227 (0.57591) Loss: 5.1925 (5.9357)
|
| 411 |
+
2025-07-31,08:10:10 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 412 |
+
2025-07-31,08:11:52 | INFO | Train Epoch: 1 [125845504/128008192 (98%)] Data (t): 0.817 Batch (t): 9.175, 1768.06/s, 221.007/s/gpu LR: 0.000000 Logit Scale: 89.273 Class_loss: 5.0171 (5.3541) Contrastive_loss: 0.18437 (0.56949) Loss: 5.2015 (5.9236)
|
| 413 |
+
2025-07-31,08:12:15 | WARNING | Handling webdataset error (OSError('broken data stream when reading image file')). Ignoring.
|
| 414 |
+
2025-07-31,08:25:42 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 415 |
+
2025-07-31,08:31:37 | INFO | Train Epoch: 1 [127942656/128008192 (100%)] Data (t): 3.405 Batch (t): 9.260, 1754.02/s, 219.252/s/gpu LR: 0.000000 Logit Scale: 89.273 Class_loss: 6.1824 (5.3675) Contrastive_loss: 1.1450 (0.57877) Loss: 7.3273 (5.9463)
|
| 416 |
+
2025-07-31,08:32:14 | INFO | Train Epoch: 1 [128008192/128008192 (100%)] Data (t): 7.932 Batch (t): 9.302, 1732.22/s, 216.527/s/gpu LR: 0.000000 Logit Scale: 89.273 Class_loss: 5.2268 (5.3653) Contrastive_loss: 0.31429 (0.57457) Loss: 5.5411 (5.9398)
|
| 417 |
+
2025-07-31,08:32:22 | INFO | Starting zero-shot imagenet.
|
| 418 |
+
2025-07-31,08:32:22 | INFO | Building zero-shot classifier
|
| 419 |
+
2025-07-31,08:32:29 | INFO | Using classifier
|
| 420 |
+
2025-07-31,08:50:10 | INFO | Finished zero-shot imagenet.
|
| 421 |
+
2025-07-31,08:50:10 | INFO | Eval Epoch: 2 imagenet-zeroshot-val-top1: 0.4475 imagenet-zeroshot-val-top5: 0.7338
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/params.txt
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NDR_patch_size: 16
|
| 2 |
+
accum_freq: 1
|
| 3 |
+
aug_cfg: {}
|
| 4 |
+
batch_size: 2048
|
| 5 |
+
beta1: 0.9
|
| 6 |
+
beta2: 0.98
|
| 7 |
+
checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/checkpoints
|
| 8 |
+
class_loss_weight: 1.0
|
| 9 |
+
coca_caption_loss_weight: 2.0
|
| 10 |
+
coca_contrastive_loss_weight: 1.0
|
| 11 |
+
copy_codebase: False
|
| 12 |
+
csv_caption_key: title
|
| 13 |
+
csv_img_key: filepath
|
| 14 |
+
csv_separator:
|
| 15 |
+
dataset_resampled: False
|
| 16 |
+
dataset_type: webdataset
|
| 17 |
+
ddp_static_graph: True
|
| 18 |
+
debug: False
|
| 19 |
+
delete_prev_step_ckpt: True
|
| 20 |
+
delete_previous_checkpoint: False
|
| 21 |
+
device: cuda:0
|
| 22 |
+
dist_backend: nccl
|
| 23 |
+
dist_url: env://
|
| 24 |
+
distill: False
|
| 25 |
+
distill_model: None
|
| 26 |
+
distill_pretrained: None
|
| 27 |
+
distributed: True
|
| 28 |
+
epochs: 2
|
| 29 |
+
epochs_cooldown: None
|
| 30 |
+
eps: 1e-06
|
| 31 |
+
force_custom_text: False
|
| 32 |
+
force_image_size: 224
|
| 33 |
+
force_patch_dropout: None
|
| 34 |
+
force_quick_gelu: False
|
| 35 |
+
gather_with_grad: True
|
| 36 |
+
global_batch_size: 16384
|
| 37 |
+
grad_checkpointing: True
|
| 38 |
+
grad_clip_norm: None
|
| 39 |
+
horovod: False
|
| 40 |
+
image_interpolation: None
|
| 41 |
+
image_mean: None
|
| 42 |
+
image_resize_mode: None
|
| 43 |
+
image_std: None
|
| 44 |
+
imagenet_v2: None
|
| 45 |
+
imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 46 |
+
is_cls_token: True
|
| 47 |
+
local_loss: True
|
| 48 |
+
local_rank: 0
|
| 49 |
+
lock_image: False
|
| 50 |
+
lock_image_freeze_bn_stats: False
|
| 51 |
+
lock_image_unlocked_groups: 0
|
| 52 |
+
lock_text: False
|
| 53 |
+
lock_text_freeze_layer_norm: False
|
| 54 |
+
lock_text_unlocked_layers: 0
|
| 55 |
+
log_every_n_steps: 128
|
| 56 |
+
log_level: 20
|
| 57 |
+
log_local: False
|
| 58 |
+
log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_wo_idf/out.log
|
| 59 |
+
logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 60 |
+
lr: 0.001
|
| 61 |
+
lr_cooldown_end: 0.0
|
| 62 |
+
lr_cooldown_power: 1.0
|
| 63 |
+
lr_scheduler: cosine
|
| 64 |
+
max_seq_len: 15000
|
| 65 |
+
model: CLIPCLS-ViT-B-16
|
| 66 |
+
name: clipcls_vit_b16_s512m_bs16k_wo_idf
|
| 67 |
+
native_dynamic_resolution: False
|
| 68 |
+
no_set_device_rank: False
|
| 69 |
+
only_class_loss: False
|
| 70 |
+
only_packing: False
|
| 71 |
+
post_train: False
|
| 72 |
+
precision: amp_bfloat16
|
| 73 |
+
pretrained:
|
| 74 |
+
pretrained_image:
|
| 75 |
+
pretrained_text:
|
| 76 |
+
rank: 0
|
| 77 |
+
remote_sync: None
|
| 78 |
+
remote_sync_frequency: 300
|
| 79 |
+
remote_sync_protocol: s3
|
| 80 |
+
report_to: wandb
|
| 81 |
+
resume: None
|
| 82 |
+
rope_attn_num_heads: 12
|
| 83 |
+
rope_model_width: 768
|
| 84 |
+
save_every_n_steps: 6104
|
| 85 |
+
save_frequency: 1
|
| 86 |
+
save_most_recent: False
|
| 87 |
+
seed: 0
|
| 88 |
+
siglip: False
|
| 89 |
+
skip_scheduler: False
|
| 90 |
+
tensorboard: False
|
| 91 |
+
tensorboard_path:
|
| 92 |
+
torchcompile: False
|
| 93 |
+
torchscript: False
|
| 94 |
+
trace: False
|
| 95 |
+
train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 96 |
+
train_data_upsampling_factors: None
|
| 97 |
+
train_num_samples: 128000000
|
| 98 |
+
use_bn_sync: False
|
| 99 |
+
use_bnb_linear: None
|
| 100 |
+
use_idf: False
|
| 101 |
+
val_data: None
|
| 102 |
+
val_frequency: 1
|
| 103 |
+
val_num_samples: None
|
| 104 |
+
val_steps: 6104
|
| 105 |
+
wandb: True
|
| 106 |
+
wandb_notes:
|
| 107 |
+
wandb_project_name: cls-clip-batch-size
|
| 108 |
+
warmup: 500
|
| 109 |
+
wd: 0.2
|
| 110 |
+
workers: 1
|
| 111 |
+
world_size: 8
|
| 112 |
+
zeroshot_frequency: 2
|
| 113 |
+
zeroshot_steps: 6104
|