Upload folder using huggingface_hub
Browse files- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints/epoch_1.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints/epoch_2.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints/results.jsonl +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/out.log +421 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/params.txt +113 -0
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af80734973d0d15c9e2d15c90b7484f4731fec62aea3aee7972de9a8f2bf7642
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints/epoch_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b90c6207f4826c875981d25bc44bc6e4f92db9c58c4a34f3190263d6969397f
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints/results.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"imagenet-zeroshot-val-top1": 0.29674, "imagenet-zeroshot-val-top5": 0.5533}
|
| 2 |
+
{"imagenet-zeroshot-val-top1": 0.44098, "imagenet-zeroshot-val-top5": 0.72774}
|
| 3 |
+
{"imagenet-zeroshot-val-top1": 0.46938, "imagenet-zeroshot-val-top5": 0.75446}
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/out.log
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-29,17:00:37 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints.
|
| 2 |
+
2025-07-29,17:00:48 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 3 |
+
2025-07-29,17:00:48 | INFO | Loaded CLIPCLS-ViT-B-16 model config.
|
| 4 |
+
2025-07-29,17:00:50 | INFO | Model:
|
| 5 |
+
2025-07-29,17:00:50 | INFO | CLIPCLS(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 8 |
+
(patch_dropout): Identity()
|
| 9 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 10 |
+
(transformer): Transformer(
|
| 11 |
+
(resblocks): ModuleList(
|
| 12 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 13 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 14 |
+
(attn): MultiheadAttention(
|
| 15 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 16 |
+
)
|
| 17 |
+
(ls_1): Identity()
|
| 18 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 19 |
+
(mlp): Sequential(
|
| 20 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 21 |
+
(gelu): GELU(approximate='none')
|
| 22 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 23 |
+
)
|
| 24 |
+
(ls_2): Identity()
|
| 25 |
+
)
|
| 26 |
+
)
|
| 27 |
+
)
|
| 28 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
)
|
| 30 |
+
(text): TextTransformer(
|
| 31 |
+
(token_embedding): Embedding(49408, 512)
|
| 32 |
+
(transformer): Transformer(
|
| 33 |
+
(resblocks): ModuleList(
|
| 34 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 35 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 36 |
+
(attn): MultiheadAttention(
|
| 37 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_1): Identity()
|
| 40 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 41 |
+
(mlp): Sequential(
|
| 42 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 43 |
+
(gelu): GELU(approximate='none')
|
| 44 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_2): Identity()
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 51 |
+
)
|
| 52 |
+
(text_decoder): MixClsHead(
|
| 53 |
+
(mlps): ModuleList()
|
| 54 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 55 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
2025-07-29,17:00:50 | INFO | Params:
|
| 59 |
+
2025-07-29,17:00:50 | INFO | NDR_patch_size: 16
|
| 60 |
+
2025-07-29,17:00:50 | INFO | accum_freq: 1
|
| 61 |
+
2025-07-29,17:00:50 | INFO | aug_cfg: {}
|
| 62 |
+
2025-07-29,17:00:50 | INFO | batch_size: 2048
|
| 63 |
+
2025-07-29,17:00:50 | INFO | beta1: 0.9
|
| 64 |
+
2025-07-29,17:00:50 | INFO | beta2: 0.98
|
| 65 |
+
2025-07-29,17:00:50 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints
|
| 66 |
+
2025-07-29,17:00:50 | INFO | class_loss_weight: 1.4
|
| 67 |
+
2025-07-29,17:00:50 | INFO | coca_caption_loss_weight: 2.0
|
| 68 |
+
2025-07-29,17:00:50 | INFO | coca_contrastive_loss_weight: 1.0
|
| 69 |
+
2025-07-29,17:00:50 | INFO | copy_codebase: False
|
| 70 |
+
2025-07-29,17:00:50 | INFO | csv_caption_key: title
|
| 71 |
+
2025-07-29,17:00:50 | INFO | csv_img_key: filepath
|
| 72 |
+
2025-07-29,17:00:50 | INFO | csv_separator:
|
| 73 |
+
2025-07-29,17:00:50 | INFO | dataset_resampled: False
|
| 74 |
+
2025-07-29,17:00:50 | INFO | dataset_type: webdataset
|
| 75 |
+
2025-07-29,17:00:50 | INFO | ddp_static_graph: True
|
| 76 |
+
2025-07-29,17:00:50 | INFO | debug: False
|
| 77 |
+
2025-07-29,17:00:50 | INFO | delete_prev_step_ckpt: True
|
| 78 |
+
2025-07-29,17:00:50 | INFO | delete_previous_checkpoint: False
|
| 79 |
+
2025-07-29,17:00:50 | INFO | device: cuda:0
|
| 80 |
+
2025-07-29,17:00:50 | INFO | dist_backend: nccl
|
| 81 |
+
2025-07-29,17:00:50 | INFO | dist_url: env://
|
| 82 |
+
2025-07-29,17:00:50 | INFO | distill: False
|
| 83 |
+
2025-07-29,17:00:50 | INFO | distill_model: None
|
| 84 |
+
2025-07-29,17:00:50 | INFO | distill_pretrained: None
|
| 85 |
+
2025-07-29,17:00:50 | INFO | distributed: True
|
| 86 |
+
2025-07-29,17:00:50 | INFO | epochs: 2
|
| 87 |
+
2025-07-29,17:00:50 | INFO | epochs_cooldown: None
|
| 88 |
+
2025-07-29,17:00:50 | INFO | eps: 1e-06
|
| 89 |
+
2025-07-29,17:00:50 | INFO | force_custom_text: False
|
| 90 |
+
2025-07-29,17:00:50 | INFO | force_image_size: 224
|
| 91 |
+
2025-07-29,17:00:50 | INFO | force_patch_dropout: None
|
| 92 |
+
2025-07-29,17:00:50 | INFO | force_quick_gelu: False
|
| 93 |
+
2025-07-29,17:00:50 | INFO | gather_with_grad: True
|
| 94 |
+
2025-07-29,17:00:50 | INFO | global_batch_size: 16384
|
| 95 |
+
2025-07-29,17:00:50 | INFO | grad_checkpointing: True
|
| 96 |
+
2025-07-29,17:00:50 | INFO | grad_clip_norm: None
|
| 97 |
+
2025-07-29,17:00:50 | INFO | horovod: False
|
| 98 |
+
2025-07-29,17:00:50 | INFO | image_interpolation: None
|
| 99 |
+
2025-07-29,17:00:50 | INFO | image_mean: None
|
| 100 |
+
2025-07-29,17:00:50 | INFO | image_resize_mode: None
|
| 101 |
+
2025-07-29,17:00:50 | INFO | image_std: None
|
| 102 |
+
2025-07-29,17:00:50 | INFO | imagenet_v2: None
|
| 103 |
+
2025-07-29,17:00:50 | INFO | imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 104 |
+
2025-07-29,17:00:50 | INFO | is_cls_token: True
|
| 105 |
+
2025-07-29,17:00:50 | INFO | local_loss: True
|
| 106 |
+
2025-07-29,17:00:50 | INFO | local_rank: 0
|
| 107 |
+
2025-07-29,17:00:50 | INFO | lock_image: False
|
| 108 |
+
2025-07-29,17:00:50 | INFO | lock_image_freeze_bn_stats: False
|
| 109 |
+
2025-07-29,17:00:50 | INFO | lock_image_unlocked_groups: 0
|
| 110 |
+
2025-07-29,17:00:50 | INFO | lock_text: False
|
| 111 |
+
2025-07-29,17:00:50 | INFO | lock_text_freeze_layer_norm: False
|
| 112 |
+
2025-07-29,17:00:50 | INFO | lock_text_unlocked_layers: 0
|
| 113 |
+
2025-07-29,17:00:50 | INFO | log_every_n_steps: 128
|
| 114 |
+
2025-07-29,17:00:50 | INFO | log_level: 20
|
| 115 |
+
2025-07-29,17:00:50 | INFO | log_local: False
|
| 116 |
+
2025-07-29,17:00:50 | INFO | log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/out.log
|
| 117 |
+
2025-07-29,17:00:50 | INFO | logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 118 |
+
2025-07-29,17:00:50 | INFO | lr: 0.001
|
| 119 |
+
2025-07-29,17:00:50 | INFO | lr_cooldown_end: 0.0
|
| 120 |
+
2025-07-29,17:00:50 | INFO | lr_cooldown_power: 1.0
|
| 121 |
+
2025-07-29,17:00:50 | INFO | lr_scheduler: cosine
|
| 122 |
+
2025-07-29,17:00:50 | INFO | max_seq_len: 15000
|
| 123 |
+
2025-07-29,17:00:50 | INFO | model: CLIPCLS-ViT-B-16
|
| 124 |
+
2025-07-29,17:00:50 | INFO | name: clipcls_vit_b16_s512m_bs16k_weighted_14
|
| 125 |
+
2025-07-29,17:00:50 | INFO | native_dynamic_resolution: False
|
| 126 |
+
2025-07-29,17:00:50 | INFO | no_set_device_rank: False
|
| 127 |
+
2025-07-29,17:00:50 | INFO | only_class_loss: False
|
| 128 |
+
2025-07-29,17:00:50 | INFO | only_packing: False
|
| 129 |
+
2025-07-29,17:00:50 | INFO | post_train: False
|
| 130 |
+
2025-07-29,17:00:50 | INFO | precision: amp_bfloat16
|
| 131 |
+
2025-07-29,17:00:50 | INFO | pretrained:
|
| 132 |
+
2025-07-29,17:00:50 | INFO | pretrained_image:
|
| 133 |
+
2025-07-29,17:00:50 | INFO | pretrained_text:
|
| 134 |
+
2025-07-29,17:00:50 | INFO | rank: 0
|
| 135 |
+
2025-07-29,17:00:50 | INFO | remote_sync: None
|
| 136 |
+
2025-07-29,17:00:50 | INFO | remote_sync_frequency: 300
|
| 137 |
+
2025-07-29,17:00:50 | INFO | remote_sync_protocol: s3
|
| 138 |
+
2025-07-29,17:00:50 | INFO | report_to: wandb
|
| 139 |
+
2025-07-29,17:00:50 | INFO | resume: None
|
| 140 |
+
2025-07-29,17:00:50 | INFO | rope_attn_num_heads: 12
|
| 141 |
+
2025-07-29,17:00:50 | INFO | rope_model_width: 768
|
| 142 |
+
2025-07-29,17:00:50 | INFO | save_every_n_steps: 6104
|
| 143 |
+
2025-07-29,17:00:50 | INFO | save_frequency: 1
|
| 144 |
+
2025-07-29,17:00:50 | INFO | save_most_recent: False
|
| 145 |
+
2025-07-29,17:00:50 | INFO | seed: 0
|
| 146 |
+
2025-07-29,17:00:50 | INFO | siglip: False
|
| 147 |
+
2025-07-29,17:00:50 | INFO | skip_scheduler: False
|
| 148 |
+
2025-07-29,17:00:50 | INFO | tensorboard: False
|
| 149 |
+
2025-07-29,17:00:50 | INFO | tensorboard_path:
|
| 150 |
+
2025-07-29,17:00:50 | INFO | torchcompile: False
|
| 151 |
+
2025-07-29,17:00:50 | INFO | torchscript: False
|
| 152 |
+
2025-07-29,17:00:50 | INFO | trace: False
|
| 153 |
+
2025-07-29,17:00:50 | INFO | train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 154 |
+
2025-07-29,17:00:50 | INFO | train_data_upsampling_factors: None
|
| 155 |
+
2025-07-29,17:00:50 | INFO | train_num_samples: 128000000
|
| 156 |
+
2025-07-29,17:00:50 | INFO | use_bn_sync: False
|
| 157 |
+
2025-07-29,17:00:50 | INFO | use_bnb_linear: None
|
| 158 |
+
2025-07-29,17:00:50 | INFO | use_idf: True
|
| 159 |
+
2025-07-29,17:00:50 | INFO | val_data: None
|
| 160 |
+
2025-07-29,17:00:50 | INFO | val_frequency: 1
|
| 161 |
+
2025-07-29,17:00:50 | INFO | val_num_samples: None
|
| 162 |
+
2025-07-29,17:00:50 | INFO | val_steps: 6104
|
| 163 |
+
2025-07-29,17:00:50 | INFO | wandb: True
|
| 164 |
+
2025-07-29,17:00:50 | INFO | wandb_notes:
|
| 165 |
+
2025-07-29,17:00:50 | INFO | wandb_project_name: cls-clip-batch-size
|
| 166 |
+
2025-07-29,17:00:50 | INFO | warmup: 500
|
| 167 |
+
2025-07-29,17:00:50 | INFO | wd: 0.2
|
| 168 |
+
2025-07-29,17:00:50 | INFO | workers: 1
|
| 169 |
+
2025-07-29,17:00:50 | INFO | world_size: 8
|
| 170 |
+
2025-07-29,17:00:50 | INFO | zeroshot_frequency: 2
|
| 171 |
+
2025-07-29,17:00:50 | INFO | zeroshot_steps: 6104
|
| 172 |
+
2025-07-29,17:01:07 | INFO | Start epoch 0
|
| 173 |
+
2025-07-29,17:01:21 | INFO | Train Epoch: 0 [ 16384/128008192 (0%)] Data (t): 11.039 Batch (t): 14.689, 1115.39/s, 139.424/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 15.851 (15.851) Contrastive_loss: 9.7885 (9.7885) Loss: 25.639 (25.639)
|
| 174 |
+
2025-07-29,17:18:24 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 175 |
+
2025-07-29,17:19:03 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 176 |
+
2025-07-29,17:20:43 | INFO | Train Epoch: 0 [ 2113536/128008192 (2%)] Data (t): 1.737 Batch (t): 9.068, 1764.91/s, 220.613/s/gpu LR: 0.000258 Logit Scale: 14.325 Class_loss: 13.156 (14.504) Contrastive_loss: 8.9206 (9.3545) Loss: 22.077 (23.858)
|
| 177 |
+
2025-07-29,17:40:19 | INFO | Train Epoch: 0 [ 4210688/128008192 (3%)] Data (t): 0.843 Batch (t): 9.195, 1668.04/s, 208.505/s/gpu LR: 0.000514 Logit Scale: 14.611 Class_loss: 12.688 (13.898) Contrastive_loss: 8.3350 (9.0147) Loss: 21.023 (22.913)
|
| 178 |
+
2025-07-29,17:42:53 | WARNING | Handling webdataset error (OSError('image file is truncated (112 bytes not processed)')). Ignoring.
|
| 179 |
+
2025-07-29,17:59:57 | INFO | Train Epoch: 0 [ 6307840/128008192 (5%)] Data (t): 0.955 Batch (t): 9.196, 1772.59/s, 221.573/s/gpu LR: 0.000770 Logit Scale: 14.728 Class_loss: 11.044 (13.185) Contrastive_loss: 7.2186 (8.5657) Loss: 18.263 (21.750)
|
| 180 |
+
2025-07-29,18:14:47 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 181 |
+
2025-07-29,18:19:24 | INFO | Train Epoch: 0 [ 8404992/128008192 (7%)] Data (t): 2.771 Batch (t): 9.118, 1799.17/s, 224.896/s/gpu LR: 0.001000 Logit Scale: 15.328 Class_loss: 10.657 (12.679) Contrastive_loss: 7.4432 (8.3412) Loss: 18.101 (21.021)
|
| 182 |
+
2025-07-29,18:38:55 | INFO | Train Epoch: 0 [ 10502144/128008192 (8%)] Data (t): 5.529 Batch (t): 9.149, 1818.55/s, 227.319/s/gpu LR: 0.001000 Logit Scale: 16.498 Class_loss: 10.976 (12.395) Contrastive_loss: 6.9117 (8.1029) Loss: 17.888 (20.498)
|
| 183 |
+
2025-07-29,18:43:15 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 184 |
+
2025-07-29,18:43:26 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 185 |
+
2025-07-29,18:57:38 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 186 |
+
2025-07-29,18:58:30 | INFO | Train Epoch: 0 [ 12599296/128008192 (10%)] Data (t): 3.640 Batch (t): 9.179, 1758.50/s, 219.812/s/gpu LR: 0.000999 Logit Scale: 18.593 Class_loss: 10.086 (12.065) Contrastive_loss: 5.4069 (7.7178) Loss: 15.493 (19.783)
|
| 187 |
+
2025-07-29,19:18:04 | INFO | Train Epoch: 0 [ 14696448/128008192 (11%)] Data (t): 0.896 Batch (t): 9.178, 1766.47/s, 220.808/s/gpu LR: 0.000998 Logit Scale: 21.037 Class_loss: 9.8728 (11.791) Contrastive_loss: 4.4942 (7.3148) Loss: 14.367 (19.106)
|
| 188 |
+
2025-07-29,19:26:29 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 189 |
+
2025-07-29,19:37:30 | INFO | Train Epoch: 0 [ 16793600/128008192 (13%)] Data (t): 3.511 Batch (t): 9.102, 1775.49/s, 221.936/s/gpu LR: 0.000997 Logit Scale: 23.268 Class_loss: 9.7820 (11.568) Contrastive_loss: 3.9476 (6.9407) Loss: 13.730 (18.509)
|
| 190 |
+
2025-07-29,19:56:59 | INFO | Train Epoch: 0 [ 18890752/128008192 (15%)] Data (t): 4.259 Batch (t): 9.136, 1791.51/s, 223.939/s/gpu LR: 0.000995 Logit Scale: 26.185 Class_loss: 9.5743 (11.369) Contrastive_loss: 3.6653 (6.6132) Loss: 13.240 (17.982)
|
| 191 |
+
2025-07-29,20:16:35 | INFO | Train Epoch: 0 [ 20987904/128008192 (16%)] Data (t): 6.670 Batch (t): 9.184, 1803.34/s, 225.417/s/gpu LR: 0.000993 Logit Scale: 29.092 Class_loss: 11.944 (11.421) Contrastive_loss: 5.9218 (6.5503) Loss: 17.865 (17.971)
|
| 192 |
+
2025-07-29,20:36:00 | INFO | Train Epoch: 0 [ 23085056/128008192 (18%)] Data (t): 7.780 Batch (t): 9.108, 1814.45/s, 226.806/s/gpu LR: 0.000991 Logit Scale: 32.320 Class_loss: 12.146 (11.481) Contrastive_loss: 5.8004 (6.4878) Loss: 17.946 (17.969)
|
| 193 |
+
2025-07-29,20:52:33 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 194 |
+
2025-07-29,20:55:34 | INFO | Train Epoch: 0 [ 25182208/128008192 (20%)] Data (t): 4.047 Batch (t): 9.169, 1805.38/s, 225.673/s/gpu LR: 0.000988 Logit Scale: 34.916 Class_loss: 10.929 (11.439) Contrastive_loss: 4.2715 (6.3173) Loss: 15.200 (17.756)
|
| 195 |
+
2025-07-29,21:13:14 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 196 |
+
2025-07-29,21:15:04 | INFO | Train Epoch: 0 [ 27279360/128008192 (21%)] Data (t): 2.051 Batch (t): 9.138, 1765.66/s, 220.707/s/gpu LR: 0.000985 Logit Scale: 38.213 Class_loss: 9.2428 (11.282) Contrastive_loss: 2.0307 (6.0111) Loss: 11.273 (17.293)
|
| 197 |
+
2025-07-29,21:26:27 | WARNING | Handling webdataset error (OSError('image file is truncated (27 bytes not processed)')). Ignoring.
|
| 198 |
+
2025-07-29,21:34:38 | INFO | Train Epoch: 0 [ 29376512/128008192 (23%)] Data (t): 0.775 Batch (t): 9.174, 1794.35/s, 224.293/s/gpu LR: 0.000982 Logit Scale: 41.354 Class_loss: 9.9668 (11.194) Contrastive_loss: 2.7373 (5.7929) Loss: 12.704 (16.987)
|
| 199 |
+
2025-07-29,21:44:17 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 200 |
+
2025-07-29,21:54:14 | INFO | Train Epoch: 0 [ 31473664/128008192 (25%)] Data (t): 0.785 Batch (t): 9.186, 1812.63/s, 226.579/s/gpu LR: 0.000978 Logit Scale: 43.025 Class_loss: 9.2206 (11.071) Contrastive_loss: 1.6132 (5.5317) Loss: 10.834 (16.603)
|
| 201 |
+
2025-07-29,21:58:21 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 202 |
+
2025-07-29,21:58:30 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 203 |
+
2025-07-29,22:10:05 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 204 |
+
2025-07-29,22:13:49 | INFO | Train Epoch: 0 [ 33570816/128008192 (26%)] Data (t): 1.649 Batch (t): 9.180, 1817.38/s, 227.172/s/gpu LR: 0.000974 Logit Scale: 45.788 Class_loss: 9.4205 (10.974) Contrastive_loss: 1.8772 (5.3167) Loss: 11.298 (16.291)
|
| 205 |
+
2025-07-29,22:26:16 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 206 |
+
2025-07-29,22:33:17 | INFO | Train Epoch: 0 [ 35667968/128008192 (28%)] Data (t): 7.098 Batch (t): 9.125, 1770.80/s, 221.350/s/gpu LR: 0.000970 Logit Scale: 47.992 Class_loss: 11.873 (11.024) Contrastive_loss: 4.7026 (5.2826) Loss: 16.576 (16.306)
|
| 207 |
+
2025-07-29,22:52:49 | INFO | Train Epoch: 0 [ 37765120/128008192 (30%)] Data (t): 7.805 Batch (t): 9.160, 1781.51/s, 222.688/s/gpu LR: 0.000965 Logit Scale: 50.606 Class_loss: 8.7356 (10.903) Contrastive_loss: 1.1694 (5.0661) Loss: 9.9050 (15.970)
|
| 208 |
+
2025-07-29,23:12:30 | INFO | Train Epoch: 0 [ 39862272/128008192 (31%)] Data (t): 3.544 Batch (t): 9.226, 1753.11/s, 219.138/s/gpu LR: 0.000960 Logit Scale: 52.097 Class_loss: 8.7817 (10.797) Contrastive_loss: 1.1987 (4.8727) Loss: 9.9803 (15.670)
|
| 209 |
+
2025-07-29,23:24:07 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 210 |
+
2025-07-29,23:32:18 | INFO | Train Epoch: 0 [ 41959424/128008192 (33%)] Data (t): 0.817 Batch (t): 9.278, 1760.38/s, 220.047/s/gpu LR: 0.000955 Logit Scale: 53.712 Class_loss: 8.5982 (10.693) Contrastive_loss: 0.86308 (4.6818) Loss: 9.4613 (15.374)
|
| 211 |
+
2025-07-29,23:32:21 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 212 |
+
2025-07-29,23:51:58 | INFO | Train Epoch: 0 [ 44056576/128008192 (34%)] Data (t): 0.764 Batch (t): 9.223, 1790.16/s, 223.770/s/gpu LR: 0.000949 Logit Scale: 53.553 Class_loss: 8.6292 (10.599) Contrastive_loss: 0.85107 (4.5077) Loss: 9.4803 (15.106)
|
| 213 |
+
2025-07-30,00:04:02 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 214 |
+
2025-07-30,00:11:33 | INFO | Train Epoch: 0 [ 46153728/128008192 (36%)] Data (t): 0.738 Batch (t): 9.179, 1768.99/s, 221.124/s/gpu LR: 0.000943 Logit Scale: 55.256 Class_loss: 8.7162 (10.517) Contrastive_loss: 1.0291 (4.3564) Loss: 9.7453 (14.873)
|
| 215 |
+
2025-07-30,00:27:21 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 216 |
+
2025-07-30,00:29:54 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 217 |
+
2025-07-30,00:30:22 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 218 |
+
2025-07-30,00:31:03 | INFO | Train Epoch: 0 [ 48250880/128008192 (38%)] Data (t): 2.063 Batch (t): 9.140, 1810.82/s, 226.353/s/gpu LR: 0.000937 Logit Scale: 56.641 Class_loss: 8.6658 (10.440) Contrastive_loss: 0.91926 (4.2132) Loss: 9.5851 (14.653)
|
| 219 |
+
2025-07-30,00:50:37 | INFO | Train Epoch: 0 [ 50348032/128008192 (39%)] Data (t): 3.347 Batch (t): 9.169, 1949.67/s, 243.709/s/gpu LR: 0.000930 Logit Scale: 57.655 Class_loss: 8.6182 (10.367) Contrastive_loss: 0.81084 (4.0771) Loss: 9.4291 (14.444)
|
| 220 |
+
2025-07-30,00:53:30 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7fcc9fadc400>')). Ignoring.
|
| 221 |
+
2025-07-30,00:59:50 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 222 |
+
2025-07-30,01:10:42 | INFO | Train Epoch: 0 [ 52445184/128008192 (41%)] Data (t): 1.995 Batch (t): 9.417, 1818.13/s, 227.267/s/gpu LR: 0.000923 Logit Scale: 58.932 Class_loss: 8.6057 (10.299) Contrastive_loss: 0.85393 (3.9531) Loss: 9.4596 (14.252)
|
| 223 |
+
2025-07-30,01:30:12 | INFO | Train Epoch: 0 [ 54542336/128008192 (43%)] Data (t): 0.739 Batch (t): 9.139, 1772.50/s, 221.563/s/gpu LR: 0.000916 Logit Scale: 59.576 Class_loss: 11.334 (10.338) Contrastive_loss: 3.4290 (3.9337) Loss: 14.763 (14.271)
|
| 224 |
+
2025-07-30,01:49:55 | INFO | Train Epoch: 0 [ 56639488/128008192 (44%)] Data (t): 0.764 Batch (t): 9.242, 1755.16/s, 219.395/s/gpu LR: 0.000909 Logit Scale: 61.145 Class_loss: 8.5206 (10.273) Contrastive_loss: 0.70241 (3.8183) Loss: 9.2230 (14.091)
|
| 225 |
+
2025-07-30,02:09:27 | INFO | Train Epoch: 0 [ 58736640/128008192 (46%)] Data (t): 1.273 Batch (t): 9.161, 1807.64/s, 225.954/s/gpu LR: 0.000901 Logit Scale: 61.786 Class_loss: 8.7313 (10.220) Contrastive_loss: 0.91194 (3.7181) Loss: 9.6432 (13.938)
|
| 226 |
+
2025-07-30,02:25:34 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 227 |
+
2025-07-30,02:28:52 | INFO | Train Epoch: 0 [ 60833792/128008192 (48%)] Data (t): 4.552 Batch (t): 9.097, 1811.59/s, 226.449/s/gpu LR: 0.000893 Logit Scale: 61.993 Class_loss: 8.5576 (10.164) Contrastive_loss: 0.74831 (3.6191) Loss: 9.3059 (13.783)
|
| 228 |
+
2025-07-30,02:48:10 | INFO | Train Epoch: 0 [ 62930944/128008192 (49%)] Data (t): 0.781 Batch (t): 9.049, 1784.27/s, 223.034/s/gpu LR: 0.000884 Logit Scale: 63.581 Class_loss: 8.4496 (10.109) Contrastive_loss: 0.71927 (3.5256) Loss: 9.1689 (13.634)
|
| 229 |
+
2025-07-30,02:56:58 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 230 |
+
2025-07-30,03:07:56 | INFO | Train Epoch: 0 [ 65028096/128008192 (51%)] Data (t): 0.789 Batch (t): 9.268, 1651.50/s, 206.437/s/gpu LR: 0.000876 Logit Scale: 64.336 Class_loss: 8.4044 (10.056) Contrastive_loss: 0.62240 (3.4348) Loss: 9.0268 (13.490)
|
| 231 |
+
2025-07-30,03:15:31 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 232 |
+
2025-07-30,03:17:50 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 233 |
+
2025-07-30,03:19:33 | WARNING | Handling webdataset error (OSError('image file is truncated (4 bytes not processed)')). Ignoring.
|
| 234 |
+
2025-07-30,03:27:31 | INFO | Train Epoch: 0 [ 67125248/128008192 (52%)] Data (t): 0.787 Batch (t): 9.176, 1792.07/s, 224.008/s/gpu LR: 0.000867 Logit Scale: 64.793 Class_loss: 8.3298 (10.003) Contrastive_loss: 0.56794 (3.3480) Loss: 8.8978 (13.351)
|
| 235 |
+
2025-07-30,03:46:59 | INFO | Train Epoch: 0 [ 69222400/128008192 (54%)] Data (t): 0.742 Batch (t): 9.128, 1941.53/s, 242.691/s/gpu LR: 0.000858 Logit Scale: 65.478 Class_loss: 8.2713 (9.9523) Contrastive_loss: 0.57090 (3.2663) Loss: 8.8422 (13.219)
|
| 236 |
+
2025-07-30,03:57:56 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 237 |
+
2025-07-30,04:06:25 | INFO | Train Epoch: 0 [ 71319552/128008192 (56%)] Data (t): 0.737 Batch (t): 9.108, 1795.16/s, 224.396/s/gpu LR: 0.000848 Logit Scale: 66.012 Class_loss: 8.3185 (9.9056) Contrastive_loss: 0.61454 (3.1905) Loss: 8.9331 (13.096)
|
| 238 |
+
2025-07-30,04:25:51 | INFO | Train Epoch: 0 [ 73416704/128008192 (57%)] Data (t): 0.755 Batch (t): 9.112, 1782.36/s, 222.795/s/gpu LR: 0.000839 Logit Scale: 66.503 Class_loss: 8.2944 (9.8609) Contrastive_loss: 0.65749 (3.1202) Loss: 8.9519 (12.981)
|
| 239 |
+
2025-07-30,04:45:20 | INFO | Train Epoch: 0 [ 75513856/128008192 (59%)] Data (t): 5.821 Batch (t): 9.130, 1776.03/s, 222.004/s/gpu LR: 0.000829 Logit Scale: 67.463 Class_loss: 8.1562 (9.8148) Contrastive_loss: 0.48260 (3.0489) Loss: 8.6388 (12.864)
|
| 240 |
+
2025-07-30,05:04:53 | INFO | Train Epoch: 0 [ 77611008/128008192 (61%)] Data (t): 7.338 Batch (t): 9.164, 1767.89/s, 220.986/s/gpu LR: 0.000819 Logit Scale: 67.661 Class_loss: 8.2134 (9.7727) Contrastive_loss: 0.54324 (2.9829) Loss: 8.7566 (12.756)
|
| 241 |
+
2025-07-30,05:16:27 | WARNING | Handling webdataset error (OSError('image file is truncated (96 bytes not processed)')). Ignoring.
|
| 242 |
+
2025-07-30,05:24:28 | INFO | Train Epoch: 0 [ 79708160/128008192 (62%)] Data (t): 7.460 Batch (t): 9.179, 1785.21/s, 223.151/s/gpu LR: 0.000808 Logit Scale: 68.172 Class_loss: 8.1687 (9.7315) Contrastive_loss: 0.45055 (2.9180) Loss: 8.6193 (12.650)
|
| 243 |
+
2025-07-30,05:44:03 | INFO | Train Epoch: 0 [ 81805312/128008192 (64%)] Data (t): 4.416 Batch (t): 9.178, 1780.66/s, 222.582/s/gpu LR: 0.000798 Logit Scale: 69.006 Class_loss: 8.4876 (9.7004) Contrastive_loss: 0.68444 (2.8622) Loss: 9.1720 (12.563)
|
| 244 |
+
2025-07-30,05:47:54 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 245 |
+
2025-07-30,05:51:49 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 246 |
+
2025-07-30,06:03:36 | INFO | Train Epoch: 0 [ 83902464/128008192 (66%)] Data (t): 5.901 Batch (t): 9.165, 1790.99/s, 223.873/s/gpu LR: 0.000787 Logit Scale: 69.384 Class_loss: 8.3879 (9.6684) Contrastive_loss: 0.50795 (2.8047) Loss: 8.8958 (12.473)
|
| 247 |
+
2025-07-30,06:21:37 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 248 |
+
2025-07-30,06:23:02 | INFO | Train Epoch: 0 [ 85999616/128008192 (67%)] Data (t): 7.242 Batch (t): 9.113, 1813.36/s, 226.670/s/gpu LR: 0.000776 Logit Scale: 69.678 Class_loss: 8.1836 (9.6331) Contrastive_loss: 0.42489 (2.7481) Loss: 8.6085 (12.381)
|
| 249 |
+
2025-07-30,06:40:33 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 250 |
+
2025-07-30,06:42:40 | INFO | Train Epoch: 0 [ 88096768/128008192 (69%)] Data (t): 7.854 Batch (t): 9.200, 1803.87/s, 225.484/s/gpu LR: 0.000765 Logit Scale: 69.857 Class_loss: 8.1142 (9.5977) Contrastive_loss: 0.43987 (2.6944) Loss: 8.5541 (12.292)
|
| 251 |
+
2025-07-30,06:43:39 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 252 |
+
2025-07-30,06:44:04 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 253 |
+
2025-07-30,07:02:21 | INFO | Train Epoch: 0 [ 90193920/128008192 (70%)] Data (t): 4.145 Batch (t): 9.229, 1622.53/s, 202.816/s/gpu LR: 0.000753 Logit Scale: 70.728 Class_loss: 8.0963 (9.5636) Contrastive_loss: 0.36750 (2.6415) Loss: 8.4638 (12.205)
|
| 254 |
+
2025-07-30,07:03:08 | WARNING | Handling webdataset error (OSError('image file is truncated (33 bytes not processed)')). Ignoring.
|
| 255 |
+
2025-07-30,07:12:28 | WARNING | Handling webdataset error (OSError('image file is truncated (76 bytes not processed)')). Ignoring.
|
| 256 |
+
2025-07-30,07:23:18 | INFO | Train Epoch: 0 [ 92291072/128008192 (72%)] Data (t): 0.765 Batch (t): 9.821, 1609.60/s, 201.200/s/gpu LR: 0.000742 Logit Scale: 71.291 Class_loss: 8.1408 (9.5320) Contrastive_loss: 0.43890 (2.5926) Loss: 8.5797 (12.125)
|
| 257 |
+
2025-07-30,07:42:48 | INFO | Train Epoch: 0 [ 94388224/128008192 (74%)] Data (t): 0.765 Batch (t): 9.134, 1762.62/s, 220.327/s/gpu LR: 0.000730 Logit Scale: 71.625 Class_loss: 8.0630 (9.5001) Contrastive_loss: 0.39154 (2.5447) Loss: 8.4545 (12.045)
|
| 258 |
+
2025-07-30,08:02:20 | INFO | Train Epoch: 0 [ 96485376/128008192 (75%)] Data (t): 0.741 Batch (t): 9.160, 1838.21/s, 229.776/s/gpu LR: 0.000718 Logit Scale: 71.887 Class_loss: 8.1108 (9.4705) Contrastive_loss: 0.42522 (2.4996) Loss: 8.5360 (11.970)
|
| 259 |
+
2025-07-30,08:08:06 | WARNING | Handling webdataset error (OSError('image file is truncated (52 bytes not processed)')). Ignoring.
|
| 260 |
+
2025-07-30,08:14:16 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 261 |
+
2025-07-30,08:21:57 | INFO | Train Epoch: 0 [ 98582528/128008192 (77%)] Data (t): 0.774 Batch (t): 9.192, 1783.39/s, 222.923/s/gpu LR: 0.000706 Logit Scale: 72.297 Class_loss: 8.0766 (9.4415) Contrastive_loss: 0.48460 (2.4576) Loss: 8.5612 (11.899)
|
| 262 |
+
2025-07-30,08:35:14 | INFO | Starting zero-shot imagenet.
|
| 263 |
+
2025-07-30,08:35:14 | INFO | Building zero-shot classifier
|
| 264 |
+
2025-07-30,08:35:21 | INFO | Using classifier
|
| 265 |
+
2025-07-30,08:53:29 | INFO | Finished zero-shot imagenet.
|
| 266 |
+
2025-07-30,08:53:29 | INFO | Eval Epoch: 0.7811340074235249 imagenet-zeroshot-val-top1: 0.2967 imagenet-zeroshot-val-top5: 0.5533
|
| 267 |
+
2025-07-30,08:59:29 | INFO | Train Epoch: 0 [100679680/128008192 (79%)] Data (t): 9.845 Batch (t): 17.597, 1808.43/s, 226.053/s/gpu LR: 0.000694 Logit Scale: 73.120 Class_loss: 8.1378 (9.4149) Contrastive_loss: 0.39129 (2.4155) Loss: 8.5291 (11.830)
|
| 268 |
+
2025-07-30,09:19:07 | INFO | Train Epoch: 0 [102776832/128008192 (80%)] Data (t): 0.747 Batch (t): 9.201, 1777.53/s, 222.191/s/gpu LR: 0.000682 Logit Scale: 73.508 Class_loss: 8.0635 (9.3878) Contrastive_loss: 0.35391 (2.3742) Loss: 8.4174 (11.762)
|
| 269 |
+
2025-07-30,09:23:00 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 270 |
+
2025-07-30,09:38:44 | INFO | Train Epoch: 0 [104873984/128008192 (82%)] Data (t): 0.748 Batch (t): 9.198, 1745.29/s, 218.162/s/gpu LR: 0.000669 Logit Scale: 73.682 Class_loss: 11.186 (9.4231) Contrastive_loss: 2.4820 (2.3764) Loss: 13.668 (11.799)
|
| 271 |
+
2025-07-30,09:58:33 | INFO | Train Epoch: 0 [106971136/128008192 (84%)] Data (t): 0.765 Batch (t): 9.290, 1845.94/s, 230.742/s/gpu LR: 0.000657 Logit Scale: 73.911 Class_loss: 8.1439 (9.3985) Contrastive_loss: 0.38915 (2.3381) Loss: 8.5330 (11.737)
|
| 272 |
+
2025-07-30,10:18:20 | INFO | Train Epoch: 0 [109068288/128008192 (85%)] Data (t): 0.755 Batch (t): 9.269, 1786.99/s, 223.373/s/gpu LR: 0.000644 Logit Scale: 74.387 Class_loss: 8.0491 (9.3730) Contrastive_loss: 0.37982 (2.3012) Loss: 8.4289 (11.674)
|
| 273 |
+
2025-07-30,10:20:44 | WARNING | Handling webdataset error (OSError('image file is truncated (37 bytes not processed)')). Ignoring.
|
| 274 |
+
2025-07-30,10:38:09 | INFO | Train Epoch: 0 [111165440/128008192 (87%)] Data (t): 0.764 Batch (t): 9.292, 1809.98/s, 226.247/s/gpu LR: 0.000631 Logit Scale: 75.035 Class_loss: 8.1683 (9.3507) Contrastive_loss: 0.38102 (2.2656) Loss: 8.5493 (11.616)
|
| 275 |
+
2025-07-30,10:57:57 | INFO | Train Epoch: 0 [113262592/128008192 (88%)] Data (t): 0.769 Batch (t): 9.280, 1769.88/s, 221.235/s/gpu LR: 0.000618 Logit Scale: 75.042 Class_loss: 8.1599 (9.3291) Contrastive_loss: 0.31206 (2.2301) Loss: 8.4720 (11.559)
|
| 276 |
+
2025-07-30,11:12:20 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 277 |
+
2025-07-30,11:17:37 | INFO | Train Epoch: 0 [115359744/128008192 (90%)] Data (t): 2.495 Batch (t): 9.216, 1754.13/s, 219.267/s/gpu LR: 0.000605 Logit Scale: 75.468 Class_loss: 8.2209 (9.3093) Contrastive_loss: 0.54715 (2.2001) Loss: 8.7681 (11.509)
|
| 278 |
+
2025-07-30,11:32:19 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 279 |
+
2025-07-30,11:34:59 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 280 |
+
2025-07-30,11:37:28 | INFO | Train Epoch: 0 [117456896/128008192 (92%)] Data (t): 0.831 Batch (t): 9.307, 1768.09/s, 221.011/s/gpu LR: 0.000592 Logit Scale: 75.997 Class_loss: 8.0167 (9.2866) Contrastive_loss: 0.34439 (2.1675) Loss: 8.3611 (11.454)
|
| 281 |
+
2025-07-30,11:41:23 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 282 |
+
2025-07-30,11:57:14 | INFO | Train Epoch: 0 [119554048/128008192 (93%)] Data (t): 0.763 Batch (t): 9.265, 1791.40/s, 223.924/s/gpu LR: 0.000579 Logit Scale: 76.075 Class_loss: 8.0491 (9.2653) Contrastive_loss: 0.33998 (2.1360) Loss: 8.3891 (11.401)
|
| 283 |
+
2025-07-30,12:04:34 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 284 |
+
2025-07-30,12:11:34 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 285 |
+
2025-07-30,12:16:49 | INFO | Train Epoch: 0 [121651200/128008192 (95%)] Data (t): 0.751 Batch (t): 9.183, 1793.45/s, 224.181/s/gpu LR: 0.000566 Logit Scale: 76.293 Class_loss: 10.801 (9.2913) Contrastive_loss: 2.2291 (2.1376) Loss: 13.030 (11.429)
|
| 286 |
+
2025-07-30,12:36:28 | INFO | Train Epoch: 0 [123748352/128008192 (97%)] Data (t): 0.751 Batch (t): 9.207, 1764.53/s, 220.566/s/gpu LR: 0.000553 Logit Scale: 76.672 Class_loss: 8.1440 (9.2722) Contrastive_loss: 0.35772 (2.1079) Loss: 8.5017 (11.380)
|
| 287 |
+
2025-07-30,12:44:20 | WARNING | Handling webdataset error (OSError('image file is truncated (181 bytes not processed)')). Ignoring.
|
| 288 |
+
2025-07-30,12:56:13 | INFO | Train Epoch: 0 [125845504/128008192 (98%)] Data (t): 1.484 Batch (t): 9.262, 1771.19/s, 221.399/s/gpu LR: 0.000540 Logit Scale: 76.884 Class_loss: 9.4153 (9.2745) Contrastive_loss: 1.2171 (2.0933) Loss: 10.632 (11.368)
|
| 289 |
+
2025-07-30,13:11:05 | WARNING | Handling webdataset error (OSError('image file is truncated (31 bytes not processed)')). Ignoring.
|
| 290 |
+
2025-07-30,13:16:10 | INFO | Train Epoch: 0 [127942656/128008192 (100%)] Data (t): 1.538 Batch (t): 9.350, 1760.64/s, 220.080/s/gpu LR: 0.000526 Logit Scale: 77.294 Class_loss: 8.0100 (9.2541) Contrastive_loss: 0.31392 (2.0646) Loss: 8.3239 (11.319)
|
| 291 |
+
2025-07-30,13:16:46 | INFO | Train Epoch: 0 [128008192/128008192 (100%)] Data (t): 0.799 Batch (t): 9.024, 1907.79/s, 238.473/s/gpu LR: 0.000526 Logit Scale: 77.302 Class_loss: 8.0001 (9.2342) Contrastive_loss: 0.37823 (2.0378) Loss: 8.3784 (11.272)
|
| 292 |
+
2025-07-30,13:16:53 | INFO | Start epoch 1
|
| 293 |
+
2025-07-30,13:17:05 | INFO | Train Epoch: 1 [ 16384/128008192 (0%)] Data (t): 10.922 Batch (t): 12.254, 1337.02/s, 167.127/s/gpu LR: 0.000526 Logit Scale: 77.302 Class_loss: 7.9434 (7.9434) Contrastive_loss: 0.30420 (0.30420) Loss: 8.2476 (8.2476)
|
| 294 |
+
2025-07-30,13:24:55 | WARNING | Handling webdataset error (OSError('image file is truncated (63 bytes not processed)')). Ignoring.
|
| 295 |
+
2025-07-30,13:30:08 | WARNING | Handling webdataset error (OSError('image file is truncated (208 bytes not processed)')). Ignoring.
|
| 296 |
+
2025-07-30,13:36:40 | INFO | Train Epoch: 1 [ 2113536/128008192 (2%)] Data (t): 7.358 Batch (t): 9.175, 1739.40/s, 217.425/s/gpu LR: 0.000513 Logit Scale: 77.422 Class_loss: 10.572 (9.2577) Contrastive_loss: 2.3017 (1.3029) Loss: 12.874 (10.561)
|
| 297 |
+
2025-07-30,13:38:00 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 298 |
+
2025-07-30,13:56:18 | INFO | Train Epoch: 1 [ 4210688/128008192 (3%)] Data (t): 5.649 Batch (t): 9.203, 1793.69/s, 224.211/s/gpu LR: 0.000499 Logit Scale: 77.561 Class_loss: 8.5743 (9.0299) Contrastive_loss: 0.69278 (1.0996) Loss: 9.2671 (10.129)
|
| 299 |
+
2025-07-30,14:15:59 | INFO | Train Epoch: 1 [ 6307840/128008192 (5%)] Data (t): 5.201 Batch (t): 9.225, 1763.34/s, 220.417/s/gpu LR: 0.000486 Logit Scale: 77.992 Class_loss: 11.181 (9.5676) Contrastive_loss: 2.6838 (1.4956) Loss: 13.864 (11.063)
|
| 300 |
+
2025-07-30,14:29:14 | WARNING | Handling webdataset error (OSError('image file is truncated (64 bytes not processed)')). Ignoring.
|
| 301 |
+
2025-07-30,14:35:34 | INFO | Train Epoch: 1 [ 8404992/128008192 (7%)] Data (t): 4.396 Batch (t): 9.183, 1799.16/s, 224.896/s/gpu LR: 0.000473 Logit Scale: 78.228 Class_loss: 8.4794 (9.3500) Contrastive_loss: 0.62115 (1.3207) Loss: 9.1006 (10.671)
|
| 302 |
+
2025-07-30,14:48:10 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 303 |
+
2025-07-30,14:55:15 | INFO | Train Epoch: 1 [ 10502144/128008192 (8%)] Data (t): 7.503 Batch (t): 9.230, 1755.25/s, 219.406/s/gpu LR: 0.000460 Logit Scale: 78.844 Class_loss: 7.9506 (9.1167) Contrastive_loss: 0.31517 (1.1531) Loss: 8.2658 (10.270)
|
| 304 |
+
2025-07-30,15:14:54 | INFO | Train Epoch: 1 [ 12599296/128008192 (10%)] Data (t): 5.899 Batch (t): 9.210, 1779.98/s, 222.497/s/gpu LR: 0.000446 Logit Scale: 79.120 Class_loss: 9.4869 (9.1696) Contrastive_loss: 1.2070 (1.1608) Loss: 10.694 (10.330)
|
| 305 |
+
2025-07-30,15:15:40 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 306 |
+
2025-07-30,15:34:38 | INFO | Train Epoch: 1 [ 14696448/128008192 (11%)] Data (t): 0.784 Batch (t): 9.251, 1747.88/s, 218.486/s/gpu LR: 0.000433 Logit Scale: 79.566 Class_loss: 9.6662 (9.2317) Contrastive_loss: 1.4667 (1.1991) Loss: 11.133 (10.431)
|
| 307 |
+
2025-07-30,15:54:23 | INFO | Train Epoch: 1 [ 16793600/128008192 (13%)] Data (t): 0.773 Batch (t): 9.251, 1776.18/s, 222.022/s/gpu LR: 0.000420 Logit Scale: 79.827 Class_loss: 7.9958 (9.0944) Contrastive_loss: 0.30020 (1.0992) Loss: 8.2960 (10.194)
|
| 308 |
+
2025-07-30,16:12:41 | WARNING | Handling webdataset error (OSError('image file is truncated (95 bytes not processed)')). Ignoring.
|
| 309 |
+
2025-07-30,16:14:08 | INFO | Train Epoch: 1 [ 18890752/128008192 (15%)] Data (t): 0.803 Batch (t): 9.259, 1755.70/s, 219.463/s/gpu LR: 0.000407 Logit Scale: 80.350 Class_loss: 8.1396 (8.9989) Contrastive_loss: 0.44083 (1.0333) Loss: 8.5804 (10.032)
|
| 310 |
+
2025-07-30,16:23:03 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 311 |
+
2025-07-30,16:33:57 | INFO | Train Epoch: 1 [ 20987904/128008192 (16%)] Data (t): 3.088 Batch (t): 9.287, 1807.62/s, 225.953/s/gpu LR: 0.000394 Logit Scale: 80.652 Class_loss: 7.9133 (8.9002) Contrastive_loss: 0.28692 (0.96549) Loss: 8.2002 (9.8657)
|
| 312 |
+
2025-07-30,16:38:11 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 313 |
+
2025-07-30,16:54:04 | INFO | Train Epoch: 1 [ 23085056/128008192 (18%)] Data (t): 3.248 Batch (t): 9.435, 1601.94/s, 200.242/s/gpu LR: 0.000381 Logit Scale: 80.952 Class_loss: 8.0092 (8.8259) Contrastive_loss: 0.31683 (0.91143) Loss: 8.3260 (9.7374)
|
| 314 |
+
2025-07-30,17:14:06 | INFO | Train Epoch: 1 [ 25182208/128008192 (20%)] Data (t): 0.754 Batch (t): 9.386, 1794.56/s, 224.320/s/gpu LR: 0.000368 Logit Scale: 81.245 Class_loss: 8.5667 (8.8060) Contrastive_loss: 0.70917 (0.89587) Loss: 9.2758 (9.7019)
|
| 315 |
+
2025-07-30,17:33:43 | INFO | Train Epoch: 1 [ 27279360/128008192 (21%)] Data (t): 0.751 Batch (t): 9.197, 1761.57/s, 220.197/s/gpu LR: 0.000355 Logit Scale: 81.376 Class_loss: 8.1818 (8.7614) Contrastive_loss: 0.28891 (0.85252) Loss: 8.4707 (9.6139)
|
| 316 |
+
2025-07-30,17:41:19 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 317 |
+
2025-07-30,17:48:42 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 318 |
+
2025-07-30,17:53:26 | INFO | Train Epoch: 1 [ 29376512/128008192 (23%)] Data (t): 0.747 Batch (t): 9.239, 1734.64/s, 216.830/s/gpu LR: 0.000343 Logit Scale: 81.824 Class_loss: 7.9930 (8.7102) Contrastive_loss: 0.29237 (0.81518) Loss: 8.2854 (9.5254)
|
| 319 |
+
2025-07-30,17:55:36 | WARNING | Handling webdataset error (OSError('image file is truncated (34 bytes not processed)')). Ignoring.
|
| 320 |
+
2025-07-30,17:55:54 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 321 |
+
2025-07-30,17:56:23 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 322 |
+
2025-07-30,18:13:27 | INFO | Train Epoch: 1 [ 31473664/128008192 (25%)] Data (t): 0.762 Batch (t): 9.386, 1787.82/s, 223.478/s/gpu LR: 0.000330 Logit Scale: 82.210 Class_loss: 11.493 (8.8841) Contrastive_loss: 2.2371 (0.90405) Loss: 13.730 (9.7882)
|
| 323 |
+
2025-07-30,18:23:26 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 324 |
+
2025-07-30,18:31:38 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 325 |
+
2025-07-30,18:33:06 | INFO | Train Epoch: 1 [ 33570816/128008192 (26%)] Data (t): 3.152 Batch (t): 9.211, 1762.97/s, 220.372/s/gpu LR: 0.000318 Logit Scale: 82.497 Class_loss: 7.8826 (8.8252) Contrastive_loss: 0.24791 (0.86545) Loss: 8.1305 (9.6907)
|
| 326 |
+
2025-07-30,18:46:21 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 327 |
+
2025-07-30,18:52:50 | INFO | Train Epoch: 1 [ 35667968/128008192 (28%)] Data (t): 0.998 Batch (t): 9.253, 1796.47/s, 224.559/s/gpu LR: 0.000305 Logit Scale: 82.829 Class_loss: 7.8166 (8.7692) Contrastive_loss: 0.28916 (0.83343) Loss: 8.1057 (9.6026)
|
| 328 |
+
2025-07-30,18:59:00 | WARNING | Handling webdataset error (OSError('image file is truncated (120 bytes not processed)')). Ignoring.
|
| 329 |
+
2025-07-30,19:12:35 | INFO | Train Epoch: 1 [ 37765120/128008192 (30%)] Data (t): 2.013 Batch (t): 9.251, 1755.50/s, 219.438/s/gpu LR: 0.000293 Logit Scale: 82.945 Class_loss: 7.8099 (8.7187) Contrastive_loss: 0.25976 (0.80324) Loss: 8.0697 (9.5219)
|
| 330 |
+
2025-07-30,19:32:05 | INFO | Train Epoch: 1 [ 39862272/128008192 (31%)] Data (t): 6.138 Batch (t): 9.145, 1813.25/s, 226.657/s/gpu LR: 0.000281 Logit Scale: 83.204 Class_loss: 7.7841 (8.6720) Contrastive_loss: 0.25112 (0.77564) Loss: 8.0352 (9.4476)
|
| 331 |
+
2025-07-30,19:45:50 | WARNING | Handling webdataset error (OSError('image file is truncated (92 bytes not processed)')). Ignoring.
|
| 332 |
+
2025-07-30,19:48:08 | WARNING | Handling webdataset error (OSError('image file is truncated (40 bytes not processed)')). Ignoring.
|
| 333 |
+
2025-07-30,19:51:33 | INFO | Train Epoch: 1 [ 41959424/128008192 (33%)] Data (t): 6.595 Batch (t): 9.127, 1796.28/s, 224.535/s/gpu LR: 0.000269 Logit Scale: 83.535 Class_loss: 8.3132 (8.6549) Contrastive_loss: 0.40863 (0.75816) Loss: 8.7219 (9.4130)
|
| 334 |
+
2025-07-30,20:10:57 | INFO | Train Epoch: 1 [ 44056576/128008192 (34%)] Data (t): 0.755 Batch (t): 9.095, 1915.57/s, 239.446/s/gpu LR: 0.000258 Logit Scale: 83.916 Class_loss: 7.8539 (8.6185) Contrastive_loss: 0.21403 (0.73343) Loss: 8.0679 (9.3519)
|
| 335 |
+
2025-07-30,20:19:25 | WARNING | Handling webdataset error (OSError('image file is truncated (3 bytes not processed)')). Ignoring.
|
| 336 |
+
2025-07-30,20:30:28 | INFO | Train Epoch: 1 [ 46153728/128008192 (36%)] Data (t): 0.730 Batch (t): 9.144, 1791.28/s, 223.909/s/gpu LR: 0.000246 Logit Scale: 84.169 Class_loss: 8.7380 (8.6237) Contrastive_loss: 0.72368 (0.73300) Loss: 9.4616 (9.3567)
|
| 337 |
+
2025-07-30,20:49:53 | INFO | Train Epoch: 1 [ 48250880/128008192 (38%)] Data (t): 1.738 Batch (t): 9.105, 1763.10/s, 220.387/s/gpu LR: 0.000235 Logit Scale: 84.461 Class_loss: 7.9284 (8.5947) Contrastive_loss: 0.28450 (0.71431) Loss: 8.2129 (9.3090)
|
| 338 |
+
2025-07-30,20:54:30 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 339 |
+
2025-07-30,21:09:29 | INFO | Train Epoch: 1 [ 50348032/128008192 (39%)] Data (t): 7.476 Batch (t): 9.183, 1752.46/s, 219.058/s/gpu LR: 0.000223 Logit Scale: 84.627 Class_loss: 7.7498 (8.5609) Contrastive_loss: 0.18900 (0.69330) Loss: 7.9388 (9.2542)
|
| 340 |
+
2025-07-30,21:18:13 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 341 |
+
2025-07-30,21:27:10 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 342 |
+
2025-07-30,21:27:14 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 343 |
+
2025-07-30,21:29:08 | INFO | Train Epoch: 1 [ 52445184/128008192 (41%)] Data (t): 7.406 Batch (t): 9.212, 1727.92/s, 215.990/s/gpu LR: 0.000212 Logit Scale: 84.903 Class_loss: 7.8211 (8.5324) Contrastive_loss: 0.23402 (0.67564) Loss: 8.0551 (9.2081)
|
| 344 |
+
2025-07-30,21:34:57 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 345 |
+
2025-07-30,21:41:14 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 346 |
+
2025-07-30,21:48:40 | INFO | Train Epoch: 1 [ 54542336/128008192 (43%)] Data (t): 5.671 Batch (t): 9.156, 1797.49/s, 224.687/s/gpu LR: 0.000202 Logit Scale: 85.214 Class_loss: 7.7603 (8.5038) Contrastive_loss: 0.25688 (0.66013) Loss: 8.0172 (9.1640)
|
| 347 |
+
2025-07-30,21:55:46 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 348 |
+
2025-07-30,22:08:22 | INFO | Train Epoch: 1 [ 56639488/128008192 (44%)] Data (t): 0.765 Batch (t): 9.237, 1790.75/s, 223.844/s/gpu LR: 0.000191 Logit Scale: 85.521 Class_loss: 7.7180 (8.4758) Contrastive_loss: 0.23544 (0.64496) Loss: 7.9534 (9.1207)
|
| 349 |
+
2025-07-30,22:28:02 | INFO | Train Epoch: 1 [ 58736640/128008192 (46%)] Data (t): 0.782 Batch (t): 9.217, 1811.98/s, 226.498/s/gpu LR: 0.000181 Logit Scale: 85.724 Class_loss: 10.146 (8.5334) Contrastive_loss: 1.5604 (0.67653) Loss: 11.706 (9.2099)
|
| 350 |
+
2025-07-30,22:47:28 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 351 |
+
2025-07-30,22:47:46 | INFO | Train Epoch: 1 [ 60833792/128008192 (48%)] Data (t): 0.756 Batch (t): 9.248, 1752.06/s, 219.008/s/gpu LR: 0.000171 Logit Scale: 86.014 Class_loss: 7.8938 (8.5120) Contrastive_loss: 0.26374 (0.66277) Loss: 8.1575 (9.1748)
|
| 352 |
+
2025-07-30,22:51:56 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 353 |
+
2025-07-30,23:07:26 | INFO | Train Epoch: 1 [ 62930944/128008192 (49%)] Data (t): 0.762 Batch (t): 9.218, 1807.54/s, 225.943/s/gpu LR: 0.000161 Logit Scale: 86.330 Class_loss: 10.826 (8.5867) Contrastive_loss: 1.6153 (0.69350) Loss: 12.441 (9.2802)
|
| 354 |
+
2025-07-30,23:14:51 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 355 |
+
2025-07-30,23:27:05 | INFO | Train Epoch: 1 [ 65028096/128008192 (51%)] Data (t): 0.748 Batch (t): 9.212, 1765.20/s, 220.650/s/gpu LR: 0.000151 Logit Scale: 86.547 Class_loss: 10.324 (8.6410) Contrastive_loss: 1.1692 (0.70836) Loss: 11.493 (9.3493)
|
| 356 |
+
2025-07-30,23:35:18 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 357 |
+
2025-07-30,23:47:13 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 358 |
+
2025-07-30,23:47:15 | INFO | Train Epoch: 1 [ 67125248/128008192 (52%)] Data (t): 0.811 Batch (t): 9.452, 1783.80/s, 222.975/s/gpu LR: 0.000142 Logit Scale: 86.793 Class_loss: 7.7074 (8.6127) Contrastive_loss: 0.23296 (0.69396) Loss: 7.9403 (9.3066)
|
| 359 |
+
2025-07-30,23:48:55 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 360 |
+
2025-07-31,00:06:59 | INFO | Train Epoch: 1 [ 69222400/128008192 (54%)] Data (t): 0.773 Batch (t): 9.250, 1770.11/s, 221.264/s/gpu LR: 0.000133 Logit Scale: 87.089 Class_loss: 8.7963 (8.6181) Contrastive_loss: 0.67817 (0.69349) Loss: 9.4745 (9.3116)
|
| 361 |
+
2025-07-31,00:09:06 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 362 |
+
2025-07-31,00:09:30 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 363 |
+
2025-07-31,00:26:50 | INFO | Train Epoch: 1 [ 71319552/128008192 (56%)] Data (t): 0.751 Batch (t): 9.308, 1766.64/s, 220.829/s/gpu LR: 0.000124 Logit Scale: 87.133 Class_loss: 7.7362 (8.5929) Contrastive_loss: 0.22986 (0.68025) Loss: 7.9661 (9.2731)
|
| 364 |
+
2025-07-31,00:33:16 | INFO | Starting zero-shot imagenet.
|
| 365 |
+
2025-07-31,00:33:16 | INFO | Building zero-shot classifier
|
| 366 |
+
2025-07-31,00:33:23 | INFO | Using classifier
|
| 367 |
+
2025-07-31,00:50:23 | INFO | Finished zero-shot imagenet.
|
| 368 |
+
2025-07-31,00:50:23 | INFO | Eval Epoch: 1.562396006655574 imagenet-zeroshot-val-top1: 0.4410 imagenet-zeroshot-val-top5: 0.7277
|
| 369 |
+
2025-07-31,01:03:18 | INFO | Train Epoch: 1 [ 73416704/128008192 (57%)] Data (t): 12.605 Batch (t): 17.092, 1788.04/s, 223.505/s/gpu LR: 0.000115 Logit Scale: 87.415 Class_loss: 7.8199 (8.5714) Contrastive_loss: 0.22462 (0.66759) Loss: 8.0445 (9.2390)
|
| 370 |
+
2025-07-31,01:20:02 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 371 |
+
2025-07-31,01:24:21 | INFO | Train Epoch: 1 [ 75513856/128008192 (59%)] Data (t): 0.756 Batch (t): 9.870, 1721.99/s, 215.248/s/gpu LR: 0.000107 Logit Scale: 87.677 Class_loss: 7.7390 (8.5489) Contrastive_loss: 0.22680 (0.65568) Loss: 7.9658 (9.2046)
|
| 372 |
+
2025-07-31,01:43:55 | INFO | Train Epoch: 1 [ 77611008/128008192 (61%)] Data (t): 0.787 Batch (t): 9.167, 1829.61/s, 228.701/s/gpu LR: 0.000099 Logit Scale: 87.956 Class_loss: 10.388 (8.5973) Contrastive_loss: 1.4153 (0.67567) Loss: 11.803 (9.2730)
|
| 373 |
+
2025-07-31,01:44:00 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 374 |
+
2025-07-31,02:03:31 | INFO | Train Epoch: 1 [ 79708160/128008192 (62%)] Data (t): 0.771 Batch (t): 9.191, 1898.14/s, 237.267/s/gpu LR: 0.000091 Logit Scale: 88.117 Class_loss: 7.7236 (8.5749) Contrastive_loss: 0.24402 (0.66460) Loss: 7.9676 (9.2395)
|
| 375 |
+
2025-07-31,02:15:29 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 376 |
+
2025-07-31,02:16:21 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 377 |
+
2025-07-31,02:22:56 | INFO | Train Epoch: 1 [ 81805312/128008192 (64%)] Data (t): 2.154 Batch (t): 9.103, 1789.83/s, 223.729/s/gpu LR: 0.000083 Logit Scale: 88.311 Class_loss: 7.7629 (8.5546) Contrastive_loss: 0.19614 (0.65289) Loss: 7.9590 (9.2075)
|
| 378 |
+
2025-07-31,02:27:43 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 379 |
+
2025-07-31,02:28:08 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 380 |
+
2025-07-31,02:30:53 | WARNING | Handling webdataset error (OSError('image file is truncated (342 bytes not processed)')). Ignoring.
|
| 381 |
+
2025-07-31,02:42:39 | INFO | Train Epoch: 1 [ 83902464/128008192 (66%)] Data (t): 0.737 Batch (t): 9.238, 1788.18/s, 223.522/s/gpu LR: 0.000076 Logit Scale: 88.473 Class_loss: 7.7026 (8.5338) Contrastive_loss: 0.17944 (0.64134) Loss: 7.8820 (9.1752)
|
| 382 |
+
2025-07-31,03:02:24 | INFO | Train Epoch: 1 [ 85999616/128008192 (67%)] Data (t): 0.746 Batch (t): 9.258, 1805.43/s, 225.679/s/gpu LR: 0.000069 Logit Scale: 88.680 Class_loss: 8.8635 (8.5417) Contrastive_loss: 0.67392 (0.64212) Loss: 9.5374 (9.1838)
|
| 383 |
+
2025-07-31,03:08:34 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 384 |
+
2025-07-31,03:22:05 | INFO | Train Epoch: 1 [ 88096768/128008192 (69%)] Data (t): 0.775 Batch (t): 9.226, 1769.61/s, 221.202/s/gpu LR: 0.000063 Logit Scale: 88.824 Class_loss: 8.1668 (8.5329) Contrastive_loss: 0.34928 (0.63531) Loss: 8.5161 (9.1683)
|
| 385 |
+
2025-07-31,03:27:33 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 386 |
+
2025-07-31,03:41:48 | INFO | Train Epoch: 1 [ 90193920/128008192 (70%)] Data (t): 0.735 Batch (t): 9.246, 1754.40/s, 219.300/s/gpu LR: 0.000056 Logit Scale: 88.962 Class_loss: 7.7006 (8.5140) Contrastive_loss: 0.17010 (0.62473) Loss: 7.8707 (9.1388)
|
| 387 |
+
2025-07-31,04:01:27 | INFO | Train Epoch: 1 [ 92291072/128008192 (72%)] Data (t): 0.760 Batch (t): 9.212, 1765.99/s, 220.749/s/gpu LR: 0.000050 Logit Scale: 89.141 Class_loss: 7.7760 (8.4976) Contrastive_loss: 0.19024 (0.61508) Loss: 7.9662 (9.1127)
|
| 388 |
+
2025-07-31,04:21:12 | INFO | Train Epoch: 1 [ 94388224/128008192 (74%)] Data (t): 0.756 Batch (t): 9.257, 1785.36/s, 223.170/s/gpu LR: 0.000045 Logit Scale: 89.218 Class_loss: 7.6869 (8.4800) Contrastive_loss: 0.18313 (0.60569) Loss: 7.8700 (9.0857)
|
| 389 |
+
2025-07-31,04:40:58 | INFO | Train Epoch: 1 [ 96485376/128008192 (75%)] Data (t): 0.765 Batch (t): 9.268, 1798.39/s, 224.799/s/gpu LR: 0.000039 Logit Scale: 89.362 Class_loss: 7.6515 (8.4624) Contrastive_loss: 0.22648 (0.59762) Loss: 7.8780 (9.0600)
|
| 390 |
+
2025-07-31,04:48:49 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 391 |
+
2025-07-31,05:00:31 | INFO | Train Epoch: 1 [ 98582528/128008192 (77%)] Data (t): 0.778 Batch (t): 9.161, 1965.26/s, 245.657/s/gpu LR: 0.000034 Logit Scale: 89.484 Class_loss: 7.6371 (8.4452) Contrastive_loss: 0.15613 (0.58842) Loss: 7.7932 (9.0336)
|
| 392 |
+
2025-07-31,05:20:10 | INFO | Train Epoch: 1 [100679680/128008192 (79%)] Data (t): 4.811 Batch (t): 9.210, 1800.98/s, 225.122/s/gpu LR: 0.000030 Logit Scale: 89.615 Class_loss: 7.6551 (8.4291) Contrastive_loss: 0.20721 (0.58064) Loss: 7.8623 (9.0097)
|
| 393 |
+
2025-07-31,05:36:02 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 394 |
+
2025-07-31,05:39:44 | INFO | Train Epoch: 1 [102776832/128008192 (80%)] Data (t): 7.793 Batch (t): 9.173, 1787.78/s, 223.472/s/gpu LR: 0.000025 Logit Scale: 89.711 Class_loss: 7.7312 (8.4151) Contrastive_loss: 0.18457 (0.57272) Loss: 7.9158 (8.9878)
|
| 395 |
+
2025-07-31,05:59:13 | INFO | Train Epoch: 1 [104873984/128008192 (82%)] Data (t): 7.774 Batch (t): 9.129, 1793.45/s, 224.181/s/gpu LR: 0.000021 Logit Scale: 89.793 Class_loss: 10.410 (8.4542) Contrastive_loss: 1.3471 (0.58791) Loss: 11.757 (9.0421)
|
| 396 |
+
2025-07-31,06:08:34 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 397 |
+
2025-07-31,06:18:49 | INFO | Train Epoch: 1 [106971136/128008192 (84%)] Data (t): 7.815 Batch (t): 9.190, 1747.81/s, 218.476/s/gpu LR: 0.000018 Logit Scale: 89.861 Class_loss: 7.8082 (8.4418) Contrastive_loss: 0.22640 (0.58095) Loss: 8.0346 (9.0227)
|
| 398 |
+
2025-07-31,06:38:29 | INFO | Train Epoch: 1 [109068288/128008192 (85%)] Data (t): 7.862 Batch (t): 9.221, 1809.37/s, 226.171/s/gpu LR: 0.000014 Logit Scale: 89.913 Class_loss: 7.6852 (8.4275) Contrastive_loss: 0.19133 (0.57360) Loss: 7.8765 (9.0011)
|
| 399 |
+
2025-07-31,06:55:52 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7fcb7e0e9e90>')). Ignoring.
|
| 400 |
+
2025-07-31,06:58:06 | INFO | Train Epoch: 1 [111165440/128008192 (87%)] Data (t): 7.856 Batch (t): 9.197, 1764.08/s, 220.509/s/gpu LR: 0.000011 Logit Scale: 89.967 Class_loss: 7.6915 (8.4139) Contrastive_loss: 0.19737 (0.56664) Loss: 7.8889 (8.9805)
|
| 401 |
+
2025-07-31,07:17:24 | INFO | Train Epoch: 1 [113262592/128008192 (88%)] Data (t): 2.753 Batch (t): 9.044, 1815.05/s, 226.882/s/gpu LR: 0.000009 Logit Scale: 90.002 Class_loss: 7.6850 (8.4006) Contrastive_loss: 0.16164 (0.55927) Loss: 7.8466 (8.9599)
|
| 402 |
+
2025-07-31,07:36:54 | INFO | Train Epoch: 1 [115359744/128008192 (90%)] Data (t): 3.403 Batch (t): 9.141, 1784.81/s, 223.101/s/gpu LR: 0.000006 Logit Scale: 90.020 Class_loss: 7.6075 (8.3865) Contrastive_loss: 0.19356 (0.55274) Loss: 7.8011 (8.9392)
|
| 403 |
+
2025-07-31,07:52:36 | WARNING | Handling webdataset error (OSError('image file is truncated (2 bytes not processed)')). Ignoring.
|
| 404 |
+
2025-07-31,07:56:29 | INFO | Train Epoch: 1 [117456896/128008192 (92%)] Data (t): 2.851 Batch (t): 9.176, 1782.17/s, 222.771/s/gpu LR: 0.000004 Logit Scale: 90.036 Class_loss: 9.8365 (8.4119) Contrastive_loss: 0.91282 (0.55906) Loss: 10.749 (8.9710)
|
| 405 |
+
2025-07-31,08:03:36 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 406 |
+
2025-07-31,08:16:06 | INFO | Train Epoch: 1 [119554048/128008192 (93%)] Data (t): 2.176 Batch (t): 9.201, 1772.36/s, 221.545/s/gpu LR: 0.000003 Logit Scale: 90.053 Class_loss: 7.8051 (8.4014) Contrastive_loss: 0.21499 (0.55313) Loss: 8.0201 (8.9546)
|
| 407 |
+
2025-07-31,08:19:42 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 408 |
+
2025-07-31,08:25:59 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 409 |
+
2025-07-31,08:35:45 | INFO | Train Epoch: 1 [121651200/128008192 (95%)] Data (t): 2.955 Batch (t): 9.212, 1764.31/s, 220.539/s/gpu LR: 0.000002 Logit Scale: 90.061 Class_loss: 7.6910 (8.3894) Contrastive_loss: 0.19827 (0.54711) Loss: 7.8893 (8.9365)
|
| 410 |
+
2025-07-31,08:55:19 | INFO | Train Epoch: 1 [123748352/128008192 (97%)] Data (t): 2.826 Batch (t): 9.168, 1789.20/s, 223.650/s/gpu LR: 0.000001 Logit Scale: 90.063 Class_loss: 7.6113 (8.3764) Contrastive_loss: 0.16671 (0.54077) Loss: 7.7780 (8.9172)
|
| 411 |
+
2025-07-31,09:13:03 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 412 |
+
2025-07-31,09:15:03 | INFO | Train Epoch: 1 [125845504/128008192 (98%)] Data (t): 2.839 Batch (t): 9.251, 1766.20/s, 220.775/s/gpu LR: 0.000000 Logit Scale: 90.063 Class_loss: 7.5902 (8.3636) Contrastive_loss: 0.19531 (0.53511) Loss: 7.7855 (8.8987)
|
| 413 |
+
2025-07-31,09:15:33 | WARNING | Handling webdataset error (OSError('broken data stream when reading image file')). Ignoring.
|
| 414 |
+
2025-07-31,09:28:42 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 415 |
+
2025-07-31,09:34:40 | INFO | Train Epoch: 1 [127942656/128008192 (100%)] Data (t): 0.758 Batch (t): 9.198, 1795.75/s, 224.469/s/gpu LR: 0.000000 Logit Scale: 90.063 Class_loss: 10.061 (8.3909) Contrastive_loss: 1.0481 (0.54338) Loss: 11.109 (8.9343)
|
| 416 |
+
2025-07-31,09:35:17 | INFO | Train Epoch: 1 [128008192/128008192 (100%)] Data (t): 0.831 Batch (t): 9.067, 1830.16/s, 228.770/s/gpu LR: 0.000000 Logit Scale: 90.063 Class_loss: 8.1817 (8.3876) Contrastive_loss: 0.32792 (0.53996) Loss: 8.5097 (8.9276)
|
| 417 |
+
2025-07-31,09:35:23 | INFO | Starting zero-shot imagenet.
|
| 418 |
+
2025-07-31,09:35:23 | INFO | Building zero-shot classifier
|
| 419 |
+
2025-07-31,09:35:30 | INFO | Using classifier
|
| 420 |
+
2025-07-31,09:52:53 | INFO | Finished zero-shot imagenet.
|
| 421 |
+
2025-07-31,09:52:53 | INFO | Eval Epoch: 2 imagenet-zeroshot-val-top1: 0.4694 imagenet-zeroshot-val-top5: 0.7545
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/params.txt
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NDR_patch_size: 16
|
| 2 |
+
accum_freq: 1
|
| 3 |
+
aug_cfg: {}
|
| 4 |
+
batch_size: 2048
|
| 5 |
+
beta1: 0.9
|
| 6 |
+
beta2: 0.98
|
| 7 |
+
checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/checkpoints
|
| 8 |
+
class_loss_weight: 1.4
|
| 9 |
+
coca_caption_loss_weight: 2.0
|
| 10 |
+
coca_contrastive_loss_weight: 1.0
|
| 11 |
+
copy_codebase: False
|
| 12 |
+
csv_caption_key: title
|
| 13 |
+
csv_img_key: filepath
|
| 14 |
+
csv_separator:
|
| 15 |
+
dataset_resampled: False
|
| 16 |
+
dataset_type: webdataset
|
| 17 |
+
ddp_static_graph: True
|
| 18 |
+
debug: False
|
| 19 |
+
delete_prev_step_ckpt: True
|
| 20 |
+
delete_previous_checkpoint: False
|
| 21 |
+
device: cuda:0
|
| 22 |
+
dist_backend: nccl
|
| 23 |
+
dist_url: env://
|
| 24 |
+
distill: False
|
| 25 |
+
distill_model: None
|
| 26 |
+
distill_pretrained: None
|
| 27 |
+
distributed: True
|
| 28 |
+
epochs: 2
|
| 29 |
+
epochs_cooldown: None
|
| 30 |
+
eps: 1e-06
|
| 31 |
+
force_custom_text: False
|
| 32 |
+
force_image_size: 224
|
| 33 |
+
force_patch_dropout: None
|
| 34 |
+
force_quick_gelu: False
|
| 35 |
+
gather_with_grad: True
|
| 36 |
+
global_batch_size: 16384
|
| 37 |
+
grad_checkpointing: True
|
| 38 |
+
grad_clip_norm: None
|
| 39 |
+
horovod: False
|
| 40 |
+
image_interpolation: None
|
| 41 |
+
image_mean: None
|
| 42 |
+
image_resize_mode: None
|
| 43 |
+
image_std: None
|
| 44 |
+
imagenet_v2: None
|
| 45 |
+
imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 46 |
+
is_cls_token: True
|
| 47 |
+
local_loss: True
|
| 48 |
+
local_rank: 0
|
| 49 |
+
lock_image: False
|
| 50 |
+
lock_image_freeze_bn_stats: False
|
| 51 |
+
lock_image_unlocked_groups: 0
|
| 52 |
+
lock_text: False
|
| 53 |
+
lock_text_freeze_layer_norm: False
|
| 54 |
+
lock_text_unlocked_layers: 0
|
| 55 |
+
log_every_n_steps: 128
|
| 56 |
+
log_level: 20
|
| 57 |
+
log_local: False
|
| 58 |
+
log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_14/out.log
|
| 59 |
+
logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 60 |
+
lr: 0.001
|
| 61 |
+
lr_cooldown_end: 0.0
|
| 62 |
+
lr_cooldown_power: 1.0
|
| 63 |
+
lr_scheduler: cosine
|
| 64 |
+
max_seq_len: 15000
|
| 65 |
+
model: CLIPCLS-ViT-B-16
|
| 66 |
+
name: clipcls_vit_b16_s512m_bs16k_weighted_14
|
| 67 |
+
native_dynamic_resolution: False
|
| 68 |
+
no_set_device_rank: False
|
| 69 |
+
only_class_loss: False
|
| 70 |
+
only_packing: False
|
| 71 |
+
post_train: False
|
| 72 |
+
precision: amp_bfloat16
|
| 73 |
+
pretrained:
|
| 74 |
+
pretrained_image:
|
| 75 |
+
pretrained_text:
|
| 76 |
+
rank: 0
|
| 77 |
+
remote_sync: None
|
| 78 |
+
remote_sync_frequency: 300
|
| 79 |
+
remote_sync_protocol: s3
|
| 80 |
+
report_to: wandb
|
| 81 |
+
resume: None
|
| 82 |
+
rope_attn_num_heads: 12
|
| 83 |
+
rope_model_width: 768
|
| 84 |
+
save_every_n_steps: 6104
|
| 85 |
+
save_frequency: 1
|
| 86 |
+
save_most_recent: False
|
| 87 |
+
seed: 0
|
| 88 |
+
siglip: False
|
| 89 |
+
skip_scheduler: False
|
| 90 |
+
tensorboard: False
|
| 91 |
+
tensorboard_path:
|
| 92 |
+
torchcompile: False
|
| 93 |
+
torchscript: False
|
| 94 |
+
trace: False
|
| 95 |
+
train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 96 |
+
train_data_upsampling_factors: None
|
| 97 |
+
train_num_samples: 128000000
|
| 98 |
+
use_bn_sync: False
|
| 99 |
+
use_bnb_linear: None
|
| 100 |
+
use_idf: True
|
| 101 |
+
val_data: None
|
| 102 |
+
val_frequency: 1
|
| 103 |
+
val_num_samples: None
|
| 104 |
+
val_steps: 6104
|
| 105 |
+
wandb: True
|
| 106 |
+
wandb_notes:
|
| 107 |
+
wandb_project_name: cls-clip-batch-size
|
| 108 |
+
warmup: 500
|
| 109 |
+
wd: 0.2
|
| 110 |
+
workers: 1
|
| 111 |
+
world_size: 8
|
| 112 |
+
zeroshot_frequency: 2
|
| 113 |
+
zeroshot_steps: 6104
|