Upload folder using huggingface_hub
Browse files- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints/epoch_1.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints/epoch_2.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints/results.jsonl +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/out.log +421 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/params.txt +113 -0
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34dc1b306c7a7c950cd8fce66775cab8b7f89f64077a2fdab116796db1d187f7
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints/epoch_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23ad4809ac06c91eab69d613bd21b0a6a4949dcb9d85f907762ad5ae131e2229
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints/results.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"imagenet-zeroshot-val-top1": 0.28146, "imagenet-zeroshot-val-top5": 0.53678}
|
| 2 |
+
{"imagenet-zeroshot-val-top1": 0.42686, "imagenet-zeroshot-val-top5": 0.70042}
|
| 3 |
+
{"imagenet-zeroshot-val-top1": 0.44994, "imagenet-zeroshot-val-top5": 0.72922}
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/out.log
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-29,19:23:00 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints.
|
| 2 |
+
2025-07-29,19:23:10 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 3 |
+
2025-07-29,19:23:10 | INFO | Loaded CLIPCLS-ViT-B-16 model config.
|
| 4 |
+
2025-07-29,19:23:12 | INFO | Model:
|
| 5 |
+
2025-07-29,19:23:12 | INFO | CLIPCLS(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 8 |
+
(patch_dropout): Identity()
|
| 9 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 10 |
+
(transformer): Transformer(
|
| 11 |
+
(resblocks): ModuleList(
|
| 12 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 13 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 14 |
+
(attn): MultiheadAttention(
|
| 15 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 16 |
+
)
|
| 17 |
+
(ls_1): Identity()
|
| 18 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 19 |
+
(mlp): Sequential(
|
| 20 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 21 |
+
(gelu): GELU(approximate='none')
|
| 22 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 23 |
+
)
|
| 24 |
+
(ls_2): Identity()
|
| 25 |
+
)
|
| 26 |
+
)
|
| 27 |
+
)
|
| 28 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
)
|
| 30 |
+
(text): TextTransformer(
|
| 31 |
+
(token_embedding): Embedding(49408, 512)
|
| 32 |
+
(transformer): Transformer(
|
| 33 |
+
(resblocks): ModuleList(
|
| 34 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 35 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 36 |
+
(attn): MultiheadAttention(
|
| 37 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_1): Identity()
|
| 40 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 41 |
+
(mlp): Sequential(
|
| 42 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 43 |
+
(gelu): GELU(approximate='none')
|
| 44 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_2): Identity()
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 51 |
+
)
|
| 52 |
+
(text_decoder): MixClsHead(
|
| 53 |
+
(mlps): ModuleList()
|
| 54 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 55 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
2025-07-29,19:23:12 | INFO | Params:
|
| 59 |
+
2025-07-29,19:23:12 | INFO | NDR_patch_size: 16
|
| 60 |
+
2025-07-29,19:23:12 | INFO | accum_freq: 1
|
| 61 |
+
2025-07-29,19:23:12 | INFO | aug_cfg: {}
|
| 62 |
+
2025-07-29,19:23:12 | INFO | batch_size: 2048
|
| 63 |
+
2025-07-29,19:23:12 | INFO | beta1: 0.9
|
| 64 |
+
2025-07-29,19:23:12 | INFO | beta2: 0.98
|
| 65 |
+
2025-07-29,19:23:12 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints
|
| 66 |
+
2025-07-29,19:23:12 | INFO | class_loss_weight: 0.6
|
| 67 |
+
2025-07-29,19:23:12 | INFO | coca_caption_loss_weight: 2.0
|
| 68 |
+
2025-07-29,19:23:12 | INFO | coca_contrastive_loss_weight: 1.0
|
| 69 |
+
2025-07-29,19:23:12 | INFO | copy_codebase: False
|
| 70 |
+
2025-07-29,19:23:12 | INFO | csv_caption_key: title
|
| 71 |
+
2025-07-29,19:23:12 | INFO | csv_img_key: filepath
|
| 72 |
+
2025-07-29,19:23:12 | INFO | csv_separator:
|
| 73 |
+
2025-07-29,19:23:12 | INFO | dataset_resampled: False
|
| 74 |
+
2025-07-29,19:23:12 | INFO | dataset_type: webdataset
|
| 75 |
+
2025-07-29,19:23:12 | INFO | ddp_static_graph: True
|
| 76 |
+
2025-07-29,19:23:12 | INFO | debug: False
|
| 77 |
+
2025-07-29,19:23:12 | INFO | delete_prev_step_ckpt: True
|
| 78 |
+
2025-07-29,19:23:12 | INFO | delete_previous_checkpoint: False
|
| 79 |
+
2025-07-29,19:23:12 | INFO | device: cuda:0
|
| 80 |
+
2025-07-29,19:23:12 | INFO | dist_backend: nccl
|
| 81 |
+
2025-07-29,19:23:12 | INFO | dist_url: env://
|
| 82 |
+
2025-07-29,19:23:12 | INFO | distill: False
|
| 83 |
+
2025-07-29,19:23:12 | INFO | distill_model: None
|
| 84 |
+
2025-07-29,19:23:12 | INFO | distill_pretrained: None
|
| 85 |
+
2025-07-29,19:23:12 | INFO | distributed: True
|
| 86 |
+
2025-07-29,19:23:12 | INFO | epochs: 2
|
| 87 |
+
2025-07-29,19:23:12 | INFO | epochs_cooldown: None
|
| 88 |
+
2025-07-29,19:23:12 | INFO | eps: 1e-06
|
| 89 |
+
2025-07-29,19:23:12 | INFO | force_custom_text: False
|
| 90 |
+
2025-07-29,19:23:12 | INFO | force_image_size: 224
|
| 91 |
+
2025-07-29,19:23:12 | INFO | force_patch_dropout: None
|
| 92 |
+
2025-07-29,19:23:12 | INFO | force_quick_gelu: False
|
| 93 |
+
2025-07-29,19:23:12 | INFO | gather_with_grad: True
|
| 94 |
+
2025-07-29,19:23:12 | INFO | global_batch_size: 16384
|
| 95 |
+
2025-07-29,19:23:12 | INFO | grad_checkpointing: True
|
| 96 |
+
2025-07-29,19:23:12 | INFO | grad_clip_norm: None
|
| 97 |
+
2025-07-29,19:23:12 | INFO | horovod: False
|
| 98 |
+
2025-07-29,19:23:12 | INFO | image_interpolation: None
|
| 99 |
+
2025-07-29,19:23:12 | INFO | image_mean: None
|
| 100 |
+
2025-07-29,19:23:12 | INFO | image_resize_mode: None
|
| 101 |
+
2025-07-29,19:23:12 | INFO | image_std: None
|
| 102 |
+
2025-07-29,19:23:12 | INFO | imagenet_v2: None
|
| 103 |
+
2025-07-29,19:23:12 | INFO | imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 104 |
+
2025-07-29,19:23:12 | INFO | is_cls_token: True
|
| 105 |
+
2025-07-29,19:23:12 | INFO | local_loss: True
|
| 106 |
+
2025-07-29,19:23:12 | INFO | local_rank: 0
|
| 107 |
+
2025-07-29,19:23:12 | INFO | lock_image: False
|
| 108 |
+
2025-07-29,19:23:12 | INFO | lock_image_freeze_bn_stats: False
|
| 109 |
+
2025-07-29,19:23:12 | INFO | lock_image_unlocked_groups: 0
|
| 110 |
+
2025-07-29,19:23:12 | INFO | lock_text: False
|
| 111 |
+
2025-07-29,19:23:12 | INFO | lock_text_freeze_layer_norm: False
|
| 112 |
+
2025-07-29,19:23:12 | INFO | lock_text_unlocked_layers: 0
|
| 113 |
+
2025-07-29,19:23:12 | INFO | log_every_n_steps: 128
|
| 114 |
+
2025-07-29,19:23:12 | INFO | log_level: 20
|
| 115 |
+
2025-07-29,19:23:12 | INFO | log_local: False
|
| 116 |
+
2025-07-29,19:23:12 | INFO | log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/out.log
|
| 117 |
+
2025-07-29,19:23:12 | INFO | logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 118 |
+
2025-07-29,19:23:12 | INFO | lr: 0.001
|
| 119 |
+
2025-07-29,19:23:12 | INFO | lr_cooldown_end: 0.0
|
| 120 |
+
2025-07-29,19:23:12 | INFO | lr_cooldown_power: 1.0
|
| 121 |
+
2025-07-29,19:23:12 | INFO | lr_scheduler: cosine
|
| 122 |
+
2025-07-29,19:23:12 | INFO | max_seq_len: 15000
|
| 123 |
+
2025-07-29,19:23:12 | INFO | model: CLIPCLS-ViT-B-16
|
| 124 |
+
2025-07-29,19:23:12 | INFO | name: clipcls_vit_b16_s512m_bs16k_weighted_06
|
| 125 |
+
2025-07-29,19:23:12 | INFO | native_dynamic_resolution: False
|
| 126 |
+
2025-07-29,19:23:12 | INFO | no_set_device_rank: False
|
| 127 |
+
2025-07-29,19:23:12 | INFO | only_class_loss: False
|
| 128 |
+
2025-07-29,19:23:12 | INFO | only_packing: False
|
| 129 |
+
2025-07-29,19:23:12 | INFO | post_train: False
|
| 130 |
+
2025-07-29,19:23:12 | INFO | precision: amp_bfloat16
|
| 131 |
+
2025-07-29,19:23:12 | INFO | pretrained:
|
| 132 |
+
2025-07-29,19:23:12 | INFO | pretrained_image:
|
| 133 |
+
2025-07-29,19:23:12 | INFO | pretrained_text:
|
| 134 |
+
2025-07-29,19:23:12 | INFO | rank: 0
|
| 135 |
+
2025-07-29,19:23:12 | INFO | remote_sync: None
|
| 136 |
+
2025-07-29,19:23:12 | INFO | remote_sync_frequency: 300
|
| 137 |
+
2025-07-29,19:23:12 | INFO | remote_sync_protocol: s3
|
| 138 |
+
2025-07-29,19:23:12 | INFO | report_to: wandb
|
| 139 |
+
2025-07-29,19:23:12 | INFO | resume: None
|
| 140 |
+
2025-07-29,19:23:12 | INFO | rope_attn_num_heads: 12
|
| 141 |
+
2025-07-29,19:23:12 | INFO | rope_model_width: 768
|
| 142 |
+
2025-07-29,19:23:12 | INFO | save_every_n_steps: 6104
|
| 143 |
+
2025-07-29,19:23:12 | INFO | save_frequency: 1
|
| 144 |
+
2025-07-29,19:23:12 | INFO | save_most_recent: False
|
| 145 |
+
2025-07-29,19:23:12 | INFO | seed: 0
|
| 146 |
+
2025-07-29,19:23:12 | INFO | siglip: False
|
| 147 |
+
2025-07-29,19:23:12 | INFO | skip_scheduler: False
|
| 148 |
+
2025-07-29,19:23:12 | INFO | tensorboard: False
|
| 149 |
+
2025-07-29,19:23:12 | INFO | tensorboard_path:
|
| 150 |
+
2025-07-29,19:23:12 | INFO | torchcompile: False
|
| 151 |
+
2025-07-29,19:23:12 | INFO | torchscript: False
|
| 152 |
+
2025-07-29,19:23:12 | INFO | trace: False
|
| 153 |
+
2025-07-29,19:23:12 | INFO | train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 154 |
+
2025-07-29,19:23:12 | INFO | train_data_upsampling_factors: None
|
| 155 |
+
2025-07-29,19:23:12 | INFO | train_num_samples: 128000000
|
| 156 |
+
2025-07-29,19:23:12 | INFO | use_bn_sync: False
|
| 157 |
+
2025-07-29,19:23:12 | INFO | use_bnb_linear: None
|
| 158 |
+
2025-07-29,19:23:12 | INFO | use_idf: True
|
| 159 |
+
2025-07-29,19:23:12 | INFO | val_data: None
|
| 160 |
+
2025-07-29,19:23:12 | INFO | val_frequency: 1
|
| 161 |
+
2025-07-29,19:23:12 | INFO | val_num_samples: None
|
| 162 |
+
2025-07-29,19:23:12 | INFO | val_steps: 6104
|
| 163 |
+
2025-07-29,19:23:12 | INFO | wandb: True
|
| 164 |
+
2025-07-29,19:23:12 | INFO | wandb_notes:
|
| 165 |
+
2025-07-29,19:23:12 | INFO | wandb_project_name: cls-clip-batch-size
|
| 166 |
+
2025-07-29,19:23:12 | INFO | warmup: 500
|
| 167 |
+
2025-07-29,19:23:12 | INFO | wd: 0.2
|
| 168 |
+
2025-07-29,19:23:12 | INFO | workers: 1
|
| 169 |
+
2025-07-29,19:23:12 | INFO | world_size: 8
|
| 170 |
+
2025-07-29,19:23:12 | INFO | zeroshot_frequency: 2
|
| 171 |
+
2025-07-29,19:23:12 | INFO | zeroshot_steps: 6104
|
| 172 |
+
2025-07-29,19:23:29 | INFO | Start epoch 0
|
| 173 |
+
2025-07-29,19:23:44 | INFO | Train Epoch: 0 [ 16384/128008192 (0%)] Data (t): 11.569 Batch (t): 15.150, 1081.43/s, 135.179/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 6.7956 (6.7956) Contrastive_loss: 9.7838 (9.7838) Loss: 16.579 (16.579)
|
| 174 |
+
2025-07-29,19:39:58 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 175 |
+
2025-07-29,19:40:50 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 176 |
+
2025-07-29,19:42:32 | INFO | Train Epoch: 0 [ 2113536/128008192 (2%)] Data (t): 7.482 Batch (t): 8.804, 1825.39/s, 228.174/s/gpu LR: 0.000258 Logit Scale: 14.315 Class_loss: 5.6362 (6.2159) Contrastive_loss: 8.9219 (9.3528) Loss: 14.558 (15.569)
|
| 177 |
+
2025-07-29,20:01:59 | INFO | Train Epoch: 0 [ 4210688/128008192 (3%)] Data (t): 2.152 Batch (t): 9.117, 1805.28/s, 225.660/s/gpu LR: 0.000514 Logit Scale: 14.681 Class_loss: 5.4078 (5.9466) Contrastive_loss: 8.2559 (8.9872) Loss: 13.664 (14.934)
|
| 178 |
+
2025-07-29,20:04:06 | WARNING | Handling webdataset error (OSError('image file is truncated (112 bytes not processed)')). Ignoring.
|
| 179 |
+
2025-07-29,20:21:41 | INFO | Train Epoch: 0 [ 6307840/128008192 (5%)] Data (t): 0.729 Batch (t): 9.238, 1792.11/s, 224.014/s/gpu LR: 0.000770 Logit Scale: 15.505 Class_loss: 4.7037 (5.6358) Contrastive_loss: 6.6411 (8.4007) Loss: 11.345 (14.037)
|
| 180 |
+
2025-07-29,20:36:26 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 181 |
+
2025-07-29,20:41:22 | INFO | Train Epoch: 0 [ 8404992/128008192 (7%)] Data (t): 0.719 Batch (t): 9.222, 1775.22/s, 221.903/s/gpu LR: 0.001000 Logit Scale: 16.403 Class_loss: 4.4351 (5.3957) Contrastive_loss: 6.0108 (7.9227) Loss: 10.446 (13.318)
|
| 182 |
+
2025-07-29,21:00:57 | INFO | Train Epoch: 0 [ 10502144/128008192 (8%)] Data (t): 0.737 Batch (t): 9.178, 1767.81/s, 220.976/s/gpu LR: 0.001000 Logit Scale: 18.112 Class_loss: 4.5937 (5.2620) Contrastive_loss: 5.8091 (7.5704) Loss: 10.403 (12.832)
|
| 183 |
+
2025-07-29,21:05:04 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 184 |
+
2025-07-29,21:05:53 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 185 |
+
2025-07-29,21:19:56 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 186 |
+
2025-07-29,21:20:30 | INFO | Train Epoch: 0 [ 12599296/128008192 (10%)] Data (t): 0.724 Batch (t): 9.168, 1809.33/s, 226.166/s/gpu LR: 0.000999 Logit Scale: 20.231 Class_loss: 4.3037 (5.1251) Contrastive_loss: 5.2320 (7.2364) Loss: 9.5358 (12.361)
|
| 187 |
+
2025-07-29,21:40:11 | INFO | Train Epoch: 0 [ 14696448/128008192 (11%)] Data (t): 0.748 Batch (t): 9.226, 1793.82/s, 224.227/s/gpu LR: 0.000998 Logit Scale: 22.748 Class_loss: 4.2242 (5.0125) Contrastive_loss: 4.0198 (6.8343) Loss: 8.2440 (11.847)
|
| 188 |
+
2025-07-29,21:48:36 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 189 |
+
2025-07-29,21:59:48 | INFO | Train Epoch: 0 [ 16793600/128008192 (13%)] Data (t): 0.734 Batch (t): 9.195, 1695.68/s, 211.960/s/gpu LR: 0.000997 Logit Scale: 25.638 Class_loss: 4.1082 (4.9120) Contrastive_loss: 3.2284 (6.4336) Loss: 7.3366 (11.346)
|
| 190 |
+
2025-07-29,22:19:28 | INFO | Train Epoch: 0 [ 18890752/128008192 (15%)] Data (t): 0.748 Batch (t): 9.221, 1803.44/s, 225.430/s/gpu LR: 0.000995 Logit Scale: 28.741 Class_loss: 4.0654 (4.8274) Contrastive_loss: 2.8602 (6.0763) Loss: 6.9256 (10.904)
|
| 191 |
+
2025-07-29,22:39:02 | INFO | Train Epoch: 0 [ 20987904/128008192 (16%)] Data (t): 0.756 Batch (t): 9.174, 1779.38/s, 222.422/s/gpu LR: 0.000993 Logit Scale: 31.531 Class_loss: 5.1218 (4.8541) Contrastive_loss: 5.4341 (6.0179) Loss: 10.556 (10.872)
|
| 192 |
+
2025-07-29,22:58:36 | INFO | Train Epoch: 0 [ 23085056/128008192 (18%)] Data (t): 0.742 Batch (t): 9.169, 1814.83/s, 226.854/s/gpu LR: 0.000991 Logit Scale: 33.709 Class_loss: 5.2174 (4.8844) Contrastive_loss: 5.5962 (5.9828) Loss: 10.814 (10.867)
|
| 193 |
+
2025-07-29,23:14:41 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 194 |
+
2025-07-29,23:18:20 | INFO | Train Epoch: 0 [ 25182208/128008192 (20%)] Data (t): 0.736 Batch (t): 9.248, 1777.13/s, 222.141/s/gpu LR: 0.000988 Logit Scale: 37.217 Class_loss: 4.6938 (4.8698) Contrastive_loss: 4.0684 (5.8355) Loss: 8.7622 (10.705)
|
| 195 |
+
2025-07-29,23:35:54 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 196 |
+
2025-07-29,23:37:50 | INFO | Train Epoch: 0 [ 27279360/128008192 (21%)] Data (t): 0.744 Batch (t): 9.143, 1770.24/s, 221.280/s/gpu LR: 0.000985 Logit Scale: 39.945 Class_loss: 4.0207 (4.8091) Contrastive_loss: 2.0149 (5.5626) Loss: 6.0355 (10.372)
|
| 197 |
+
2025-07-29,23:49:05 | WARNING | Handling webdataset error (OSError('image file is truncated (27 bytes not processed)')). Ignoring.
|
| 198 |
+
2025-07-29,23:57:29 | INFO | Train Epoch: 0 [ 29376512/128008192 (23%)] Data (t): 0.723 Batch (t): 9.213, 1736.50/s, 217.063/s/gpu LR: 0.000982 Logit Scale: 43.124 Class_loss: 4.2841 (4.7741) Contrastive_loss: 2.6132 (5.3660) Loss: 6.8973 (10.140)
|
| 199 |
+
2025-07-30,00:07:33 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 200 |
+
2025-07-30,00:17:10 | INFO | Train Epoch: 0 [ 31473664/128008192 (25%)] Data (t): 0.731 Batch (t): 9.227, 1799.16/s, 224.895/s/gpu LR: 0.000978 Logit Scale: 45.345 Class_loss: 3.9387 (4.7219) Contrastive_loss: 1.4119 (5.1189) Loss: 5.3506 (9.8408)
|
| 201 |
+
2025-07-30,00:21:51 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 202 |
+
2025-07-30,00:21:55 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 203 |
+
2025-07-30,00:32:56 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 204 |
+
2025-07-30,00:36:52 | INFO | Train Epoch: 0 [ 33570816/128008192 (26%)] Data (t): 0.715 Batch (t): 9.228, 1751.42/s, 218.927/s/gpu LR: 0.000974 Logit Scale: 48.180 Class_loss: 4.0496 (4.6823) Contrastive_loss: 1.7874 (4.9229) Loss: 5.8370 (9.6052)
|
| 205 |
+
2025-07-30,00:48:51 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 206 |
+
2025-07-30,00:56:27 | INFO | Train Epoch: 0 [ 35667968/128008192 (28%)] Data (t): 0.734 Batch (t): 9.180, 1788.30/s, 223.537/s/gpu LR: 0.000970 Logit Scale: 49.811 Class_loss: 5.1402 (4.7078) Contrastive_loss: 4.5042 (4.8996) Loss: 9.6444 (9.6074)
|
| 207 |
+
2025-07-30,01:16:05 | INFO | Train Epoch: 0 [ 37765120/128008192 (30%)] Data (t): 0.756 Batch (t): 9.204, 1779.41/s, 222.426/s/gpu LR: 0.000965 Logit Scale: 52.017 Class_loss: 3.7666 (4.6582) Contrastive_loss: 1.0733 (4.6982) Loss: 4.8399 (9.3565)
|
| 208 |
+
2025-07-30,01:35:46 | INFO | Train Epoch: 0 [ 39862272/128008192 (31%)] Data (t): 0.763 Batch (t): 9.232, 1727.05/s, 215.881/s/gpu LR: 0.000960 Logit Scale: 53.049 Class_loss: 3.8236 (4.6165) Contrastive_loss: 1.1397 (4.5203) Loss: 4.9633 (9.1368)
|
| 209 |
+
2025-07-30,01:46:53 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 210 |
+
2025-07-30,01:54:51 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 211 |
+
2025-07-30,01:55:12 | INFO | Train Epoch: 0 [ 41959424/128008192 (33%)] Data (t): 0.751 Batch (t): 9.109, 1803.62/s, 225.452/s/gpu LR: 0.000955 Logit Scale: 54.613 Class_loss: 3.7204 (4.5738) Contrastive_loss: 0.93632 (4.3497) Loss: 4.6568 (8.9235)
|
| 212 |
+
2025-07-30,02:14:34 | INFO | Train Epoch: 0 [ 44056576/128008192 (34%)] Data (t): 0.746 Batch (t): 9.077, 1774.47/s, 221.809/s/gpu LR: 0.000949 Logit Scale: 56.245 Class_loss: 3.7210 (4.5351) Contrastive_loss: 0.80661 (4.1886) Loss: 4.5276 (8.7237)
|
| 213 |
+
2025-07-30,02:26:47 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 214 |
+
2025-07-30,02:34:06 | INFO | Train Epoch: 0 [ 46153728/128008192 (36%)] Data (t): 0.732 Batch (t): 9.158, 1771.72/s, 221.465/s/gpu LR: 0.000943 Logit Scale: 57.817 Class_loss: 3.7235 (4.4998) Contrastive_loss: 0.86360 (4.0440) Loss: 4.5871 (8.5438)
|
| 215 |
+
2025-07-30,02:49:41 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 216 |
+
2025-07-30,02:52:15 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 217 |
+
2025-07-30,02:52:47 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 218 |
+
2025-07-30,02:53:51 | INFO | Train Epoch: 0 [ 48250880/128008192 (38%)] Data (t): 0.749 Batch (t): 9.251, 1755.65/s, 219.457/s/gpu LR: 0.000937 Logit Scale: 58.571 Class_loss: 3.7367 (4.4680) Contrastive_loss: 0.85967 (3.9114) Loss: 4.5964 (8.3794)
|
| 219 |
+
2025-07-30,03:13:10 | INFO | Train Epoch: 0 [ 50348032/128008192 (39%)] Data (t): 0.740 Batch (t): 9.055, 1803.61/s, 225.452/s/gpu LR: 0.000930 Logit Scale: 59.363 Class_loss: 3.6768 (4.4364) Contrastive_loss: 0.71026 (3.7833) Loss: 4.3871 (8.2197)
|
| 220 |
+
2025-07-30,03:15:47 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7fbf52d391c0>')). Ignoring.
|
| 221 |
+
2025-07-30,03:22:41 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 222 |
+
2025-07-30,03:32:41 | INFO | Train Epoch: 0 [ 52445184/128008192 (41%)] Data (t): 0.752 Batch (t): 9.154, 1787.12/s, 223.389/s/gpu LR: 0.000923 Logit Scale: 60.141 Class_loss: 3.7280 (4.4091) Contrastive_loss: 0.85087 (3.6705) Loss: 4.5789 (8.0796)
|
| 223 |
+
2025-07-30,03:52:22 | INFO | Train Epoch: 0 [ 54542336/128008192 (43%)] Data (t): 0.735 Batch (t): 9.221, 1807.89/s, 225.987/s/gpu LR: 0.000916 Logit Scale: 61.149 Class_loss: 4.9056 (4.4275) Contrastive_loss: 3.3866 (3.6600) Loss: 8.2923 (8.0875)
|
| 224 |
+
2025-07-30,04:11:53 | INFO | Train Epoch: 0 [ 56639488/128008192 (44%)] Data (t): 0.724 Batch (t): 9.154, 1783.97/s, 222.996/s/gpu LR: 0.000909 Logit Scale: 62.473 Class_loss: 3.6941 (4.4013) Contrastive_loss: 0.66101 (3.5529) Loss: 4.3551 (7.9542)
|
| 225 |
+
2025-07-30,04:31:33 | INFO | Train Epoch: 0 [ 58736640/128008192 (46%)] Data (t): 0.727 Batch (t): 9.216, 1795.80/s, 224.475/s/gpu LR: 0.000901 Logit Scale: 62.905 Class_loss: 3.7863 (4.3801) Contrastive_loss: 0.87308 (3.4605) Loss: 4.6594 (7.8406)
|
| 226 |
+
2025-07-30,04:46:39 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 227 |
+
2025-07-30,04:51:07 | INFO | Train Epoch: 0 [ 60833792/128008192 (48%)] Data (t): 0.723 Batch (t): 9.172, 1792.59/s, 224.074/s/gpu LR: 0.000893 Logit Scale: 63.918 Class_loss: 3.6940 (4.3572) Contrastive_loss: 0.68068 (3.3678) Loss: 4.3747 (7.7251)
|
| 228 |
+
2025-07-30,05:10:51 | INFO | Train Epoch: 0 [ 62930944/128008192 (49%)] Data (t): 0.713 Batch (t): 9.251, 1785.17/s, 223.146/s/gpu LR: 0.000884 Logit Scale: 64.760 Class_loss: 3.6773 (4.3353) Contrastive_loss: 0.71053 (3.2821) Loss: 4.3878 (7.6174)
|
| 229 |
+
2025-07-30,05:19:24 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 230 |
+
2025-07-30,05:30:33 | INFO | Train Epoch: 0 [ 65028096/128008192 (51%)] Data (t): 0.738 Batch (t): 9.233, 1707.08/s, 213.385/s/gpu LR: 0.000876 Logit Scale: 65.258 Class_loss: 3.6362 (4.3134) Contrastive_loss: 0.62734 (3.1992) Loss: 4.2636 (7.5126)
|
| 231 |
+
2025-07-30,05:37:58 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 232 |
+
2025-07-30,05:40:15 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 233 |
+
2025-07-30,05:41:32 | WARNING | Handling webdataset error (OSError('image file is truncated (4 bytes not processed)')). Ignoring.
|
| 234 |
+
2025-07-30,05:50:18 | INFO | Train Epoch: 0 [ 67125248/128008192 (52%)] Data (t): 0.758 Batch (t): 9.261, 1739.52/s, 217.440/s/gpu LR: 0.000867 Logit Scale: 65.724 Class_loss: 3.6366 (4.2929) Contrastive_loss: 0.60559 (3.1206) Loss: 4.2422 (7.4135)
|
| 235 |
+
2025-07-30,06:09:56 | INFO | Train Epoch: 0 [ 69222400/128008192 (54%)] Data (t): 0.742 Batch (t): 9.198, 1793.02/s, 224.127/s/gpu LR: 0.000858 Logit Scale: 65.926 Class_loss: 3.5759 (4.2719) Contrastive_loss: 0.57761 (3.0458) Loss: 4.1535 (7.3176)
|
| 236 |
+
2025-07-30,06:21:17 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 237 |
+
2025-07-30,06:29:42 | INFO | Train Epoch: 0 [ 71319552/128008192 (56%)] Data (t): 0.733 Batch (t): 9.267, 1772.71/s, 221.588/s/gpu LR: 0.000848 Logit Scale: 66.672 Class_loss: 3.6120 (4.2530) Contrastive_loss: 0.60972 (2.9762) Loss: 4.2217 (7.2292)
|
| 238 |
+
2025-07-30,06:49:04 | INFO | Train Epoch: 0 [ 73416704/128008192 (57%)] Data (t): 0.727 Batch (t): 9.080, 1800.82/s, 225.102/s/gpu LR: 0.000839 Logit Scale: 67.139 Class_loss: 3.5774 (4.2342) Contrastive_loss: 0.51911 (2.9079) Loss: 4.0965 (7.1421)
|
| 239 |
+
2025-07-30,07:07:51 | INFO | Train Epoch: 0 [ 75513856/128008192 (59%)] Data (t): 3.407 Batch (t): 8.808, 1851.23/s, 231.404/s/gpu LR: 0.000829 Logit Scale: 68.066 Class_loss: 3.5526 (4.2158) Contrastive_loss: 0.54399 (2.8440) Loss: 4.0966 (7.0598)
|
| 240 |
+
2025-07-30,07:27:19 | INFO | Train Epoch: 0 [ 77611008/128008192 (61%)] Data (t): 1.145 Batch (t): 9.118, 1765.73/s, 220.716/s/gpu LR: 0.000819 Logit Scale: 68.326 Class_loss: 3.5716 (4.1989) Contrastive_loss: 0.49946 (2.7823) Loss: 4.0711 (6.9812)
|
| 241 |
+
2025-07-30,07:40:27 | WARNING | Handling webdataset error (OSError('image file is truncated (96 bytes not processed)')). Ignoring.
|
| 242 |
+
2025-07-30,07:47:04 | INFO | Train Epoch: 0 [ 79708160/128008192 (62%)] Data (t): 0.758 Batch (t): 9.261, 1779.38/s, 222.422/s/gpu LR: 0.000808 Logit Scale: 68.783 Class_loss: 3.5659 (4.1826) Contrastive_loss: 0.49526 (2.7237) Loss: 4.0612 (6.9063)
|
| 243 |
+
2025-07-30,08:06:45 | INFO | Train Epoch: 0 [ 81805312/128008192 (64%)] Data (t): 0.751 Batch (t): 9.227, 1775.96/s, 221.995/s/gpu LR: 0.000798 Logit Scale: 69.530 Class_loss: 3.7097 (4.1708) Contrastive_loss: 0.77931 (2.6751) Loss: 4.4890 (6.8459)
|
| 244 |
+
2025-07-30,08:11:00 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 245 |
+
2025-07-30,08:14:17 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 246 |
+
2025-07-30,08:26:22 | INFO | Train Epoch: 0 [ 83902464/128008192 (66%)] Data (t): 0.726 Batch (t): 9.194, 1831.29/s, 228.911/s/gpu LR: 0.000787 Logit Scale: 69.854 Class_loss: 3.6296 (4.1576) Contrastive_loss: 0.52065 (2.6225) Loss: 4.1503 (6.7801)
|
| 247 |
+
2025-07-30,08:44:34 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 248 |
+
2025-07-30,08:45:58 | INFO | Train Epoch: 0 [ 85999616/128008192 (67%)] Data (t): 0.755 Batch (t): 9.192, 1769.81/s, 221.227/s/gpu LR: 0.000776 Logit Scale: 69.985 Class_loss: 3.5544 (4.1432) Contrastive_loss: 0.44538 (2.5707) Loss: 3.9998 (6.7139)
|
| 249 |
+
2025-07-30,09:03:57 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 250 |
+
2025-07-30,09:04:11 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 251 |
+
2025-07-30,09:05:37 | INFO | Train Epoch: 0 [ 88096768/128008192 (69%)] Data (t): 0.787 Batch (t): 9.207, 1804.82/s, 225.602/s/gpu LR: 0.000765 Logit Scale: 70.208 Class_loss: 3.5454 (4.1293) Contrastive_loss: 0.44047 (2.5212) Loss: 3.9859 (6.6505)
|
| 252 |
+
2025-07-30,09:06:44 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 253 |
+
2025-07-30,09:25:16 | INFO | Train Epoch: 0 [ 90193920/128008192 (70%)] Data (t): 0.748 Batch (t): 9.215, 1789.58/s, 223.698/s/gpu LR: 0.000753 Logit Scale: 71.028 Class_loss: 3.5364 (4.1159) Contrastive_loss: 0.41842 (2.4734) Loss: 3.9549 (6.5892)
|
| 254 |
+
2025-07-30,09:25:37 | WARNING | Handling webdataset error (OSError('image file is truncated (33 bytes not processed)')). Ignoring.
|
| 255 |
+
2025-07-30,09:35:02 | WARNING | Handling webdataset error (OSError('image file is truncated (76 bytes not processed)')). Ignoring.
|
| 256 |
+
2025-07-30,09:44:59 | INFO | Train Epoch: 0 [ 92291072/128008192 (72%)] Data (t): 0.717 Batch (t): 9.236, 1748.48/s, 218.560/s/gpu LR: 0.000742 Logit Scale: 71.450 Class_loss: 3.5454 (4.1032) Contrastive_loss: 0.45596 (2.4285) Loss: 4.0013 (6.5317)
|
| 257 |
+
2025-07-30,10:04:40 | INFO | Train Epoch: 0 [ 94388224/128008192 (74%)] Data (t): 0.745 Batch (t): 9.233, 1763.71/s, 220.463/s/gpu LR: 0.000730 Logit Scale: 71.782 Class_loss: 3.4864 (4.0898) Contrastive_loss: 0.46183 (2.3858) Loss: 3.9482 (6.4756)
|
| 258 |
+
2025-07-30,10:24:23 | INFO | Train Epoch: 0 [ 96485376/128008192 (75%)] Data (t): 0.749 Batch (t): 9.242, 1791.66/s, 223.957/s/gpu LR: 0.000718 Logit Scale: 72.215 Class_loss: 3.5467 (4.0782) Contrastive_loss: 0.45505 (2.3447) Loss: 4.0017 (6.4229)
|
| 259 |
+
2025-07-30,10:29:58 | WARNING | Handling webdataset error (OSError('image file is truncated (52 bytes not processed)')). Ignoring.
|
| 260 |
+
2025-07-30,10:36:06 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 261 |
+
2025-07-30,10:44:05 | INFO | Train Epoch: 0 [ 98582528/128008192 (77%)] Data (t): 0.749 Batch (t): 9.230, 1782.37/s, 222.797/s/gpu LR: 0.000706 Logit Scale: 72.587 Class_loss: 3.5448 (4.0671) Contrastive_loss: 0.42509 (2.3047) Loss: 3.9699 (6.3718)
|
| 262 |
+
2025-07-30,10:57:25 | INFO | Starting zero-shot imagenet.
|
| 263 |
+
2025-07-30,10:57:25 | INFO | Building zero-shot classifier
|
| 264 |
+
2025-07-30,10:57:32 | INFO | Using classifier
|
| 265 |
+
2025-07-30,11:11:28 | INFO | Finished zero-shot imagenet.
|
| 266 |
+
2025-07-30,11:11:28 | INFO | Eval Epoch: 0.7811340074235249 imagenet-zeroshot-val-top1: 0.2815 imagenet-zeroshot-val-top5: 0.5368
|
| 267 |
+
2025-07-30,11:17:30 | INFO | Train Epoch: 0 [100679680/128008192 (79%)] Data (t): 8.639 Batch (t): 15.664, 1791.87/s, 223.984/s/gpu LR: 0.000694 Logit Scale: 73.208 Class_loss: 3.5102 (4.0557) Contrastive_loss: 0.47296 (2.2673) Loss: 3.9832 (6.3231)
|
| 268 |
+
2025-07-30,11:37:07 | INFO | Train Epoch: 0 [102776832/128008192 (80%)] Data (t): 0.787 Batch (t): 9.197, 1760.24/s, 220.030/s/gpu LR: 0.000682 Logit Scale: 73.511 Class_loss: 3.4791 (4.0442) Contrastive_loss: 0.39952 (2.2300) Loss: 3.8786 (6.2742)
|
| 269 |
+
2025-07-30,11:41:41 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 270 |
+
2025-07-30,11:56:59 | INFO | Train Epoch: 0 [104873984/128008192 (82%)] Data (t): 0.779 Batch (t): 9.314, 1813.67/s, 226.709/s/gpu LR: 0.000669 Logit Scale: 73.715 Class_loss: 4.8687 (4.0604) Contrastive_loss: 2.5217 (2.2357) Loss: 7.3904 (6.2961)
|
| 271 |
+
2025-07-30,12:16:36 | INFO | Train Epoch: 0 [106971136/128008192 (84%)] Data (t): 0.767 Batch (t): 9.197, 1791.17/s, 223.896/s/gpu LR: 0.000657 Logit Scale: 73.888 Class_loss: 3.5293 (4.0502) Contrastive_loss: 0.43229 (2.2010) Loss: 3.9616 (6.2512)
|
| 272 |
+
2025-07-30,12:36:19 | INFO | Train Epoch: 0 [109068288/128008192 (85%)] Data (t): 0.754 Batch (t): 9.237, 1733.28/s, 216.660/s/gpu LR: 0.000644 Logit Scale: 74.366 Class_loss: 3.4878 (4.0396) Contrastive_loss: 0.39346 (2.1669) Loss: 3.8812 (6.2065)
|
| 273 |
+
2025-07-30,12:38:44 | WARNING | Handling webdataset error (OSError('image file is truncated (37 bytes not processed)')). Ignoring.
|
| 274 |
+
2025-07-30,12:55:57 | INFO | Train Epoch: 0 [111165440/128008192 (87%)] Data (t): 0.733 Batch (t): 9.202, 1782.22/s, 222.778/s/gpu LR: 0.000631 Logit Scale: 75.062 Class_loss: 3.5281 (4.0301) Contrastive_loss: 0.41414 (2.1344) Loss: 3.9422 (6.1645)
|
| 275 |
+
2025-07-30,13:15:35 | INFO | Train Epoch: 0 [113262592/128008192 (88%)] Data (t): 0.740 Batch (t): 9.206, 1776.03/s, 222.004/s/gpu LR: 0.000618 Logit Scale: 75.023 Class_loss: 3.5459 (4.0213) Contrastive_loss: 0.41503 (2.1032) Loss: 3.9609 (6.1245)
|
| 276 |
+
2025-07-30,13:30:00 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 277 |
+
2025-07-30,13:35:17 | INFO | Train Epoch: 0 [115359744/128008192 (90%)] Data (t): 0.743 Batch (t): 9.236, 1776.21/s, 222.026/s/gpu LR: 0.000605 Logit Scale: 75.332 Class_loss: 3.5717 (4.0132) Contrastive_loss: 0.48316 (2.0743) Loss: 4.0549 (6.0875)
|
| 278 |
+
2025-07-30,13:49:58 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 279 |
+
2025-07-30,13:52:15 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 280 |
+
2025-07-30,13:54:57 | INFO | Train Epoch: 0 [117456896/128008192 (92%)] Data (t): 0.760 Batch (t): 9.215, 1765.76/s, 220.720/s/gpu LR: 0.000592 Logit Scale: 75.789 Class_loss: 3.4649 (4.0036) Contrastive_loss: 0.34954 (2.0440) Loss: 3.8145 (6.0476)
|
| 281 |
+
2025-07-30,13:58:44 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 282 |
+
2025-07-30,14:14:18 | INFO | Train Epoch: 0 [119554048/128008192 (93%)] Data (t): 0.775 Batch (t): 9.074, 1788.08/s, 223.511/s/gpu LR: 0.000579 Logit Scale: 75.923 Class_loss: 3.5218 (3.9953) Contrastive_loss: 0.39673 (2.0156) Loss: 3.9186 (6.0109)
|
| 283 |
+
2025-07-30,14:21:32 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 284 |
+
2025-07-30,14:27:48 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 285 |
+
2025-07-30,14:33:40 | INFO | Train Epoch: 0 [121651200/128008192 (95%)] Data (t): 0.791 Batch (t): 9.078, 1914.05/s, 239.256/s/gpu LR: 0.000566 Logit Scale: 76.037 Class_loss: 4.6785 (4.0069) Contrastive_loss: 2.3099 (2.0206) Loss: 6.9884 (6.0275)
|
| 286 |
+
2025-07-30,14:52:50 | INFO | Train Epoch: 0 [123748352/128008192 (97%)] Data (t): 1.947 Batch (t): 8.985, 1773.47/s, 221.683/s/gpu LR: 0.000553 Logit Scale: 76.467 Class_loss: 3.5208 (3.9988) Contrastive_loss: 0.33875 (1.9926) Loss: 3.8596 (5.9914)
|
| 287 |
+
2025-07-30,14:59:56 | WARNING | Handling webdataset error (OSError('image file is truncated (181 bytes not processed)')). Ignoring.
|
| 288 |
+
2025-07-30,15:12:31 | INFO | Train Epoch: 0 [125845504/128008192 (98%)] Data (t): 0.770 Batch (t): 9.224, 1777.57/s, 222.196/s/gpu LR: 0.000540 Logit Scale: 76.801 Class_loss: 4.0405 (3.9995) Contrastive_loss: 1.1751 (1.9791) Loss: 5.2156 (5.9786)
|
| 289 |
+
2025-07-30,15:26:55 | WARNING | Handling webdataset error (OSError('image file is truncated (31 bytes not processed)')). Ignoring.
|
| 290 |
+
2025-07-30,15:32:11 | INFO | Train Epoch: 0 [127942656/128008192 (100%)] Data (t): 0.734 Batch (t): 9.218, 1779.16/s, 222.395/s/gpu LR: 0.000526 Logit Scale: 77.136 Class_loss: 3.4958 (3.9914) Contrastive_loss: 0.41776 (1.9540) Loss: 3.9135 (5.9453)
|
| 291 |
+
2025-07-30,15:32:48 | INFO | Train Epoch: 0 [128008192/128008192 (100%)] Data (t): 0.758 Batch (t): 9.227, 1785.00/s, 223.125/s/gpu LR: 0.000526 Logit Scale: 77.148 Class_loss: 3.4938 (3.9835) Contrastive_loss: 0.36614 (1.9288) Loss: 3.8599 (5.9122)
|
| 292 |
+
2025-07-30,15:32:56 | INFO | Start epoch 1
|
| 293 |
+
2025-07-30,15:33:09 | INFO | Train Epoch: 1 [ 16384/128008192 (0%)] Data (t): 10.681 Batch (t): 12.001, 1365.25/s, 170.657/s/gpu LR: 0.000526 Logit Scale: 77.151 Class_loss: 3.4629 (3.4629) Contrastive_loss: 0.31562 (0.31562) Loss: 3.7785 (3.7785)
|
| 294 |
+
2025-07-30,15:40:35 | WARNING | Handling webdataset error (OSError('image file is truncated (63 bytes not processed)')). Ignoring.
|
| 295 |
+
2025-07-30,15:45:37 | WARNING | Handling webdataset error (OSError('image file is truncated (208 bytes not processed)')). Ignoring.
|
| 296 |
+
2025-07-30,15:52:15 | INFO | Train Epoch: 1 [ 2113536/128008192 (2%)] Data (t): 4.364 Batch (t): 8.959, 1793.93/s, 224.241/s/gpu LR: 0.000513 Logit Scale: 77.380 Class_loss: 4.6091 (4.0360) Contrastive_loss: 2.3564 (1.3360) Loss: 6.9654 (5.3720)
|
| 297 |
+
2025-07-30,15:53:15 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 298 |
+
2025-07-30,16:11:52 | INFO | Train Epoch: 1 [ 4210688/128008192 (3%)] Data (t): 0.749 Batch (t): 9.190, 1827.47/s, 228.433/s/gpu LR: 0.000499 Logit Scale: 77.433 Class_loss: 3.7566 (3.9428) Contrastive_loss: 0.64012 (1.1040) Loss: 4.3967 (5.0469)
|
| 299 |
+
2025-07-30,16:31:22 | INFO | Train Epoch: 1 [ 6307840/128008192 (5%)] Data (t): 0.755 Batch (t): 9.142, 1820.47/s, 227.558/s/gpu LR: 0.000486 Logit Scale: 77.986 Class_loss: 4.8510 (4.1699) Contrastive_loss: 2.8157 (1.5320) Loss: 7.6668 (5.7019)
|
| 300 |
+
2025-07-30,16:43:45 | WARNING | Handling webdataset error (OSError('image file is truncated (64 bytes not processed)')). Ignoring.
|
| 301 |
+
2025-07-30,16:50:56 | INFO | Train Epoch: 1 [ 8404992/128008192 (7%)] Data (t): 0.746 Batch (t): 9.175, 1763.08/s, 220.385/s/gpu LR: 0.000473 Logit Scale: 78.172 Class_loss: 3.6427 (4.0645) Contrastive_loss: 0.61428 (1.3484) Loss: 4.2570 (5.4129)
|
| 302 |
+
2025-07-30,17:02:42 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 303 |
+
2025-07-30,17:10:31 | INFO | Train Epoch: 1 [ 10502144/128008192 (8%)] Data (t): 0.736 Batch (t): 9.177, 1771.83/s, 221.479/s/gpu LR: 0.000460 Logit Scale: 78.675 Class_loss: 3.4458 (3.9613) Contrastive_loss: 0.25540 (1.1663) Loss: 3.7012 (5.1276)
|
| 304 |
+
2025-07-30,17:30:06 | INFO | Train Epoch: 1 [ 12599296/128008192 (10%)] Data (t): 0.746 Batch (t): 9.183, 1866.94/s, 233.368/s/gpu LR: 0.000446 Logit Scale: 78.988 Class_loss: 4.1195 (3.9839) Contrastive_loss: 1.2643 (1.1803) Loss: 5.3838 (5.1642)
|
| 305 |
+
2025-07-30,17:30:42 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 306 |
+
2025-07-30,17:49:14 | INFO | Train Epoch: 1 [ 14696448/128008192 (11%)] Data (t): 3.249 Batch (t): 8.969, 1813.64/s, 226.705/s/gpu LR: 0.000433 Logit Scale: 79.418 Class_loss: 4.2375 (4.0156) Contrastive_loss: 1.6311 (1.2366) Loss: 5.8686 (5.2522)
|
| 307 |
+
2025-07-30,18:08:52 | INFO | Train Epoch: 1 [ 16793600/128008192 (13%)] Data (t): 0.729 Batch (t): 9.204, 1770.85/s, 221.356/s/gpu LR: 0.000420 Logit Scale: 79.742 Class_loss: 3.4508 (3.9529) Contrastive_loss: 0.29928 (1.1325) Loss: 3.7501 (5.0853)
|
| 308 |
+
2025-07-30,18:26:47 | WARNING | Handling webdataset error (OSError('image file is truncated (95 bytes not processed)')). Ignoring.
|
| 309 |
+
2025-07-30,18:28:34 | INFO | Train Epoch: 1 [ 18890752/128008192 (15%)] Data (t): 0.759 Batch (t): 9.232, 1676.41/s, 209.552/s/gpu LR: 0.000407 Logit Scale: 80.072 Class_loss: 3.5148 (3.9091) Contrastive_loss: 0.47356 (1.0666) Loss: 3.9884 (4.9756)
|
| 310 |
+
2025-07-30,18:37:10 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 311 |
+
2025-07-30,18:48:16 | INFO | Train Epoch: 1 [ 20987904/128008192 (16%)] Data (t): 0.728 Batch (t): 9.236, 1759.88/s, 219.985/s/gpu LR: 0.000394 Logit Scale: 80.417 Class_loss: 3.4618 (3.8684) Contrastive_loss: 0.30847 (0.99766) Loss: 3.7702 (4.8661)
|
| 312 |
+
2025-07-30,18:52:09 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 313 |
+
2025-07-30,19:07:53 | INFO | Train Epoch: 1 [ 23085056/128008192 (18%)] Data (t): 0.763 Batch (t): 9.195, 1781.16/s, 222.645/s/gpu LR: 0.000381 Logit Scale: 80.849 Class_loss: 3.4504 (3.8336) Contrastive_loss: 0.31382 (0.94067) Loss: 3.7642 (4.7742)
|
| 314 |
+
2025-07-30,19:27:34 | INFO | Train Epoch: 1 [ 25182208/128008192 (20%)] Data (t): 0.728 Batch (t): 9.221, 1722.15/s, 215.269/s/gpu LR: 0.000368 Logit Scale: 81.163 Class_loss: 3.7429 (3.8266) Contrastive_loss: 0.75887 (0.92669) Loss: 4.5018 (4.7533)
|
| 315 |
+
2025-07-30,19:47:17 | INFO | Train Epoch: 1 [ 27279360/128008192 (21%)] Data (t): 0.755 Batch (t): 9.248, 1796.69/s, 224.586/s/gpu LR: 0.000355 Logit Scale: 81.289 Class_loss: 3.5523 (3.8070) Contrastive_loss: 0.28661 (0.88097) Loss: 3.8389 (4.6880)
|
| 316 |
+
2025-07-30,19:54:30 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 317 |
+
2025-07-30,20:02:01 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 318 |
+
2025-07-30,20:06:59 | INFO | Train Epoch: 1 [ 29376512/128008192 (23%)] Data (t): 0.735 Batch (t): 9.228, 1821.06/s, 227.632/s/gpu LR: 0.000343 Logit Scale: 81.808 Class_loss: 3.4905 (3.7859) Contrastive_loss: 0.27662 (0.84068) Loss: 3.7671 (4.6266)
|
| 319 |
+
2025-07-30,20:09:14 | WARNING | Handling webdataset error (OSError('image file is truncated (34 bytes not processed)')). Ignoring.
|
| 320 |
+
2025-07-30,20:09:22 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 321 |
+
2025-07-30,20:09:57 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 322 |
+
2025-07-30,20:26:58 | INFO | Train Epoch: 1 [ 31473664/128008192 (25%)] Data (t): 0.740 Batch (t): 9.373, 1689.22/s, 211.153/s/gpu LR: 0.000330 Logit Scale: 82.195 Class_loss: 4.9986 (3.8617) Contrastive_loss: 2.3405 (0.93441) Loss: 7.3391 (4.7961)
|
| 323 |
+
2025-07-30,20:37:32 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 324 |
+
2025-07-30,20:46:07 | INFO | Train Epoch: 1 [ 33570816/128008192 (26%)] Data (t): 0.735 Batch (t): 8.975, 1787.94/s, 223.493/s/gpu LR: 0.000318 Logit Scale: 82.409 Class_loss: 3.4391 (3.8368) Contrastive_loss: 0.31074 (0.89773) Loss: 3.7499 (4.7346)
|
| 325 |
+
2025-07-30,20:46:15 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 326 |
+
2025-07-30,20:59:22 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 327 |
+
2025-07-30,21:05:48 | INFO | Train Epoch: 1 [ 35667968/128008192 (28%)] Data (t): 0.743 Batch (t): 9.229, 1763.33/s, 220.416/s/gpu LR: 0.000305 Logit Scale: 82.803 Class_loss: 3.3818 (3.8116) Contrastive_loss: 0.30612 (0.86486) Loss: 3.6879 (4.6764)
|
| 328 |
+
2025-07-30,21:11:29 | WARNING | Handling webdataset error (OSError('image file is truncated (120 bytes not processed)')). Ignoring.
|
| 329 |
+
2025-07-30,21:25:28 | INFO | Train Epoch: 1 [ 37765120/128008192 (30%)] Data (t): 0.744 Batch (t): 9.211, 1737.65/s, 217.206/s/gpu LR: 0.000293 Logit Scale: 82.947 Class_loss: 3.4058 (3.7902) Contrastive_loss: 0.28821 (0.83451) Loss: 3.6940 (4.6247)
|
| 330 |
+
2025-07-30,21:45:10 | INFO | Train Epoch: 1 [ 39862272/128008192 (31%)] Data (t): 0.745 Batch (t): 9.237, 1770.34/s, 221.292/s/gpu LR: 0.000281 Logit Scale: 83.188 Class_loss: 3.3850 (3.7699) Contrastive_loss: 0.25018 (0.80529) Loss: 3.6352 (4.5752)
|
| 331 |
+
2025-07-30,21:57:52 | WARNING | Handling webdataset error (OSError('image file is truncated (92 bytes not processed)')). Ignoring.
|
| 332 |
+
2025-07-30,22:00:55 | WARNING | Handling webdataset error (OSError('image file is truncated (40 bytes not processed)')). Ignoring.
|
| 333 |
+
2025-07-30,22:04:55 | INFO | Train Epoch: 1 [ 41959424/128008192 (33%)] Data (t): 0.788 Batch (t): 9.257, 1751.52/s, 218.940/s/gpu LR: 0.000269 Logit Scale: 83.484 Class_loss: 3.5844 (3.7611) Contrastive_loss: 0.43844 (0.78782) Loss: 4.0228 (4.5489)
|
| 334 |
+
2025-07-30,22:25:21 | INFO | Train Epoch: 1 [ 44056576/128008192 (34%)] Data (t): 0.775 Batch (t): 9.583, 1725.76/s, 215.720/s/gpu LR: 0.000258 Logit Scale: 83.880 Class_loss: 3.3869 (3.7441) Contrastive_loss: 0.25307 (0.76352) Loss: 3.6400 (4.5076)
|
| 335 |
+
2025-07-30,22:33:40 | WARNING | Handling webdataset error (OSError('image file is truncated (3 bytes not processed)')). Ignoring.
|
| 336 |
+
2025-07-30,22:45:07 | INFO | Train Epoch: 1 [ 46153728/128008192 (36%)] Data (t): 0.754 Batch (t): 9.261, 1788.75/s, 223.594/s/gpu LR: 0.000246 Logit Scale: 84.200 Class_loss: 3.7309 (3.7435) Contrastive_loss: 0.74562 (0.76274) Loss: 4.4765 (4.5063)
|
| 337 |
+
2025-07-30,23:04:35 | INFO | Train Epoch: 1 [ 48250880/128008192 (38%)] Data (t): 0.742 Batch (t): 9.125, 1726.47/s, 215.809/s/gpu LR: 0.000235 Logit Scale: 84.547 Class_loss: 3.4560 (3.7315) Contrastive_loss: 0.29708 (0.74334) Loss: 3.7531 (4.4749)
|
| 338 |
+
2025-07-30,23:09:06 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 339 |
+
2025-07-30,23:24:18 | INFO | Train Epoch: 1 [ 50348032/128008192 (39%)] Data (t): 0.753 Batch (t): 9.240, 1730.35/s, 216.293/s/gpu LR: 0.000223 Logit Scale: 84.722 Class_loss: 3.3809 (3.7175) Contrastive_loss: 0.22506 (0.72261) Loss: 3.6059 (4.4401)
|
| 340 |
+
2025-07-30,23:33:06 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 341 |
+
2025-07-30,23:42:40 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 342 |
+
2025-07-30,23:42:41 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 343 |
+
2025-07-30,23:45:03 | INFO | Train Epoch: 1 [ 52445184/128008192 (41%)] Data (t): 0.741 Batch (t): 9.727, 1596.67/s, 199.584/s/gpu LR: 0.000212 Logit Scale: 85.089 Class_loss: 3.3939 (3.7051) Contrastive_loss: 0.22503 (0.70347) Loss: 3.6189 (4.4085)
|
| 344 |
+
2025-07-30,23:50:53 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 345 |
+
2025-07-30,23:57:05 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 346 |
+
2025-07-31,00:05:17 | INFO | Train Epoch: 1 [ 54542336/128008192 (43%)] Data (t): 0.833 Batch (t): 9.491, 1757.87/s, 219.734/s/gpu LR: 0.000202 Logit Scale: 85.337 Class_loss: 3.3616 (3.6924) Contrastive_loss: 0.29668 (0.68840) Loss: 3.6583 (4.3808)
|
| 347 |
+
2025-07-31,00:12:20 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 348 |
+
2025-07-31,00:25:08 | INFO | Train Epoch: 1 [ 56639488/128008192 (44%)] Data (t): 0.831 Batch (t): 9.298, 1734.25/s, 216.781/s/gpu LR: 0.000191 Logit Scale: 85.544 Class_loss: 3.3590 (3.6804) Contrastive_loss: 0.25255 (0.67284) Loss: 3.6116 (4.3533)
|
| 349 |
+
2025-07-31,00:44:33 | INFO | Train Epoch: 1 [ 58736640/128008192 (46%)] Data (t): 0.786 Batch (t): 9.108, 1915.64/s, 239.455/s/gpu LR: 0.000181 Logit Scale: 85.825 Class_loss: 4.4258 (3.7061) Contrastive_loss: 1.5974 (0.70472) Loss: 6.0232 (4.4109)
|
| 350 |
+
2025-07-31,01:03:15 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 351 |
+
2025-07-31,01:04:02 | INFO | Train Epoch: 1 [ 60833792/128008192 (48%)] Data (t): 2.125 Batch (t): 9.132, 1815.44/s, 226.930/s/gpu LR: 0.000171 Logit Scale: 86.081 Class_loss: 3.4501 (3.6976) Contrastive_loss: 0.25333 (0.68967) Loss: 3.7034 (4.3873)
|
| 352 |
+
2025-07-31,01:08:17 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 353 |
+
2025-07-31,01:23:39 | INFO | Train Epoch: 1 [ 62930944/128008192 (49%)] Data (t): 0.747 Batch (t): 9.189, 1808.55/s, 226.069/s/gpu LR: 0.000161 Logit Scale: 86.387 Class_loss: 4.7166 (3.7305) Contrastive_loss: 1.8045 (0.72563) Loss: 6.5212 (4.4561)
|
| 354 |
+
2025-07-31,01:30:43 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 355 |
+
2025-07-31,01:43:16 | INFO | Train Epoch: 1 [ 65028096/128008192 (51%)] Data (t): 0.747 Batch (t): 9.197, 1777.49/s, 222.186/s/gpu LR: 0.000151 Logit Scale: 86.594 Class_loss: 4.4705 (3.7536) Contrastive_loss: 1.2483 (0.74197) Loss: 5.7187 (4.4956)
|
| 356 |
+
2025-07-31,01:50:43 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 357 |
+
2025-07-31,02:02:46 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 358 |
+
2025-07-31,02:02:57 | INFO | Train Epoch: 1 [ 67125248/128008192 (52%)] Data (t): 0.766 Batch (t): 9.231, 1777.20/s, 222.150/s/gpu LR: 0.000142 Logit Scale: 86.829 Class_loss: 3.3601 (3.7417) Contrastive_loss: 0.24733 (0.72698) Loss: 3.6075 (4.4687)
|
| 359 |
+
2025-07-31,02:04:47 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 360 |
+
2025-07-31,02:22:34 | INFO | Train Epoch: 1 [ 69222400/128008192 (54%)] Data (t): 0.745 Batch (t): 9.192, 1797.24/s, 224.655/s/gpu LR: 0.000133 Logit Scale: 87.138 Class_loss: 3.7568 (3.7421) Contrastive_loss: 0.60555 (0.72341) Loss: 4.3623 (4.4655)
|
| 361 |
+
2025-07-31,02:23:23 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 362 |
+
2025-07-31,02:24:39 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 363 |
+
2025-07-31,02:42:16 | INFO | Train Epoch: 1 [ 71319552/128008192 (56%)] Data (t): 0.756 Batch (t): 9.239, 1939.80/s, 242.475/s/gpu LR: 0.000124 Logit Scale: 87.225 Class_loss: 3.3733 (3.7316) Contrastive_loss: 0.24964 (0.70987) Loss: 3.6229 (4.4415)
|
| 364 |
+
2025-07-31,02:48:35 | INFO | Starting zero-shot imagenet.
|
| 365 |
+
2025-07-31,02:48:35 | INFO | Building zero-shot classifier
|
| 366 |
+
2025-07-31,02:48:42 | INFO | Using classifier
|
| 367 |
+
2025-07-31,03:06:21 | INFO | Finished zero-shot imagenet.
|
| 368 |
+
2025-07-31,03:06:21 | INFO | Eval Epoch: 1.562396006655574 imagenet-zeroshot-val-top1: 0.4269 imagenet-zeroshot-val-top5: 0.7004
|
| 369 |
+
2025-07-31,03:19:19 | INFO | Train Epoch: 1 [ 73416704/128008192 (57%)] Data (t): 10.503 Batch (t): 17.364, 1768.15/s, 221.019/s/gpu LR: 0.000115 Logit Scale: 87.454 Class_loss: 3.3861 (3.7220) Contrastive_loss: 0.24873 (0.69706) Loss: 3.6349 (4.4191)
|
| 370 |
+
2025-07-31,03:35:08 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 371 |
+
2025-07-31,03:39:05 | INFO | Train Epoch: 1 [ 75513856/128008192 (59%)] Data (t): 0.766 Batch (t): 9.262, 1795.80/s, 224.475/s/gpu LR: 0.000107 Logit Scale: 87.711 Class_loss: 3.3758 (3.7126) Contrastive_loss: 0.26996 (0.68552) Loss: 3.6458 (4.3982)
|
| 372 |
+
2025-07-31,03:57:39 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 373 |
+
2025-07-31,03:58:48 | INFO | Train Epoch: 1 [ 77611008/128008192 (61%)] Data (t): 0.779 Batch (t): 9.241, 1802.70/s, 225.338/s/gpu LR: 0.000099 Logit Scale: 87.962 Class_loss: 4.4981 (3.7333) Contrastive_loss: 1.3915 (0.70410) Loss: 5.8895 (4.4374)
|
| 374 |
+
2025-07-31,04:18:34 | INFO | Train Epoch: 1 [ 79708160/128008192 (62%)] Data (t): 0.783 Batch (t): 9.267, 1733.17/s, 216.646/s/gpu LR: 0.000091 Logit Scale: 88.161 Class_loss: 3.3602 (3.7237) Contrastive_loss: 0.23954 (0.69218) Loss: 3.5997 (4.4159)
|
| 375 |
+
2025-07-31,04:30:56 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 376 |
+
2025-07-31,04:31:16 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 377 |
+
2025-07-31,04:38:19 | INFO | Train Epoch: 1 [ 81805312/128008192 (64%)] Data (t): 0.767 Batch (t): 9.261, 1741.71/s, 217.714/s/gpu LR: 0.000083 Logit Scale: 88.322 Class_loss: 3.3586 (3.7146) Contrastive_loss: 0.18951 (0.67962) Loss: 3.5481 (4.3942)
|
| 378 |
+
2025-07-31,04:43:12 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 379 |
+
2025-07-31,04:44:38 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 380 |
+
2025-07-31,04:45:47 | WARNING | Handling webdataset error (OSError('image file is truncated (342 bytes not processed)')). Ignoring.
|
| 381 |
+
2025-07-31,04:58:00 | INFO | Train Epoch: 1 [ 83902464/128008192 (66%)] Data (t): 0.963 Batch (t): 9.223, 1766.23/s, 220.779/s/gpu LR: 0.000076 Logit Scale: 88.482 Class_loss: 3.3731 (3.7063) Contrastive_loss: 0.21403 (0.66826) Loss: 3.5871 (4.3745)
|
| 382 |
+
2025-07-31,05:17:40 | INFO | Train Epoch: 1 [ 85999616/128008192 (67%)] Data (t): 1.502 Batch (t): 9.225, 1804.42/s, 225.553/s/gpu LR: 0.000069 Logit Scale: 88.688 Class_loss: 3.8895 (3.7106) Contrastive_loss: 0.69159 (0.66882) Loss: 4.5811 (4.3795)
|
| 383 |
+
2025-07-31,05:23:42 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 384 |
+
2025-07-31,05:37:30 | INFO | Train Epoch: 1 [ 88096768/128008192 (69%)] Data (t): 0.797 Batch (t): 9.290, 1798.11/s, 224.764/s/gpu LR: 0.000063 Logit Scale: 88.850 Class_loss: 3.5969 (3.7080) Contrastive_loss: 0.41193 (0.66284) Loss: 4.0088 (4.3708)
|
| 385 |
+
2025-07-31,05:44:00 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 386 |
+
2025-07-31,05:57:09 | INFO | Train Epoch: 1 [ 90193920/128008192 (70%)] Data (t): 2.560 Batch (t): 9.216, 1721.18/s, 215.148/s/gpu LR: 0.000056 Logit Scale: 88.966 Class_loss: 3.3806 (3.7006) Contrastive_loss: 0.20028 (0.65233) Loss: 3.5809 (4.3529)
|
| 387 |
+
2025-07-31,06:16:49 | INFO | Train Epoch: 1 [ 92291072/128008192 (72%)] Data (t): 0.765 Batch (t): 9.215, 1803.84/s, 225.479/s/gpu LR: 0.000050 Logit Scale: 89.138 Class_loss: 3.3946 (3.6938) Contrastive_loss: 0.21694 (0.64265) Loss: 3.6115 (4.3364)
|
| 388 |
+
2025-07-31,06:36:30 | INFO | Train Epoch: 1 [ 94388224/128008192 (74%)] Data (t): 1.646 Batch (t): 9.233, 1788.58/s, 223.573/s/gpu LR: 0.000045 Logit Scale: 89.251 Class_loss: 3.3575 (3.6864) Contrastive_loss: 0.19011 (0.63282) Loss: 3.5476 (4.3193)
|
| 389 |
+
2025-07-31,06:56:16 | INFO | Train Epoch: 1 [ 96485376/128008192 (75%)] Data (t): 0.811 Batch (t): 9.260, 1752.91/s, 219.114/s/gpu LR: 0.000039 Logit Scale: 89.365 Class_loss: 3.3670 (3.6797) Contrastive_loss: 0.20048 (0.62362) Loss: 3.5675 (4.3033)
|
| 390 |
+
2025-07-31,07:04:47 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 391 |
+
2025-07-31,07:15:52 | INFO | Train Epoch: 1 [ 98582528/128008192 (77%)] Data (t): 0.754 Batch (t): 9.187, 1791.99/s, 223.998/s/gpu LR: 0.000034 Logit Scale: 89.481 Class_loss: 3.3392 (3.6726) Contrastive_loss: 0.17073 (0.61418) Loss: 3.5099 (4.2867)
|
| 392 |
+
2025-07-31,07:35:39 | INFO | Train Epoch: 1 [100679680/128008192 (79%)] Data (t): 0.791 Batch (t): 9.277, 1639.43/s, 204.929/s/gpu LR: 0.000030 Logit Scale: 89.594 Class_loss: 3.3452 (3.6659) Contrastive_loss: 0.22769 (0.60629) Loss: 3.5729 (4.2722)
|
| 393 |
+
2025-07-31,07:50:15 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 394 |
+
2025-07-31,07:55:08 | INFO | Train Epoch: 1 [102776832/128008192 (80%)] Data (t): 0.810 Batch (t): 9.132, 1797.61/s, 224.701/s/gpu LR: 0.000025 Logit Scale: 89.708 Class_loss: 3.3689 (3.6599) Contrastive_loss: 0.20482 (0.59827) Loss: 3.5737 (4.2582)
|
| 395 |
+
2025-07-31,08:14:42 | INFO | Train Epoch: 1 [104873984/128008192 (82%)] Data (t): 0.834 Batch (t): 9.172, 1742.75/s, 217.844/s/gpu LR: 0.000021 Logit Scale: 89.791 Class_loss: 4.5259 (3.6769) Contrastive_loss: 1.3733 (0.61346) Loss: 5.8992 (4.2904)
|
| 396 |
+
2025-07-31,08:23:59 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 397 |
+
2025-07-31,08:34:25 | INFO | Train Epoch: 1 [106971136/128008192 (84%)] Data (t): 0.784 Batch (t): 9.238, 1826.86/s, 228.357/s/gpu LR: 0.000018 Logit Scale: 89.854 Class_loss: 3.4041 (3.6717) Contrastive_loss: 0.23987 (0.60628) Loss: 3.6440 (4.2779)
|
| 398 |
+
2025-07-31,08:54:08 | INFO | Train Epoch: 1 [109068288/128008192 (85%)] Data (t): 0.759 Batch (t): 9.249, 1788.27/s, 223.534/s/gpu LR: 0.000014 Logit Scale: 89.917 Class_loss: 3.3667 (3.6659) Contrastive_loss: 0.21180 (0.59883) Loss: 3.5785 (4.2648)
|
| 399 |
+
2025-07-31,09:11:12 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7fbe2d554f90>')). Ignoring.
|
| 400 |
+
2025-07-31,09:13:53 | INFO | Train Epoch: 1 [111165440/128008192 (87%)] Data (t): 0.751 Batch (t): 9.251, 1779.84/s, 222.480/s/gpu LR: 0.000011 Logit Scale: 89.978 Class_loss: 3.3277 (3.6597) Contrastive_loss: 0.17008 (0.59089) Loss: 3.4978 (4.2506)
|
| 401 |
+
2025-07-31,09:33:38 | INFO | Train Epoch: 1 [113262592/128008192 (88%)] Data (t): 0.757 Batch (t): 9.264, 1732.17/s, 216.522/s/gpu LR: 0.000009 Logit Scale: 90.023 Class_loss: 3.3369 (3.6538) Contrastive_loss: 0.16708 (0.58319) Loss: 3.5040 (4.2370)
|
| 402 |
+
2025-07-31,09:53:27 | INFO | Train Epoch: 1 [115359744/128008192 (90%)] Data (t): 0.761 Batch (t): 9.284, 1784.19/s, 223.024/s/gpu LR: 0.000006 Logit Scale: 90.035 Class_loss: 3.3433 (3.6482) Contrastive_loss: 0.18011 (0.57599) Loss: 3.5235 (4.2242)
|
| 403 |
+
2025-07-31,10:09:18 | WARNING | Handling webdataset error (OSError('image file is truncated (2 bytes not processed)')). Ignoring.
|
| 404 |
+
2025-07-31,10:13:14 | INFO | Train Epoch: 1 [117456896/128008192 (92%)] Data (t): 0.754 Batch (t): 9.276, 1788.93/s, 223.617/s/gpu LR: 0.000004 Logit Scale: 90.048 Class_loss: 4.3060 (3.6598) Contrastive_loss: 0.99327 (0.58331) Loss: 5.2992 (4.2431)
|
| 405 |
+
2025-07-31,10:20:11 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 406 |
+
2025-07-31,10:33:05 | INFO | Train Epoch: 1 [119554048/128008192 (93%)] Data (t): 0.764 Batch (t): 9.306, 1761.71/s, 220.214/s/gpu LR: 0.000003 Logit Scale: 90.064 Class_loss: 3.3912 (3.6552) Contrastive_loss: 0.28220 (0.57812) Loss: 3.6734 (4.2333)
|
| 407 |
+
2025-07-31,10:36:25 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 408 |
+
2025-07-31,10:44:04 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 409 |
+
2025-07-31,10:52:36 | INFO | Train Epoch: 1 [121651200/128008192 (95%)] Data (t): 0.818 Batch (t): 9.149, 1681.89/s, 210.236/s/gpu LR: 0.000002 Logit Scale: 90.072 Class_loss: 3.3454 (3.6499) Contrastive_loss: 0.17750 (0.57133) Loss: 3.5229 (4.2212)
|
| 410 |
+
2025-07-31,11:12:19 | INFO | Train Epoch: 1 [123748352/128008192 (97%)] Data (t): 0.862 Batch (t): 9.238, 1793.11/s, 224.138/s/gpu LR: 0.000001 Logit Scale: 90.075 Class_loss: 3.3403 (3.6447) Contrastive_loss: 0.17811 (0.56478) Loss: 3.5184 (4.2095)
|
| 411 |
+
2025-07-31,11:30:28 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 412 |
+
2025-07-31,11:32:06 | INFO | Train Epoch: 1 [125845504/128008192 (98%)] Data (t): 0.852 Batch (t): 9.272, 1814.81/s, 226.852/s/gpu LR: 0.000000 Logit Scale: 90.075 Class_loss: 3.3162 (3.6394) Contrastive_loss: 0.22033 (0.55913) Loss: 3.5366 (4.1985)
|
| 413 |
+
2025-07-31,11:32:43 | WARNING | Handling webdataset error (OSError('broken data stream when reading image file')). Ignoring.
|
| 414 |
+
2025-07-31,11:46:13 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 415 |
+
2025-07-31,11:52:05 | INFO | Train Epoch: 1 [127942656/128008192 (100%)] Data (t): 0.802 Batch (t): 9.366, 1753.85/s, 219.231/s/gpu LR: 0.000000 Logit Scale: 90.075 Class_loss: 4.3783 (3.6513) Contrastive_loss: 1.1426 (0.56854) Loss: 5.5210 (4.2198)
|
| 416 |
+
2025-07-31,11:52:42 | INFO | Train Epoch: 1 [128008192/128008192 (100%)] Data (t): 0.762 Batch (t): 9.304, 1803.62/s, 225.452/s/gpu LR: 0.000000 Logit Scale: 90.075 Class_loss: 3.5421 (3.6495) Contrastive_loss: 0.33196 (0.56479) Loss: 3.8741 (4.2143)
|
| 417 |
+
2025-07-31,11:52:49 | INFO | Starting zero-shot imagenet.
|
| 418 |
+
2025-07-31,11:52:49 | INFO | Building zero-shot classifier
|
| 419 |
+
2025-07-31,11:52:56 | INFO | Using classifier
|
| 420 |
+
2025-07-31,12:10:30 | INFO | Finished zero-shot imagenet.
|
| 421 |
+
2025-07-31,12:10:30 | INFO | Eval Epoch: 2 imagenet-zeroshot-val-top1: 0.4499 imagenet-zeroshot-val-top5: 0.7292
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/params.txt
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NDR_patch_size: 16
|
| 2 |
+
accum_freq: 1
|
| 3 |
+
aug_cfg: {}
|
| 4 |
+
batch_size: 2048
|
| 5 |
+
beta1: 0.9
|
| 6 |
+
beta2: 0.98
|
| 7 |
+
checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/checkpoints
|
| 8 |
+
class_loss_weight: 0.6
|
| 9 |
+
coca_caption_loss_weight: 2.0
|
| 10 |
+
coca_contrastive_loss_weight: 1.0
|
| 11 |
+
copy_codebase: False
|
| 12 |
+
csv_caption_key: title
|
| 13 |
+
csv_img_key: filepath
|
| 14 |
+
csv_separator:
|
| 15 |
+
dataset_resampled: False
|
| 16 |
+
dataset_type: webdataset
|
| 17 |
+
ddp_static_graph: True
|
| 18 |
+
debug: False
|
| 19 |
+
delete_prev_step_ckpt: True
|
| 20 |
+
delete_previous_checkpoint: False
|
| 21 |
+
device: cuda:0
|
| 22 |
+
dist_backend: nccl
|
| 23 |
+
dist_url: env://
|
| 24 |
+
distill: False
|
| 25 |
+
distill_model: None
|
| 26 |
+
distill_pretrained: None
|
| 27 |
+
distributed: True
|
| 28 |
+
epochs: 2
|
| 29 |
+
epochs_cooldown: None
|
| 30 |
+
eps: 1e-06
|
| 31 |
+
force_custom_text: False
|
| 32 |
+
force_image_size: 224
|
| 33 |
+
force_patch_dropout: None
|
| 34 |
+
force_quick_gelu: False
|
| 35 |
+
gather_with_grad: True
|
| 36 |
+
global_batch_size: 16384
|
| 37 |
+
grad_checkpointing: True
|
| 38 |
+
grad_clip_norm: None
|
| 39 |
+
horovod: False
|
| 40 |
+
image_interpolation: None
|
| 41 |
+
image_mean: None
|
| 42 |
+
image_resize_mode: None
|
| 43 |
+
image_std: None
|
| 44 |
+
imagenet_v2: None
|
| 45 |
+
imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 46 |
+
is_cls_token: True
|
| 47 |
+
local_loss: True
|
| 48 |
+
local_rank: 0
|
| 49 |
+
lock_image: False
|
| 50 |
+
lock_image_freeze_bn_stats: False
|
| 51 |
+
lock_image_unlocked_groups: 0
|
| 52 |
+
lock_text: False
|
| 53 |
+
lock_text_freeze_layer_norm: False
|
| 54 |
+
lock_text_unlocked_layers: 0
|
| 55 |
+
log_every_n_steps: 128
|
| 56 |
+
log_level: 20
|
| 57 |
+
log_local: False
|
| 58 |
+
log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_06/out.log
|
| 59 |
+
logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 60 |
+
lr: 0.001
|
| 61 |
+
lr_cooldown_end: 0.0
|
| 62 |
+
lr_cooldown_power: 1.0
|
| 63 |
+
lr_scheduler: cosine
|
| 64 |
+
max_seq_len: 15000
|
| 65 |
+
model: CLIPCLS-ViT-B-16
|
| 66 |
+
name: clipcls_vit_b16_s512m_bs16k_weighted_06
|
| 67 |
+
native_dynamic_resolution: False
|
| 68 |
+
no_set_device_rank: False
|
| 69 |
+
only_class_loss: False
|
| 70 |
+
only_packing: False
|
| 71 |
+
post_train: False
|
| 72 |
+
precision: amp_bfloat16
|
| 73 |
+
pretrained:
|
| 74 |
+
pretrained_image:
|
| 75 |
+
pretrained_text:
|
| 76 |
+
rank: 0
|
| 77 |
+
remote_sync: None
|
| 78 |
+
remote_sync_frequency: 300
|
| 79 |
+
remote_sync_protocol: s3
|
| 80 |
+
report_to: wandb
|
| 81 |
+
resume: None
|
| 82 |
+
rope_attn_num_heads: 12
|
| 83 |
+
rope_model_width: 768
|
| 84 |
+
save_every_n_steps: 6104
|
| 85 |
+
save_frequency: 1
|
| 86 |
+
save_most_recent: False
|
| 87 |
+
seed: 0
|
| 88 |
+
siglip: False
|
| 89 |
+
skip_scheduler: False
|
| 90 |
+
tensorboard: False
|
| 91 |
+
tensorboard_path:
|
| 92 |
+
torchcompile: False
|
| 93 |
+
torchscript: False
|
| 94 |
+
trace: False
|
| 95 |
+
train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 96 |
+
train_data_upsampling_factors: None
|
| 97 |
+
train_num_samples: 128000000
|
| 98 |
+
use_bn_sync: False
|
| 99 |
+
use_bnb_linear: None
|
| 100 |
+
use_idf: True
|
| 101 |
+
val_data: None
|
| 102 |
+
val_frequency: 1
|
| 103 |
+
val_num_samples: None
|
| 104 |
+
val_steps: 6104
|
| 105 |
+
wandb: True
|
| 106 |
+
wandb_notes:
|
| 107 |
+
wandb_project_name: cls-clip-batch-size
|
| 108 |
+
warmup: 500
|
| 109 |
+
wd: 0.2
|
| 110 |
+
workers: 1
|
| 111 |
+
world_size: 8
|
| 112 |
+
zeroshot_frequency: 2
|
| 113 |
+
zeroshot_steps: 6104
|