Upload folder using huggingface_hub
Browse files- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/epoch_1.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/epoch_2.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/results.jsonl +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/step_12208.pt +3 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/out.log +421 -0
- logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/params.txt +113 -0
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/epoch_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e34cc10d36d5183a28a6db96e0d91137115462ad608f20a86faa3a972cf4b612
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/epoch_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d3fbb5917c12bc6e699d2dae20504eded377a74cb34644f6bffed726967634b
|
| 3 |
+
size 2252180672
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/results.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"imagenet-zeroshot-val-top1": 0.3069, "imagenet-zeroshot-val-top5": 0.56678}
|
| 2 |
+
{"imagenet-zeroshot-val-top1": 0.44924, "imagenet-zeroshot-val-top5": 0.73136}
|
| 3 |
+
{"imagenet-zeroshot-val-top1": 0.4721, "imagenet-zeroshot-val-top5": 0.75588}
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints/step_12208.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1479234c5847f106f051972b4de1e16a7aaf378ac06078874eb79681060593ae
|
| 3 |
+
size 2252183914
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/out.log
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-29,20:12:28 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints.
|
| 2 |
+
2025-07-29,20:12:40 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 3 |
+
2025-07-29,20:12:40 | INFO | Loaded CLIPCLS-ViT-B-16 model config.
|
| 4 |
+
2025-07-29,20:12:42 | INFO | Model:
|
| 5 |
+
2025-07-29,20:12:42 | INFO | CLIPCLS(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 8 |
+
(patch_dropout): Identity()
|
| 9 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 10 |
+
(transformer): Transformer(
|
| 11 |
+
(resblocks): ModuleList(
|
| 12 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 13 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 14 |
+
(attn): MultiheadAttention(
|
| 15 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 16 |
+
)
|
| 17 |
+
(ls_1): Identity()
|
| 18 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 19 |
+
(mlp): Sequential(
|
| 20 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 21 |
+
(gelu): GELU(approximate='none')
|
| 22 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 23 |
+
)
|
| 24 |
+
(ls_2): Identity()
|
| 25 |
+
)
|
| 26 |
+
)
|
| 27 |
+
)
|
| 28 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
)
|
| 30 |
+
(text): TextTransformer(
|
| 31 |
+
(token_embedding): Embedding(49408, 512)
|
| 32 |
+
(transformer): Transformer(
|
| 33 |
+
(resblocks): ModuleList(
|
| 34 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 35 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 36 |
+
(attn): MultiheadAttention(
|
| 37 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_1): Identity()
|
| 40 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 41 |
+
(mlp): Sequential(
|
| 42 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 43 |
+
(gelu): GELU(approximate='none')
|
| 44 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_2): Identity()
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 51 |
+
)
|
| 52 |
+
(text_decoder): MixClsHead(
|
| 53 |
+
(mlps): ModuleList()
|
| 54 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 55 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
2025-07-29,20:12:42 | INFO | Params:
|
| 59 |
+
2025-07-29,20:12:42 | INFO | NDR_patch_size: 16
|
| 60 |
+
2025-07-29,20:12:42 | INFO | accum_freq: 1
|
| 61 |
+
2025-07-29,20:12:42 | INFO | aug_cfg: {}
|
| 62 |
+
2025-07-29,20:12:42 | INFO | batch_size: 2048
|
| 63 |
+
2025-07-29,20:12:42 | INFO | beta1: 0.9
|
| 64 |
+
2025-07-29,20:12:42 | INFO | beta2: 0.98
|
| 65 |
+
2025-07-29,20:12:42 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints
|
| 66 |
+
2025-07-29,20:12:42 | INFO | class_loss_weight: 1.6
|
| 67 |
+
2025-07-29,20:12:42 | INFO | coca_caption_loss_weight: 2.0
|
| 68 |
+
2025-07-29,20:12:42 | INFO | coca_contrastive_loss_weight: 1.0
|
| 69 |
+
2025-07-29,20:12:42 | INFO | copy_codebase: False
|
| 70 |
+
2025-07-29,20:12:42 | INFO | csv_caption_key: title
|
| 71 |
+
2025-07-29,20:12:42 | INFO | csv_img_key: filepath
|
| 72 |
+
2025-07-29,20:12:42 | INFO | csv_separator:
|
| 73 |
+
2025-07-29,20:12:42 | INFO | dataset_resampled: False
|
| 74 |
+
2025-07-29,20:12:42 | INFO | dataset_type: webdataset
|
| 75 |
+
2025-07-29,20:12:42 | INFO | ddp_static_graph: True
|
| 76 |
+
2025-07-29,20:12:42 | INFO | debug: False
|
| 77 |
+
2025-07-29,20:12:42 | INFO | delete_prev_step_ckpt: True
|
| 78 |
+
2025-07-29,20:12:42 | INFO | delete_previous_checkpoint: False
|
| 79 |
+
2025-07-29,20:12:42 | INFO | device: cuda:0
|
| 80 |
+
2025-07-29,20:12:42 | INFO | dist_backend: nccl
|
| 81 |
+
2025-07-29,20:12:42 | INFO | dist_url: env://
|
| 82 |
+
2025-07-29,20:12:42 | INFO | distill: False
|
| 83 |
+
2025-07-29,20:12:42 | INFO | distill_model: None
|
| 84 |
+
2025-07-29,20:12:42 | INFO | distill_pretrained: None
|
| 85 |
+
2025-07-29,20:12:42 | INFO | distributed: True
|
| 86 |
+
2025-07-29,20:12:42 | INFO | epochs: 2
|
| 87 |
+
2025-07-29,20:12:42 | INFO | epochs_cooldown: None
|
| 88 |
+
2025-07-29,20:12:42 | INFO | eps: 1e-06
|
| 89 |
+
2025-07-29,20:12:42 | INFO | force_custom_text: False
|
| 90 |
+
2025-07-29,20:12:42 | INFO | force_image_size: 224
|
| 91 |
+
2025-07-29,20:12:42 | INFO | force_patch_dropout: None
|
| 92 |
+
2025-07-29,20:12:42 | INFO | force_quick_gelu: False
|
| 93 |
+
2025-07-29,20:12:42 | INFO | gather_with_grad: True
|
| 94 |
+
2025-07-29,20:12:42 | INFO | global_batch_size: 16384
|
| 95 |
+
2025-07-29,20:12:42 | INFO | grad_checkpointing: True
|
| 96 |
+
2025-07-29,20:12:42 | INFO | grad_clip_norm: None
|
| 97 |
+
2025-07-29,20:12:42 | INFO | horovod: False
|
| 98 |
+
2025-07-29,20:12:42 | INFO | image_interpolation: None
|
| 99 |
+
2025-07-29,20:12:42 | INFO | image_mean: None
|
| 100 |
+
2025-07-29,20:12:42 | INFO | image_resize_mode: None
|
| 101 |
+
2025-07-29,20:12:42 | INFO | image_std: None
|
| 102 |
+
2025-07-29,20:12:42 | INFO | imagenet_v2: None
|
| 103 |
+
2025-07-29,20:12:42 | INFO | imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 104 |
+
2025-07-29,20:12:42 | INFO | is_cls_token: True
|
| 105 |
+
2025-07-29,20:12:42 | INFO | local_loss: True
|
| 106 |
+
2025-07-29,20:12:42 | INFO | local_rank: 0
|
| 107 |
+
2025-07-29,20:12:42 | INFO | lock_image: False
|
| 108 |
+
2025-07-29,20:12:42 | INFO | lock_image_freeze_bn_stats: False
|
| 109 |
+
2025-07-29,20:12:42 | INFO | lock_image_unlocked_groups: 0
|
| 110 |
+
2025-07-29,20:12:42 | INFO | lock_text: False
|
| 111 |
+
2025-07-29,20:12:42 | INFO | lock_text_freeze_layer_norm: False
|
| 112 |
+
2025-07-29,20:12:42 | INFO | lock_text_unlocked_layers: 0
|
| 113 |
+
2025-07-29,20:12:42 | INFO | log_every_n_steps: 128
|
| 114 |
+
2025-07-29,20:12:42 | INFO | log_level: 20
|
| 115 |
+
2025-07-29,20:12:42 | INFO | log_local: False
|
| 116 |
+
2025-07-29,20:12:42 | INFO | log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/out.log
|
| 117 |
+
2025-07-29,20:12:42 | INFO | logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 118 |
+
2025-07-29,20:12:42 | INFO | lr: 0.001
|
| 119 |
+
2025-07-29,20:12:42 | INFO | lr_cooldown_end: 0.0
|
| 120 |
+
2025-07-29,20:12:42 | INFO | lr_cooldown_power: 1.0
|
| 121 |
+
2025-07-29,20:12:42 | INFO | lr_scheduler: cosine
|
| 122 |
+
2025-07-29,20:12:42 | INFO | max_seq_len: 15000
|
| 123 |
+
2025-07-29,20:12:42 | INFO | model: CLIPCLS-ViT-B-16
|
| 124 |
+
2025-07-29,20:12:42 | INFO | name: clipcls_vit_b16_s512m_bs16k_weighted_16
|
| 125 |
+
2025-07-29,20:12:42 | INFO | native_dynamic_resolution: False
|
| 126 |
+
2025-07-29,20:12:42 | INFO | no_set_device_rank: False
|
| 127 |
+
2025-07-29,20:12:42 | INFO | only_class_loss: False
|
| 128 |
+
2025-07-29,20:12:42 | INFO | only_packing: False
|
| 129 |
+
2025-07-29,20:12:42 | INFO | post_train: False
|
| 130 |
+
2025-07-29,20:12:42 | INFO | precision: amp_bfloat16
|
| 131 |
+
2025-07-29,20:12:42 | INFO | pretrained:
|
| 132 |
+
2025-07-29,20:12:42 | INFO | pretrained_image:
|
| 133 |
+
2025-07-29,20:12:42 | INFO | pretrained_text:
|
| 134 |
+
2025-07-29,20:12:42 | INFO | rank: 0
|
| 135 |
+
2025-07-29,20:12:42 | INFO | remote_sync: None
|
| 136 |
+
2025-07-29,20:12:42 | INFO | remote_sync_frequency: 300
|
| 137 |
+
2025-07-29,20:12:42 | INFO | remote_sync_protocol: s3
|
| 138 |
+
2025-07-29,20:12:42 | INFO | report_to: wandb
|
| 139 |
+
2025-07-29,20:12:42 | INFO | resume: None
|
| 140 |
+
2025-07-29,20:12:42 | INFO | rope_attn_num_heads: 12
|
| 141 |
+
2025-07-29,20:12:42 | INFO | rope_model_width: 768
|
| 142 |
+
2025-07-29,20:12:42 | INFO | save_every_n_steps: 6104
|
| 143 |
+
2025-07-29,20:12:42 | INFO | save_frequency: 1
|
| 144 |
+
2025-07-29,20:12:42 | INFO | save_most_recent: False
|
| 145 |
+
2025-07-29,20:12:42 | INFO | seed: 0
|
| 146 |
+
2025-07-29,20:12:42 | INFO | siglip: False
|
| 147 |
+
2025-07-29,20:12:42 | INFO | skip_scheduler: False
|
| 148 |
+
2025-07-29,20:12:42 | INFO | tensorboard: False
|
| 149 |
+
2025-07-29,20:12:42 | INFO | tensorboard_path:
|
| 150 |
+
2025-07-29,20:12:42 | INFO | torchcompile: False
|
| 151 |
+
2025-07-29,20:12:42 | INFO | torchscript: False
|
| 152 |
+
2025-07-29,20:12:42 | INFO | trace: False
|
| 153 |
+
2025-07-29,20:12:42 | INFO | train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 154 |
+
2025-07-29,20:12:42 | INFO | train_data_upsampling_factors: None
|
| 155 |
+
2025-07-29,20:12:42 | INFO | train_num_samples: 128000000
|
| 156 |
+
2025-07-29,20:12:42 | INFO | use_bn_sync: False
|
| 157 |
+
2025-07-29,20:12:42 | INFO | use_bnb_linear: None
|
| 158 |
+
2025-07-29,20:12:42 | INFO | use_idf: True
|
| 159 |
+
2025-07-29,20:12:42 | INFO | val_data: None
|
| 160 |
+
2025-07-29,20:12:42 | INFO | val_frequency: 1
|
| 161 |
+
2025-07-29,20:12:42 | INFO | val_num_samples: None
|
| 162 |
+
2025-07-29,20:12:42 | INFO | val_steps: 6104
|
| 163 |
+
2025-07-29,20:12:42 | INFO | wandb: True
|
| 164 |
+
2025-07-29,20:12:42 | INFO | wandb_notes:
|
| 165 |
+
2025-07-29,20:12:42 | INFO | wandb_project_name: cls-clip-batch-size
|
| 166 |
+
2025-07-29,20:12:42 | INFO | warmup: 500
|
| 167 |
+
2025-07-29,20:12:42 | INFO | wd: 0.2
|
| 168 |
+
2025-07-29,20:12:42 | INFO | workers: 1
|
| 169 |
+
2025-07-29,20:12:42 | INFO | world_size: 8
|
| 170 |
+
2025-07-29,20:12:42 | INFO | zeroshot_frequency: 2
|
| 171 |
+
2025-07-29,20:12:42 | INFO | zeroshot_steps: 6104
|
| 172 |
+
2025-07-29,20:12:58 | INFO | Start epoch 0
|
| 173 |
+
2025-07-29,20:13:14 | INFO | Train Epoch: 0 [ 16384/128008192 (0%)] Data (t): 11.559 Batch (t): 15.249, 1074.41/s, 134.302/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 18.121 (18.121) Contrastive_loss: 9.7910 (9.7910) Loss: 27.912 (27.912)
|
| 174 |
+
2025-07-29,20:29:45 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 175 |
+
2025-07-29,20:30:36 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 176 |
+
2025-07-29,20:32:42 | INFO | Train Epoch: 0 [ 2113536/128008192 (2%)] Data (t): 2.722 Batch (t): 9.127, 1790.52/s, 223.815/s/gpu LR: 0.000258 Logit Scale: 14.324 Class_loss: 14.983 (16.552) Contrastive_loss: 8.9321 (9.3615) Loss: 23.915 (25.913)
|
| 177 |
+
2025-07-29,20:52:11 | INFO | Train Epoch: 0 [ 4210688/128008192 (3%)] Data (t): 0.731 Batch (t): 9.132, 1812.14/s, 226.517/s/gpu LR: 0.000514 Logit Scale: 14.616 Class_loss: 14.498 (15.867) Contrastive_loss: 8.5037 (9.0756) Loss: 23.002 (24.943)
|
| 178 |
+
2025-07-29,20:54:51 | WARNING | Handling webdataset error (OSError('image file is truncated (112 bytes not processed)')). Ignoring.
|
| 179 |
+
2025-07-29,21:11:46 | INFO | Train Epoch: 0 [ 6307840/128008192 (5%)] Data (t): 0.721 Batch (t): 9.176, 1772.80/s, 221.600/s/gpu LR: 0.000770 Logit Scale: 15.159 Class_loss: 12.570 (15.043) Contrastive_loss: 7.1806 (8.6018) Loss: 19.751 (23.645)
|
| 180 |
+
2025-07-29,21:26:55 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 181 |
+
2025-07-29,21:31:24 | INFO | Train Epoch: 0 [ 8404992/128008192 (7%)] Data (t): 0.728 Batch (t): 9.203, 1772.05/s, 221.507/s/gpu LR: 0.001000 Logit Scale: 16.161 Class_loss: 11.749 (14.384) Contrastive_loss: 5.9312 (8.0677) Loss: 17.680 (22.452)
|
| 182 |
+
2025-07-29,21:50:52 | INFO | Train Epoch: 0 [ 10502144/128008192 (8%)] Data (t): 0.708 Batch (t): 9.129, 1806.15/s, 225.769/s/gpu LR: 0.001000 Logit Scale: 17.434 Class_loss: 12.344 (14.044) Contrastive_loss: 6.0598 (7.7331) Loss: 18.404 (21.777)
|
| 183 |
+
2025-07-29,21:55:23 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 184 |
+
2025-07-29,21:55:39 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 185 |
+
2025-07-29,22:09:20 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 186 |
+
2025-07-29,22:10:25 | INFO | Train Epoch: 0 [ 12599296/128008192 (10%)] Data (t): 0.708 Batch (t): 9.158, 1767.86/s, 220.982/s/gpu LR: 0.000999 Logit Scale: 19.799 Class_loss: 11.363 (13.661) Contrastive_loss: 4.9394 (7.3340) Loss: 16.302 (20.995)
|
| 187 |
+
2025-07-29,22:29:56 | INFO | Train Epoch: 0 [ 14696448/128008192 (11%)] Data (t): 0.734 Batch (t): 9.153, 1784.88/s, 223.111/s/gpu LR: 0.000998 Logit Scale: 22.406 Class_loss: 11.177 (13.350) Contrastive_loss: 4.1039 (6.9302) Loss: 15.281 (20.281)
|
| 188 |
+
2025-07-29,22:38:59 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 189 |
+
2025-07-29,22:49:34 | INFO | Train Epoch: 0 [ 16793600/128008192 (13%)] Data (t): 0.717 Batch (t): 9.199, 1788.53/s, 223.566/s/gpu LR: 0.000997 Logit Scale: 25.197 Class_loss: 10.953 (13.084) Contrastive_loss: 3.3171 (6.5288) Loss: 14.270 (19.613)
|
| 190 |
+
2025-07-29,23:09:10 | INFO | Train Epoch: 0 [ 18890752/128008192 (15%)] Data (t): 0.736 Batch (t): 9.189, 1769.04/s, 221.130/s/gpu LR: 0.000995 Logit Scale: 28.269 Class_loss: 10.756 (12.851) Contrastive_loss: 2.9783 (6.1737) Loss: 13.734 (19.025)
|
| 191 |
+
2025-07-29,23:28:37 | INFO | Train Epoch: 0 [ 20987904/128008192 (16%)] Data (t): 0.736 Batch (t): 9.119, 1823.81/s, 227.976/s/gpu LR: 0.000993 Logit Scale: 31.441 Class_loss: 13.473 (12.908) Contrastive_loss: 5.4274 (6.1059) Loss: 18.901 (19.014)
|
| 192 |
+
2025-07-29,23:48:08 | INFO | Train Epoch: 0 [ 23085056/128008192 (18%)] Data (t): 0.732 Batch (t): 9.150, 1846.68/s, 230.835/s/gpu LR: 0.000991 Logit Scale: 34.650 Class_loss: 13.673 (12.972) Contrastive_loss: 5.3455 (6.0425) Loss: 19.018 (19.014)
|
| 193 |
+
2025-07-30,00:04:02 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 194 |
+
2025-07-30,00:07:48 | INFO | Train Epoch: 0 [ 25182208/128008192 (20%)] Data (t): 0.735 Batch (t): 9.214, 1792.74/s, 224.093/s/gpu LR: 0.000988 Logit Scale: 37.860 Class_loss: 12.369 (12.925) Contrastive_loss: 3.9902 (5.8846) Loss: 16.359 (18.810)
|
| 195 |
+
2025-07-30,00:26:06 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 196 |
+
2025-07-30,00:27:24 | INFO | Train Epoch: 0 [ 27279360/128008192 (21%)] Data (t): 0.724 Batch (t): 9.191, 1787.78/s, 223.473/s/gpu LR: 0.000985 Logit Scale: 40.692 Class_loss: 11.288 (12.808) Contrastive_loss: 3.8987 (5.7428) Loss: 15.187 (18.551)
|
| 197 |
+
2025-07-30,00:38:48 | WARNING | Handling webdataset error (OSError('image file is truncated (27 bytes not processed)')). Ignoring.
|
| 198 |
+
2025-07-30,00:46:59 | INFO | Train Epoch: 0 [ 29376512/128008192 (23%)] Data (t): 0.714 Batch (t): 9.180, 1783.03/s, 222.879/s/gpu LR: 0.000982 Logit Scale: 41.743 Class_loss: 11.218 (12.702) Contrastive_loss: 2.5880 (5.5325) Loss: 13.806 (18.235)
|
| 199 |
+
2025-07-30,00:56:34 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 200 |
+
2025-07-30,01:06:35 | INFO | Train Epoch: 0 [ 31473664/128008192 (25%)] Data (t): 0.705 Batch (t): 9.189, 1802.59/s, 225.323/s/gpu LR: 0.000978 Logit Scale: 44.662 Class_loss: 10.433 (12.560) Contrastive_loss: 1.5988 (5.2866) Loss: 12.032 (17.847)
|
| 201 |
+
2025-07-30,01:10:28 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 202 |
+
2025-07-30,01:10:58 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 203 |
+
2025-07-30,01:22:14 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 204 |
+
2025-07-30,01:26:09 | INFO | Train Epoch: 0 [ 33570816/128008192 (26%)] Data (t): 0.735 Batch (t): 9.167, 1777.46/s, 222.182/s/gpu LR: 0.000974 Logit Scale: 47.232 Class_loss: 10.659 (12.449) Contrastive_loss: 1.8564 (5.0848) Loss: 12.515 (17.533)
|
| 205 |
+
2025-07-30,01:38:34 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 206 |
+
2025-07-30,01:46:07 | INFO | Train Epoch: 0 [ 35667968/128008192 (28%)] Data (t): 0.755 Batch (t): 9.363, 1767.51/s, 220.939/s/gpu LR: 0.000970 Logit Scale: 48.988 Class_loss: 13.597 (12.512) Contrastive_loss: 4.5208 (5.0535) Loss: 18.118 (17.566)
|
| 207 |
+
2025-07-30,02:05:29 | INFO | Train Epoch: 0 [ 37765120/128008192 (30%)] Data (t): 0.748 Batch (t): 9.081, 1800.07/s, 225.009/s/gpu LR: 0.000965 Logit Scale: 50.764 Class_loss: 9.9570 (12.378) Contrastive_loss: 1.0231 (4.8414) Loss: 10.980 (17.219)
|
| 208 |
+
2025-07-30,02:25:02 | INFO | Train Epoch: 0 [ 39862272/128008192 (31%)] Data (t): 0.726 Batch (t): 9.158, 1797.87/s, 224.733/s/gpu LR: 0.000960 Logit Scale: 52.848 Class_loss: 9.9626 (12.257) Contrastive_loss: 1.0672 (4.6527) Loss: 11.030 (16.910)
|
| 209 |
+
2025-07-30,02:36:25 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 210 |
+
2025-07-30,02:44:42 | INFO | Train Epoch: 0 [ 41959424/128008192 (33%)] Data (t): 0.734 Batch (t): 9.225, 1766.89/s, 220.861/s/gpu LR: 0.000955 Logit Scale: 54.302 Class_loss: 9.8331 (12.142) Contrastive_loss: 0.92507 (4.4752) Loss: 10.758 (16.617)
|
| 211 |
+
2025-07-30,02:44:46 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 212 |
+
2025-07-30,03:04:09 | INFO | Train Epoch: 0 [ 44056576/128008192 (34%)] Data (t): 0.758 Batch (t): 9.117, 1750.17/s, 218.771/s/gpu LR: 0.000949 Logit Scale: 56.010 Class_loss: 9.7864 (12.035) Contrastive_loss: 0.77109 (4.3068) Loss: 10.557 (16.341)
|
| 213 |
+
2025-07-30,03:16:29 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 214 |
+
2025-07-30,03:23:44 | INFO | Train Epoch: 0 [ 46153728/128008192 (36%)] Data (t): 0.790 Batch (t): 9.173, 1833.91/s, 229.239/s/gpu LR: 0.000943 Logit Scale: 57.323 Class_loss: 9.7256 (11.934) Contrastive_loss: 0.86024 (4.1569) Loss: 10.586 (16.091)
|
| 215 |
+
2025-07-30,03:39:03 | WARNING | Handling webdataset error (OSError('image file is truncated (9 bytes not processed)')). Ignoring.
|
| 216 |
+
2025-07-30,03:42:19 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 217 |
+
2025-07-30,03:42:42 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 218 |
+
2025-07-30,03:43:15 | INFO | Train Epoch: 0 [ 48250880/128008192 (38%)] Data (t): 0.798 Batch (t): 9.154, 1753.08/s, 219.135/s/gpu LR: 0.000937 Logit Scale: 58.252 Class_loss: 9.8697 (11.848) Contrastive_loss: 0.84280 (4.0189) Loss: 10.712 (15.867)
|
| 219 |
+
2025-07-30,04:02:44 | INFO | Train Epoch: 0 [ 50348032/128008192 (39%)] Data (t): 0.782 Batch (t): 9.132, 1802.87/s, 225.358/s/gpu LR: 0.000930 Logit Scale: 59.232 Class_loss: 9.7098 (11.763) Contrastive_loss: 0.71749 (3.8868) Loss: 10.427 (15.650)
|
| 220 |
+
2025-07-30,04:05:23 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7f765e970f90>')). Ignoring.
|
| 221 |
+
2025-07-30,04:12:13 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 222 |
+
2025-07-30,04:22:06 | INFO | Train Epoch: 0 [ 52445184/128008192 (41%)] Data (t): 0.790 Batch (t): 9.076, 1810.80/s, 226.350/s/gpu LR: 0.000923 Logit Scale: 60.337 Class_loss: 9.7365 (11.685) Contrastive_loss: 0.81508 (3.7687) Loss: 10.552 (15.453)
|
| 223 |
+
2025-07-30,04:41:35 | INFO | Train Epoch: 0 [ 54542336/128008192 (43%)] Data (t): 0.774 Batch (t): 9.130, 1819.32/s, 227.414/s/gpu LR: 0.000916 Logit Scale: 60.118 Class_loss: 12.856 (11.728) Contrastive_loss: 3.3288 (3.7524) Loss: 16.184 (15.481)
|
| 224 |
+
2025-07-30,05:01:08 | INFO | Train Epoch: 0 [ 56639488/128008192 (44%)] Data (t): 0.749 Batch (t): 9.163, 1821.77/s, 227.722/s/gpu LR: 0.000909 Logit Scale: 62.083 Class_loss: 9.6428 (11.654) Contrastive_loss: 0.63685 (3.6411) Loss: 10.280 (15.295)
|
| 225 |
+
2025-07-30,05:20:39 | INFO | Train Epoch: 0 [ 58736640/128008192 (46%)] Data (t): 0.727 Batch (t): 9.154, 1794.60/s, 224.325/s/gpu LR: 0.000901 Logit Scale: 62.652 Class_loss: 10.007 (11.597) Contrastive_loss: 0.85020 (3.5449) Loss: 10.857 (15.142)
|
| 226 |
+
2025-07-30,05:36:05 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 227 |
+
2025-07-30,05:40:12 | INFO | Train Epoch: 0 [ 60833792/128008192 (48%)] Data (t): 0.734 Batch (t): 9.166, 1805.50/s, 225.688/s/gpu LR: 0.000893 Logit Scale: 63.582 Class_loss: 9.7488 (11.535) Contrastive_loss: 0.69501 (3.4499) Loss: 10.444 (14.985)
|
| 228 |
+
2025-07-30,05:59:49 | INFO | Train Epoch: 0 [ 62930944/128008192 (49%)] Data (t): 0.745 Batch (t): 9.195, 1790.17/s, 223.771/s/gpu LR: 0.000884 Logit Scale: 64.499 Class_loss: 9.5882 (11.472) Contrastive_loss: 0.68170 (3.3606) Loss: 10.270 (14.833)
|
| 229 |
+
2025-07-30,06:08:15 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 230 |
+
2025-07-30,06:19:19 | INFO | Train Epoch: 0 [ 65028096/128008192 (51%)] Data (t): 0.757 Batch (t): 9.137, 1792.30/s, 224.037/s/gpu LR: 0.000876 Logit Scale: 65.347 Class_loss: 9.5222 (11.412) Contrastive_loss: 0.58598 (3.2739) Loss: 10.108 (14.685)
|
| 231 |
+
2025-07-30,06:26:46 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 232 |
+
2025-07-30,06:29:16 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 233 |
+
2025-07-30,06:30:11 | WARNING | Handling webdataset error (OSError('image file is truncated (4 bytes not processed)')). Ignoring.
|
| 234 |
+
2025-07-30,06:38:46 | INFO | Train Epoch: 0 [ 67125248/128008192 (52%)] Data (t): 0.736 Batch (t): 9.119, 1760.29/s, 220.036/s/gpu LR: 0.000867 Logit Scale: 65.577 Class_loss: 9.4711 (11.353) Contrastive_loss: 0.46846 (3.1888) Loss: 9.9396 (14.542)
|
| 235 |
+
2025-07-30,06:58:23 | INFO | Train Epoch: 0 [ 69222400/128008192 (54%)] Data (t): 0.740 Batch (t): 9.196, 1785.64/s, 223.205/s/gpu LR: 0.000858 Logit Scale: 66.271 Class_loss: 9.3842 (11.295) Contrastive_loss: 0.50823 (3.1100) Loss: 9.8924 (14.405)
|
| 236 |
+
2025-07-30,07:09:35 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 237 |
+
2025-07-30,07:17:55 | INFO | Train Epoch: 0 [ 71319552/128008192 (56%)] Data (t): 0.731 Batch (t): 9.153, 1785.88/s, 223.235/s/gpu LR: 0.000848 Logit Scale: 66.744 Class_loss: 9.5251 (11.244) Contrastive_loss: 0.62279 (3.0389) Loss: 10.148 (14.283)
|
| 238 |
+
2025-07-30,07:37:36 | INFO | Train Epoch: 0 [ 73416704/128008192 (57%)] Data (t): 0.757 Batch (t): 9.223, 1676.26/s, 209.532/s/gpu LR: 0.000839 Logit Scale: 67.092 Class_loss: 9.4135 (11.193) Contrastive_loss: 0.49880 (2.9684) Loss: 9.9123 (14.162)
|
| 239 |
+
2025-07-30,07:57:14 | INFO | Train Epoch: 0 [ 75513856/128008192 (59%)] Data (t): 0.746 Batch (t): 9.205, 1762.99/s, 220.374/s/gpu LR: 0.000829 Logit Scale: 68.076 Class_loss: 9.3267 (11.143) Contrastive_loss: 0.45500 (2.9005) Loss: 9.7817 (14.043)
|
| 240 |
+
2025-07-30,08:16:50 | INFO | Train Epoch: 0 [ 77611008/128008192 (61%)] Data (t): 0.757 Batch (t): 9.192, 1747.50/s, 218.437/s/gpu LR: 0.000819 Logit Scale: 68.174 Class_loss: 9.4707 (11.099) Contrastive_loss: 0.51055 (2.8376) Loss: 9.9812 (13.937)
|
| 241 |
+
2025-07-30,08:28:53 | WARNING | Handling webdataset error (OSError('image file is truncated (96 bytes not processed)')). Ignoring.
|
| 242 |
+
2025-07-30,08:36:32 | INFO | Train Epoch: 0 [ 79708160/128008192 (62%)] Data (t): 0.758 Batch (t): 9.234, 1795.66/s, 224.457/s/gpu LR: 0.000808 Logit Scale: 68.697 Class_loss: 9.3740 (11.055) Contrastive_loss: 0.47074 (2.7769) Loss: 9.8447 (13.832)
|
| 243 |
+
2025-07-30,08:56:03 | INFO | Train Epoch: 0 [ 81805312/128008192 (64%)] Data (t): 0.747 Batch (t): 9.145, 1795.04/s, 224.380/s/gpu LR: 0.000798 Logit Scale: 69.652 Class_loss: 9.7523 (11.022) Contrastive_loss: 0.72003 (2.7255) Loss: 10.472 (13.748)
|
| 244 |
+
2025-07-30,08:59:43 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 245 |
+
2025-07-30,09:03:39 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 246 |
+
2025-07-30,09:15:34 | INFO | Train Epoch: 0 [ 83902464/128008192 (66%)] Data (t): 0.728 Batch (t): 9.150, 1811.77/s, 226.471/s/gpu LR: 0.000787 Logit Scale: 69.930 Class_loss: 9.6003 (10.987) Contrastive_loss: 0.45126 (2.6700) Loss: 10.052 (13.657)
|
| 247 |
+
2025-07-30,09:33:44 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 248 |
+
2025-07-30,09:35:05 | INFO | Train Epoch: 0 [ 85999616/128008192 (67%)] Data (t): 0.749 Batch (t): 9.144, 1775.34/s, 221.917/s/gpu LR: 0.000776 Logit Scale: 70.154 Class_loss: 9.3019 (10.947) Contrastive_loss: 0.44178 (2.6169) Loss: 9.7436 (13.564)
|
| 249 |
+
2025-07-30,09:52:21 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 250 |
+
2025-07-30,09:53:28 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 251 |
+
2025-07-30,09:54:42 | INFO | Train Epoch: 0 [ 88096768/128008192 (69%)] Data (t): 0.760 Batch (t): 9.200, 1807.05/s, 225.882/s/gpu LR: 0.000765 Logit Scale: 70.342 Class_loss: 9.3207 (10.910) Contrastive_loss: 0.47061 (2.5670) Loss: 9.7913 (13.477)
|
| 252 |
+
2025-07-30,09:56:07 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 253 |
+
2025-07-30,10:14:24 | INFO | Train Epoch: 0 [ 90193920/128008192 (70%)] Data (t): 0.756 Batch (t): 9.237, 1795.51/s, 224.439/s/gpu LR: 0.000753 Logit Scale: 71.189 Class_loss: 9.1725 (10.870) Contrastive_loss: 0.37856 (2.5173) Loss: 9.5511 (13.387)
|
| 254 |
+
2025-07-30,10:14:50 | WARNING | Handling webdataset error (OSError('image file is truncated (33 bytes not processed)')). Ignoring.
|
| 255 |
+
2025-07-30,10:24:44 | WARNING | Handling webdataset error (OSError('image file is truncated (76 bytes not processed)')). Ignoring.
|
| 256 |
+
2025-07-30,10:34:04 | INFO | Train Epoch: 0 [ 92291072/128008192 (72%)] Data (t): 0.758 Batch (t): 9.212, 1779.95/s, 222.493/s/gpu LR: 0.000742 Logit Scale: 71.616 Class_loss: 9.3216 (10.836) Contrastive_loss: 0.45066 (2.4714) Loss: 9.7723 (13.307)
|
| 257 |
+
2025-07-30,10:53:40 | INFO | Train Epoch: 0 [ 94388224/128008192 (74%)] Data (t): 0.776 Batch (t): 9.193, 1791.80/s, 223.975/s/gpu LR: 0.000730 Logit Scale: 72.057 Class_loss: 9.1183 (10.798) Contrastive_loss: 0.43173 (2.4270) Loss: 9.5500 (13.225)
|
| 258 |
+
2025-07-30,11:13:18 | INFO | Train Epoch: 0 [ 96485376/128008192 (75%)] Data (t): 0.762 Batch (t): 9.201, 1774.27/s, 221.784/s/gpu LR: 0.000718 Logit Scale: 72.365 Class_loss: 9.3176 (10.767) Contrastive_loss: 0.41687 (2.3842) Loss: 9.7345 (13.151)
|
| 259 |
+
2025-07-30,11:19:14 | WARNING | Handling webdataset error (OSError('image file is truncated (52 bytes not processed)')). Ignoring.
|
| 260 |
+
2025-07-30,11:25:59 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 261 |
+
2025-07-30,11:33:05 | INFO | Train Epoch: 0 [ 98582528/128008192 (77%)] Data (t): 0.769 Batch (t): 9.271, 1740.72/s, 217.590/s/gpu LR: 0.000706 Logit Scale: 72.834 Class_loss: 9.1830 (10.734) Contrastive_loss: 0.45686 (2.3441) Loss: 9.6398 (13.078)
|
| 262 |
+
2025-07-30,11:46:24 | INFO | Starting zero-shot imagenet.
|
| 263 |
+
2025-07-30,11:46:25 | INFO | Building zero-shot classifier
|
| 264 |
+
2025-07-30,11:46:31 | INFO | Using classifier
|
| 265 |
+
2025-07-30,12:00:22 | INFO | Finished zero-shot imagenet.
|
| 266 |
+
2025-07-30,12:00:22 | INFO | Eval Epoch: 0.7811340074235249 imagenet-zeroshot-val-top1: 0.3069 imagenet-zeroshot-val-top5: 0.5668
|
| 267 |
+
2025-07-30,12:06:22 | INFO | Train Epoch: 0 [100679680/128008192 (79%)] Data (t): 8.833 Batch (t): 15.603, 1771.93/s, 221.491/s/gpu LR: 0.000694 Logit Scale: 73.524 Class_loss: 9.2516 (10.704) Contrastive_loss: 0.36296 (2.3037) Loss: 9.6146 (13.007)
|
| 268 |
+
2025-07-30,12:26:02 | INFO | Train Epoch: 0 [102776832/128008192 (80%)] Data (t): 0.805 Batch (t): 9.218, 1812.88/s, 226.610/s/gpu LR: 0.000682 Logit Scale: 73.781 Class_loss: 9.1678 (10.673) Contrastive_loss: 0.32284 (2.2640) Loss: 9.4906 (12.937)
|
| 269 |
+
2025-07-30,12:29:50 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 270 |
+
2025-07-30,12:45:35 | INFO | Train Epoch: 0 [104873984/128008192 (82%)] Data (t): 0.758 Batch (t): 9.163, 1888.80/s, 236.099/s/gpu LR: 0.000669 Logit Scale: 73.941 Class_loss: 12.820 (10.715) Contrastive_loss: 2.4096 (2.2669) Loss: 15.230 (12.982)
|
| 271 |
+
2025-07-30,13:04:51 | INFO | Train Epoch: 0 [106971136/128008192 (84%)] Data (t): 0.761 Batch (t): 9.037, 1814.05/s, 226.756/s/gpu LR: 0.000657 Logit Scale: 74.071 Class_loss: 9.2587 (10.687) Contrastive_loss: 0.44537 (2.2319) Loss: 9.7041 (12.919)
|
| 272 |
+
2025-07-30,13:24:33 | INFO | Train Epoch: 0 [109068288/128008192 (85%)] Data (t): 0.746 Batch (t): 9.232, 1792.93/s, 224.116/s/gpu LR: 0.000644 Logit Scale: 74.569 Class_loss: 9.0816 (10.657) Contrastive_loss: 0.33773 (2.1961) Loss: 9.4193 (12.853)
|
| 273 |
+
2025-07-30,13:26:53 | WARNING | Handling webdataset error (OSError('image file is truncated (37 bytes not processed)')). Ignoring.
|
| 274 |
+
2025-07-30,13:44:13 | INFO | Train Epoch: 0 [111165440/128008192 (87%)] Data (t): 0.754 Batch (t): 9.216, 1791.67/s, 223.959/s/gpu LR: 0.000631 Logit Scale: 75.280 Class_loss: 9.3441 (10.632) Contrastive_loss: 0.39064 (2.1627) Loss: 9.7347 (12.795)
|
| 275 |
+
2025-07-30,14:03:48 | INFO | Train Epoch: 0 [113262592/128008192 (88%)] Data (t): 0.758 Batch (t): 9.185, 1776.11/s, 222.013/s/gpu LR: 0.000618 Logit Scale: 75.354 Class_loss: 9.2768 (10.608) Contrastive_loss: 0.36064 (2.1299) Loss: 9.6374 (12.738)
|
| 276 |
+
2025-07-30,14:18:20 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 277 |
+
2025-07-30,14:23:27 | INFO | Train Epoch: 0 [115359744/128008192 (90%)] Data (t): 0.751 Batch (t): 9.212, 1745.72/s, 218.215/s/gpu LR: 0.000605 Logit Scale: 75.500 Class_loss: 9.3415 (10.585) Contrastive_loss: 0.45785 (2.1001) Loss: 9.7993 (12.685)
|
| 278 |
+
2025-07-30,14:38:27 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 279 |
+
2025-07-30,14:40:17 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 280 |
+
2025-07-30,14:43:07 | INFO | Train Epoch: 0 [117456896/128008192 (92%)] Data (t): 0.785 Batch (t): 9.215, 1763.01/s, 220.376/s/gpu LR: 0.000592 Logit Scale: 76.126 Class_loss: 9.0644 (10.558) Contrastive_loss: 0.32667 (2.0690) Loss: 9.3910 (12.627)
|
| 281 |
+
2025-07-30,14:46:45 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 282 |
+
2025-07-30,15:02:42 | INFO | Train Epoch: 0 [119554048/128008192 (93%)] Data (t): 0.787 Batch (t): 9.180, 1925.53/s, 240.691/s/gpu LR: 0.000579 Logit Scale: 76.375 Class_loss: 9.2859 (10.536) Contrastive_loss: 0.38454 (2.0399) Loss: 9.6704 (12.576)
|
| 283 |
+
2025-07-30,15:09:54 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 284 |
+
2025-07-30,15:16:02 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 285 |
+
2025-07-30,15:22:12 | INFO | Train Epoch: 0 [121651200/128008192 (95%)] Data (t): 0.749 Batch (t): 9.142, 1799.16/s, 224.895/s/gpu LR: 0.000566 Logit Scale: 76.444 Class_loss: 12.219 (10.565) Contrastive_loss: 2.1146 (2.0412) Loss: 14.334 (12.606)
|
| 286 |
+
2025-07-30,15:41:42 | INFO | Train Epoch: 0 [123748352/128008192 (97%)] Data (t): 0.781 Batch (t): 9.142, 1805.68/s, 225.710/s/gpu LR: 0.000553 Logit Scale: 76.662 Class_loss: 9.1951 (10.542) Contrastive_loss: 0.34069 (2.0128) Loss: 9.5358 (12.555)
|
| 287 |
+
2025-07-30,15:48:49 | WARNING | Handling webdataset error (OSError('image file is truncated (181 bytes not processed)')). Ignoring.
|
| 288 |
+
2025-07-30,16:01:25 | INFO | Train Epoch: 0 [125845504/128008192 (98%)] Data (t): 0.746 Batch (t): 9.242, 1767.55/s, 220.944/s/gpu LR: 0.000540 Logit Scale: 76.868 Class_loss: 10.823 (10.547) Contrastive_loss: 1.2627 (2.0005) Loss: 12.086 (12.547)
|
| 289 |
+
2025-07-30,16:15:40 | WARNING | Handling webdataset error (OSError('image file is truncated (31 bytes not processed)')). Ignoring.
|
| 290 |
+
2025-07-30,16:20:58 | INFO | Train Epoch: 0 [127942656/128008192 (100%)] Data (t): 0.762 Batch (t): 9.159, 1794.51/s, 224.314/s/gpu LR: 0.000526 Logit Scale: 77.251 Class_loss: 9.1553 (10.524) Contrastive_loss: 0.36314 (1.9741) Loss: 9.5184 (12.498)
|
| 291 |
+
2025-07-30,16:21:34 | INFO | Train Epoch: 0 [128008192/128008192 (100%)] Data (t): 0.790 Batch (t): 9.069, 1860.13/s, 232.516/s/gpu LR: 0.000526 Logit Scale: 77.258 Class_loss: 9.1275 (10.502) Contrastive_loss: 0.35391 (1.9484) Loss: 9.4814 (12.451)
|
| 292 |
+
2025-07-30,16:21:41 | INFO | Start epoch 1
|
| 293 |
+
2025-07-30,16:21:54 | INFO | Train Epoch: 1 [ 16384/128008192 (0%)] Data (t): 10.757 Batch (t): 12.072, 1357.22/s, 169.653/s/gpu LR: 0.000526 Logit Scale: 77.262 Class_loss: 9.1015 (9.1015) Contrastive_loss: 0.29511 (0.29511) Loss: 9.3966 (9.3966)
|
| 294 |
+
2025-07-30,16:28:47 | WARNING | Handling webdataset error (OSError('image file is truncated (63 bytes not processed)')). Ignoring.
|
| 295 |
+
2025-07-30,16:34:23 | WARNING | Handling webdataset error (OSError('image file is truncated (208 bytes not processed)')). Ignoring.
|
| 296 |
+
2025-07-30,16:41:00 | INFO | Train Epoch: 1 [ 2113536/128008192 (2%)] Data (t): 2.562 Batch (t): 8.954, 1886.93/s, 235.867/s/gpu LR: 0.000513 Logit Scale: 77.341 Class_loss: 12.154 (10.628) Contrastive_loss: 2.2441 (1.2696) Loss: 14.398 (11.897)
|
| 297 |
+
2025-07-30,16:41:47 | WARNING | Handling webdataset error (OSError('image file is truncated (77 bytes not processed)')). Ignoring.
|
| 298 |
+
2025-07-30,17:00:25 | INFO | Train Epoch: 1 [ 4210688/128008192 (3%)] Data (t): 0.762 Batch (t): 9.100, 1782.79/s, 222.849/s/gpu LR: 0.000499 Logit Scale: 77.359 Class_loss: 9.8717 (10.376) Contrastive_loss: 0.64527 (1.0615) Loss: 10.517 (11.437)
|
| 299 |
+
2025-07-30,17:19:54 | INFO | Train Epoch: 1 [ 6307840/128008192 (5%)] Data (t): 0.757 Batch (t): 9.135, 1812.54/s, 226.567/s/gpu LR: 0.000486 Logit Scale: 77.939 Class_loss: 12.583 (10.927) Contrastive_loss: 2.5473 (1.4330) Loss: 15.130 (12.360)
|
| 300 |
+
2025-07-30,17:32:45 | WARNING | Handling webdataset error (OSError('image file is truncated (64 bytes not processed)')). Ignoring.
|
| 301 |
+
2025-07-30,17:39:19 | INFO | Train Epoch: 1 [ 8404992/128008192 (7%)] Data (t): 0.770 Batch (t): 9.098, 1800.73/s, 225.091/s/gpu LR: 0.000473 Logit Scale: 78.237 Class_loss: 9.5647 (10.655) Contrastive_loss: 0.58121 (1.2626) Loss: 10.146 (11.917)
|
| 302 |
+
2025-07-30,17:53:17 | WARNING | Handling webdataset error (OSError('image file is truncated (17 bytes not processed)')). Ignoring.
|
| 303 |
+
2025-07-30,17:58:54 | INFO | Train Epoch: 1 [ 10502144/128008192 (8%)] Data (t): 0.748 Batch (t): 9.180, 1771.00/s, 221.375/s/gpu LR: 0.000460 Logit Scale: 78.730 Class_loss: 9.0504 (10.387) Contrastive_loss: 0.28590 (1.0998) Loss: 9.3363 (11.487)
|
| 304 |
+
2025-07-30,18:18:25 | INFO | Train Epoch: 1 [ 12599296/128008192 (10%)] Data (t): 0.753 Batch (t): 9.154, 1775.28/s, 221.910/s/gpu LR: 0.000446 Logit Scale: 79.052 Class_loss: 10.813 (10.448) Contrastive_loss: 1.2020 (1.1144) Loss: 12.015 (11.563)
|
| 305 |
+
2025-07-30,18:20:01 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 306 |
+
2025-07-30,18:38:01 | INFO | Train Epoch: 1 [ 14696448/128008192 (11%)] Data (t): 0.748 Batch (t): 9.187, 1800.29/s, 225.037/s/gpu LR: 0.000433 Logit Scale: 79.514 Class_loss: 10.896 (10.504) Contrastive_loss: 1.3889 (1.1487) Loss: 12.285 (11.653)
|
| 307 |
+
2025-07-30,18:57:38 | INFO | Train Epoch: 1 [ 16793600/128008192 (13%)] Data (t): 0.745 Batch (t): 9.191, 1761.55/s, 220.194/s/gpu LR: 0.000420 Logit Scale: 79.850 Class_loss: 9.1151 (10.350) Contrastive_loss: 0.27508 (1.0517) Loss: 9.3901 (11.402)
|
| 308 |
+
2025-07-30,19:15:54 | WARNING | Handling webdataset error (OSError('image file is truncated (95 bytes not processed)')). Ignoring.
|
| 309 |
+
2025-07-30,19:17:19 | INFO | Train Epoch: 1 [ 18890752/128008192 (15%)] Data (t): 0.763 Batch (t): 9.231, 1785.98/s, 223.248/s/gpu LR: 0.000407 Logit Scale: 80.315 Class_loss: 9.2347 (10.238) Contrastive_loss: 0.40487 (0.98698) Loss: 9.6395 (11.225)
|
| 310 |
+
2025-07-30,19:25:58 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 311 |
+
2025-07-30,19:36:57 | INFO | Train Epoch: 1 [ 20987904/128008192 (16%)] Data (t): 0.734 Batch (t): 9.196, 1783.48/s, 222.935/s/gpu LR: 0.000394 Logit Scale: 80.559 Class_loss: 9.0163 (10.127) Contrastive_loss: 0.25004 (0.91999) Loss: 9.2663 (11.047)
|
| 312 |
+
2025-07-30,19:40:55 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 313 |
+
2025-07-30,19:56:43 | INFO | Train Epoch: 1 [ 23085056/128008192 (18%)] Data (t): 0.742 Batch (t): 9.270, 1775.41/s, 221.926/s/gpu LR: 0.000381 Logit Scale: 80.964 Class_loss: 9.1728 (10.048) Contrastive_loss: 0.26007 (0.86500) Loss: 9.4328 (10.913)
|
| 314 |
+
2025-07-30,20:16:16 | INFO | Train Epoch: 1 [ 25182208/128008192 (20%)] Data (t): 0.765 Batch (t): 9.161, 1783.01/s, 222.877/s/gpu LR: 0.000368 Logit Scale: 81.275 Class_loss: 9.8607 (10.033) Contrastive_loss: 0.72902 (0.85454) Loss: 10.590 (10.888)
|
| 315 |
+
2025-07-30,20:35:54 | INFO | Train Epoch: 1 [ 27279360/128008192 (21%)] Data (t): 0.737 Batch (t): 9.204, 1812.03/s, 226.503/s/gpu LR: 0.000355 Logit Scale: 81.467 Class_loss: 9.2992 (9.9809) Contrastive_loss: 0.28500 (0.81385) Loss: 9.5842 (10.795)
|
| 316 |
+
2025-07-30,20:42:54 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 317 |
+
2025-07-30,20:51:28 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 318 |
+
2025-07-30,20:55:32 | INFO | Train Epoch: 1 [ 29376512/128008192 (23%)] Data (t): 0.745 Batch (t): 9.208, 1792.38/s, 224.047/s/gpu LR: 0.000343 Logit Scale: 81.805 Class_loss: 9.1518 (9.9256) Contrastive_loss: 0.27915 (0.77821) Loss: 9.4310 (10.704)
|
| 319 |
+
2025-07-30,20:57:33 | WARNING | Handling webdataset error (OSError('image file is truncated (34 bytes not processed)')). Ignoring.
|
| 320 |
+
2025-07-30,20:57:59 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 321 |
+
2025-07-30,20:58:48 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 322 |
+
2025-07-30,21:15:00 | INFO | Train Epoch: 1 [ 31473664/128008192 (25%)] Data (t): 0.761 Batch (t): 9.118, 1805.31/s, 225.664/s/gpu LR: 0.000330 Logit Scale: 82.138 Class_loss: 13.264 (10.134) Contrastive_loss: 2.2272 (0.86877) Loss: 15.491 (11.003)
|
| 323 |
+
2025-07-30,21:25:36 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 324 |
+
2025-07-30,21:33:23 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 325 |
+
2025-07-30,21:34:45 | INFO | Train Epoch: 1 [ 33570816/128008192 (26%)] Data (t): 0.774 Batch (t): 9.262, 1792.49/s, 224.061/s/gpu LR: 0.000318 Logit Scale: 82.472 Class_loss: 8.9420 (10.064) Contrastive_loss: 0.23351 (0.83140) Loss: 9.1755 (10.896)
|
| 326 |
+
2025-07-30,21:47:42 | WARNING | Handling webdataset error (OSError('image file is truncated (20 bytes not processed)')). Ignoring.
|
| 327 |
+
2025-07-30,21:54:14 | INFO | Train Epoch: 1 [ 35667968/128008192 (28%)] Data (t): 0.780 Batch (t): 9.134, 1803.92/s, 225.491/s/gpu LR: 0.000305 Logit Scale: 82.747 Class_loss: 8.8905 (9.9989) Contrastive_loss: 0.24989 (0.79909) Loss: 9.1404 (10.798)
|
| 328 |
+
2025-07-30,22:00:02 | WARNING | Handling webdataset error (OSError('image file is truncated (120 bytes not processed)')). Ignoring.
|
| 329 |
+
2025-07-30,22:13:49 | INFO | Train Epoch: 1 [ 37765120/128008192 (30%)] Data (t): 0.771 Batch (t): 9.173, 1715.66/s, 214.458/s/gpu LR: 0.000293 Logit Scale: 82.831 Class_loss: 8.8911 (9.9406) Contrastive_loss: 0.19955 (0.76754) Loss: 9.0906 (10.708)
|
| 330 |
+
2025-07-30,22:33:23 | INFO | Train Epoch: 1 [ 39862272/128008192 (31%)] Data (t): 0.744 Batch (t): 9.178, 1765.50/s, 220.687/s/gpu LR: 0.000281 Logit Scale: 83.126 Class_loss: 8.8754 (9.8873) Contrastive_loss: 0.24303 (0.74131) Loss: 9.1184 (10.629)
|
| 331 |
+
2025-07-30,22:46:03 | WARNING | Handling webdataset error (OSError('image file is truncated (92 bytes not processed)')). Ignoring.
|
| 332 |
+
2025-07-30,22:50:37 | WARNING | Handling webdataset error (OSError('image file is truncated (40 bytes not processed)')). Ignoring.
|
| 333 |
+
2025-07-30,22:52:59 | INFO | Train Epoch: 1 [ 41959424/128008192 (33%)] Data (t): 0.757 Batch (t): 9.188, 1808.03/s, 226.003/s/gpu LR: 0.000269 Logit Scale: 83.447 Class_loss: 9.3524 (9.8619) Contrastive_loss: 0.43249 (0.72661) Loss: 9.7849 (10.588)
|
| 334 |
+
2025-07-30,23:12:46 | INFO | Train Epoch: 1 [ 44056576/128008192 (34%)] Data (t): 0.752 Batch (t): 9.268, 1812.97/s, 226.622/s/gpu LR: 0.000258 Logit Scale: 83.818 Class_loss: 8.8987 (9.8181) Contrastive_loss: 0.26385 (0.70557) Loss: 9.1625 (10.524)
|
| 335 |
+
2025-07-30,23:20:43 | WARNING | Handling webdataset error (OSError('image file is truncated (3 bytes not processed)')). Ignoring.
|
| 336 |
+
2025-07-30,23:32:41 | INFO | Train Epoch: 1 [ 46153728/128008192 (36%)] Data (t): 0.753 Batch (t): 9.339, 1790.31/s, 223.789/s/gpu LR: 0.000246 Logit Scale: 84.061 Class_loss: 9.8117 (9.8178) Contrastive_loss: 0.66091 (0.70363) Loss: 10.473 (10.521)
|
| 337 |
+
2025-07-30,23:52:18 | INFO | Train Epoch: 1 [ 48250880/128008192 (38%)] Data (t): 0.749 Batch (t): 9.197, 1765.30/s, 220.662/s/gpu LR: 0.000235 Logit Scale: 84.383 Class_loss: 9.0275 (9.7849) Contrastive_loss: 0.27529 (0.68578) Loss: 9.3028 (10.471)
|
| 338 |
+
2025-07-30,23:56:42 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 339 |
+
2025-07-31,00:12:03 | INFO | Train Epoch: 1 [ 50348032/128008192 (39%)] Data (t): 0.763 Batch (t): 9.256, 1636.25/s, 204.532/s/gpu LR: 0.000223 Logit Scale: 84.589 Class_loss: 8.8603 (9.7479) Contrastive_loss: 0.19898 (0.66631) Loss: 9.0593 (10.414)
|
| 340 |
+
2025-07-31,00:20:24 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 341 |
+
2025-07-31,00:30:05 | WARNING | Handling webdataset error (OSError('image file is truncated (48 bytes not processed)')). Ignoring.
|
| 342 |
+
2025-07-31,00:30:08 | WARNING | Handling webdataset error (OSError('image file is truncated (11 bytes not processed)')). Ignoring.
|
| 343 |
+
2025-07-31,00:31:52 | INFO | Train Epoch: 1 [ 52445184/128008192 (41%)] Data (t): 0.792 Batch (t): 9.292, 1788.43/s, 223.554/s/gpu LR: 0.000212 Logit Scale: 84.881 Class_loss: 8.9538 (9.7174) Contrastive_loss: 0.23723 (0.64981) Loss: 9.1910 (10.367)
|
| 344 |
+
2025-07-31,00:37:17 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 345 |
+
2025-07-31,00:43:18 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 346 |
+
2025-07-31,00:51:35 | INFO | Train Epoch: 1 [ 54542336/128008192 (43%)] Data (t): 0.751 Batch (t): 9.242, 1808.69/s, 226.086/s/gpu LR: 0.000202 Logit Scale: 85.190 Class_loss: 8.7944 (9.6832) Contrastive_loss: 0.23447 (0.63443) Loss: 9.0288 (10.318)
|
| 347 |
+
2025-07-31,00:58:24 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 348 |
+
2025-07-31,01:11:08 | INFO | Train Epoch: 1 [ 56639488/128008192 (44%)] Data (t): 0.773 Batch (t): 9.163, 1770.81/s, 221.352/s/gpu LR: 0.000191 Logit Scale: 85.462 Class_loss: 8.8578 (9.6537) Contrastive_loss: 0.21433 (0.61942) Loss: 9.0721 (10.273)
|
| 349 |
+
2025-07-31,01:30:41 | INFO | Train Epoch: 1 [ 58736640/128008192 (46%)] Data (t): 0.744 Batch (t): 9.160, 1669.85/s, 208.731/s/gpu LR: 0.000181 Logit Scale: 85.645 Class_loss: 11.450 (9.7156) Contrastive_loss: 1.4692 (0.64872) Loss: 12.919 (10.364)
|
| 350 |
+
2025-07-31,01:49:03 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 351 |
+
2025-07-31,01:50:05 | INFO | Train Epoch: 1 [ 60833792/128008192 (48%)] Data (t): 0.774 Batch (t): 9.092, 1792.88/s, 224.110/s/gpu LR: 0.000171 Logit Scale: 85.898 Class_loss: 9.0260 (9.6927) Contrastive_loss: 0.25101 (0.63547) Loss: 9.2770 (10.328)
|
| 352 |
+
2025-07-31,01:54:27 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 353 |
+
2025-07-31,02:09:35 | INFO | Train Epoch: 1 [ 62930944/128008192 (49%)] Data (t): 0.771 Batch (t): 9.141, 1779.92/s, 222.490/s/gpu LR: 0.000161 Logit Scale: 86.151 Class_loss: 12.232 (9.7746) Contrastive_loss: 1.5456 (0.66483) Loss: 13.778 (10.439)
|
| 354 |
+
2025-07-31,02:16:39 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 355 |
+
2025-07-31,02:29:10 | INFO | Train Epoch: 1 [ 65028096/128008192 (51%)] Data (t): 0.743 Batch (t): 9.185, 1793.12/s, 224.140/s/gpu LR: 0.000151 Logit Scale: 86.390 Class_loss: 11.809 (9.8382) Contrastive_loss: 1.1132 (0.67884) Loss: 12.923 (10.517)
|
| 356 |
+
2025-07-31,02:36:32 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 357 |
+
2025-07-31,02:48:26 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 358 |
+
2025-07-31,02:48:47 | INFO | Train Epoch: 1 [ 67125248/128008192 (52%)] Data (t): 0.769 Batch (t): 9.196, 1805.90/s, 225.737/s/gpu LR: 0.000142 Logit Scale: 86.636 Class_loss: 8.7405 (9.8049) Contrastive_loss: 0.19108 (0.66406) Loss: 8.9315 (10.469)
|
| 359 |
+
2025-07-31,02:51:21 | WARNING | Handling webdataset error (OSError('image file is truncated (6 bytes not processed)')). Ignoring.
|
| 360 |
+
2025-07-31,03:08:23 | INFO | Train Epoch: 1 [ 69222400/128008192 (54%)] Data (t): 0.747 Batch (t): 9.184, 1763.29/s, 220.411/s/gpu LR: 0.000133 Logit Scale: 86.919 Class_loss: 9.9504 (9.8092) Contrastive_loss: 0.60896 (0.66243) Loss: 10.559 (10.472)
|
| 361 |
+
2025-07-31,03:09:01 | WARNING | Handling webdataset error (OSError('image file is truncated (7 bytes not processed)')). Ignoring.
|
| 362 |
+
2025-07-31,03:10:21 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 363 |
+
2025-07-31,03:28:02 | INFO | Train Epoch: 1 [ 71319552/128008192 (56%)] Data (t): 0.741 Batch (t): 9.213, 1778.48/s, 222.310/s/gpu LR: 0.000124 Logit Scale: 87.004 Class_loss: 8.8207 (9.7809) Contrastive_loss: 0.22845 (0.65004) Loss: 9.0492 (10.431)
|
| 364 |
+
2025-07-31,03:34:30 | INFO | Starting zero-shot imagenet.
|
| 365 |
+
2025-07-31,03:34:30 | INFO | Building zero-shot classifier
|
| 366 |
+
2025-07-31,03:34:37 | INFO | Using classifier
|
| 367 |
+
2025-07-31,03:52:32 | INFO | Finished zero-shot imagenet.
|
| 368 |
+
2025-07-31,03:52:32 | INFO | Eval Epoch: 1.562396006655574 imagenet-zeroshot-val-top1: 0.4492 imagenet-zeroshot-val-top5: 0.7314
|
| 369 |
+
2025-07-31,04:05:25 | INFO | Train Epoch: 1 [ 73416704/128008192 (57%)] Data (t): 10.825 Batch (t): 17.518, 1735.63/s, 216.954/s/gpu LR: 0.000115 Logit Scale: 87.250 Class_loss: 8.8186 (9.7542) Contrastive_loss: 0.17150 (0.63674) Loss: 8.9901 (10.391)
|
| 370 |
+
2025-07-31,04:21:00 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 371 |
+
2025-07-31,04:24:59 | INFO | Train Epoch: 1 [ 75513856/128008192 (59%)] Data (t): 0.741 Batch (t): 9.176, 1763.70/s, 220.462/s/gpu LR: 0.000107 Logit Scale: 87.486 Class_loss: 8.8364 (9.7294) Contrastive_loss: 0.25770 (0.62650) Loss: 9.0941 (10.356)
|
| 372 |
+
2025-07-31,04:43:36 | WARNING | Handling webdataset error (OSError('image file is truncated (16 bytes not processed)')). Ignoring.
|
| 373 |
+
2025-07-31,04:44:36 | INFO | Train Epoch: 1 [ 77611008/128008192 (61%)] Data (t): 0.731 Batch (t): 9.192, 1838.30/s, 229.788/s/gpu LR: 0.000099 Logit Scale: 87.764 Class_loss: 11.855 (9.7853) Contrastive_loss: 1.3196 (0.64474) Loss: 13.174 (10.430)
|
| 374 |
+
2025-07-31,05:04:48 | INFO | Train Epoch: 1 [ 79708160/128008192 (62%)] Data (t): 0.734 Batch (t): 9.467, 1790.55/s, 223.818/s/gpu LR: 0.000091 Logit Scale: 87.984 Class_loss: 8.7835 (9.7596) Contrastive_loss: 0.20409 (0.63344) Loss: 8.9876 (10.393)
|
| 375 |
+
2025-07-31,05:17:04 | WARNING | Handling webdataset error (OSError('image file is truncated (26 bytes not processed)')). Ignoring.
|
| 376 |
+
2025-07-31,05:17:10 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 377 |
+
2025-07-31,05:24:25 | INFO | Train Epoch: 1 [ 81805312/128008192 (64%)] Data (t): 0.781 Batch (t): 9.196, 1752.73/s, 219.091/s/gpu LR: 0.000083 Logit Scale: 88.171 Class_loss: 8.7992 (9.7356) Contrastive_loss: 0.23473 (0.62347) Loss: 9.0339 (10.359)
|
| 378 |
+
2025-07-31,05:29:17 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 379 |
+
2025-07-31,05:29:41 | WARNING | Handling webdataset error (OSError('image file is truncated (14 bytes not processed)')). Ignoring.
|
| 380 |
+
2025-07-31,05:31:52 | WARNING | Handling webdataset error (OSError('image file is truncated (342 bytes not processed)')). Ignoring.
|
| 381 |
+
2025-07-31,05:44:32 | INFO | Train Epoch: 1 [ 83902464/128008192 (66%)] Data (t): 0.737 Batch (t): 9.430, 1789.40/s, 223.675/s/gpu LR: 0.000076 Logit Scale: 88.379 Class_loss: 8.7843 (9.7124) Contrastive_loss: 0.18199 (0.61270) Loss: 8.9663 (10.325)
|
| 382 |
+
2025-07-31,06:05:05 | INFO | Train Epoch: 1 [ 85999616/128008192 (67%)] Data (t): 0.732 Batch (t): 9.632, 1824.52/s, 228.065/s/gpu LR: 0.000069 Logit Scale: 88.572 Class_loss: 10.085 (9.7213) Contrastive_loss: 0.60921 (0.61262) Loss: 10.694 (10.334)
|
| 383 |
+
2025-07-31,06:11:10 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 384 |
+
2025-07-31,06:25:05 | INFO | Train Epoch: 1 [ 88096768/128008192 (69%)] Data (t): 0.760 Batch (t): 9.379, 1627.94/s, 203.492/s/gpu LR: 0.000063 Logit Scale: 88.771 Class_loss: 9.2635 (9.7107) Contrastive_loss: 0.31146 (0.60562) Loss: 9.5750 (10.316)
|
| 385 |
+
2025-07-31,06:32:06 | WARNING | Handling webdataset error (OSError('image file is truncated (19 bytes not processed)')). Ignoring.
|
| 386 |
+
2025-07-31,06:45:21 | INFO | Train Epoch: 1 [ 90193920/128008192 (70%)] Data (t): 0.730 Batch (t): 9.498, 1780.54/s, 222.567/s/gpu LR: 0.000056 Logit Scale: 88.881 Class_loss: 8.7345 (9.6885) Contrastive_loss: 0.16558 (0.59562) Loss: 8.9001 (10.284)
|
| 387 |
+
2025-07-31,07:04:56 | INFO | Train Epoch: 1 [ 92291072/128008192 (72%)] Data (t): 0.742 Batch (t): 9.185, 1774.18/s, 221.772/s/gpu LR: 0.000050 Logit Scale: 89.049 Class_loss: 8.8745 (9.6704) Contrastive_loss: 0.18575 (0.58651) Loss: 9.0603 (10.257)
|
| 388 |
+
2025-07-31,07:24:30 | INFO | Train Epoch: 1 [ 94388224/128008192 (74%)] Data (t): 0.756 Batch (t): 9.165, 1779.92/s, 222.490/s/gpu LR: 0.000045 Logit Scale: 89.164 Class_loss: 8.7540 (9.6505) Contrastive_loss: 0.17609 (0.57759) Loss: 8.9300 (10.228)
|
| 389 |
+
2025-07-31,07:44:09 | INFO | Train Epoch: 1 [ 96485376/128008192 (75%)] Data (t): 0.761 Batch (t): 9.215, 1777.14/s, 222.142/s/gpu LR: 0.000039 Logit Scale: 89.286 Class_loss: 8.7299 (9.6309) Contrastive_loss: 0.16767 (0.56886) Loss: 8.8975 (10.200)
|
| 390 |
+
2025-07-31,07:52:19 | WARNING | Handling webdataset error (OSError('image file is truncated (42 bytes not processed)')). Ignoring.
|
| 391 |
+
2025-07-31,08:03:45 | INFO | Train Epoch: 1 [ 98582528/128008192 (77%)] Data (t): 2.143 Batch (t): 9.184, 1806.87/s, 225.859/s/gpu LR: 0.000034 Logit Scale: 89.402 Class_loss: 8.6427 (9.6103) Contrastive_loss: 0.18810 (0.56093) Loss: 8.8308 (10.171)
|
| 392 |
+
2025-07-31,08:23:38 | INFO | Train Epoch: 1 [100679680/128008192 (79%)] Data (t): 1.235 Batch (t): 9.326, 1773.87/s, 221.734/s/gpu LR: 0.000030 Logit Scale: 89.518 Class_loss: 8.7535 (9.5928) Contrastive_loss: 0.18551 (0.55327) Loss: 8.9390 (10.146)
|
| 393 |
+
2025-07-31,08:38:39 | WARNING | Handling webdataset error (OSError('image file is truncated (10 bytes not processed)')). Ignoring.
|
| 394 |
+
2025-07-31,08:43:20 | INFO | Train Epoch: 1 [102776832/128008192 (80%)] Data (t): 0.751 Batch (t): 9.232, 1683.76/s, 210.470/s/gpu LR: 0.000025 Logit Scale: 89.630 Class_loss: 8.7740 (9.5764) Contrastive_loss: 0.16475 (0.54550) Loss: 8.9387 (10.122)
|
| 395 |
+
2025-07-31,09:03:03 | INFO | Train Epoch: 1 [104873984/128008192 (82%)] Data (t): 0.750 Batch (t): 9.238, 1768.45/s, 221.057/s/gpu LR: 0.000021 Logit Scale: 89.695 Class_loss: 11.882 (9.6216) Contrastive_loss: 1.2860 (0.56002) Loss: 13.168 (10.182)
|
| 396 |
+
2025-07-31,09:13:17 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 397 |
+
2025-07-31,09:22:59 | INFO | Train Epoch: 1 [106971136/128008192 (84%)] Data (t): 0.739 Batch (t): 9.346, 1768.93/s, 221.116/s/gpu LR: 0.000018 Logit Scale: 89.765 Class_loss: 8.9158 (9.6081) Contrastive_loss: 0.23428 (0.55376) Loss: 9.1501 (10.162)
|
| 398 |
+
2025-07-31,09:42:35 | INFO | Train Epoch: 1 [109068288/128008192 (85%)] Data (t): 0.753 Batch (t): 9.190, 1770.19/s, 221.274/s/gpu LR: 0.000014 Logit Scale: 89.830 Class_loss: 8.7377 (9.5916) Contrastive_loss: 0.19147 (0.54692) Loss: 8.9291 (10.139)
|
| 399 |
+
2025-07-31,09:59:44 | WARNING | Handling webdataset error (UnidentifiedImageError('cannot identify image file <_io.BytesIO object at 0x7f753f15fb50>')). Ignoring.
|
| 400 |
+
2025-07-31,10:02:06 | INFO | Train Epoch: 1 [111165440/128008192 (87%)] Data (t): 0.756 Batch (t): 9.152, 1788.60/s, 223.575/s/gpu LR: 0.000011 Logit Scale: 89.879 Class_loss: 8.7841 (9.5767) Contrastive_loss: 0.15139 (0.53959) Loss: 8.9355 (10.116)
|
| 401 |
+
2025-07-31,10:21:36 | INFO | Train Epoch: 1 [113262592/128008192 (88%)] Data (t): 0.768 Batch (t): 9.139, 1784.68/s, 223.085/s/gpu LR: 0.000009 Logit Scale: 89.915 Class_loss: 8.6784 (9.5604) Contrastive_loss: 0.18204 (0.53309) Loss: 8.8604 (10.093)
|
| 402 |
+
2025-07-31,10:41:15 | INFO | Train Epoch: 1 [115359744/128008192 (90%)] Data (t): 0.763 Batch (t): 9.210, 1752.09/s, 219.011/s/gpu LR: 0.000006 Logit Scale: 89.933 Class_loss: 8.7268 (9.5455) Contrastive_loss: 0.16440 (0.52651) Loss: 8.8912 (10.072)
|
| 403 |
+
2025-07-31,10:58:29 | WARNING | Handling webdataset error (OSError('image file is truncated (2 bytes not processed)')). Ignoring.
|
| 404 |
+
2025-07-31,11:00:59 | INFO | Train Epoch: 1 [117456896/128008192 (92%)] Data (t): 1.078 Batch (t): 9.250, 1778.25/s, 222.282/s/gpu LR: 0.000004 Logit Scale: 89.951 Class_loss: 11.328 (9.5767) Contrastive_loss: 0.93596 (0.53369) Loss: 12.264 (10.110)
|
| 405 |
+
2025-07-31,11:08:10 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 406 |
+
2025-07-31,11:20:34 | INFO | Train Epoch: 1 [119554048/128008192 (93%)] Data (t): 0.739 Batch (t): 9.176, 1812.25/s, 226.531/s/gpu LR: 0.000003 Logit Scale: 89.967 Class_loss: 8.9804 (9.5665) Contrastive_loss: 0.27453 (0.52923) Loss: 9.2549 (10.096)
|
| 407 |
+
2025-07-31,11:23:06 | WARNING | Handling webdataset error (OSError('image file is truncated (50 bytes not processed)')). Ignoring.
|
| 408 |
+
2025-07-31,11:30:33 | WARNING | Handling webdataset error (OSError('image file is truncated (13 bytes not processed)')). Ignoring.
|
| 409 |
+
2025-07-31,11:40:14 | INFO | Train Epoch: 1 [121651200/128008192 (95%)] Data (t): 0.734 Batch (t): 9.222, 1756.25/s, 219.532/s/gpu LR: 0.000002 Logit Scale: 89.973 Class_loss: 8.7347 (9.5524) Contrastive_loss: 0.18011 (0.52331) Loss: 8.9149 (10.076)
|
| 410 |
+
2025-07-31,11:59:49 | INFO | Train Epoch: 1 [123748352/128008192 (97%)] Data (t): 0.753 Batch (t): 9.179, 1787.26/s, 223.408/s/gpu LR: 0.000001 Logit Scale: 89.975 Class_loss: 8.6596 (9.5375) Contrastive_loss: 0.20050 (0.51793) Loss: 8.8601 (10.055)
|
| 411 |
+
2025-07-31,12:17:19 | WARNING | Handling webdataset error (OSError('image file is truncated (8 bytes not processed)')). Ignoring.
|
| 412 |
+
2025-07-31,12:19:27 | INFO | Train Epoch: 1 [125845504/128008192 (98%)] Data (t): 0.751 Batch (t): 9.204, 1765.86/s, 220.732/s/gpu LR: 0.000000 Logit Scale: 89.975 Class_loss: 8.7483 (9.5245) Contrastive_loss: 0.13924 (0.51172) Loss: 8.8876 (10.036)
|
| 413 |
+
2025-07-31,12:20:29 | WARNING | Handling webdataset error (OSError('broken data stream when reading image file')). Ignoring.
|
| 414 |
+
2025-07-31,12:32:47 | WARNING | Handling webdataset error (OSError('image file is truncated (83 bytes not processed)')). Ignoring.
|
| 415 |
+
2025-07-31,12:39:00 | INFO | Train Epoch: 1 [127942656/128008192 (100%)] Data (t): 0.745 Batch (t): 9.164, 1789.20/s, 223.650/s/gpu LR: 0.000000 Logit Scale: 89.975 Class_loss: 11.376 (9.5544) Contrastive_loss: 1.0139 (0.51982) Loss: 12.390 (10.074)
|
| 416 |
+
2025-07-31,12:39:37 | INFO | Train Epoch: 1 [128008192/128008192 (100%)] Data (t): 0.770 Batch (t): 9.147, 1806.67/s, 225.834/s/gpu LR: 0.000000 Logit Scale: 89.975 Class_loss: 9.1244 (9.5476) Contrastive_loss: 0.26505 (0.51578) Loss: 9.3895 (10.063)
|
| 417 |
+
2025-07-31,12:39:43 | INFO | Starting zero-shot imagenet.
|
| 418 |
+
2025-07-31,12:39:43 | INFO | Building zero-shot classifier
|
| 419 |
+
2025-07-31,12:39:50 | INFO | Using classifier
|
| 420 |
+
2025-07-31,12:57:18 | INFO | Finished zero-shot imagenet.
|
| 421 |
+
2025-07-31,12:57:18 | INFO | Eval Epoch: 2 imagenet-zeroshot-val-top1: 0.4721 imagenet-zeroshot-val-top5: 0.7559
|
logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/params.txt
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NDR_patch_size: 16
|
| 2 |
+
accum_freq: 1
|
| 3 |
+
aug_cfg: {}
|
| 4 |
+
batch_size: 2048
|
| 5 |
+
beta1: 0.9
|
| 6 |
+
beta2: 0.98
|
| 7 |
+
checkpoint_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/checkpoints
|
| 8 |
+
class_loss_weight: 1.6
|
| 9 |
+
coca_caption_loss_weight: 2.0
|
| 10 |
+
coca_contrastive_loss_weight: 1.0
|
| 11 |
+
copy_codebase: False
|
| 12 |
+
csv_caption_key: title
|
| 13 |
+
csv_img_key: filepath
|
| 14 |
+
csv_separator:
|
| 15 |
+
dataset_resampled: False
|
| 16 |
+
dataset_type: webdataset
|
| 17 |
+
ddp_static_graph: True
|
| 18 |
+
debug: False
|
| 19 |
+
delete_prev_step_ckpt: True
|
| 20 |
+
delete_previous_checkpoint: False
|
| 21 |
+
device: cuda:0
|
| 22 |
+
dist_backend: nccl
|
| 23 |
+
dist_url: env://
|
| 24 |
+
distill: False
|
| 25 |
+
distill_model: None
|
| 26 |
+
distill_pretrained: None
|
| 27 |
+
distributed: True
|
| 28 |
+
epochs: 2
|
| 29 |
+
epochs_cooldown: None
|
| 30 |
+
eps: 1e-06
|
| 31 |
+
force_custom_text: False
|
| 32 |
+
force_image_size: 224
|
| 33 |
+
force_patch_dropout: None
|
| 34 |
+
force_quick_gelu: False
|
| 35 |
+
gather_with_grad: True
|
| 36 |
+
global_batch_size: 16384
|
| 37 |
+
grad_checkpointing: True
|
| 38 |
+
grad_clip_norm: None
|
| 39 |
+
horovod: False
|
| 40 |
+
image_interpolation: None
|
| 41 |
+
image_mean: None
|
| 42 |
+
image_resize_mode: None
|
| 43 |
+
image_std: None
|
| 44 |
+
imagenet_v2: None
|
| 45 |
+
imagenet_val: /mnt/bn/zilongdata-us/dataset/ILSVRC/Data/CLS-LOC/val
|
| 46 |
+
is_cls_token: True
|
| 47 |
+
local_loss: True
|
| 48 |
+
local_rank: 0
|
| 49 |
+
lock_image: False
|
| 50 |
+
lock_image_freeze_bn_stats: False
|
| 51 |
+
lock_image_unlocked_groups: 0
|
| 52 |
+
lock_text: False
|
| 53 |
+
lock_text_freeze_layer_norm: False
|
| 54 |
+
lock_text_unlocked_layers: 0
|
| 55 |
+
log_every_n_steps: 128
|
| 56 |
+
log_level: 20
|
| 57 |
+
log_local: False
|
| 58 |
+
log_path: ./logs-lr1e-3-datacomp-rebuttal/clipcls_vit_b16_s512m_bs16k_weighted_16/out.log
|
| 59 |
+
logs: ./logs-lr1e-3-datacomp-rebuttal
|
| 60 |
+
lr: 0.001
|
| 61 |
+
lr_cooldown_end: 0.0
|
| 62 |
+
lr_cooldown_power: 1.0
|
| 63 |
+
lr_scheduler: cosine
|
| 64 |
+
max_seq_len: 15000
|
| 65 |
+
model: CLIPCLS-ViT-B-16
|
| 66 |
+
name: clipcls_vit_b16_s512m_bs16k_weighted_16
|
| 67 |
+
native_dynamic_resolution: False
|
| 68 |
+
no_set_device_rank: False
|
| 69 |
+
only_class_loss: False
|
| 70 |
+
only_packing: False
|
| 71 |
+
post_train: False
|
| 72 |
+
precision: amp_bfloat16
|
| 73 |
+
pretrained:
|
| 74 |
+
pretrained_image:
|
| 75 |
+
pretrained_text:
|
| 76 |
+
rank: 0
|
| 77 |
+
remote_sync: None
|
| 78 |
+
remote_sync_frequency: 300
|
| 79 |
+
remote_sync_protocol: s3
|
| 80 |
+
report_to: wandb
|
| 81 |
+
resume: None
|
| 82 |
+
rope_attn_num_heads: 12
|
| 83 |
+
rope_model_width: 768
|
| 84 |
+
save_every_n_steps: 6104
|
| 85 |
+
save_frequency: 1
|
| 86 |
+
save_most_recent: False
|
| 87 |
+
seed: 0
|
| 88 |
+
siglip: False
|
| 89 |
+
skip_scheduler: False
|
| 90 |
+
tensorboard: False
|
| 91 |
+
tensorboard_path:
|
| 92 |
+
torchcompile: False
|
| 93 |
+
torchscript: False
|
| 94 |
+
trace: False
|
| 95 |
+
train_data: /mnt/bn/zilongdata-us/dataset/recap-datacomp-1b-webdataset/{000000..140146}.tar
|
| 96 |
+
train_data_upsampling_factors: None
|
| 97 |
+
train_num_samples: 128000000
|
| 98 |
+
use_bn_sync: False
|
| 99 |
+
use_bnb_linear: None
|
| 100 |
+
use_idf: True
|
| 101 |
+
val_data: None
|
| 102 |
+
val_frequency: 1
|
| 103 |
+
val_num_samples: None
|
| 104 |
+
val_steps: 6104
|
| 105 |
+
wandb: True
|
| 106 |
+
wandb_notes:
|
| 107 |
+
wandb_project_name: cls-clip-batch-size
|
| 108 |
+
warmup: 500
|
| 109 |
+
wd: 0.2
|
| 110 |
+
workers: 1
|
| 111 |
+
world_size: 8
|
| 112 |
+
zeroshot_frequency: 2
|
| 113 |
+
zeroshot_steps: 6104
|