Upload folder using huggingface_hub
Browse files
exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints/results.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"imagenet-zeroshot-val-top1": 0.0001136734693877551, "imagenet-zeroshot-val-top5": 0.00038602040816326533}
|
exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
ADDED
|
@@ -0,0 +1,854 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-02-18,21:49:15 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints.
|
| 2 |
+
2025-02-18,21:49:20 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 3 |
+
2025-02-18,21:49:20 | INFO | Loaded CLIPCLS-ViT-B-16-NDR model config.
|
| 4 |
+
2025-02-18,21:49:22 | INFO | Model:
|
| 5 |
+
2025-02-18,21:49:22 | INFO | CLIPCLS(
|
| 6 |
+
(visual): NDRVisionTransformer(
|
| 7 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 8 |
+
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
| 9 |
+
(patch_dropout): Identity()
|
| 10 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 11 |
+
(transformer): RoPETransformer(
|
| 12 |
+
(resblocks): ModuleList(
|
| 13 |
+
(0-11): 12 x CustomResidualRoPEAttentionBlock(
|
| 14 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 15 |
+
(attn): RoPEAttention(
|
| 16 |
+
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 17 |
+
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 18 |
+
(out_drop): Dropout(p=0.0, inplace=False)
|
| 19 |
+
)
|
| 20 |
+
(ln_attn): Identity()
|
| 21 |
+
(ls_1): Identity()
|
| 22 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 23 |
+
(mlp): Sequential(
|
| 24 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 25 |
+
(gelu): GELU(approximate='none')
|
| 26 |
+
(ln): Identity()
|
| 27 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 28 |
+
)
|
| 29 |
+
(ls_2): Identity()
|
| 30 |
+
)
|
| 31 |
+
)
|
| 32 |
+
)
|
| 33 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 34 |
+
)
|
| 35 |
+
(text): TextTransformer(
|
| 36 |
+
(token_embedding): Embedding(49408, 512)
|
| 37 |
+
(transformer): Transformer(
|
| 38 |
+
(resblocks): ModuleList(
|
| 39 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 40 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 41 |
+
(attn): MultiheadAttention(
|
| 42 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 43 |
+
)
|
| 44 |
+
(ls_1): Identity()
|
| 45 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 46 |
+
(mlp): Sequential(
|
| 47 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 48 |
+
(gelu): GELU(approximate='none')
|
| 49 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 50 |
+
)
|
| 51 |
+
(ls_2): Identity()
|
| 52 |
+
)
|
| 53 |
+
)
|
| 54 |
+
)
|
| 55 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 56 |
+
)
|
| 57 |
+
(text_decoder): MixClsHead(
|
| 58 |
+
(mlps): ModuleList()
|
| 59 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 60 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 61 |
+
)
|
| 62 |
+
)
|
| 63 |
+
2025-02-18,21:49:22 | INFO | Params:
|
| 64 |
+
2025-02-18,21:49:22 | INFO | NDR_patch_size: 16
|
| 65 |
+
2025-02-18,21:49:22 | INFO | accum_freq: 1
|
| 66 |
+
2025-02-18,21:49:22 | INFO | aug_cfg: {}
|
| 67 |
+
2025-02-18,21:49:22 | INFO | batch_size: 1024
|
| 68 |
+
2025-02-18,21:49:22 | INFO | beta1: 0.9
|
| 69 |
+
2025-02-18,21:49:22 | INFO | beta2: 0.98
|
| 70 |
+
2025-02-18,21:49:22 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 71 |
+
2025-02-18,21:49:22 | INFO | coca_caption_loss_weight: 2.0
|
| 72 |
+
2025-02-18,21:49:22 | INFO | coca_contrastive_loss_weight: 1.0
|
| 73 |
+
2025-02-18,21:49:22 | INFO | copy_codebase: False
|
| 74 |
+
2025-02-18,21:49:22 | INFO | csv_caption_key: title
|
| 75 |
+
2025-02-18,21:49:22 | INFO | csv_img_key: filepath
|
| 76 |
+
2025-02-18,21:49:22 | INFO | csv_separator:
|
| 77 |
+
2025-02-18,21:49:22 | INFO | dataset_resampled: False
|
| 78 |
+
2025-02-18,21:49:22 | INFO | dataset_type: webdataset
|
| 79 |
+
2025-02-18,21:49:22 | INFO | ddp_static_graph: True
|
| 80 |
+
2025-02-18,21:49:22 | INFO | debug: False
|
| 81 |
+
2025-02-18,21:49:22 | INFO | delete_prev_step_ckpt: True
|
| 82 |
+
2025-02-18,21:49:22 | INFO | delete_previous_checkpoint: False
|
| 83 |
+
2025-02-18,21:49:22 | INFO | device: cuda:0
|
| 84 |
+
2025-02-18,21:49:22 | INFO | dist_backend: nccl
|
| 85 |
+
2025-02-18,21:49:22 | INFO | dist_url: env://
|
| 86 |
+
2025-02-18,21:49:22 | INFO | distill: False
|
| 87 |
+
2025-02-18,21:49:22 | INFO | distill_model: None
|
| 88 |
+
2025-02-18,21:49:22 | INFO | distill_pretrained: None
|
| 89 |
+
2025-02-18,21:49:22 | INFO | distributed: True
|
| 90 |
+
2025-02-18,21:49:22 | INFO | epochs: 4
|
| 91 |
+
2025-02-18,21:49:22 | INFO | epochs_cooldown: None
|
| 92 |
+
2025-02-18,21:49:22 | INFO | eps: 1e-06
|
| 93 |
+
2025-02-18,21:49:22 | INFO | force_custom_text: False
|
| 94 |
+
2025-02-18,21:49:22 | INFO | force_image_size: 224
|
| 95 |
+
2025-02-18,21:49:22 | INFO | force_patch_dropout: None
|
| 96 |
+
2025-02-18,21:49:22 | INFO | force_quick_gelu: False
|
| 97 |
+
2025-02-18,21:49:22 | INFO | gather_with_grad: True
|
| 98 |
+
2025-02-18,21:49:22 | INFO | global_batch_size: 8192
|
| 99 |
+
2025-02-18,21:49:22 | INFO | grad_checkpointing: True
|
| 100 |
+
2025-02-18,21:49:22 | INFO | grad_clip_norm: None
|
| 101 |
+
2025-02-18,21:49:22 | INFO | horovod: False
|
| 102 |
+
2025-02-18,21:49:22 | INFO | image_interpolation: None
|
| 103 |
+
2025-02-18,21:49:22 | INFO | image_mean: None
|
| 104 |
+
2025-02-18,21:49:22 | INFO | image_resize_mode: None
|
| 105 |
+
2025-02-18,21:49:22 | INFO | image_std: None
|
| 106 |
+
2025-02-18,21:49:22 | INFO | imagenet_v2: None
|
| 107 |
+
2025-02-18,21:49:22 | INFO | imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 108 |
+
2025-02-18,21:49:22 | INFO | local_loss: True
|
| 109 |
+
2025-02-18,21:49:22 | INFO | local_rank: 0
|
| 110 |
+
2025-02-18,21:49:22 | INFO | lock_image: False
|
| 111 |
+
2025-02-18,21:49:22 | INFO | lock_image_freeze_bn_stats: False
|
| 112 |
+
2025-02-18,21:49:22 | INFO | lock_image_unlocked_groups: 0
|
| 113 |
+
2025-02-18,21:49:22 | INFO | lock_text: False
|
| 114 |
+
2025-02-18,21:49:22 | INFO | lock_text_freeze_layer_norm: False
|
| 115 |
+
2025-02-18,21:49:22 | INFO | lock_text_unlocked_layers: 0
|
| 116 |
+
2025-02-18,21:49:22 | INFO | log_every_n_steps: 128
|
| 117 |
+
2025-02-18,21:49:22 | INFO | log_level: 20
|
| 118 |
+
2025-02-18,21:49:22 | INFO | log_local: False
|
| 119 |
+
2025-02-18,21:49:22 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 120 |
+
2025-02-18,21:49:22 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 121 |
+
2025-02-18,21:49:22 | INFO | lr: 0.001
|
| 122 |
+
2025-02-18,21:49:22 | INFO | lr_cooldown_end: 0.0
|
| 123 |
+
2025-02-18,21:49:22 | INFO | lr_cooldown_power: 1.0
|
| 124 |
+
2025-02-18,21:49:22 | INFO | lr_scheduler: cosine
|
| 125 |
+
2025-02-18,21:49:22 | INFO | max_seq_len: 100000000000000
|
| 126 |
+
2025-02-18,21:49:22 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 127 |
+
2025-02-18,21:49:22 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 128 |
+
2025-02-18,21:49:22 | INFO | native_dynamic_resolution: True
|
| 129 |
+
2025-02-18,21:49:22 | INFO | no_set_device_rank: False
|
| 130 |
+
2025-02-18,21:49:22 | INFO | only_packing: True
|
| 131 |
+
2025-02-18,21:49:22 | INFO | precision: amp_bfloat16
|
| 132 |
+
2025-02-18,21:49:22 | INFO | pretrained:
|
| 133 |
+
2025-02-18,21:49:22 | INFO | pretrained_image:
|
| 134 |
+
2025-02-18,21:49:22 | INFO | pretrained_text:
|
| 135 |
+
2025-02-18,21:49:22 | INFO | rank: 0
|
| 136 |
+
2025-02-18,21:49:22 | INFO | remote_sync: None
|
| 137 |
+
2025-02-18,21:49:22 | INFO | remote_sync_frequency: 300
|
| 138 |
+
2025-02-18,21:49:22 | INFO | remote_sync_protocol: s3
|
| 139 |
+
2025-02-18,21:49:22 | INFO | report_to: wandb
|
| 140 |
+
2025-02-18,21:49:22 | INFO | resume: None
|
| 141 |
+
2025-02-18,21:49:22 | INFO | rope_attn_num_heads: 12
|
| 142 |
+
2025-02-18,21:49:22 | INFO | rope_model_width: 768
|
| 143 |
+
2025-02-18,21:49:22 | INFO | save_every_n_steps: 6104
|
| 144 |
+
2025-02-18,21:49:22 | INFO | save_frequency: 1
|
| 145 |
+
2025-02-18,21:49:22 | INFO | save_most_recent: False
|
| 146 |
+
2025-02-18,21:49:22 | INFO | seed: 0
|
| 147 |
+
2025-02-18,21:49:22 | INFO | siglip: False
|
| 148 |
+
2025-02-18,21:49:22 | INFO | skip_scheduler: False
|
| 149 |
+
2025-02-18,21:49:22 | INFO | tensorboard: False
|
| 150 |
+
2025-02-18,21:49:22 | INFO | tensorboard_path:
|
| 151 |
+
2025-02-18,21:49:22 | INFO | torchcompile: False
|
| 152 |
+
2025-02-18,21:49:22 | INFO | torchscript: False
|
| 153 |
+
2025-02-18,21:49:22 | INFO | trace: False
|
| 154 |
+
2025-02-18,21:49:22 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 155 |
+
2025-02-18,21:49:22 | INFO | train_data_upsampling_factors: None
|
| 156 |
+
2025-02-18,21:49:22 | INFO | train_num_samples: 128000000
|
| 157 |
+
2025-02-18,21:49:22 | INFO | use_bn_sync: False
|
| 158 |
+
2025-02-18,21:49:22 | INFO | use_bnb_linear: None
|
| 159 |
+
2025-02-18,21:49:22 | INFO | val_data: None
|
| 160 |
+
2025-02-18,21:49:22 | INFO | val_frequency: 1
|
| 161 |
+
2025-02-18,21:49:22 | INFO | val_num_samples: None
|
| 162 |
+
2025-02-18,21:49:22 | INFO | val_steps: 6104
|
| 163 |
+
2025-02-18,21:49:22 | INFO | wandb: True
|
| 164 |
+
2025-02-18,21:49:22 | INFO | wandb_notes:
|
| 165 |
+
2025-02-18,21:49:22 | INFO | wandb_project_name: cls-clip-NDR
|
| 166 |
+
2025-02-18,21:49:22 | INFO | warmup: 500
|
| 167 |
+
2025-02-18,21:49:22 | INFO | wd: 0.2
|
| 168 |
+
2025-02-18,21:49:22 | INFO | workers: 1
|
| 169 |
+
2025-02-18,21:49:22 | INFO | world_size: 8
|
| 170 |
+
2025-02-18,21:49:22 | INFO | zeroshot_frequency: 2
|
| 171 |
+
2025-02-18,21:49:22 | INFO | zeroshot_steps: 6104
|
| 172 |
+
2025-02-18,21:49:37 | INFO | Start epoch 0
|
| 173 |
+
2025-02-18,22:27:17 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints.
|
| 174 |
+
2025-02-18,22:27:22 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 175 |
+
2025-02-18,22:27:22 | INFO | Loaded CLIPCLS-ViT-B-16-NDR model config.
|
| 176 |
+
2025-02-18,22:27:24 | INFO | Model:
|
| 177 |
+
2025-02-18,22:27:24 | INFO | CLIPCLS(
|
| 178 |
+
(visual): NDRVisionTransformer(
|
| 179 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 180 |
+
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
| 181 |
+
(patch_dropout): Identity()
|
| 182 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 183 |
+
(transformer): RoPETransformer(
|
| 184 |
+
(resblocks): ModuleList(
|
| 185 |
+
(0-11): 12 x CustomResidualRoPEAttentionBlock(
|
| 186 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 187 |
+
(attn): RoPEAttention(
|
| 188 |
+
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 189 |
+
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 190 |
+
(out_drop): Dropout(p=0.0, inplace=False)
|
| 191 |
+
)
|
| 192 |
+
(ln_attn): Identity()
|
| 193 |
+
(ls_1): Identity()
|
| 194 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 195 |
+
(mlp): Sequential(
|
| 196 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 197 |
+
(gelu): GELU(approximate='none')
|
| 198 |
+
(ln): Identity()
|
| 199 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 200 |
+
)
|
| 201 |
+
(ls_2): Identity()
|
| 202 |
+
)
|
| 203 |
+
)
|
| 204 |
+
)
|
| 205 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 206 |
+
)
|
| 207 |
+
(text): TextTransformer(
|
| 208 |
+
(token_embedding): Embedding(49408, 512)
|
| 209 |
+
(transformer): Transformer(
|
| 210 |
+
(resblocks): ModuleList(
|
| 211 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 212 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 213 |
+
(attn): MultiheadAttention(
|
| 214 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 215 |
+
)
|
| 216 |
+
(ls_1): Identity()
|
| 217 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 218 |
+
(mlp): Sequential(
|
| 219 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 220 |
+
(gelu): GELU(approximate='none')
|
| 221 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 222 |
+
)
|
| 223 |
+
(ls_2): Identity()
|
| 224 |
+
)
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 228 |
+
)
|
| 229 |
+
(text_decoder): MixClsHead(
|
| 230 |
+
(mlps): ModuleList()
|
| 231 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 232 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 233 |
+
)
|
| 234 |
+
)
|
| 235 |
+
2025-02-18,22:27:24 | INFO | Params:
|
| 236 |
+
2025-02-18,22:27:24 | INFO | NDR_patch_size: 16
|
| 237 |
+
2025-02-18,22:27:24 | INFO | accum_freq: 1
|
| 238 |
+
2025-02-18,22:27:24 | INFO | aug_cfg: {}
|
| 239 |
+
2025-02-18,22:27:24 | INFO | batch_size: 1024
|
| 240 |
+
2025-02-18,22:27:24 | INFO | beta1: 0.9
|
| 241 |
+
2025-02-18,22:27:24 | INFO | beta2: 0.98
|
| 242 |
+
2025-02-18,22:27:24 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 243 |
+
2025-02-18,22:27:24 | INFO | coca_caption_loss_weight: 2.0
|
| 244 |
+
2025-02-18,22:27:24 | INFO | coca_contrastive_loss_weight: 1.0
|
| 245 |
+
2025-02-18,22:27:24 | INFO | copy_codebase: False
|
| 246 |
+
2025-02-18,22:27:24 | INFO | csv_caption_key: title
|
| 247 |
+
2025-02-18,22:27:24 | INFO | csv_img_key: filepath
|
| 248 |
+
2025-02-18,22:27:24 | INFO | csv_separator:
|
| 249 |
+
2025-02-18,22:27:24 | INFO | dataset_resampled: False
|
| 250 |
+
2025-02-18,22:27:24 | INFO | dataset_type: webdataset
|
| 251 |
+
2025-02-18,22:27:24 | INFO | ddp_static_graph: True
|
| 252 |
+
2025-02-18,22:27:24 | INFO | debug: False
|
| 253 |
+
2025-02-18,22:27:24 | INFO | delete_prev_step_ckpt: True
|
| 254 |
+
2025-02-18,22:27:24 | INFO | delete_previous_checkpoint: False
|
| 255 |
+
2025-02-18,22:27:24 | INFO | device: cuda:0
|
| 256 |
+
2025-02-18,22:27:24 | INFO | dist_backend: nccl
|
| 257 |
+
2025-02-18,22:27:24 | INFO | dist_url: env://
|
| 258 |
+
2025-02-18,22:27:24 | INFO | distill: False
|
| 259 |
+
2025-02-18,22:27:24 | INFO | distill_model: None
|
| 260 |
+
2025-02-18,22:27:24 | INFO | distill_pretrained: None
|
| 261 |
+
2025-02-18,22:27:24 | INFO | distributed: True
|
| 262 |
+
2025-02-18,22:27:24 | INFO | epochs: 4
|
| 263 |
+
2025-02-18,22:27:24 | INFO | epochs_cooldown: None
|
| 264 |
+
2025-02-18,22:27:24 | INFO | eps: 1e-06
|
| 265 |
+
2025-02-18,22:27:24 | INFO | force_custom_text: False
|
| 266 |
+
2025-02-18,22:27:24 | INFO | force_image_size: 224
|
| 267 |
+
2025-02-18,22:27:24 | INFO | force_patch_dropout: None
|
| 268 |
+
2025-02-18,22:27:24 | INFO | force_quick_gelu: False
|
| 269 |
+
2025-02-18,22:27:24 | INFO | gather_with_grad: True
|
| 270 |
+
2025-02-18,22:27:24 | INFO | global_batch_size: 8192
|
| 271 |
+
2025-02-18,22:27:24 | INFO | grad_checkpointing: True
|
| 272 |
+
2025-02-18,22:27:24 | INFO | grad_clip_norm: None
|
| 273 |
+
2025-02-18,22:27:24 | INFO | horovod: False
|
| 274 |
+
2025-02-18,22:27:24 | INFO | image_interpolation: None
|
| 275 |
+
2025-02-18,22:27:24 | INFO | image_mean: None
|
| 276 |
+
2025-02-18,22:27:24 | INFO | image_resize_mode: None
|
| 277 |
+
2025-02-18,22:27:24 | INFO | image_std: None
|
| 278 |
+
2025-02-18,22:27:24 | INFO | imagenet_v2: None
|
| 279 |
+
2025-02-18,22:27:24 | INFO | imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 280 |
+
2025-02-18,22:27:24 | INFO | local_loss: True
|
| 281 |
+
2025-02-18,22:27:24 | INFO | local_rank: 0
|
| 282 |
+
2025-02-18,22:27:24 | INFO | lock_image: False
|
| 283 |
+
2025-02-18,22:27:24 | INFO | lock_image_freeze_bn_stats: False
|
| 284 |
+
2025-02-18,22:27:24 | INFO | lock_image_unlocked_groups: 0
|
| 285 |
+
2025-02-18,22:27:24 | INFO | lock_text: False
|
| 286 |
+
2025-02-18,22:27:24 | INFO | lock_text_freeze_layer_norm: False
|
| 287 |
+
2025-02-18,22:27:24 | INFO | lock_text_unlocked_layers: 0
|
| 288 |
+
2025-02-18,22:27:24 | INFO | log_every_n_steps: 128
|
| 289 |
+
2025-02-18,22:27:24 | INFO | log_level: 20
|
| 290 |
+
2025-02-18,22:27:24 | INFO | log_local: False
|
| 291 |
+
2025-02-18,22:27:24 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 292 |
+
2025-02-18,22:27:24 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 293 |
+
2025-02-18,22:27:24 | INFO | lr: 0.001
|
| 294 |
+
2025-02-18,22:27:24 | INFO | lr_cooldown_end: 0.0
|
| 295 |
+
2025-02-18,22:27:24 | INFO | lr_cooldown_power: 1.0
|
| 296 |
+
2025-02-18,22:27:24 | INFO | lr_scheduler: cosine
|
| 297 |
+
2025-02-18,22:27:24 | INFO | max_seq_len: 100000000000000
|
| 298 |
+
2025-02-18,22:27:24 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 299 |
+
2025-02-18,22:27:24 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 300 |
+
2025-02-18,22:27:24 | INFO | native_dynamic_resolution: True
|
| 301 |
+
2025-02-18,22:27:24 | INFO | no_set_device_rank: False
|
| 302 |
+
2025-02-18,22:27:24 | INFO | only_packing: True
|
| 303 |
+
2025-02-18,22:27:24 | INFO | precision: amp_bfloat16
|
| 304 |
+
2025-02-18,22:27:24 | INFO | pretrained:
|
| 305 |
+
2025-02-18,22:27:24 | INFO | pretrained_image:
|
| 306 |
+
2025-02-18,22:27:24 | INFO | pretrained_text:
|
| 307 |
+
2025-02-18,22:27:24 | INFO | rank: 0
|
| 308 |
+
2025-02-18,22:27:24 | INFO | remote_sync: None
|
| 309 |
+
2025-02-18,22:27:24 | INFO | remote_sync_frequency: 300
|
| 310 |
+
2025-02-18,22:27:24 | INFO | remote_sync_protocol: s3
|
| 311 |
+
2025-02-18,22:27:24 | INFO | report_to: wandb
|
| 312 |
+
2025-02-18,22:27:24 | INFO | resume: None
|
| 313 |
+
2025-02-18,22:27:24 | INFO | rope_attn_num_heads: 12
|
| 314 |
+
2025-02-18,22:27:24 | INFO | rope_model_width: 768
|
| 315 |
+
2025-02-18,22:27:24 | INFO | save_every_n_steps: 6104
|
| 316 |
+
2025-02-18,22:27:24 | INFO | save_frequency: 1
|
| 317 |
+
2025-02-18,22:27:24 | INFO | save_most_recent: False
|
| 318 |
+
2025-02-18,22:27:24 | INFO | seed: 0
|
| 319 |
+
2025-02-18,22:27:24 | INFO | siglip: False
|
| 320 |
+
2025-02-18,22:27:24 | INFO | skip_scheduler: False
|
| 321 |
+
2025-02-18,22:27:24 | INFO | tensorboard: False
|
| 322 |
+
2025-02-18,22:27:24 | INFO | tensorboard_path:
|
| 323 |
+
2025-02-18,22:27:24 | INFO | torchcompile: False
|
| 324 |
+
2025-02-18,22:27:24 | INFO | torchscript: False
|
| 325 |
+
2025-02-18,22:27:24 | INFO | trace: False
|
| 326 |
+
2025-02-18,22:27:24 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 327 |
+
2025-02-18,22:27:24 | INFO | train_data_upsampling_factors: None
|
| 328 |
+
2025-02-18,22:27:24 | INFO | train_num_samples: 128000000
|
| 329 |
+
2025-02-18,22:27:24 | INFO | use_bn_sync: False
|
| 330 |
+
2025-02-18,22:27:24 | INFO | use_bnb_linear: None
|
| 331 |
+
2025-02-18,22:27:24 | INFO | val_data: None
|
| 332 |
+
2025-02-18,22:27:24 | INFO | val_frequency: 1
|
| 333 |
+
2025-02-18,22:27:24 | INFO | val_num_samples: None
|
| 334 |
+
2025-02-18,22:27:24 | INFO | val_steps: 6104
|
| 335 |
+
2025-02-18,22:27:24 | INFO | wandb: True
|
| 336 |
+
2025-02-18,22:27:24 | INFO | wandb_notes:
|
| 337 |
+
2025-02-18,22:27:24 | INFO | wandb_project_name: cls-clip-NDR
|
| 338 |
+
2025-02-18,22:27:24 | INFO | warmup: 500
|
| 339 |
+
2025-02-18,22:27:24 | INFO | wd: 0.2
|
| 340 |
+
2025-02-18,22:27:24 | INFO | workers: 1
|
| 341 |
+
2025-02-18,22:27:24 | INFO | world_size: 8
|
| 342 |
+
2025-02-18,22:27:24 | INFO | zeroshot_frequency: 2
|
| 343 |
+
2025-02-18,22:27:24 | INFO | zeroshot_steps: 6104
|
| 344 |
+
2025-02-18,22:27:39 | INFO | Start epoch 0
|
| 345 |
+
2025-02-18,22:27:52 | INFO | Train Epoch: 0 [ 8192/128000000 (0%)] Data (t): 8.211 Batch (t): 13.321, 614.970/s, 76.8713/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.332 (11.332) Contrastive_loss: 9.1303 (9.1303) Loss: 20.462 (20.462)
|
| 346 |
+
2025-02-18,22:31:10 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 347 |
+
2025-02-18,22:38:09 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 348 |
+
2025-02-18,22:38:28 | INFO | Train Epoch: 0 [ 1056768/128000000 (1%)] Data (t): 1.448 Batch (t): 4.961, 1710.76/s, 213.845/s/gpu LR: 0.000258 Logit Scale: 14.281 Class_loss: 9.0799 (10.206) Contrastive_loss: 8.5317 (8.8310) Loss: 17.612 (19.037)
|
| 349 |
+
2025-02-18,22:49:10 | INFO | Train Epoch: 0 [ 2105344/128000000 (2%)] Data (t): 0.498 Batch (t): 5.016, 1737.53/s, 217.191/s/gpu LR: 0.000514 Logit Scale: 14.218 Class_loss: 8.9842 (9.7987) Contrastive_loss: 8.3782 (8.6801) Loss: 17.362 (18.479)
|
| 350 |
+
2025-02-18,22:58:05 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 351 |
+
2025-02-18,22:59:47 | INFO | Train Epoch: 0 [ 3153920/128000000 (2%)] Data (t): 0.493 Batch (t): 4.981, 1644.52/s, 205.565/s/gpu LR: 0.000770 Logit Scale: 14.176 Class_loss: 8.9318 (9.5820) Contrastive_loss: 8.2162 (8.5641) Loss: 17.148 (18.146)
|
| 352 |
+
2025-02-18,23:10:26 | INFO | Train Epoch: 0 [ 4202496/128000000 (3%)] Data (t): 0.500 Batch (t): 4.993, 1682.98/s, 210.373/s/gpu LR: 0.001000 Logit Scale: 14.169 Class_loss: 8.9133 (9.4483) Contrastive_loss: 8.0103 (8.4533) Loss: 16.924 (17.902)
|
| 353 |
+
2025-02-18,23:15:29 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 354 |
+
2025-02-18,23:21:03 | INFO | Train Epoch: 0 [ 5251072/128000000 (4%)] Data (t): 0.490 Batch (t): 4.977, 1697.53/s, 212.191/s/gpu LR: 0.001000 Logit Scale: 14.355 Class_loss: 8.8144 (9.3426) Contrastive_loss: 7.6759 (8.3238) Loss: 16.490 (17.666)
|
| 355 |
+
2025-02-18,23:25:24 | WARNING | Handling webdataset error (OSError('image file is truncated (35 bytes not processed)')). Ignoring.
|
| 356 |
+
2025-02-18,23:31:34 | INFO | Train Epoch: 0 [ 6299648/128000000 (5%)] Data (t): 0.490 Batch (t): 4.931, 1673.50/s, 209.187/s/gpu LR: 0.001000 Logit Scale: 14.754 Class_loss: 8.7440 (9.2571) Contrastive_loss: 7.4504 (8.1990) Loss: 16.194 (17.456)
|
| 357 |
+
2025-02-18,23:42:07 | INFO | Train Epoch: 0 [ 7348224/128000000 (6%)] Data (t): 0.497 Batch (t): 4.941, 1700.40/s, 212.551/s/gpu LR: 0.001000 Logit Scale: 15.404 Class_loss: 8.7063 (9.1882) Contrastive_loss: 7.2018 (8.0744) Loss: 15.908 (17.263)
|
| 358 |
+
2025-02-18,23:52:39 | INFO | Train Epoch: 0 [ 8396800/128000000 (7%)] Data (t): 0.493 Batch (t): 4.934, 1599.86/s, 199.983/s/gpu LR: 0.001000 Logit Scale: 16.300 Class_loss: 8.7078 (9.1349) Contrastive_loss: 7.0773 (7.9636) Loss: 15.785 (17.098)
|
| 359 |
+
2025-02-19,00:03:15 | INFO | Train Epoch: 0 [ 9445376/128000000 (7%)] Data (t): 1.257 Batch (t): 4.974, 1613.74/s, 201.718/s/gpu LR: 0.001000 Logit Scale: 17.299 Class_loss: 8.6476 (9.0861) Contrastive_loss: 6.9470 (7.8619) Loss: 15.595 (16.948)
|
| 360 |
+
2025-02-19,00:13:42 | INFO | Train Epoch: 0 [ 10493952/128000000 (8%)] Data (t): 0.466 Batch (t): 4.897, 1852.39/s, 231.548/s/gpu LR: 0.001000 Logit Scale: 18.280 Class_loss: 8.6103 (9.0429) Contrastive_loss: 6.7120 (7.7574) Loss: 15.322 (16.800)
|
| 361 |
+
2025-02-19,00:19:08 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 362 |
+
2025-02-19,00:20:34 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 363 |
+
2025-02-19,00:24:12 | INFO | Train Epoch: 0 [ 11542528/128000000 (9%)] Data (t): 0.480 Batch (t): 4.919, 1671.42/s, 208.928/s/gpu LR: 0.000999 Logit Scale: 18.969 Class_loss: 8.6012 (9.0061) Contrastive_loss: 6.8561 (7.6823) Loss: 15.457 (16.688)
|
| 364 |
+
2025-02-19,00:34:47 | INFO | Train Epoch: 0 [ 12591104/128000000 (10%)] Data (t): 0.483 Batch (t): 4.966, 1573.27/s, 196.658/s/gpu LR: 0.000999 Logit Scale: 19.978 Class_loss: 8.5730 (8.9728) Contrastive_loss: 6.7427 (7.6100) Loss: 15.316 (16.583)
|
| 365 |
+
2025-02-19,00:35:04 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 366 |
+
2025-02-19,00:44:17 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 367 |
+
2025-02-19,00:45:25 | INFO | Train Epoch: 0 [ 13639680/128000000 (11%)] Data (t): 0.491 Batch (t): 4.985, 1665.19/s, 208.149/s/gpu LR: 0.000999 Logit Scale: 20.849 Class_loss: 8.6300 (8.9483) Contrastive_loss: 6.6130 (7.5388) Loss: 15.243 (16.487)
|
| 368 |
+
2025-02-19,00:55:59 | INFO | Train Epoch: 0 [ 14688256/128000000 (11%)] Data (t): 0.493 Batch (t): 4.952, 1646.66/s, 205.833/s/gpu LR: 0.000999 Logit Scale: 21.675 Class_loss: 8.4805 (8.9171) Contrastive_loss: 6.3305 (7.4582) Loss: 14.811 (16.375)
|
| 369 |
+
2025-02-19,01:00:37 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 370 |
+
2025-02-19,01:03:11 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 371 |
+
2025-02-19,01:06:40 | INFO | Train Epoch: 0 [ 15736832/128000000 (12%)] Data (t): 0.511 Batch (t): 5.008, 1549.28/s, 193.660/s/gpu LR: 0.000999 Logit Scale: 22.294 Class_loss: 8.5770 (8.8958) Contrastive_loss: 6.5645 (7.4024) Loss: 15.142 (16.298)
|
| 372 |
+
2025-02-19,01:15:22 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 373 |
+
2025-02-19,01:17:17 | INFO | Train Epoch: 0 [ 16785408/128000000 (13%)] Data (t): 0.505 Batch (t): 4.971, 1574.20/s, 196.775/s/gpu LR: 0.000998 Logit Scale: 22.863 Class_loss: 8.5862 (8.8776) Contrastive_loss: 6.6149 (7.3560) Loss: 15.201 (16.234)
|
| 374 |
+
2025-02-19,01:27:49 | INFO | Train Epoch: 0 [ 17833984/128000000 (14%)] Data (t): 0.484 Batch (t): 4.942, 1646.81/s, 205.852/s/gpu LR: 0.000998 Logit Scale: 23.389 Class_loss: 8.5213 (8.8578) Contrastive_loss: 6.4658 (7.3066) Loss: 14.987 (16.164)
|
| 375 |
+
2025-02-19,01:38:25 | INFO | Train Epoch: 0 [ 18882560/128000000 (15%)] Data (t): 0.484 Batch (t): 4.971, 1672.64/s, 209.080/s/gpu LR: 0.000998 Logit Scale: 24.044 Class_loss: 8.5339 (8.8408) Contrastive_loss: 6.2940 (7.2533) Loss: 14.828 (16.094)
|
| 376 |
+
2025-02-19,01:49:08 | INFO | Train Epoch: 0 [ 19931136/128000000 (16%)] Data (t): 0.496 Batch (t): 5.025, 1633.70/s, 204.213/s/gpu LR: 0.000998 Logit Scale: 24.474 Class_loss: 8.5713 (8.8273) Contrastive_loss: 6.4502 (7.2131) Loss: 15.021 (16.040)
|
| 377 |
+
2025-02-19,01:59:41 | INFO | Train Epoch: 0 [ 20979712/128000000 (16%)] Data (t): 0.494 Batch (t): 4.943, 1630.90/s, 203.863/s/gpu LR: 0.000997 Logit Scale: 24.856 Class_loss: 8.5903 (8.8160) Contrastive_loss: 6.5037 (7.1794) Loss: 15.094 (15.995)
|
| 378 |
+
2025-02-19,02:10:16 | INFO | Train Epoch: 0 [ 22028288/128000000 (17%)] Data (t): 0.485 Batch (t): 4.960, 1702.35/s, 212.794/s/gpu LR: 0.000997 Logit Scale: 25.257 Class_loss: 8.5090 (8.8021) Contrastive_loss: 6.5430 (7.1504) Loss: 15.052 (15.952)
|
| 379 |
+
2025-02-19,02:20:51 | INFO | Train Epoch: 0 [ 23076864/128000000 (18%)] Data (t): 0.486 Batch (t): 4.960, 1659.41/s, 207.427/s/gpu LR: 0.000997 Logit Scale: 25.485 Class_loss: 8.5440 (8.7908) Contrastive_loss: 6.4479 (7.1199) Loss: 14.992 (15.911)
|
| 380 |
+
2025-02-19,02:25:28 | WARNING | Handling webdataset error (OSError('image file is truncated (114 bytes not processed)')). Ignoring.
|
| 381 |
+
2025-02-19,02:31:25 | INFO | Train Epoch: 0 [ 24125440/128000000 (19%)] Data (t): 0.916 Batch (t): 4.951, 1642.16/s, 205.270/s/gpu LR: 0.000996 Logit Scale: 25.798 Class_loss: 8.6041 (8.7831) Contrastive_loss: 6.4717 (7.0929) Loss: 15.076 (15.876)
|
| 382 |
+
2025-02-19,02:37:37 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 383 |
+
2025-02-19,02:42:01 | INFO | Train Epoch: 0 [ 25174016/128000000 (20%)] Data (t): 1.321 Batch (t): 4.968, 1728.61/s, 216.076/s/gpu LR: 0.000996 Logit Scale: 25.977 Class_loss: 8.7193 (8.7805) Contrastive_loss: 7.1729 (7.0961) Loss: 15.892 (15.877)
|
| 384 |
+
2025-02-19,02:51:06 | WARNING | Handling webdataset error (OSError('image file is truncated (75 bytes not processed)')). Ignoring.
|
| 385 |
+
2025-02-19,02:52:30 | INFO | Train Epoch: 0 [ 26222592/128000000 (20%)] Data (t): 0.487 Batch (t): 4.916, 1607.15/s, 200.893/s/gpu LR: 0.000995 Logit Scale: 26.240 Class_loss: 8.5990 (8.7735) Contrastive_loss: 6.6981 (7.0808) Loss: 15.297 (15.854)
|
| 386 |
+
2025-02-19,02:57:23 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 387 |
+
2025-02-19,03:03:01 | INFO | Train Epoch: 0 [ 27271168/128000000 (21%)] Data (t): 1.049 Batch (t): 4.935, 1699.78/s, 212.473/s/gpu LR: 0.000995 Logit Scale: 26.634 Class_loss: 8.5412 (8.7649) Contrastive_loss: 6.5002 (7.0593) Loss: 15.041 (15.824)
|
| 388 |
+
2025-02-19,03:07:43 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 389 |
+
2025-02-19,03:13:37 | INFO | Train Epoch: 0 [ 28319744/128000000 (22%)] Data (t): 1.544 Batch (t): 4.969, 1656.25/s, 207.031/s/gpu LR: 0.000994 Logit Scale: 27.127 Class_loss: 8.6210 (8.7598) Contrastive_loss: 6.4448 (7.0373) Loss: 15.066 (15.797)
|
| 390 |
+
2025-02-19,03:24:06 | INFO | Train Epoch: 0 [ 29368320/128000000 (23%)] Data (t): 1.353 Batch (t): 4.909, 1660.71/s, 207.589/s/gpu LR: 0.000994 Logit Scale: 27.395 Class_loss: 8.5946 (8.7541) Contrastive_loss: 6.6205 (7.0230) Loss: 15.215 (15.777)
|
| 391 |
+
2025-02-19,03:30:30 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 392 |
+
2025-02-19,03:34:33 | INFO | Train Epoch: 0 [ 30416896/128000000 (24%)] Data (t): 0.673 Batch (t): 4.902, 1690.04/s, 211.256/s/gpu LR: 0.000993 Logit Scale: 27.535 Class_loss: 8.5433 (8.7471) Contrastive_loss: 6.5007 (7.0055) Loss: 15.044 (15.753)
|
| 393 |
+
2025-02-19,03:45:07 | INFO | Train Epoch: 0 [ 31465472/128000000 (25%)] Data (t): 0.557 Batch (t): 4.954, 1647.53/s, 205.941/s/gpu LR: 0.000993 Logit Scale: 27.862 Class_loss: 8.5895 (8.7420) Contrastive_loss: 6.4604 (6.9880) Loss: 15.050 (15.730)
|
| 394 |
+
2025-02-19,03:55:41 | INFO | Train Epoch: 0 [ 32514048/128000000 (25%)] Data (t): 1.361 Batch (t): 4.951, 1643.81/s, 205.476/s/gpu LR: 0.000992 Logit Scale: 28.190 Class_loss: 8.6014 (8.7376) Contrastive_loss: 6.5170 (6.9732) Loss: 15.118 (15.711)
|
| 395 |
+
2025-02-19,04:06:13 | INFO | Train Epoch: 0 [ 33562624/128000000 (26%)] Data (t): 1.013 Batch (t): 4.936, 1681.33/s, 210.166/s/gpu LR: 0.000992 Logit Scale: 28.594 Class_loss: 8.5567 (8.7321) Contrastive_loss: 6.4205 (6.9565) Loss: 14.977 (15.689)
|
| 396 |
+
2025-02-19,04:15:58 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 397 |
+
2025-02-19,04:16:46 | INFO | Train Epoch: 0 [ 34611200/128000000 (27%)] Data (t): 1.355 Batch (t): 4.945, 1656.56/s, 207.070/s/gpu LR: 0.000991 Logit Scale: 29.041 Class_loss: 8.5748 (8.7275) Contrastive_loss: 6.4534 (6.9417) Loss: 15.028 (15.669)
|
| 398 |
+
2025-02-19,04:27:12 | INFO | Train Epoch: 0 [ 35659776/128000000 (28%)] Data (t): 1.040 Batch (t): 4.894, 1791.95/s, 223.993/s/gpu LR: 0.000991 Logit Scale: 29.346 Class_loss: 8.6309 (8.7247) Contrastive_loss: 6.5668 (6.9310) Loss: 15.198 (15.656)
|
| 399 |
+
2025-02-19,04:37:43 | INFO | Train Epoch: 0 [ 36708352/128000000 (29%)] Data (t): 0.480 Batch (t): 4.928, 1726.57/s, 215.821/s/gpu LR: 0.000990 Logit Scale: 29.693 Class_loss: 8.5747 (8.7205) Contrastive_loss: 6.3854 (6.9158) Loss: 14.960 (15.636)
|
| 400 |
+
2025-02-19,04:48:18 | INFO | Train Epoch: 0 [ 37756928/128000000 (29%)] Data (t): 0.478 Batch (t): 4.962, 1546.40/s, 193.300/s/gpu LR: 0.000989 Logit Scale: 29.898 Class_loss: 8.5614 (8.7162) Contrastive_loss: 6.4141 (6.9023) Loss: 14.975 (15.619)
|
| 401 |
+
2025-02-19,04:49:59 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 402 |
+
2025-02-19,04:51:12 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 403 |
+
2025-02-19,04:58:53 | INFO | Train Epoch: 0 [ 38805504/128000000 (30%)] Data (t): 0.490 Batch (t): 4.957, 1655.67/s, 206.958/s/gpu LR: 0.000989 Logit Scale: 30.182 Class_loss: 8.5812 (8.7127) Contrastive_loss: 6.3257 (6.8871) Loss: 14.907 (15.600)
|
| 404 |
+
2025-02-19,05:09:25 | INFO | Train Epoch: 0 [ 39854080/128000000 (31%)] Data (t): 0.498 Batch (t): 4.942, 1677.92/s, 209.740/s/gpu LR: 0.000988 Logit Scale: 30.647 Class_loss: 8.5371 (8.7082) Contrastive_loss: 6.2606 (6.8710) Loss: 14.798 (15.579)
|
| 405 |
+
2025-02-19,05:09:47 | WARNING | Handling webdataset error (OSError('image file is truncated (84 bytes not processed)')). Ignoring.
|
| 406 |
+
2025-02-19,05:19:59 | INFO | Train Epoch: 0 [ 40902656/128000000 (32%)] Data (t): 0.496 Batch (t): 4.950, 1724.45/s, 215.557/s/gpu LR: 0.000987 Logit Scale: 30.845 Class_loss: 8.6178 (8.7059) Contrastive_loss: 6.5260 (6.8624) Loss: 15.144 (15.568)
|
| 407 |
+
2025-02-19,05:30:31 | INFO | Train Epoch: 0 [ 41951232/128000000 (33%)] Data (t): 0.742 Batch (t): 4.937, 1609.72/s, 201.215/s/gpu LR: 0.000986 Logit Scale: 31.070 Class_loss: 8.5946 (8.7032) Contrastive_loss: 6.3455 (6.8498) Loss: 14.940 (15.553)
|
| 408 |
+
2025-02-19,05:41:04 | INFO | Train Epoch: 0 [ 42999808/128000000 (34%)] Data (t): 1.393 Batch (t): 4.943, 1741.95/s, 217.744/s/gpu LR: 0.000986 Logit Scale: 31.166 Class_loss: 8.5977 (8.7007) Contrastive_loss: 6.5869 (6.8435) Loss: 15.185 (15.544)
|
| 409 |
+
2025-02-19,05:42:30 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 410 |
+
2025-02-19,05:51:33 | INFO | Train Epoch: 0 [ 44048384/128000000 (34%)] Data (t): 0.656 Batch (t): 4.920, 1751.76/s, 218.970/s/gpu LR: 0.000985 Logit Scale: 31.222 Class_loss: 8.6009 (8.6984) Contrastive_loss: 6.4423 (6.8342) Loss: 15.043 (15.533)
|
| 411 |
+
2025-02-19,05:58:15 | WARNING | Handling webdataset error (OSError('image file is truncated (49 bytes not processed)')). Ignoring.
|
| 412 |
+
2025-02-19,06:02:11 | INFO | Train Epoch: 0 [ 45096960/128000000 (35%)] Data (t): 1.539 Batch (t): 4.982, 1662.70/s, 207.837/s/gpu LR: 0.000984 Logit Scale: 31.315 Class_loss: 8.6310 (8.6969) Contrastive_loss: 6.6083 (6.8291) Loss: 15.239 (15.526)
|
| 413 |
+
2025-02-19,06:08:17 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 414 |
+
2025-02-19,06:12:40 | INFO | Train Epoch: 0 [ 46145536/128000000 (36%)] Data (t): 1.486 Batch (t): 4.911, 1714.24/s, 214.280/s/gpu LR: 0.000983 Logit Scale: 31.623 Class_loss: 8.5829 (8.6943) Contrastive_loss: 6.2714 (6.8167) Loss: 14.854 (15.511)
|
| 415 |
+
2025-02-19,06:23:13 | INFO | Train Epoch: 0 [ 47194112/128000000 (37%)] Data (t): 0.746 Batch (t): 4.950, 1678.94/s, 209.868/s/gpu LR: 0.000982 Logit Scale: 32.004 Class_loss: 8.5654 (8.6915) Contrastive_loss: 6.4099 (6.8078) Loss: 14.975 (15.499)
|
| 416 |
+
2025-02-19,06:32:49 | WARNING | Handling webdataset error (OSError('image file is truncated (86 bytes not processed)')). Ignoring.
|
| 417 |
+
2025-02-19,06:34:01 | INFO | Train Epoch: 0 [ 48242688/128000000 (38%)] Data (t): 1.593 Batch (t): 5.059, 1715.50/s, 214.437/s/gpu LR: 0.000981 Logit Scale: 32.295 Class_loss: 8.6135 (8.6899) Contrastive_loss: 6.3449 (6.7980) Loss: 14.958 (15.488)
|
| 418 |
+
2025-02-19,06:44:45 | INFO | Train Epoch: 0 [ 49291264/128000000 (39%)] Data (t): 1.587 Batch (t): 5.031, 1553.26/s, 194.157/s/gpu LR: 0.000981 Logit Scale: 32.571 Class_loss: 8.6077 (8.6881) Contrastive_loss: 6.3209 (6.7880) Loss: 14.929 (15.476)
|
| 419 |
+
2025-02-19,06:52:07 | INFO | Starting zero-shot imagenet.
|
| 420 |
+
2025-02-19,06:52:07 | INFO | Building zero-shot classifier
|
| 421 |
+
2025-02-19,06:52:17 | INFO | Using classifier
|
| 422 |
+
2025-02-19,12:05:22 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints.
|
| 423 |
+
2025-02-19,12:05:26 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 424 |
+
2025-02-19,12:05:27 | INFO | Loaded CLIPCLS-ViT-B-16-NDR model config.
|
| 425 |
+
2025-02-19,12:05:28 | INFO | Model:
|
| 426 |
+
2025-02-19,12:05:28 | INFO | CLIPCLS(
|
| 427 |
+
(visual): NDRVisionTransformer(
|
| 428 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 429 |
+
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
| 430 |
+
(patch_dropout): Identity()
|
| 431 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 432 |
+
(transformer): RoPETransformer(
|
| 433 |
+
(resblocks): ModuleList(
|
| 434 |
+
(0-11): 12 x CustomResidualRoPEAttentionBlock(
|
| 435 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 436 |
+
(attn): RoPEAttention(
|
| 437 |
+
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 438 |
+
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 439 |
+
(out_drop): Dropout(p=0.0, inplace=False)
|
| 440 |
+
)
|
| 441 |
+
(ln_attn): Identity()
|
| 442 |
+
(ls_1): Identity()
|
| 443 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 444 |
+
(mlp): Sequential(
|
| 445 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 446 |
+
(gelu): GELU(approximate='none')
|
| 447 |
+
(ln): Identity()
|
| 448 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 449 |
+
)
|
| 450 |
+
(ls_2): Identity()
|
| 451 |
+
)
|
| 452 |
+
)
|
| 453 |
+
)
|
| 454 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 455 |
+
)
|
| 456 |
+
(text): TextTransformer(
|
| 457 |
+
(token_embedding): Embedding(49408, 512)
|
| 458 |
+
(transformer): Transformer(
|
| 459 |
+
(resblocks): ModuleList(
|
| 460 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 461 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 462 |
+
(attn): MultiheadAttention(
|
| 463 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 464 |
+
)
|
| 465 |
+
(ls_1): Identity()
|
| 466 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 467 |
+
(mlp): Sequential(
|
| 468 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 469 |
+
(gelu): GELU(approximate='none')
|
| 470 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 471 |
+
)
|
| 472 |
+
(ls_2): Identity()
|
| 473 |
+
)
|
| 474 |
+
)
|
| 475 |
+
)
|
| 476 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 477 |
+
)
|
| 478 |
+
(text_decoder): MixClsHead(
|
| 479 |
+
(mlps): ModuleList()
|
| 480 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 481 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 482 |
+
)
|
| 483 |
+
)
|
| 484 |
+
2025-02-19,12:05:28 | INFO | Params:
|
| 485 |
+
2025-02-19,12:05:28 | INFO | NDR_patch_size: 16
|
| 486 |
+
2025-02-19,12:05:28 | INFO | accum_freq: 1
|
| 487 |
+
2025-02-19,12:05:28 | INFO | aug_cfg: {}
|
| 488 |
+
2025-02-19,12:05:28 | INFO | batch_size: 1024
|
| 489 |
+
2025-02-19,12:05:28 | INFO | beta1: 0.9
|
| 490 |
+
2025-02-19,12:05:28 | INFO | beta2: 0.98
|
| 491 |
+
2025-02-19,12:05:28 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 492 |
+
2025-02-19,12:05:28 | INFO | coca_caption_loss_weight: 2.0
|
| 493 |
+
2025-02-19,12:05:28 | INFO | coca_contrastive_loss_weight: 1.0
|
| 494 |
+
2025-02-19,12:05:28 | INFO | copy_codebase: False
|
| 495 |
+
2025-02-19,12:05:28 | INFO | csv_caption_key: title
|
| 496 |
+
2025-02-19,12:05:28 | INFO | csv_img_key: filepath
|
| 497 |
+
2025-02-19,12:05:28 | INFO | csv_separator:
|
| 498 |
+
2025-02-19,12:05:28 | INFO | dataset_resampled: False
|
| 499 |
+
2025-02-19,12:05:28 | INFO | dataset_type: webdataset
|
| 500 |
+
2025-02-19,12:05:28 | INFO | ddp_static_graph: True
|
| 501 |
+
2025-02-19,12:05:28 | INFO | debug: False
|
| 502 |
+
2025-02-19,12:05:28 | INFO | delete_prev_step_ckpt: True
|
| 503 |
+
2025-02-19,12:05:28 | INFO | delete_previous_checkpoint: False
|
| 504 |
+
2025-02-19,12:05:28 | INFO | device: cuda:0
|
| 505 |
+
2025-02-19,12:05:28 | INFO | dist_backend: nccl
|
| 506 |
+
2025-02-19,12:05:28 | INFO | dist_url: env://
|
| 507 |
+
2025-02-19,12:05:28 | INFO | distill: False
|
| 508 |
+
2025-02-19,12:05:28 | INFO | distill_model: None
|
| 509 |
+
2025-02-19,12:05:28 | INFO | distill_pretrained: None
|
| 510 |
+
2025-02-19,12:05:28 | INFO | distributed: True
|
| 511 |
+
2025-02-19,12:05:28 | INFO | epochs: 4
|
| 512 |
+
2025-02-19,12:05:28 | INFO | epochs_cooldown: None
|
| 513 |
+
2025-02-19,12:05:28 | INFO | eps: 1e-06
|
| 514 |
+
2025-02-19,12:05:28 | INFO | force_custom_text: False
|
| 515 |
+
2025-02-19,12:05:28 | INFO | force_image_size: 224
|
| 516 |
+
2025-02-19,12:05:28 | INFO | force_patch_dropout: None
|
| 517 |
+
2025-02-19,12:05:29 | INFO | force_quick_gelu: False
|
| 518 |
+
2025-02-19,12:05:29 | INFO | gather_with_grad: True
|
| 519 |
+
2025-02-19,12:05:29 | INFO | global_batch_size: 8192
|
| 520 |
+
2025-02-19,12:05:29 | INFO | grad_checkpointing: True
|
| 521 |
+
2025-02-19,12:05:29 | INFO | grad_clip_norm: None
|
| 522 |
+
2025-02-19,12:05:29 | INFO | horovod: False
|
| 523 |
+
2025-02-19,12:05:29 | INFO | image_interpolation: None
|
| 524 |
+
2025-02-19,12:05:29 | INFO | image_mean: None
|
| 525 |
+
2025-02-19,12:05:29 | INFO | image_resize_mode: None
|
| 526 |
+
2025-02-19,12:05:29 | INFO | image_std: None
|
| 527 |
+
2025-02-19,12:05:29 | INFO | imagenet_v2: None
|
| 528 |
+
2025-02-19,12:05:29 | INFO | imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 529 |
+
2025-02-19,12:05:29 | INFO | local_loss: True
|
| 530 |
+
2025-02-19,12:05:29 | INFO | local_rank: 0
|
| 531 |
+
2025-02-19,12:05:29 | INFO | lock_image: False
|
| 532 |
+
2025-02-19,12:05:29 | INFO | lock_image_freeze_bn_stats: False
|
| 533 |
+
2025-02-19,12:05:29 | INFO | lock_image_unlocked_groups: 0
|
| 534 |
+
2025-02-19,12:05:29 | INFO | lock_text: False
|
| 535 |
+
2025-02-19,12:05:29 | INFO | lock_text_freeze_layer_norm: False
|
| 536 |
+
2025-02-19,12:05:29 | INFO | lock_text_unlocked_layers: 0
|
| 537 |
+
2025-02-19,12:05:29 | INFO | log_every_n_steps: 128
|
| 538 |
+
2025-02-19,12:05:29 | INFO | log_level: 20
|
| 539 |
+
2025-02-19,12:05:29 | INFO | log_local: False
|
| 540 |
+
2025-02-19,12:05:29 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 541 |
+
2025-02-19,12:05:29 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 542 |
+
2025-02-19,12:05:29 | INFO | lr: 0.001
|
| 543 |
+
2025-02-19,12:05:29 | INFO | lr_cooldown_end: 0.0
|
| 544 |
+
2025-02-19,12:05:29 | INFO | lr_cooldown_power: 1.0
|
| 545 |
+
2025-02-19,12:05:29 | INFO | lr_scheduler: cosine
|
| 546 |
+
2025-02-19,12:05:29 | INFO | max_seq_len: 100000000000000
|
| 547 |
+
2025-02-19,12:05:29 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 548 |
+
2025-02-19,12:05:29 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 549 |
+
2025-02-19,12:05:29 | INFO | native_dynamic_resolution: True
|
| 550 |
+
2025-02-19,12:05:29 | INFO | no_set_device_rank: False
|
| 551 |
+
2025-02-19,12:05:29 | INFO | only_packing: True
|
| 552 |
+
2025-02-19,12:05:29 | INFO | precision: amp_bfloat16
|
| 553 |
+
2025-02-19,12:05:29 | INFO | pretrained:
|
| 554 |
+
2025-02-19,12:05:29 | INFO | pretrained_image:
|
| 555 |
+
2025-02-19,12:05:29 | INFO | pretrained_text:
|
| 556 |
+
2025-02-19,12:05:29 | INFO | rank: 0
|
| 557 |
+
2025-02-19,12:05:29 | INFO | remote_sync: None
|
| 558 |
+
2025-02-19,12:05:29 | INFO | remote_sync_frequency: 300
|
| 559 |
+
2025-02-19,12:05:29 | INFO | remote_sync_protocol: s3
|
| 560 |
+
2025-02-19,12:05:29 | INFO | report_to: wandb
|
| 561 |
+
2025-02-19,12:05:29 | INFO | resume: None
|
| 562 |
+
2025-02-19,12:05:29 | INFO | rope_attn_num_heads: 12
|
| 563 |
+
2025-02-19,12:05:29 | INFO | rope_model_width: 768
|
| 564 |
+
2025-02-19,12:05:29 | INFO | save_every_n_steps: 6104
|
| 565 |
+
2025-02-19,12:05:29 | INFO | save_frequency: 1
|
| 566 |
+
2025-02-19,12:05:29 | INFO | save_most_recent: False
|
| 567 |
+
2025-02-19,12:05:29 | INFO | seed: 0
|
| 568 |
+
2025-02-19,12:05:29 | INFO | siglip: False
|
| 569 |
+
2025-02-19,12:05:29 | INFO | skip_scheduler: False
|
| 570 |
+
2025-02-19,12:05:29 | INFO | tensorboard: False
|
| 571 |
+
2025-02-19,12:05:29 | INFO | tensorboard_path:
|
| 572 |
+
2025-02-19,12:05:29 | INFO | torchcompile: False
|
| 573 |
+
2025-02-19,12:05:29 | INFO | torchscript: False
|
| 574 |
+
2025-02-19,12:05:29 | INFO | trace: False
|
| 575 |
+
2025-02-19,12:05:29 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 576 |
+
2025-02-19,12:05:29 | INFO | train_data_upsampling_factors: None
|
| 577 |
+
2025-02-19,12:05:29 | INFO | train_num_samples: 128000000
|
| 578 |
+
2025-02-19,12:05:29 | INFO | use_bn_sync: False
|
| 579 |
+
2025-02-19,12:05:29 | INFO | use_bnb_linear: None
|
| 580 |
+
2025-02-19,12:05:29 | INFO | val_data: None
|
| 581 |
+
2025-02-19,12:05:29 | INFO | val_frequency: 1
|
| 582 |
+
2025-02-19,12:05:29 | INFO | val_num_samples: None
|
| 583 |
+
2025-02-19,12:05:29 | INFO | val_steps: 6104
|
| 584 |
+
2025-02-19,12:05:29 | INFO | wandb: True
|
| 585 |
+
2025-02-19,12:05:29 | INFO | wandb_notes:
|
| 586 |
+
2025-02-19,12:05:29 | INFO | wandb_project_name: cls-clip-NDR
|
| 587 |
+
2025-02-19,12:05:29 | INFO | warmup: 500
|
| 588 |
+
2025-02-19,12:05:29 | INFO | wd: 0.2
|
| 589 |
+
2025-02-19,12:05:29 | INFO | workers: 1
|
| 590 |
+
2025-02-19,12:05:29 | INFO | world_size: 8
|
| 591 |
+
2025-02-19,12:05:29 | INFO | zeroshot_frequency: 2
|
| 592 |
+
2025-02-19,12:05:29 | INFO | zeroshot_steps: 6104
|
| 593 |
+
2025-02-19,12:05:45 | INFO | Start epoch 0
|
| 594 |
+
2025-02-19,12:05:58 | INFO | Train Epoch: 0 [ 8192/128000000 (0%)] Data (t): 7.563 Batch (t): 12.760, 641.995/s, 80.2494/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.323 (11.323) Contrastive_loss: 9.1263 (9.1263) Loss: 20.449 (20.449)
|
| 595 |
+
2025-02-19,12:09:00 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 596 |
+
2025-02-19,12:16:37 | INFO | Train Epoch: 0 [ 1056768/128000000 (1%)] Data (t): 1.530 Batch (t): 4.993, 1766.00/s, 220.750/s/gpu LR: 0.000258 Logit Scale: 14.283 Class_loss: 9.1091 (10.216) Contrastive_loss: 8.5592 (8.8427) Loss: 17.668 (19.059)
|
| 597 |
+
2025-02-19,12:17:05 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 598 |
+
2025-02-19,12:27:20 | INFO | Train Epoch: 0 [ 2105344/128000000 (2%)] Data (t): 1.563 Batch (t): 5.024, 1746.20/s, 218.275/s/gpu LR: 0.000514 Logit Scale: 14.237 Class_loss: 8.9689 (9.8003) Contrastive_loss: 8.3292 (8.6715) Loss: 17.298 (18.472)
|
| 599 |
+
2025-02-19,12:36:42 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 600 |
+
2025-02-19,12:38:00 | INFO | Train Epoch: 0 [ 3153920/128000000 (2%)] Data (t): 0.900 Batch (t): 4.999, 1755.67/s, 219.459/s/gpu LR: 0.000770 Logit Scale: 14.214 Class_loss: 8.9421 (9.5857) Contrastive_loss: 8.1398 (8.5386) Loss: 17.082 (18.124)
|
| 601 |
+
2025-02-19,12:48:40 | INFO | Train Epoch: 0 [ 4202496/128000000 (3%)] Data (t): 0.594 Batch (t): 5.001, 1614.41/s, 201.801/s/gpu LR: 0.001000 Logit Scale: 14.196 Class_loss: 8.8778 (9.4442) Contrastive_loss: 7.8747 (8.4058) Loss: 16.753 (17.850)
|
| 602 |
+
2025-02-19,12:53:51 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 603 |
+
2025-02-19,12:59:12 | INFO | Train Epoch: 0 [ 5251072/128000000 (4%)] Data (t): 0.602 Batch (t): 4.938, 1690.05/s, 211.257/s/gpu LR: 0.001000 Logit Scale: 14.396 Class_loss: 8.8194 (9.3400) Contrastive_loss: 7.6024 (8.2719) Loss: 16.422 (17.612)
|
| 604 |
+
2025-02-19,13:05:13 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints.
|
| 605 |
+
2025-02-19,13:05:18 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 606 |
+
2025-02-19,13:05:18 | INFO | Loaded CLIPCLS-ViT-B-16-NDR model config.
|
| 607 |
+
2025-02-19,13:05:20 | INFO | Model:
|
| 608 |
+
2025-02-19,13:05:20 | INFO | CLIPCLS(
|
| 609 |
+
(visual): NDRVisionTransformer(
|
| 610 |
+
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 611 |
+
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
| 612 |
+
(patch_dropout): Identity()
|
| 613 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 614 |
+
(transformer): RoPETransformer(
|
| 615 |
+
(resblocks): ModuleList(
|
| 616 |
+
(0-11): 12 x CustomResidualRoPEAttentionBlock(
|
| 617 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 618 |
+
(attn): RoPEAttention(
|
| 619 |
+
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 620 |
+
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 621 |
+
(out_drop): Dropout(p=0.0, inplace=False)
|
| 622 |
+
)
|
| 623 |
+
(ln_attn): Identity()
|
| 624 |
+
(ls_1): Identity()
|
| 625 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 626 |
+
(mlp): Sequential(
|
| 627 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 628 |
+
(gelu): GELU(approximate='none')
|
| 629 |
+
(ln): Identity()
|
| 630 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 631 |
+
)
|
| 632 |
+
(ls_2): Identity()
|
| 633 |
+
)
|
| 634 |
+
)
|
| 635 |
+
)
|
| 636 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 637 |
+
)
|
| 638 |
+
(text): TextTransformer(
|
| 639 |
+
(token_embedding): Embedding(49408, 512)
|
| 640 |
+
(transformer): Transformer(
|
| 641 |
+
(resblocks): ModuleList(
|
| 642 |
+
(0-11): 12 x ResidualAttentionBlock(
|
| 643 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 644 |
+
(attn): MultiheadAttention(
|
| 645 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 646 |
+
)
|
| 647 |
+
(ls_1): Identity()
|
| 648 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 649 |
+
(mlp): Sequential(
|
| 650 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 651 |
+
(gelu): GELU(approximate='none')
|
| 652 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 653 |
+
)
|
| 654 |
+
(ls_2): Identity()
|
| 655 |
+
)
|
| 656 |
+
)
|
| 657 |
+
)
|
| 658 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 659 |
+
)
|
| 660 |
+
(text_decoder): MixClsHead(
|
| 661 |
+
(mlps): ModuleList()
|
| 662 |
+
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 663 |
+
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 664 |
+
)
|
| 665 |
+
)
|
| 666 |
+
2025-02-19,13:05:20 | INFO | Params:
|
| 667 |
+
2025-02-19,13:05:20 | INFO | NDR_patch_size: 16
|
| 668 |
+
2025-02-19,13:05:20 | INFO | accum_freq: 1
|
| 669 |
+
2025-02-19,13:05:20 | INFO | aug_cfg: {}
|
| 670 |
+
2025-02-19,13:05:20 | INFO | batch_size: 1024
|
| 671 |
+
2025-02-19,13:05:20 | INFO | beta1: 0.9
|
| 672 |
+
2025-02-19,13:05:20 | INFO | beta2: 0.98
|
| 673 |
+
2025-02-19,13:05:20 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 674 |
+
2025-02-19,13:05:20 | INFO | coca_caption_loss_weight: 2.0
|
| 675 |
+
2025-02-19,13:05:20 | INFO | coca_contrastive_loss_weight: 1.0
|
| 676 |
+
2025-02-19,13:05:20 | INFO | copy_codebase: False
|
| 677 |
+
2025-02-19,13:05:20 | INFO | csv_caption_key: title
|
| 678 |
+
2025-02-19,13:05:20 | INFO | csv_img_key: filepath
|
| 679 |
+
2025-02-19,13:05:20 | INFO | csv_separator:
|
| 680 |
+
2025-02-19,13:05:20 | INFO | dataset_resampled: False
|
| 681 |
+
2025-02-19,13:05:20 | INFO | dataset_type: webdataset
|
| 682 |
+
2025-02-19,13:05:20 | INFO | ddp_static_graph: True
|
| 683 |
+
2025-02-19,13:05:20 | INFO | debug: False
|
| 684 |
+
2025-02-19,13:05:20 | INFO | delete_prev_step_ckpt: True
|
| 685 |
+
2025-02-19,13:05:20 | INFO | delete_previous_checkpoint: False
|
| 686 |
+
2025-02-19,13:05:20 | INFO | device: cuda:0
|
| 687 |
+
2025-02-19,13:05:20 | INFO | dist_backend: nccl
|
| 688 |
+
2025-02-19,13:05:20 | INFO | dist_url: env://
|
| 689 |
+
2025-02-19,13:05:20 | INFO | distill: False
|
| 690 |
+
2025-02-19,13:05:20 | INFO | distill_model: None
|
| 691 |
+
2025-02-19,13:05:20 | INFO | distill_pretrained: None
|
| 692 |
+
2025-02-19,13:05:20 | INFO | distributed: True
|
| 693 |
+
2025-02-19,13:05:20 | INFO | epochs: 4
|
| 694 |
+
2025-02-19,13:05:20 | INFO | epochs_cooldown: None
|
| 695 |
+
2025-02-19,13:05:20 | INFO | eps: 1e-06
|
| 696 |
+
2025-02-19,13:05:20 | INFO | force_custom_text: False
|
| 697 |
+
2025-02-19,13:05:20 | INFO | force_image_size: 224
|
| 698 |
+
2025-02-19,13:05:20 | INFO | force_patch_dropout: None
|
| 699 |
+
2025-02-19,13:05:20 | INFO | force_quick_gelu: False
|
| 700 |
+
2025-02-19,13:05:20 | INFO | gather_with_grad: True
|
| 701 |
+
2025-02-19,13:05:20 | INFO | global_batch_size: 8192
|
| 702 |
+
2025-02-19,13:05:20 | INFO | grad_checkpointing: True
|
| 703 |
+
2025-02-19,13:05:20 | INFO | grad_clip_norm: None
|
| 704 |
+
2025-02-19,13:05:20 | INFO | horovod: False
|
| 705 |
+
2025-02-19,13:05:20 | INFO | image_interpolation: None
|
| 706 |
+
2025-02-19,13:05:20 | INFO | image_mean: None
|
| 707 |
+
2025-02-19,13:05:20 | INFO | image_resize_mode: None
|
| 708 |
+
2025-02-19,13:05:20 | INFO | image_std: None
|
| 709 |
+
2025-02-19,13:05:20 | INFO | imagenet_v2: None
|
| 710 |
+
2025-02-19,13:05:20 | INFO | imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 711 |
+
2025-02-19,13:05:20 | INFO | local_loss: True
|
| 712 |
+
2025-02-19,13:05:20 | INFO | local_rank: 0
|
| 713 |
+
2025-02-19,13:05:20 | INFO | lock_image: False
|
| 714 |
+
2025-02-19,13:05:20 | INFO | lock_image_freeze_bn_stats: False
|
| 715 |
+
2025-02-19,13:05:20 | INFO | lock_image_unlocked_groups: 0
|
| 716 |
+
2025-02-19,13:05:20 | INFO | lock_text: False
|
| 717 |
+
2025-02-19,13:05:20 | INFO | lock_text_freeze_layer_norm: False
|
| 718 |
+
2025-02-19,13:05:20 | INFO | lock_text_unlocked_layers: 0
|
| 719 |
+
2025-02-19,13:05:20 | INFO | log_every_n_steps: 128
|
| 720 |
+
2025-02-19,13:05:20 | INFO | log_level: 20
|
| 721 |
+
2025-02-19,13:05:20 | INFO | log_local: False
|
| 722 |
+
2025-02-19,13:05:20 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 723 |
+
2025-02-19,13:05:20 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 724 |
+
2025-02-19,13:05:20 | INFO | lr: 0.001
|
| 725 |
+
2025-02-19,13:05:20 | INFO | lr_cooldown_end: 0.0
|
| 726 |
+
2025-02-19,13:05:20 | INFO | lr_cooldown_power: 1.0
|
| 727 |
+
2025-02-19,13:05:20 | INFO | lr_scheduler: cosine
|
| 728 |
+
2025-02-19,13:05:20 | INFO | max_seq_len: 100000000000000
|
| 729 |
+
2025-02-19,13:05:20 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 730 |
+
2025-02-19,13:05:20 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 731 |
+
2025-02-19,13:05:20 | INFO | native_dynamic_resolution: True
|
| 732 |
+
2025-02-19,13:05:20 | INFO | no_set_device_rank: False
|
| 733 |
+
2025-02-19,13:05:20 | INFO | only_packing: True
|
| 734 |
+
2025-02-19,13:05:20 | INFO | precision: amp_bfloat16
|
| 735 |
+
2025-02-19,13:05:20 | INFO | pretrained:
|
| 736 |
+
2025-02-19,13:05:20 | INFO | pretrained_image:
|
| 737 |
+
2025-02-19,13:05:20 | INFO | pretrained_text:
|
| 738 |
+
2025-02-19,13:05:20 | INFO | rank: 0
|
| 739 |
+
2025-02-19,13:05:20 | INFO | remote_sync: None
|
| 740 |
+
2025-02-19,13:05:20 | INFO | remote_sync_frequency: 300
|
| 741 |
+
2025-02-19,13:05:20 | INFO | remote_sync_protocol: s3
|
| 742 |
+
2025-02-19,13:05:20 | INFO | report_to: wandb
|
| 743 |
+
2025-02-19,13:05:20 | INFO | resume: None
|
| 744 |
+
2025-02-19,13:05:20 | INFO | rope_attn_num_heads: 12
|
| 745 |
+
2025-02-19,13:05:20 | INFO | rope_model_width: 768
|
| 746 |
+
2025-02-19,13:05:20 | INFO | save_every_n_steps: 6104
|
| 747 |
+
2025-02-19,13:05:20 | INFO | save_frequency: 1
|
| 748 |
+
2025-02-19,13:05:20 | INFO | save_most_recent: False
|
| 749 |
+
2025-02-19,13:05:20 | INFO | seed: 0
|
| 750 |
+
2025-02-19,13:05:20 | INFO | siglip: False
|
| 751 |
+
2025-02-19,13:05:20 | INFO | skip_scheduler: False
|
| 752 |
+
2025-02-19,13:05:20 | INFO | tensorboard: False
|
| 753 |
+
2025-02-19,13:05:20 | INFO | tensorboard_path:
|
| 754 |
+
2025-02-19,13:05:20 | INFO | torchcompile: False
|
| 755 |
+
2025-02-19,13:05:20 | INFO | torchscript: False
|
| 756 |
+
2025-02-19,13:05:20 | INFO | trace: False
|
| 757 |
+
2025-02-19,13:05:20 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 758 |
+
2025-02-19,13:05:20 | INFO | train_data_upsampling_factors: None
|
| 759 |
+
2025-02-19,13:05:20 | INFO | train_num_samples: 128000000
|
| 760 |
+
2025-02-19,13:05:20 | INFO | use_bn_sync: False
|
| 761 |
+
2025-02-19,13:05:20 | INFO | use_bnb_linear: None
|
| 762 |
+
2025-02-19,13:05:20 | INFO | val_data: None
|
| 763 |
+
2025-02-19,13:05:20 | INFO | val_frequency: 1
|
| 764 |
+
2025-02-19,13:05:20 | INFO | val_num_samples: None
|
| 765 |
+
2025-02-19,13:05:20 | INFO | val_steps: 6104
|
| 766 |
+
2025-02-19,13:05:20 | INFO | wandb: True
|
| 767 |
+
2025-02-19,13:05:20 | INFO | wandb_notes:
|
| 768 |
+
2025-02-19,13:05:20 | INFO | wandb_project_name: cls-clip-NDR
|
| 769 |
+
2025-02-19,13:05:20 | INFO | warmup: 500
|
| 770 |
+
2025-02-19,13:05:20 | INFO | wd: 0.2
|
| 771 |
+
2025-02-19,13:05:20 | INFO | workers: 6
|
| 772 |
+
2025-02-19,13:05:20 | INFO | world_size: 8
|
| 773 |
+
2025-02-19,13:05:20 | INFO | zeroshot_frequency: 2
|
| 774 |
+
2025-02-19,13:05:20 | INFO | zeroshot_steps: 6104
|
| 775 |
+
2025-02-19,13:05:35 | INFO | Start epoch 0
|
| 776 |
+
2025-02-19,13:05:51 | INFO | Train Epoch: 0 [ 8192/128040960 (0%)] Data (t): 10.133 Batch (t): 15.889, 515.567/s, 64.4459/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.345 (11.345) Contrastive_loss: 9.1457 (9.1457) Loss: 20.491 (20.491)
|
| 777 |
+
2025-02-19,13:06:40 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 778 |
+
2025-02-19,13:15:11 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 779 |
+
2025-02-19,13:15:21 | INFO | Train Epoch: 0 [ 1056768/128040960 (1%)] Data (t): 0.563 Batch (t): 4.452, 1948.42/s, 243.553/s/gpu LR: 0.000258 Logit Scale: 14.284 Class_loss: 9.1077 (10.226) Contrastive_loss: 8.6769 (8.9113) Loss: 17.785 (19.138)
|
| 780 |
+
2025-02-19,13:24:52 | INFO | Train Epoch: 0 [ 2105344/128040960 (2%)] Data (t): 0.545 Batch (t): 4.461, 2104.00/s, 263.000/s/gpu LR: 0.000514 Logit Scale: 14.242 Class_loss: 8.9850 (9.8126) Contrastive_loss: 8.3043 (8.7090) Loss: 17.289 (18.522)
|
| 781 |
+
2025-02-19,13:34:22 | INFO | Train Epoch: 0 [ 3153920/128040960 (2%)] Data (t): 0.554 Batch (t): 4.453, 1689.95/s, 211.243/s/gpu LR: 0.000770 Logit Scale: 14.214 Class_loss: 8.9541 (9.5980) Contrastive_loss: 8.0705 (8.5494) Loss: 17.025 (18.147)
|
| 782 |
+
2025-02-19,13:35:15 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 783 |
+
2025-02-19,13:43:54 | INFO | Train Epoch: 0 [ 4202496/128040960 (3%)] Data (t): 0.561 Batch (t): 4.469, 1886.29/s, 235.786/s/gpu LR: 0.001000 Logit Scale: 14.216 Class_loss: 8.8615 (9.4507) Contrastive_loss: 7.8130 (8.4021) Loss: 16.675 (17.853)
|
| 784 |
+
2025-02-19,13:48:38 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 785 |
+
2025-02-19,13:53:26 | INFO | Train Epoch: 0 [ 5251072/128040960 (4%)] Data (t): 0.560 Batch (t): 4.463, 1972.18/s, 246.523/s/gpu LR: 0.001000 Logit Scale: 14.389 Class_loss: 8.8393 (9.3488) Contrastive_loss: 7.6803 (8.2818) Loss: 16.520 (17.631)
|
| 786 |
+
2025-02-19,13:56:35 | WARNING | Handling webdataset error (OSError('image file is truncated (35 bytes not processed)')). Ignoring.
|
| 787 |
+
2025-02-19,14:02:57 | INFO | Train Epoch: 0 [ 6299648/128040960 (5%)] Data (t): 0.550 Batch (t): 4.464, 1777.37/s, 222.172/s/gpu LR: 0.001000 Logit Scale: 14.879 Class_loss: 8.7577 (9.2644) Contrastive_loss: 7.3735 (8.1521) Loss: 16.131 (17.416)
|
| 788 |
+
2025-02-19,14:12:33 | INFO | Train Epoch: 0 [ 7348224/128040960 (6%)] Data (t): 0.564 Batch (t): 4.498, 1942.67/s, 242.834/s/gpu LR: 0.001000 Logit Scale: 15.541 Class_loss: 8.7739 (9.2031) Contrastive_loss: 7.4424 (8.0634) Loss: 16.216 (17.266)
|
| 789 |
+
2025-02-19,14:22:05 | INFO | Train Epoch: 0 [ 8396800/128040960 (7%)] Data (t): 0.571 Batch (t): 4.473, 1944.53/s, 243.066/s/gpu LR: 0.001000 Logit Scale: 16.469 Class_loss: 8.7110 (9.1484) Contrastive_loss: 7.0960 (7.9559) Loss: 15.807 (17.104)
|
| 790 |
+
2025-02-19,14:31:38 | INFO | Train Epoch: 0 [ 9445376/128040960 (7%)] Data (t): 0.563 Batch (t): 4.472, 1748.42/s, 218.552/s/gpu LR: 0.001000 Logit Scale: 17.457 Class_loss: 8.5955 (9.0931) Contrastive_loss: 6.8748 (7.8478) Loss: 15.470 (16.941)
|
| 791 |
+
2025-02-19,14:40:26 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 792 |
+
2025-02-19,14:41:10 | INFO | Train Epoch: 0 [ 10493952/128040960 (8%)] Data (t): 0.558 Batch (t): 4.469, 1963.86/s, 245.483/s/gpu LR: 0.001000 Logit Scale: 18.481 Class_loss: 8.5946 (9.0478) Contrastive_loss: 6.8009 (7.7526) Loss: 15.396 (16.800)
|
| 793 |
+
2025-02-19,14:45:09 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 794 |
+
2025-02-19,14:50:40 | INFO | Train Epoch: 0 [ 11542528/128040960 (9%)] Data (t): 0.574 Batch (t): 4.459, 2100.78/s, 262.597/s/gpu LR: 0.000999 Logit Scale: 19.417 Class_loss: 8.5640 (9.0075) Contrastive_loss: 6.6877 (7.6639) Loss: 15.252 (16.671)
|
| 795 |
+
2025-02-19,14:59:04 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 796 |
+
2025-02-19,15:00:10 | INFO | Train Epoch: 0 [ 12591104/128040960 (10%)] Data (t): 0.571 Batch (t): 4.450, 1743.91/s, 217.989/s/gpu LR: 0.000999 Logit Scale: 20.350 Class_loss: 8.5830 (8.9748) Contrastive_loss: 6.6580 (7.5865) Loss: 15.241 (16.561)
|
| 797 |
+
2025-02-19,15:09:40 | INFO | Train Epoch: 0 [ 13639680/128040960 (11%)] Data (t): 0.559 Batch (t): 4.456, 1966.06/s, 245.757/s/gpu LR: 0.000999 Logit Scale: 21.192 Class_loss: 8.5088 (8.9415) Contrastive_loss: 6.5581 (7.5130) Loss: 15.067 (16.455)
|
| 798 |
+
2025-02-19,15:12:15 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 799 |
+
2025-02-19,15:19:08 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 800 |
+
2025-02-19,15:19:10 | INFO | Train Epoch: 0 [ 14688256/128040960 (11%)] Data (t): 0.566 Batch (t): 4.453, 1954.79/s, 244.348/s/gpu LR: 0.000999 Logit Scale: 21.899 Class_loss: 8.5548 (8.9157) Contrastive_loss: 6.4382 (7.4414) Loss: 14.993 (16.357)
|
| 801 |
+
2025-02-19,15:19:39 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 802 |
+
2025-02-19,15:28:39 | INFO | Train Epoch: 0 [ 15736832/128040960 (12%)] Data (t): 0.565 Batch (t): 4.443, 1826.76/s, 228.345/s/gpu LR: 0.000999 Logit Scale: 22.392 Class_loss: 8.5375 (8.8921) Contrastive_loss: 6.6134 (7.3896) Loss: 15.151 (16.282)
|
| 803 |
+
2025-02-19,15:31:54 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 804 |
+
2025-02-19,15:38:10 | INFO | Train Epoch: 0 [ 16785408/128040960 (13%)] Data (t): 0.569 Batch (t): 4.461, 1664.71/s, 208.089/s/gpu LR: 0.000998 Logit Scale: 22.853 Class_loss: 8.4941 (8.8687) Contrastive_loss: 6.5778 (7.3419) Loss: 15.072 (16.211)
|
| 805 |
+
2025-02-19,15:47:40 | INFO | Train Epoch: 0 [ 17833984/128040960 (14%)] Data (t): 0.567 Batch (t): 4.450, 2096.33/s, 262.041/s/gpu LR: 0.000998 Logit Scale: 23.429 Class_loss: 8.5666 (8.8519) Contrastive_loss: 6.4976 (7.2950) Loss: 15.064 (16.147)
|
| 806 |
+
2025-02-19,15:57:09 | INFO | Train Epoch: 0 [ 18882560/128040960 (15%)] Data (t): 0.573 Batch (t): 4.445, 1937.46/s, 242.183/s/gpu LR: 0.000998 Logit Scale: 24.001 Class_loss: 8.6211 (8.8398) Contrastive_loss: 6.5858 (7.2576) Loss: 15.207 (16.097)
|
| 807 |
+
2025-02-19,16:06:39 | INFO | Train Epoch: 0 [ 19931136/128040960 (16%)] Data (t): 0.557 Batch (t): 4.460, 1787.86/s, 223.483/s/gpu LR: 0.000998 Logit Scale: 24.533 Class_loss: 8.5764 (8.8266) Contrastive_loss: 6.6014 (7.2248) Loss: 15.178 (16.051)
|
| 808 |
+
2025-02-19,16:16:08 | INFO | Train Epoch: 0 [ 20979712/128040960 (16%)] Data (t): 0.555 Batch (t): 4.439, 1978.62/s, 247.327/s/gpu LR: 0.000997 Logit Scale: 25.075 Class_loss: 8.5267 (8.8123) Contrastive_loss: 6.3055 (7.1811) Loss: 14.832 (15.993)
|
| 809 |
+
2025-02-19,16:25:38 | INFO | Train Epoch: 0 [ 22028288/128040960 (17%)] Data (t): 0.572 Batch (t): 4.459, 1888.03/s, 236.004/s/gpu LR: 0.000997 Logit Scale: 25.526 Class_loss: 8.5753 (8.8015) Contrastive_loss: 6.5262 (7.1513) Loss: 15.101 (15.953)
|
| 810 |
+
2025-02-19,16:35:06 | INFO | Train Epoch: 0 [ 23076864/128040960 (18%)] Data (t): 0.571 Batch (t): 4.434, 1799.42/s, 224.928/s/gpu LR: 0.000997 Logit Scale: 25.952 Class_loss: 8.5775 (8.7918) Contrastive_loss: 6.5202 (7.1238) Loss: 15.098 (15.916)
|
| 811 |
+
2025-02-19,16:41:13 | WARNING | Handling webdataset error (OSError('image file is truncated (114 bytes not processed)')). Ignoring.
|
| 812 |
+
2025-02-19,16:44:34 | INFO | Train Epoch: 0 [ 24125440/128040960 (19%)] Data (t): 0.554 Batch (t): 4.436, 1961.03/s, 245.129/s/gpu LR: 0.000996 Logit Scale: 26.313 Class_loss: 8.5193 (8.7804) Contrastive_loss: 6.4393 (7.0953) Loss: 14.959 (15.876)
|
| 813 |
+
2025-02-19,16:52:29 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 814 |
+
2025-02-19,16:54:01 | INFO | Train Epoch: 0 [ 25174016/128040960 (20%)] Data (t): 0.556 Batch (t): 4.428, 1866.46/s, 233.307/s/gpu LR: 0.000996 Logit Scale: 26.777 Class_loss: 8.5398 (8.7708) Contrastive_loss: 6.4941 (7.0713) Loss: 15.034 (15.842)
|
| 815 |
+
2025-02-19,17:03:23 | INFO | Train Epoch: 0 [ 26222592/128040960 (20%)] Data (t): 0.543 Batch (t): 4.393, 1817.50/s, 227.187/s/gpu LR: 0.000995 Logit Scale: 27.129 Class_loss: 8.5815 (8.7635) Contrastive_loss: 6.5006 (7.0493) Loss: 15.082 (15.813)
|
| 816 |
+
2025-02-19,17:04:44 | WARNING | Handling webdataset error (OSError('image file is truncated (75 bytes not processed)')). Ignoring.
|
| 817 |
+
2025-02-19,17:07:16 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 818 |
+
2025-02-19,17:12:46 | INFO | Train Epoch: 0 [ 27271168/128040960 (21%)] Data (t): 0.554 Batch (t): 4.400, 1981.18/s, 247.647/s/gpu LR: 0.000995 Logit Scale: 27.474 Class_loss: 8.5443 (8.7554) Contrastive_loss: 6.3068 (7.0218) Loss: 14.851 (15.777)
|
| 819 |
+
2025-02-19,17:16:36 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 820 |
+
2025-02-19,17:22:15 | INFO | Train Epoch: 0 [ 28319744/128040960 (22%)] Data (t): 0.595 Batch (t): 4.443, 1857.18/s, 232.147/s/gpu LR: 0.000994 Logit Scale: 27.816 Class_loss: 8.5371 (8.7476) Contrastive_loss: 6.3310 (6.9972) Loss: 14.868 (15.745)
|
| 821 |
+
2025-02-19,17:31:46 | INFO | Train Epoch: 0 [ 29368320/128040960 (23%)] Data (t): 0.612 Batch (t): 4.461, 1883.20/s, 235.400/s/gpu LR: 0.000994 Logit Scale: 28.138 Class_loss: 8.6386 (8.7439) Contrastive_loss: 6.6651 (6.9857) Loss: 15.304 (15.730)
|
| 822 |
+
2025-02-19,17:37:01 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 823 |
+
2025-02-19,17:41:10 | INFO | Train Epoch: 0 [ 30416896/128040960 (24%)] Data (t): 0.566 Batch (t): 4.405, 2113.44/s, 264.180/s/gpu LR: 0.000993 Logit Scale: 28.479 Class_loss: 8.5183 (8.7363) Contrastive_loss: 6.2043 (6.9597) Loss: 14.723 (15.696)
|
| 824 |
+
2025-02-19,17:50:31 | INFO | Train Epoch: 0 [ 31465472/128040960 (25%)] Data (t): 0.560 Batch (t): 4.388, 1980.46/s, 247.557/s/gpu LR: 0.000993 Logit Scale: 28.767 Class_loss: 8.5412 (8.7301) Contrastive_loss: 6.1385 (6.9332) Loss: 14.680 (15.663)
|
| 825 |
+
2025-02-19,17:59:57 | INFO | Train Epoch: 0 [ 32514048/128040960 (25%)] Data (t): 0.576 Batch (t): 4.421, 1778.98/s, 222.373/s/gpu LR: 0.000992 Logit Scale: 29.225 Class_loss: 8.5244 (8.7236) Contrastive_loss: 6.2928 (6.9132) Loss: 14.817 (15.637)
|
| 826 |
+
2025-02-19,18:09:20 | INFO | Train Epoch: 0 [ 33562624/128040960 (26%)] Data (t): 0.570 Batch (t): 4.395, 2010.13/s, 251.266/s/gpu LR: 0.000992 Logit Scale: 29.516 Class_loss: 8.5979 (8.7198) Contrastive_loss: 6.4969 (6.9005) Loss: 15.095 (15.620)
|
| 827 |
+
2025-02-19,18:10:36 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 828 |
+
2025-02-19,18:18:42 | INFO | Train Epoch: 0 [ 34611200/128040960 (27%)] Data (t): 0.573 Batch (t): 4.395, 1723.65/s, 215.456/s/gpu LR: 0.000991 Logit Scale: 29.842 Class_loss: 8.5726 (8.7155) Contrastive_loss: 6.2865 (6.8825) Loss: 14.859 (15.598)
|
| 829 |
+
2025-02-19,18:28:03 | INFO | Train Epoch: 0 [ 35659776/128040960 (28%)] Data (t): 0.565 Batch (t): 4.379, 1771.34/s, 221.418/s/gpu LR: 0.000991 Logit Scale: 29.875 Class_loss: 8.6969 (8.7150) Contrastive_loss: 6.8899 (6.8827) Loss: 15.587 (15.598)
|
| 830 |
+
2025-02-19,18:37:25 | INFO | Train Epoch: 0 [ 36708352/128040960 (29%)] Data (t): 0.566 Batch (t): 4.389, 1942.40/s, 242.800/s/gpu LR: 0.000990 Logit Scale: 30.141 Class_loss: 8.6001 (8.7118) Contrastive_loss: 6.5299 (6.8729) Loss: 15.130 (15.585)
|
| 831 |
+
2025-02-19,18:37:48 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 832 |
+
2025-02-19,18:46:43 | INFO | Train Epoch: 0 [ 37756928/128040960 (29%)] Data (t): 0.572 Batch (t): 4.359, 1966.47/s, 245.808/s/gpu LR: 0.000989 Logit Scale: 30.344 Class_loss: 8.5232 (8.7067) Contrastive_loss: 6.3698 (6.8593) Loss: 14.893 (15.566)
|
| 833 |
+
2025-02-19,18:49:17 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 834 |
+
2025-02-19,18:56:01 | INFO | Train Epoch: 0 [ 38805504/128040960 (30%)] Data (t): 0.573 Batch (t): 4.358, 1861.34/s, 232.668/s/gpu LR: 0.000989 Logit Scale: 30.342 Class_loss: 8.6590 (8.7054) Contrastive_loss: 6.8510 (6.8591) Loss: 15.510 (15.564)
|
| 835 |
+
2025-02-19,19:05:19 | INFO | Train Epoch: 0 [ 39854080/128040960 (31%)] Data (t): 0.561 Batch (t): 4.364, 2074.33/s, 259.291/s/gpu LR: 0.000988 Logit Scale: 30.527 Class_loss: 8.7386 (8.7063) Contrastive_loss: 6.9183 (6.8606) Loss: 15.657 (15.567)
|
| 836 |
+
2025-02-19,19:09:01 | WARNING | Handling webdataset error (OSError('image file is truncated (84 bytes not processed)')). Ignoring.
|
| 837 |
+
2025-02-19,19:14:36 | INFO | Train Epoch: 0 [ 40902656/128040960 (32%)] Data (t): 0.557 Batch (t): 4.350, 1938.78/s, 242.347/s/gpu LR: 0.000987 Logit Scale: 30.693 Class_loss: 8.6893 (8.7058) Contrastive_loss: 6.9139 (6.8619) Loss: 15.603 (15.568)
|
| 838 |
+
2025-02-19,19:23:55 | INFO | Train Epoch: 0 [ 41951232/128040960 (33%)] Data (t): 0.583 Batch (t): 4.369, 1796.22/s, 224.527/s/gpu LR: 0.000986 Logit Scale: 30.823 Class_loss: 8.6125 (8.7036) Contrastive_loss: 6.6863 (6.8576) Loss: 15.299 (15.561)
|
| 839 |
+
2025-02-19,19:29:22 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 840 |
+
2025-02-19,19:33:16 | INFO | Train Epoch: 0 [ 42999808/128040960 (34%)] Data (t): 0.628 Batch (t): 4.384, 1942.68/s, 242.835/s/gpu LR: 0.000986 Logit Scale: 31.040 Class_loss: 8.6737 (8.7029) Contrastive_loss: 6.6027 (6.8516) Loss: 15.276 (15.554)
|
| 841 |
+
2025-02-19,19:42:37 | INFO | Train Epoch: 0 [ 44048384/128040960 (34%)] Data (t): 0.598 Batch (t): 4.381, 1870.95/s, 233.869/s/gpu LR: 0.000985 Logit Scale: 31.324 Class_loss: 8.6157 (8.7008) Contrastive_loss: 6.6863 (6.8477) Loss: 15.302 (15.549)
|
| 842 |
+
2025-02-19,19:50:01 | WARNING | Handling webdataset error (OSError('image file is truncated (49 bytes not processed)')). Ignoring.
|
| 843 |
+
2025-02-19,19:51:53 | INFO | Train Epoch: 0 [ 45096960/128040960 (35%)] Data (t): 0.560 Batch (t): 4.346, 1685.03/s, 210.628/s/gpu LR: 0.000984 Logit Scale: 31.575 Class_loss: 8.6482 (8.6996) Contrastive_loss: 6.5760 (6.8416) Loss: 15.224 (15.541)
|
| 844 |
+
2025-02-19,20:00:44 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 845 |
+
2025-02-19,20:01:08 | INFO | Train Epoch: 0 [ 46145536/128040960 (36%)] Data (t): 0.577 Batch (t): 4.330, 1977.80/s, 247.224/s/gpu LR: 0.000983 Logit Scale: 31.606 Class_loss: 8.6865 (8.6993) Contrastive_loss: 6.7368 (6.8392) Loss: 15.423 (15.539)
|
| 846 |
+
2025-02-19,20:10:24 | INFO | Train Epoch: 0 [ 47194112/128040960 (37%)] Data (t): 0.573 Batch (t): 4.348, 1957.80/s, 244.725/s/gpu LR: 0.000982 Logit Scale: 32.080 Class_loss: 8.6316 (8.6979) Contrastive_loss: 6.5471 (6.8329) Loss: 15.179 (15.531)
|
| 847 |
+
2025-02-19,20:16:50 | WARNING | Handling webdataset error (OSError('image file is truncated (86 bytes not processed)')). Ignoring.
|
| 848 |
+
2025-02-19,20:19:43 | INFO | Train Epoch: 0 [ 48242688/128040960 (38%)] Data (t): 0.574 Batch (t): 4.367, 1928.76/s, 241.095/s/gpu LR: 0.000981 Logit Scale: 31.915 Class_loss: 8.6851 (8.6976) Contrastive_loss: 6.7133 (6.8303) Loss: 15.398 (15.528)
|
| 849 |
+
2025-02-19,20:29:02 | INFO | Train Epoch: 0 [ 49291264/128040960 (38%)] Data (t): 0.579 Batch (t): 4.363, 2102.72/s, 262.839/s/gpu LR: 0.000981 Logit Scale: 32.106 Class_loss: 8.6748 (8.6971) Contrastive_loss: 6.8274 (6.8303) Loss: 15.502 (15.527)
|
| 850 |
+
2025-02-19,20:35:19 | INFO | Starting zero-shot imagenet.
|
| 851 |
+
2025-02-19,20:35:19 | INFO | Building zero-shot classifier
|
| 852 |
+
2025-02-19,20:35:29 | INFO | Using classifier
|
| 853 |
+
2025-02-19,20:40:28 | INFO | Finished zero-shot imagenet.
|
| 854 |
+
2025-02-19,20:40:28 | INFO | Eval Epoch: 0.39046705054382597 imagenet-zeroshot-val-top1: 0.0001 imagenet-zeroshot-val-top5: 0.0004
|
exp_rope_clipcls_vit_b16_s512m_bs8k/params.txt
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NDR_patch_size: 16
|
| 2 |
+
accum_freq: 1
|
| 3 |
+
aug_cfg: {}
|
| 4 |
+
batch_size: 1024
|
| 5 |
+
beta1: 0.9
|
| 6 |
+
beta2: 0.98
|
| 7 |
+
checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 8 |
+
coca_caption_loss_weight: 2.0
|
| 9 |
+
coca_contrastive_loss_weight: 1.0
|
| 10 |
+
copy_codebase: False
|
| 11 |
+
csv_caption_key: title
|
| 12 |
+
csv_img_key: filepath
|
| 13 |
+
csv_separator:
|
| 14 |
+
dataset_resampled: False
|
| 15 |
+
dataset_type: webdataset
|
| 16 |
+
ddp_static_graph: True
|
| 17 |
+
debug: False
|
| 18 |
+
delete_prev_step_ckpt: True
|
| 19 |
+
delete_previous_checkpoint: False
|
| 20 |
+
device: cuda:0
|
| 21 |
+
dist_backend: nccl
|
| 22 |
+
dist_url: env://
|
| 23 |
+
distill: False
|
| 24 |
+
distill_model: None
|
| 25 |
+
distill_pretrained: None
|
| 26 |
+
distributed: True
|
| 27 |
+
epochs: 4
|
| 28 |
+
epochs_cooldown: None
|
| 29 |
+
eps: 1e-06
|
| 30 |
+
force_custom_text: False
|
| 31 |
+
force_image_size: 224
|
| 32 |
+
force_patch_dropout: None
|
| 33 |
+
force_quick_gelu: False
|
| 34 |
+
gather_with_grad: True
|
| 35 |
+
global_batch_size: 8192
|
| 36 |
+
grad_checkpointing: True
|
| 37 |
+
grad_clip_norm: None
|
| 38 |
+
horovod: False
|
| 39 |
+
image_interpolation: None
|
| 40 |
+
image_mean: None
|
| 41 |
+
image_resize_mode: None
|
| 42 |
+
image_std: None
|
| 43 |
+
imagenet_v2: None
|
| 44 |
+
imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 45 |
+
local_loss: True
|
| 46 |
+
local_rank: 0
|
| 47 |
+
lock_image: False
|
| 48 |
+
lock_image_freeze_bn_stats: False
|
| 49 |
+
lock_image_unlocked_groups: 0
|
| 50 |
+
lock_text: False
|
| 51 |
+
lock_text_freeze_layer_norm: False
|
| 52 |
+
lock_text_unlocked_layers: 0
|
| 53 |
+
log_every_n_steps: 128
|
| 54 |
+
log_level: 20
|
| 55 |
+
log_local: False
|
| 56 |
+
log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 57 |
+
logs: ./logs-lr1e-3-datacomp
|
| 58 |
+
lr: 0.001
|
| 59 |
+
lr_cooldown_end: 0.0
|
| 60 |
+
lr_cooldown_power: 1.0
|
| 61 |
+
lr_scheduler: cosine
|
| 62 |
+
max_seq_len: 100000000000000
|
| 63 |
+
model: CLIPCLS-ViT-B-16-NDR
|
| 64 |
+
name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 65 |
+
native_dynamic_resolution: True
|
| 66 |
+
no_set_device_rank: False
|
| 67 |
+
only_packing: True
|
| 68 |
+
precision: amp_bfloat16
|
| 69 |
+
pretrained:
|
| 70 |
+
pretrained_image:
|
| 71 |
+
pretrained_text:
|
| 72 |
+
rank: 0
|
| 73 |
+
remote_sync: None
|
| 74 |
+
remote_sync_frequency: 300
|
| 75 |
+
remote_sync_protocol: s3
|
| 76 |
+
report_to: wandb
|
| 77 |
+
resume: None
|
| 78 |
+
rope_attn_num_heads: 12
|
| 79 |
+
rope_model_width: 768
|
| 80 |
+
save_every_n_steps: 6104
|
| 81 |
+
save_frequency: 1
|
| 82 |
+
save_most_recent: False
|
| 83 |
+
seed: 0
|
| 84 |
+
siglip: False
|
| 85 |
+
skip_scheduler: False
|
| 86 |
+
tensorboard: False
|
| 87 |
+
tensorboard_path:
|
| 88 |
+
torchcompile: False
|
| 89 |
+
torchscript: False
|
| 90 |
+
trace: False
|
| 91 |
+
train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 92 |
+
train_data_upsampling_factors: None
|
| 93 |
+
train_num_samples: 128000000
|
| 94 |
+
use_bn_sync: False
|
| 95 |
+
use_bnb_linear: None
|
| 96 |
+
val_data: None
|
| 97 |
+
val_frequency: 1
|
| 98 |
+
val_num_samples: None
|
| 99 |
+
val_steps: 6104
|
| 100 |
+
wandb: True
|
| 101 |
+
wandb_notes:
|
| 102 |
+
wandb_project_name: cls-clip-NDR
|
| 103 |
+
warmup: 500
|
| 104 |
+
wd: 0.2
|
| 105 |
+
workers: 6
|
| 106 |
+
world_size: 8
|
| 107 |
+
zeroshot_frequency: 2
|
| 108 |
+
zeroshot_steps: 6104
|