Upload folder using huggingface_hub
Browse files
exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints/results.jsonl
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"imagenet-zeroshot-val-top1": 0.
|
|
|
|
| 1 |
+
{"imagenet-zeroshot-val-top1": 0.0015595918367346938, "imagenet-zeroshot-val-top5": 0.0029268367346938777}
|
exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints/step_6104.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f13bf614390459a56343c6b146506d6fecba8906c00ac2227d8cd9eba65720bf
|
| 3 |
+
size 2337853685
|
exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
2025-02-
|
| 2 |
-
2025-02-
|
| 3 |
-
2025-02-
|
| 4 |
-
2025-02-
|
| 5 |
-
2025-02-
|
| 6 |
(visual): NDRVisionTransformer(
|
| 7 |
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 8 |
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
|
@@ -16,6 +16,9 @@
|
|
| 16 |
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 17 |
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 18 |
(out_drop): Dropout(p=0.0, inplace=False)
|
|
|
|
|
|
|
|
|
|
| 19 |
)
|
| 20 |
(ln_attn): Identity()
|
| 21 |
(ls_1): Identity()
|
|
@@ -60,795 +63,223 @@
|
|
| 60 |
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 61 |
)
|
| 62 |
)
|
| 63 |
-
2025-02-
|
| 64 |
-
2025-02-
|
| 65 |
-
2025-02-
|
| 66 |
-
2025-02-
|
| 67 |
-
2025-02-
|
| 68 |
-
2025-02-
|
| 69 |
-
2025-02-
|
| 70 |
-
2025-02-
|
| 71 |
-
2025-02-
|
| 72 |
-
2025-02-
|
| 73 |
-
2025-02-
|
| 74 |
-
2025-02-
|
| 75 |
-
2025-02-
|
| 76 |
-
2025-02-
|
| 77 |
-
2025-02-
|
| 78 |
-
2025-02-
|
| 79 |
-
2025-02-
|
| 80 |
-
2025-02-
|
| 81 |
-
2025-02-
|
| 82 |
-
2025-02-
|
| 83 |
-
2025-02-
|
| 84 |
-
2025-02-
|
| 85 |
-
2025-02-
|
| 86 |
-
2025-02-
|
| 87 |
-
2025-02-
|
| 88 |
-
2025-02-
|
| 89 |
-
2025-02-
|
| 90 |
-
2025-02-
|
| 91 |
-
2025-02-
|
| 92 |
-
2025-02-
|
| 93 |
-
2025-02-
|
| 94 |
-
2025-02-
|
| 95 |
-
2025-02-
|
| 96 |
-
2025-02-
|
| 97 |
-
2025-02-
|
| 98 |
-
2025-02-
|
| 99 |
-
2025-02-
|
| 100 |
-
2025-02-
|
| 101 |
-
2025-02-
|
| 102 |
-
2025-02-
|
| 103 |
-
2025-02-
|
| 104 |
-
2025-02-
|
| 105 |
-
2025-02-
|
| 106 |
-
2025-02-
|
| 107 |
-
2025-02-
|
| 108 |
-
2025-02-
|
| 109 |
-
2025-02-
|
| 110 |
-
2025-02-
|
| 111 |
-
2025-02-
|
| 112 |
-
2025-02-
|
| 113 |
-
2025-02-
|
| 114 |
-
2025-02-
|
| 115 |
-
2025-02-
|
| 116 |
-
2025-02-
|
| 117 |
-
2025-02-
|
| 118 |
-
2025-02-
|
| 119 |
-
2025-02-
|
| 120 |
-
2025-02-
|
| 121 |
-
2025-02-
|
| 122 |
-
2025-02-
|
| 123 |
-
2025-02-
|
| 124 |
-
2025-02-
|
| 125 |
-
2025-02-
|
| 126 |
-
2025-02-
|
| 127 |
-
2025-02-
|
| 128 |
-
2025-02-
|
| 129 |
-
2025-02-
|
| 130 |
-
2025-02-
|
| 131 |
-
2025-02-
|
| 132 |
-
2025-02-
|
| 133 |
-
2025-02-
|
| 134 |
-
2025-02-
|
| 135 |
-
2025-02-
|
| 136 |
-
2025-02-
|
| 137 |
-
2025-02-
|
| 138 |
-
2025-02-
|
| 139 |
-
2025-02-
|
| 140 |
-
2025-02-
|
| 141 |
-
2025-02-
|
| 142 |
-
2025-02-
|
| 143 |
-
2025-02-
|
| 144 |
-
2025-02-
|
| 145 |
-
2025-02-
|
| 146 |
-
2025-02-
|
| 147 |
-
2025-02-
|
| 148 |
-
2025-02-
|
| 149 |
-
2025-02-
|
| 150 |
-
2025-02-
|
| 151 |
-
2025-02-
|
| 152 |
-
2025-02-
|
| 153 |
-
2025-02-
|
| 154 |
-
2025-02-
|
| 155 |
-
2025-02-
|
| 156 |
-
2025-02-
|
| 157 |
-
2025-02-
|
| 158 |
-
2025-02-
|
| 159 |
-
2025-02-
|
| 160 |
-
2025-02-
|
| 161 |
-
2025-02-
|
| 162 |
-
2025-02-
|
| 163 |
-
2025-02-
|
| 164 |
-
2025-02-
|
| 165 |
-
2025-02-
|
| 166 |
-
2025-02-
|
| 167 |
-
2025-02-
|
| 168 |
-
2025-02-
|
| 169 |
-
2025-02-
|
| 170 |
-
2025-02-
|
| 171 |
-
2025-02-
|
| 172 |
-
2025-02-
|
| 173 |
-
2025-02-
|
| 174 |
-
2025-02-
|
| 175 |
-
2025-02-
|
| 176 |
-
2025-02-
|
| 177 |
-
2025-02-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
)
|
| 235 |
-
2025-02-18
|
| 236 |
-
2025-02-18
|
| 237 |
-
2025-02-18
|
| 238 |
-
2025-02-
|
| 239 |
-
2025-02-
|
| 240 |
-
2025-02-
|
| 241 |
-
2025-02-
|
| 242 |
-
2025-02-
|
| 243 |
-
2025-02-
|
| 244 |
-
2025-02-
|
| 245 |
-
2025-02-
|
| 246 |
-
2025-02-
|
| 247 |
-
2025-02-
|
| 248 |
-
2025-02-
|
| 249 |
-
2025-02-
|
| 250 |
-
2025-02-
|
| 251 |
-
2025-02-
|
| 252 |
-
2025-02-
|
| 253 |
-
2025-02-
|
| 254 |
-
2025-02-
|
| 255 |
-
2025-02-
|
| 256 |
-
2025-02-
|
| 257 |
-
2025-02-
|
| 258 |
-
2025-02-
|
| 259 |
-
2025-02-
|
| 260 |
-
2025-02-
|
| 261 |
-
2025-02-
|
| 262 |
-
2025-02-
|
| 263 |
-
2025-02-
|
| 264 |
-
2025-02-
|
| 265 |
-
2025-02-
|
| 266 |
-
2025-02-
|
| 267 |
-
2025-02-
|
| 268 |
-
2025-02-
|
| 269 |
-
2025-02-
|
| 270 |
-
2025-02-
|
| 271 |
-
2025-02-
|
| 272 |
-
2025-02-
|
| 273 |
-
2025-02-
|
| 274 |
-
2025-02-
|
| 275 |
-
2025-02-
|
| 276 |
-
2025-02-
|
| 277 |
-
2025-02-
|
| 278 |
-
2025-02-
|
| 279 |
-
2025-02-
|
| 280 |
-
2025-02-
|
| 281 |
-
2025-02-
|
| 282 |
-
2025-02-
|
| 283 |
-
2025-02-18,22:27:24 | INFO | lock_image_freeze_bn_stats: False
|
| 284 |
-
2025-02-18,22:27:24 | INFO | lock_image_unlocked_groups: 0
|
| 285 |
-
2025-02-18,22:27:24 | INFO | lock_text: False
|
| 286 |
-
2025-02-18,22:27:24 | INFO | lock_text_freeze_layer_norm: False
|
| 287 |
-
2025-02-18,22:27:24 | INFO | lock_text_unlocked_layers: 0
|
| 288 |
-
2025-02-18,22:27:24 | INFO | log_every_n_steps: 128
|
| 289 |
-
2025-02-18,22:27:24 | INFO | log_level: 20
|
| 290 |
-
2025-02-18,22:27:24 | INFO | log_local: False
|
| 291 |
-
2025-02-18,22:27:24 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 292 |
-
2025-02-18,22:27:24 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 293 |
-
2025-02-18,22:27:24 | INFO | lr: 0.001
|
| 294 |
-
2025-02-18,22:27:24 | INFO | lr_cooldown_end: 0.0
|
| 295 |
-
2025-02-18,22:27:24 | INFO | lr_cooldown_power: 1.0
|
| 296 |
-
2025-02-18,22:27:24 | INFO | lr_scheduler: cosine
|
| 297 |
-
2025-02-18,22:27:24 | INFO | max_seq_len: 100000000000000
|
| 298 |
-
2025-02-18,22:27:24 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 299 |
-
2025-02-18,22:27:24 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 300 |
-
2025-02-18,22:27:24 | INFO | native_dynamic_resolution: True
|
| 301 |
-
2025-02-18,22:27:24 | INFO | no_set_device_rank: False
|
| 302 |
-
2025-02-18,22:27:24 | INFO | only_packing: True
|
| 303 |
-
2025-02-18,22:27:24 | INFO | precision: amp_bfloat16
|
| 304 |
-
2025-02-18,22:27:24 | INFO | pretrained:
|
| 305 |
-
2025-02-18,22:27:24 | INFO | pretrained_image:
|
| 306 |
-
2025-02-18,22:27:24 | INFO | pretrained_text:
|
| 307 |
-
2025-02-18,22:27:24 | INFO | rank: 0
|
| 308 |
-
2025-02-18,22:27:24 | INFO | remote_sync: None
|
| 309 |
-
2025-02-18,22:27:24 | INFO | remote_sync_frequency: 300
|
| 310 |
-
2025-02-18,22:27:24 | INFO | remote_sync_protocol: s3
|
| 311 |
-
2025-02-18,22:27:24 | INFO | report_to: wandb
|
| 312 |
-
2025-02-18,22:27:24 | INFO | resume: None
|
| 313 |
-
2025-02-18,22:27:24 | INFO | rope_attn_num_heads: 12
|
| 314 |
-
2025-02-18,22:27:24 | INFO | rope_model_width: 768
|
| 315 |
-
2025-02-18,22:27:24 | INFO | save_every_n_steps: 6104
|
| 316 |
-
2025-02-18,22:27:24 | INFO | save_frequency: 1
|
| 317 |
-
2025-02-18,22:27:24 | INFO | save_most_recent: False
|
| 318 |
-
2025-02-18,22:27:24 | INFO | seed: 0
|
| 319 |
-
2025-02-18,22:27:24 | INFO | siglip: False
|
| 320 |
-
2025-02-18,22:27:24 | INFO | skip_scheduler: False
|
| 321 |
-
2025-02-18,22:27:24 | INFO | tensorboard: False
|
| 322 |
-
2025-02-18,22:27:24 | INFO | tensorboard_path:
|
| 323 |
-
2025-02-18,22:27:24 | INFO | torchcompile: False
|
| 324 |
-
2025-02-18,22:27:24 | INFO | torchscript: False
|
| 325 |
-
2025-02-18,22:27:24 | INFO | trace: False
|
| 326 |
-
2025-02-18,22:27:24 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 327 |
-
2025-02-18,22:27:24 | INFO | train_data_upsampling_factors: None
|
| 328 |
-
2025-02-18,22:27:24 | INFO | train_num_samples: 128000000
|
| 329 |
-
2025-02-18,22:27:24 | INFO | use_bn_sync: False
|
| 330 |
-
2025-02-18,22:27:24 | INFO | use_bnb_linear: None
|
| 331 |
-
2025-02-18,22:27:24 | INFO | val_data: None
|
| 332 |
-
2025-02-18,22:27:24 | INFO | val_frequency: 1
|
| 333 |
-
2025-02-18,22:27:24 | INFO | val_num_samples: None
|
| 334 |
-
2025-02-18,22:27:24 | INFO | val_steps: 6104
|
| 335 |
-
2025-02-18,22:27:24 | INFO | wandb: True
|
| 336 |
-
2025-02-18,22:27:24 | INFO | wandb_notes:
|
| 337 |
-
2025-02-18,22:27:24 | INFO | wandb_project_name: cls-clip-NDR
|
| 338 |
-
2025-02-18,22:27:24 | INFO | warmup: 500
|
| 339 |
-
2025-02-18,22:27:24 | INFO | wd: 0.2
|
| 340 |
-
2025-02-18,22:27:24 | INFO | workers: 1
|
| 341 |
-
2025-02-18,22:27:24 | INFO | world_size: 8
|
| 342 |
-
2025-02-18,22:27:24 | INFO | zeroshot_frequency: 2
|
| 343 |
-
2025-02-18,22:27:24 | INFO | zeroshot_steps: 6104
|
| 344 |
-
2025-02-18,22:27:39 | INFO | Start epoch 0
|
| 345 |
-
2025-02-18,22:27:52 | INFO | Train Epoch: 0 [ 8192/128000000 (0%)] Data (t): 8.211 Batch (t): 13.321, 614.970/s, 76.8713/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.332 (11.332) Contrastive_loss: 9.1303 (9.1303) Loss: 20.462 (20.462)
|
| 346 |
-
2025-02-18,22:31:10 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 347 |
-
2025-02-18,22:38:09 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 348 |
-
2025-02-18,22:38:28 | INFO | Train Epoch: 0 [ 1056768/128000000 (1%)] Data (t): 1.448 Batch (t): 4.961, 1710.76/s, 213.845/s/gpu LR: 0.000258 Logit Scale: 14.281 Class_loss: 9.0799 (10.206) Contrastive_loss: 8.5317 (8.8310) Loss: 17.612 (19.037)
|
| 349 |
-
2025-02-18,22:49:10 | INFO | Train Epoch: 0 [ 2105344/128000000 (2%)] Data (t): 0.498 Batch (t): 5.016, 1737.53/s, 217.191/s/gpu LR: 0.000514 Logit Scale: 14.218 Class_loss: 8.9842 (9.7987) Contrastive_loss: 8.3782 (8.6801) Loss: 17.362 (18.479)
|
| 350 |
-
2025-02-18,22:58:05 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 351 |
-
2025-02-18,22:59:47 | INFO | Train Epoch: 0 [ 3153920/128000000 (2%)] Data (t): 0.493 Batch (t): 4.981, 1644.52/s, 205.565/s/gpu LR: 0.000770 Logit Scale: 14.176 Class_loss: 8.9318 (9.5820) Contrastive_loss: 8.2162 (8.5641) Loss: 17.148 (18.146)
|
| 352 |
-
2025-02-18,23:10:26 | INFO | Train Epoch: 0 [ 4202496/128000000 (3%)] Data (t): 0.500 Batch (t): 4.993, 1682.98/s, 210.373/s/gpu LR: 0.001000 Logit Scale: 14.169 Class_loss: 8.9133 (9.4483) Contrastive_loss: 8.0103 (8.4533) Loss: 16.924 (17.902)
|
| 353 |
-
2025-02-18,23:15:29 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 354 |
-
2025-02-18,23:21:03 | INFO | Train Epoch: 0 [ 5251072/128000000 (4%)] Data (t): 0.490 Batch (t): 4.977, 1697.53/s, 212.191/s/gpu LR: 0.001000 Logit Scale: 14.355 Class_loss: 8.8144 (9.3426) Contrastive_loss: 7.6759 (8.3238) Loss: 16.490 (17.666)
|
| 355 |
-
2025-02-18,23:25:24 | WARNING | Handling webdataset error (OSError('image file is truncated (35 bytes not processed)')). Ignoring.
|
| 356 |
-
2025-02-18,23:31:34 | INFO | Train Epoch: 0 [ 6299648/128000000 (5%)] Data (t): 0.490 Batch (t): 4.931, 1673.50/s, 209.187/s/gpu LR: 0.001000 Logit Scale: 14.754 Class_loss: 8.7440 (9.2571) Contrastive_loss: 7.4504 (8.1990) Loss: 16.194 (17.456)
|
| 357 |
-
2025-02-18,23:42:07 | INFO | Train Epoch: 0 [ 7348224/128000000 (6%)] Data (t): 0.497 Batch (t): 4.941, 1700.40/s, 212.551/s/gpu LR: 0.001000 Logit Scale: 15.404 Class_loss: 8.7063 (9.1882) Contrastive_loss: 7.2018 (8.0744) Loss: 15.908 (17.263)
|
| 358 |
-
2025-02-18,23:52:39 | INFO | Train Epoch: 0 [ 8396800/128000000 (7%)] Data (t): 0.493 Batch (t): 4.934, 1599.86/s, 199.983/s/gpu LR: 0.001000 Logit Scale: 16.300 Class_loss: 8.7078 (9.1349) Contrastive_loss: 7.0773 (7.9636) Loss: 15.785 (17.098)
|
| 359 |
-
2025-02-19,00:03:15 | INFO | Train Epoch: 0 [ 9445376/128000000 (7%)] Data (t): 1.257 Batch (t): 4.974, 1613.74/s, 201.718/s/gpu LR: 0.001000 Logit Scale: 17.299 Class_loss: 8.6476 (9.0861) Contrastive_loss: 6.9470 (7.8619) Loss: 15.595 (16.948)
|
| 360 |
-
2025-02-19,00:13:42 | INFO | Train Epoch: 0 [ 10493952/128000000 (8%)] Data (t): 0.466 Batch (t): 4.897, 1852.39/s, 231.548/s/gpu LR: 0.001000 Logit Scale: 18.280 Class_loss: 8.6103 (9.0429) Contrastive_loss: 6.7120 (7.7574) Loss: 15.322 (16.800)
|
| 361 |
-
2025-02-19,00:19:08 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 362 |
-
2025-02-19,00:20:34 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 363 |
-
2025-02-19,00:24:12 | INFO | Train Epoch: 0 [ 11542528/128000000 (9%)] Data (t): 0.480 Batch (t): 4.919, 1671.42/s, 208.928/s/gpu LR: 0.000999 Logit Scale: 18.969 Class_loss: 8.6012 (9.0061) Contrastive_loss: 6.8561 (7.6823) Loss: 15.457 (16.688)
|
| 364 |
-
2025-02-19,00:34:47 | INFO | Train Epoch: 0 [ 12591104/128000000 (10%)] Data (t): 0.483 Batch (t): 4.966, 1573.27/s, 196.658/s/gpu LR: 0.000999 Logit Scale: 19.978 Class_loss: 8.5730 (8.9728) Contrastive_loss: 6.7427 (7.6100) Loss: 15.316 (16.583)
|
| 365 |
-
2025-02-19,00:35:04 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 366 |
-
2025-02-19,00:44:17 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 367 |
-
2025-02-19,00:45:25 | INFO | Train Epoch: 0 [ 13639680/128000000 (11%)] Data (t): 0.491 Batch (t): 4.985, 1665.19/s, 208.149/s/gpu LR: 0.000999 Logit Scale: 20.849 Class_loss: 8.6300 (8.9483) Contrastive_loss: 6.6130 (7.5388) Loss: 15.243 (16.487)
|
| 368 |
-
2025-02-19,00:55:59 | INFO | Train Epoch: 0 [ 14688256/128000000 (11%)] Data (t): 0.493 Batch (t): 4.952, 1646.66/s, 205.833/s/gpu LR: 0.000999 Logit Scale: 21.675 Class_loss: 8.4805 (8.9171) Contrastive_loss: 6.3305 (7.4582) Loss: 14.811 (16.375)
|
| 369 |
-
2025-02-19,01:00:37 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 370 |
-
2025-02-19,01:03:11 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 371 |
-
2025-02-19,01:06:40 | INFO | Train Epoch: 0 [ 15736832/128000000 (12%)] Data (t): 0.511 Batch (t): 5.008, 1549.28/s, 193.660/s/gpu LR: 0.000999 Logit Scale: 22.294 Class_loss: 8.5770 (8.8958) Contrastive_loss: 6.5645 (7.4024) Loss: 15.142 (16.298)
|
| 372 |
-
2025-02-19,01:15:22 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 373 |
-
2025-02-19,01:17:17 | INFO | Train Epoch: 0 [ 16785408/128000000 (13%)] Data (t): 0.505 Batch (t): 4.971, 1574.20/s, 196.775/s/gpu LR: 0.000998 Logit Scale: 22.863 Class_loss: 8.5862 (8.8776) Contrastive_loss: 6.6149 (7.3560) Loss: 15.201 (16.234)
|
| 374 |
-
2025-02-19,01:27:49 | INFO | Train Epoch: 0 [ 17833984/128000000 (14%)] Data (t): 0.484 Batch (t): 4.942, 1646.81/s, 205.852/s/gpu LR: 0.000998 Logit Scale: 23.389 Class_loss: 8.5213 (8.8578) Contrastive_loss: 6.4658 (7.3066) Loss: 14.987 (16.164)
|
| 375 |
-
2025-02-19,01:38:25 | INFO | Train Epoch: 0 [ 18882560/128000000 (15%)] Data (t): 0.484 Batch (t): 4.971, 1672.64/s, 209.080/s/gpu LR: 0.000998 Logit Scale: 24.044 Class_loss: 8.5339 (8.8408) Contrastive_loss: 6.2940 (7.2533) Loss: 14.828 (16.094)
|
| 376 |
-
2025-02-19,01:49:08 | INFO | Train Epoch: 0 [ 19931136/128000000 (16%)] Data (t): 0.496 Batch (t): 5.025, 1633.70/s, 204.213/s/gpu LR: 0.000998 Logit Scale: 24.474 Class_loss: 8.5713 (8.8273) Contrastive_loss: 6.4502 (7.2131) Loss: 15.021 (16.040)
|
| 377 |
-
2025-02-19,01:59:41 | INFO | Train Epoch: 0 [ 20979712/128000000 (16%)] Data (t): 0.494 Batch (t): 4.943, 1630.90/s, 203.863/s/gpu LR: 0.000997 Logit Scale: 24.856 Class_loss: 8.5903 (8.8160) Contrastive_loss: 6.5037 (7.1794) Loss: 15.094 (15.995)
|
| 378 |
-
2025-02-19,02:10:16 | INFO | Train Epoch: 0 [ 22028288/128000000 (17%)] Data (t): 0.485 Batch (t): 4.960, 1702.35/s, 212.794/s/gpu LR: 0.000997 Logit Scale: 25.257 Class_loss: 8.5090 (8.8021) Contrastive_loss: 6.5430 (7.1504) Loss: 15.052 (15.952)
|
| 379 |
-
2025-02-19,02:20:51 | INFO | Train Epoch: 0 [ 23076864/128000000 (18%)] Data (t): 0.486 Batch (t): 4.960, 1659.41/s, 207.427/s/gpu LR: 0.000997 Logit Scale: 25.485 Class_loss: 8.5440 (8.7908) Contrastive_loss: 6.4479 (7.1199) Loss: 14.992 (15.911)
|
| 380 |
-
2025-02-19,02:25:28 | WARNING | Handling webdataset error (OSError('image file is truncated (114 bytes not processed)')). Ignoring.
|
| 381 |
-
2025-02-19,02:31:25 | INFO | Train Epoch: 0 [ 24125440/128000000 (19%)] Data (t): 0.916 Batch (t): 4.951, 1642.16/s, 205.270/s/gpu LR: 0.000996 Logit Scale: 25.798 Class_loss: 8.6041 (8.7831) Contrastive_loss: 6.4717 (7.0929) Loss: 15.076 (15.876)
|
| 382 |
-
2025-02-19,02:37:37 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 383 |
-
2025-02-19,02:42:01 | INFO | Train Epoch: 0 [ 25174016/128000000 (20%)] Data (t): 1.321 Batch (t): 4.968, 1728.61/s, 216.076/s/gpu LR: 0.000996 Logit Scale: 25.977 Class_loss: 8.7193 (8.7805) Contrastive_loss: 7.1729 (7.0961) Loss: 15.892 (15.877)
|
| 384 |
-
2025-02-19,02:51:06 | WARNING | Handling webdataset error (OSError('image file is truncated (75 bytes not processed)')). Ignoring.
|
| 385 |
-
2025-02-19,02:52:30 | INFO | Train Epoch: 0 [ 26222592/128000000 (20%)] Data (t): 0.487 Batch (t): 4.916, 1607.15/s, 200.893/s/gpu LR: 0.000995 Logit Scale: 26.240 Class_loss: 8.5990 (8.7735) Contrastive_loss: 6.6981 (7.0808) Loss: 15.297 (15.854)
|
| 386 |
-
2025-02-19,02:57:23 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 387 |
-
2025-02-19,03:03:01 | INFO | Train Epoch: 0 [ 27271168/128000000 (21%)] Data (t): 1.049 Batch (t): 4.935, 1699.78/s, 212.473/s/gpu LR: 0.000995 Logit Scale: 26.634 Class_loss: 8.5412 (8.7649) Contrastive_loss: 6.5002 (7.0593) Loss: 15.041 (15.824)
|
| 388 |
-
2025-02-19,03:07:43 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 389 |
-
2025-02-19,03:13:37 | INFO | Train Epoch: 0 [ 28319744/128000000 (22%)] Data (t): 1.544 Batch (t): 4.969, 1656.25/s, 207.031/s/gpu LR: 0.000994 Logit Scale: 27.127 Class_loss: 8.6210 (8.7598) Contrastive_loss: 6.4448 (7.0373) Loss: 15.066 (15.797)
|
| 390 |
-
2025-02-19,03:24:06 | INFO | Train Epoch: 0 [ 29368320/128000000 (23%)] Data (t): 1.353 Batch (t): 4.909, 1660.71/s, 207.589/s/gpu LR: 0.000994 Logit Scale: 27.395 Class_loss: 8.5946 (8.7541) Contrastive_loss: 6.6205 (7.0230) Loss: 15.215 (15.777)
|
| 391 |
-
2025-02-19,03:30:30 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 392 |
-
2025-02-19,03:34:33 | INFO | Train Epoch: 0 [ 30416896/128000000 (24%)] Data (t): 0.673 Batch (t): 4.902, 1690.04/s, 211.256/s/gpu LR: 0.000993 Logit Scale: 27.535 Class_loss: 8.5433 (8.7471) Contrastive_loss: 6.5007 (7.0055) Loss: 15.044 (15.753)
|
| 393 |
-
2025-02-19,03:45:07 | INFO | Train Epoch: 0 [ 31465472/128000000 (25%)] Data (t): 0.557 Batch (t): 4.954, 1647.53/s, 205.941/s/gpu LR: 0.000993 Logit Scale: 27.862 Class_loss: 8.5895 (8.7420) Contrastive_loss: 6.4604 (6.9880) Loss: 15.050 (15.730)
|
| 394 |
-
2025-02-19,03:55:41 | INFO | Train Epoch: 0 [ 32514048/128000000 (25%)] Data (t): 1.361 Batch (t): 4.951, 1643.81/s, 205.476/s/gpu LR: 0.000992 Logit Scale: 28.190 Class_loss: 8.6014 (8.7376) Contrastive_loss: 6.5170 (6.9732) Loss: 15.118 (15.711)
|
| 395 |
-
2025-02-19,04:06:13 | INFO | Train Epoch: 0 [ 33562624/128000000 (26%)] Data (t): 1.013 Batch (t): 4.936, 1681.33/s, 210.166/s/gpu LR: 0.000992 Logit Scale: 28.594 Class_loss: 8.5567 (8.7321) Contrastive_loss: 6.4205 (6.9565) Loss: 14.977 (15.689)
|
| 396 |
-
2025-02-19,04:15:58 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 397 |
-
2025-02-19,04:16:46 | INFO | Train Epoch: 0 [ 34611200/128000000 (27%)] Data (t): 1.355 Batch (t): 4.945, 1656.56/s, 207.070/s/gpu LR: 0.000991 Logit Scale: 29.041 Class_loss: 8.5748 (8.7275) Contrastive_loss: 6.4534 (6.9417) Loss: 15.028 (15.669)
|
| 398 |
-
2025-02-19,04:27:12 | INFO | Train Epoch: 0 [ 35659776/128000000 (28%)] Data (t): 1.040 Batch (t): 4.894, 1791.95/s, 223.993/s/gpu LR: 0.000991 Logit Scale: 29.346 Class_loss: 8.6309 (8.7247) Contrastive_loss: 6.5668 (6.9310) Loss: 15.198 (15.656)
|
| 399 |
-
2025-02-19,04:37:43 | INFO | Train Epoch: 0 [ 36708352/128000000 (29%)] Data (t): 0.480 Batch (t): 4.928, 1726.57/s, 215.821/s/gpu LR: 0.000990 Logit Scale: 29.693 Class_loss: 8.5747 (8.7205) Contrastive_loss: 6.3854 (6.9158) Loss: 14.960 (15.636)
|
| 400 |
-
2025-02-19,04:48:18 | INFO | Train Epoch: 0 [ 37756928/128000000 (29%)] Data (t): 0.478 Batch (t): 4.962, 1546.40/s, 193.300/s/gpu LR: 0.000989 Logit Scale: 29.898 Class_loss: 8.5614 (8.7162) Contrastive_loss: 6.4141 (6.9023) Loss: 14.975 (15.619)
|
| 401 |
-
2025-02-19,04:49:59 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 402 |
-
2025-02-19,04:51:12 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 403 |
-
2025-02-19,04:58:53 | INFO | Train Epoch: 0 [ 38805504/128000000 (30%)] Data (t): 0.490 Batch (t): 4.957, 1655.67/s, 206.958/s/gpu LR: 0.000989 Logit Scale: 30.182 Class_loss: 8.5812 (8.7127) Contrastive_loss: 6.3257 (6.8871) Loss: 14.907 (15.600)
|
| 404 |
-
2025-02-19,05:09:25 | INFO | Train Epoch: 0 [ 39854080/128000000 (31%)] Data (t): 0.498 Batch (t): 4.942, 1677.92/s, 209.740/s/gpu LR: 0.000988 Logit Scale: 30.647 Class_loss: 8.5371 (8.7082) Contrastive_loss: 6.2606 (6.8710) Loss: 14.798 (15.579)
|
| 405 |
-
2025-02-19,05:09:47 | WARNING | Handling webdataset error (OSError('image file is truncated (84 bytes not processed)')). Ignoring.
|
| 406 |
-
2025-02-19,05:19:59 | INFO | Train Epoch: 0 [ 40902656/128000000 (32%)] Data (t): 0.496 Batch (t): 4.950, 1724.45/s, 215.557/s/gpu LR: 0.000987 Logit Scale: 30.845 Class_loss: 8.6178 (8.7059) Contrastive_loss: 6.5260 (6.8624) Loss: 15.144 (15.568)
|
| 407 |
-
2025-02-19,05:30:31 | INFO | Train Epoch: 0 [ 41951232/128000000 (33%)] Data (t): 0.742 Batch (t): 4.937, 1609.72/s, 201.215/s/gpu LR: 0.000986 Logit Scale: 31.070 Class_loss: 8.5946 (8.7032) Contrastive_loss: 6.3455 (6.8498) Loss: 14.940 (15.553)
|
| 408 |
-
2025-02-19,05:41:04 | INFO | Train Epoch: 0 [ 42999808/128000000 (34%)] Data (t): 1.393 Batch (t): 4.943, 1741.95/s, 217.744/s/gpu LR: 0.000986 Logit Scale: 31.166 Class_loss: 8.5977 (8.7007) Contrastive_loss: 6.5869 (6.8435) Loss: 15.185 (15.544)
|
| 409 |
-
2025-02-19,05:42:30 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 410 |
-
2025-02-19,05:51:33 | INFO | Train Epoch: 0 [ 44048384/128000000 (34%)] Data (t): 0.656 Batch (t): 4.920, 1751.76/s, 218.970/s/gpu LR: 0.000985 Logit Scale: 31.222 Class_loss: 8.6009 (8.6984) Contrastive_loss: 6.4423 (6.8342) Loss: 15.043 (15.533)
|
| 411 |
-
2025-02-19,05:58:15 | WARNING | Handling webdataset error (OSError('image file is truncated (49 bytes not processed)')). Ignoring.
|
| 412 |
-
2025-02-19,06:02:11 | INFO | Train Epoch: 0 [ 45096960/128000000 (35%)] Data (t): 1.539 Batch (t): 4.982, 1662.70/s, 207.837/s/gpu LR: 0.000984 Logit Scale: 31.315 Class_loss: 8.6310 (8.6969) Contrastive_loss: 6.6083 (6.8291) Loss: 15.239 (15.526)
|
| 413 |
-
2025-02-19,06:08:17 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 414 |
-
2025-02-19,06:12:40 | INFO | Train Epoch: 0 [ 46145536/128000000 (36%)] Data (t): 1.486 Batch (t): 4.911, 1714.24/s, 214.280/s/gpu LR: 0.000983 Logit Scale: 31.623 Class_loss: 8.5829 (8.6943) Contrastive_loss: 6.2714 (6.8167) Loss: 14.854 (15.511)
|
| 415 |
-
2025-02-19,06:23:13 | INFO | Train Epoch: 0 [ 47194112/128000000 (37%)] Data (t): 0.746 Batch (t): 4.950, 1678.94/s, 209.868/s/gpu LR: 0.000982 Logit Scale: 32.004 Class_loss: 8.5654 (8.6915) Contrastive_loss: 6.4099 (6.8078) Loss: 14.975 (15.499)
|
| 416 |
-
2025-02-19,06:32:49 | WARNING | Handling webdataset error (OSError('image file is truncated (86 bytes not processed)')). Ignoring.
|
| 417 |
-
2025-02-19,06:34:01 | INFO | Train Epoch: 0 [ 48242688/128000000 (38%)] Data (t): 1.593 Batch (t): 5.059, 1715.50/s, 214.437/s/gpu LR: 0.000981 Logit Scale: 32.295 Class_loss: 8.6135 (8.6899) Contrastive_loss: 6.3449 (6.7980) Loss: 14.958 (15.488)
|
| 418 |
-
2025-02-19,06:44:45 | INFO | Train Epoch: 0 [ 49291264/128000000 (39%)] Data (t): 1.587 Batch (t): 5.031, 1553.26/s, 194.157/s/gpu LR: 0.000981 Logit Scale: 32.571 Class_loss: 8.6077 (8.6881) Contrastive_loss: 6.3209 (6.7880) Loss: 14.929 (15.476)
|
| 419 |
-
2025-02-19,06:52:07 | INFO | Starting zero-shot imagenet.
|
| 420 |
-
2025-02-19,06:52:07 | INFO | Building zero-shot classifier
|
| 421 |
-
2025-02-19,06:52:17 | INFO | Using classifier
|
| 422 |
-
2025-02-19,12:05:22 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints.
|
| 423 |
-
2025-02-19,12:05:26 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 424 |
-
2025-02-19,12:05:27 | INFO | Loaded CLIPCLS-ViT-B-16-NDR model config.
|
| 425 |
-
2025-02-19,12:05:28 | INFO | Model:
|
| 426 |
-
2025-02-19,12:05:28 | INFO | CLIPCLS(
|
| 427 |
-
(visual): NDRVisionTransformer(
|
| 428 |
-
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 429 |
-
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
| 430 |
-
(patch_dropout): Identity()
|
| 431 |
-
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 432 |
-
(transformer): RoPETransformer(
|
| 433 |
-
(resblocks): ModuleList(
|
| 434 |
-
(0-11): 12 x CustomResidualRoPEAttentionBlock(
|
| 435 |
-
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 436 |
-
(attn): RoPEAttention(
|
| 437 |
-
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 438 |
-
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 439 |
-
(out_drop): Dropout(p=0.0, inplace=False)
|
| 440 |
-
)
|
| 441 |
-
(ln_attn): Identity()
|
| 442 |
-
(ls_1): Identity()
|
| 443 |
-
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 444 |
-
(mlp): Sequential(
|
| 445 |
-
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 446 |
-
(gelu): GELU(approximate='none')
|
| 447 |
-
(ln): Identity()
|
| 448 |
-
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 449 |
-
)
|
| 450 |
-
(ls_2): Identity()
|
| 451 |
-
)
|
| 452 |
-
)
|
| 453 |
-
)
|
| 454 |
-
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 455 |
-
)
|
| 456 |
-
(text): TextTransformer(
|
| 457 |
-
(token_embedding): Embedding(49408, 512)
|
| 458 |
-
(transformer): Transformer(
|
| 459 |
-
(resblocks): ModuleList(
|
| 460 |
-
(0-11): 12 x ResidualAttentionBlock(
|
| 461 |
-
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 462 |
-
(attn): MultiheadAttention(
|
| 463 |
-
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 464 |
-
)
|
| 465 |
-
(ls_1): Identity()
|
| 466 |
-
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 467 |
-
(mlp): Sequential(
|
| 468 |
-
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 469 |
-
(gelu): GELU(approximate='none')
|
| 470 |
-
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 471 |
-
)
|
| 472 |
-
(ls_2): Identity()
|
| 473 |
-
)
|
| 474 |
-
)
|
| 475 |
-
)
|
| 476 |
-
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 477 |
-
)
|
| 478 |
-
(text_decoder): MixClsHead(
|
| 479 |
-
(mlps): ModuleList()
|
| 480 |
-
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 481 |
-
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 482 |
-
)
|
| 483 |
-
)
|
| 484 |
-
2025-02-19,12:05:28 | INFO | Params:
|
| 485 |
-
2025-02-19,12:05:28 | INFO | NDR_patch_size: 16
|
| 486 |
-
2025-02-19,12:05:28 | INFO | accum_freq: 1
|
| 487 |
-
2025-02-19,12:05:28 | INFO | aug_cfg: {}
|
| 488 |
-
2025-02-19,12:05:28 | INFO | batch_size: 1024
|
| 489 |
-
2025-02-19,12:05:28 | INFO | beta1: 0.9
|
| 490 |
-
2025-02-19,12:05:28 | INFO | beta2: 0.98
|
| 491 |
-
2025-02-19,12:05:28 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 492 |
-
2025-02-19,12:05:28 | INFO | coca_caption_loss_weight: 2.0
|
| 493 |
-
2025-02-19,12:05:28 | INFO | coca_contrastive_loss_weight: 1.0
|
| 494 |
-
2025-02-19,12:05:28 | INFO | copy_codebase: False
|
| 495 |
-
2025-02-19,12:05:28 | INFO | csv_caption_key: title
|
| 496 |
-
2025-02-19,12:05:28 | INFO | csv_img_key: filepath
|
| 497 |
-
2025-02-19,12:05:28 | INFO | csv_separator:
|
| 498 |
-
2025-02-19,12:05:28 | INFO | dataset_resampled: False
|
| 499 |
-
2025-02-19,12:05:28 | INFO | dataset_type: webdataset
|
| 500 |
-
2025-02-19,12:05:28 | INFO | ddp_static_graph: True
|
| 501 |
-
2025-02-19,12:05:28 | INFO | debug: False
|
| 502 |
-
2025-02-19,12:05:28 | INFO | delete_prev_step_ckpt: True
|
| 503 |
-
2025-02-19,12:05:28 | INFO | delete_previous_checkpoint: False
|
| 504 |
-
2025-02-19,12:05:28 | INFO | device: cuda:0
|
| 505 |
-
2025-02-19,12:05:28 | INFO | dist_backend: nccl
|
| 506 |
-
2025-02-19,12:05:28 | INFO | dist_url: env://
|
| 507 |
-
2025-02-19,12:05:28 | INFO | distill: False
|
| 508 |
-
2025-02-19,12:05:28 | INFO | distill_model: None
|
| 509 |
-
2025-02-19,12:05:28 | INFO | distill_pretrained: None
|
| 510 |
-
2025-02-19,12:05:28 | INFO | distributed: True
|
| 511 |
-
2025-02-19,12:05:28 | INFO | epochs: 4
|
| 512 |
-
2025-02-19,12:05:28 | INFO | epochs_cooldown: None
|
| 513 |
-
2025-02-19,12:05:28 | INFO | eps: 1e-06
|
| 514 |
-
2025-02-19,12:05:28 | INFO | force_custom_text: False
|
| 515 |
-
2025-02-19,12:05:28 | INFO | force_image_size: 224
|
| 516 |
-
2025-02-19,12:05:28 | INFO | force_patch_dropout: None
|
| 517 |
-
2025-02-19,12:05:29 | INFO | force_quick_gelu: False
|
| 518 |
-
2025-02-19,12:05:29 | INFO | gather_with_grad: True
|
| 519 |
-
2025-02-19,12:05:29 | INFO | global_batch_size: 8192
|
| 520 |
-
2025-02-19,12:05:29 | INFO | grad_checkpointing: True
|
| 521 |
-
2025-02-19,12:05:29 | INFO | grad_clip_norm: None
|
| 522 |
-
2025-02-19,12:05:29 | INFO | horovod: False
|
| 523 |
-
2025-02-19,12:05:29 | INFO | image_interpolation: None
|
| 524 |
-
2025-02-19,12:05:29 | INFO | image_mean: None
|
| 525 |
-
2025-02-19,12:05:29 | INFO | image_resize_mode: None
|
| 526 |
-
2025-02-19,12:05:29 | INFO | image_std: None
|
| 527 |
-
2025-02-19,12:05:29 | INFO | imagenet_v2: None
|
| 528 |
-
2025-02-19,12:05:29 | INFO | imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 529 |
-
2025-02-19,12:05:29 | INFO | local_loss: True
|
| 530 |
-
2025-02-19,12:05:29 | INFO | local_rank: 0
|
| 531 |
-
2025-02-19,12:05:29 | INFO | lock_image: False
|
| 532 |
-
2025-02-19,12:05:29 | INFO | lock_image_freeze_bn_stats: False
|
| 533 |
-
2025-02-19,12:05:29 | INFO | lock_image_unlocked_groups: 0
|
| 534 |
-
2025-02-19,12:05:29 | INFO | lock_text: False
|
| 535 |
-
2025-02-19,12:05:29 | INFO | lock_text_freeze_layer_norm: False
|
| 536 |
-
2025-02-19,12:05:29 | INFO | lock_text_unlocked_layers: 0
|
| 537 |
-
2025-02-19,12:05:29 | INFO | log_every_n_steps: 128
|
| 538 |
-
2025-02-19,12:05:29 | INFO | log_level: 20
|
| 539 |
-
2025-02-19,12:05:29 | INFO | log_local: False
|
| 540 |
-
2025-02-19,12:05:29 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 541 |
-
2025-02-19,12:05:29 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 542 |
-
2025-02-19,12:05:29 | INFO | lr: 0.001
|
| 543 |
-
2025-02-19,12:05:29 | INFO | lr_cooldown_end: 0.0
|
| 544 |
-
2025-02-19,12:05:29 | INFO | lr_cooldown_power: 1.0
|
| 545 |
-
2025-02-19,12:05:29 | INFO | lr_scheduler: cosine
|
| 546 |
-
2025-02-19,12:05:29 | INFO | max_seq_len: 100000000000000
|
| 547 |
-
2025-02-19,12:05:29 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 548 |
-
2025-02-19,12:05:29 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 549 |
-
2025-02-19,12:05:29 | INFO | native_dynamic_resolution: True
|
| 550 |
-
2025-02-19,12:05:29 | INFO | no_set_device_rank: False
|
| 551 |
-
2025-02-19,12:05:29 | INFO | only_packing: True
|
| 552 |
-
2025-02-19,12:05:29 | INFO | precision: amp_bfloat16
|
| 553 |
-
2025-02-19,12:05:29 | INFO | pretrained:
|
| 554 |
-
2025-02-19,12:05:29 | INFO | pretrained_image:
|
| 555 |
-
2025-02-19,12:05:29 | INFO | pretrained_text:
|
| 556 |
-
2025-02-19,12:05:29 | INFO | rank: 0
|
| 557 |
-
2025-02-19,12:05:29 | INFO | remote_sync: None
|
| 558 |
-
2025-02-19,12:05:29 | INFO | remote_sync_frequency: 300
|
| 559 |
-
2025-02-19,12:05:29 | INFO | remote_sync_protocol: s3
|
| 560 |
-
2025-02-19,12:05:29 | INFO | report_to: wandb
|
| 561 |
-
2025-02-19,12:05:29 | INFO | resume: None
|
| 562 |
-
2025-02-19,12:05:29 | INFO | rope_attn_num_heads: 12
|
| 563 |
-
2025-02-19,12:05:29 | INFO | rope_model_width: 768
|
| 564 |
-
2025-02-19,12:05:29 | INFO | save_every_n_steps: 6104
|
| 565 |
-
2025-02-19,12:05:29 | INFO | save_frequency: 1
|
| 566 |
-
2025-02-19,12:05:29 | INFO | save_most_recent: False
|
| 567 |
-
2025-02-19,12:05:29 | INFO | seed: 0
|
| 568 |
-
2025-02-19,12:05:29 | INFO | siglip: False
|
| 569 |
-
2025-02-19,12:05:29 | INFO | skip_scheduler: False
|
| 570 |
-
2025-02-19,12:05:29 | INFO | tensorboard: False
|
| 571 |
-
2025-02-19,12:05:29 | INFO | tensorboard_path:
|
| 572 |
-
2025-02-19,12:05:29 | INFO | torchcompile: False
|
| 573 |
-
2025-02-19,12:05:29 | INFO | torchscript: False
|
| 574 |
-
2025-02-19,12:05:29 | INFO | trace: False
|
| 575 |
-
2025-02-19,12:05:29 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 576 |
-
2025-02-19,12:05:29 | INFO | train_data_upsampling_factors: None
|
| 577 |
-
2025-02-19,12:05:29 | INFO | train_num_samples: 128000000
|
| 578 |
-
2025-02-19,12:05:29 | INFO | use_bn_sync: False
|
| 579 |
-
2025-02-19,12:05:29 | INFO | use_bnb_linear: None
|
| 580 |
-
2025-02-19,12:05:29 | INFO | val_data: None
|
| 581 |
-
2025-02-19,12:05:29 | INFO | val_frequency: 1
|
| 582 |
-
2025-02-19,12:05:29 | INFO | val_num_samples: None
|
| 583 |
-
2025-02-19,12:05:29 | INFO | val_steps: 6104
|
| 584 |
-
2025-02-19,12:05:29 | INFO | wandb: True
|
| 585 |
-
2025-02-19,12:05:29 | INFO | wandb_notes:
|
| 586 |
-
2025-02-19,12:05:29 | INFO | wandb_project_name: cls-clip-NDR
|
| 587 |
-
2025-02-19,12:05:29 | INFO | warmup: 500
|
| 588 |
-
2025-02-19,12:05:29 | INFO | wd: 0.2
|
| 589 |
-
2025-02-19,12:05:29 | INFO | workers: 1
|
| 590 |
-
2025-02-19,12:05:29 | INFO | world_size: 8
|
| 591 |
-
2025-02-19,12:05:29 | INFO | zeroshot_frequency: 2
|
| 592 |
-
2025-02-19,12:05:29 | INFO | zeroshot_steps: 6104
|
| 593 |
-
2025-02-19,12:05:45 | INFO | Start epoch 0
|
| 594 |
-
2025-02-19,12:05:58 | INFO | Train Epoch: 0 [ 8192/128000000 (0%)] Data (t): 7.563 Batch (t): 12.760, 641.995/s, 80.2494/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.323 (11.323) Contrastive_loss: 9.1263 (9.1263) Loss: 20.449 (20.449)
|
| 595 |
-
2025-02-19,12:09:00 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 596 |
-
2025-02-19,12:16:37 | INFO | Train Epoch: 0 [ 1056768/128000000 (1%)] Data (t): 1.530 Batch (t): 4.993, 1766.00/s, 220.750/s/gpu LR: 0.000258 Logit Scale: 14.283 Class_loss: 9.1091 (10.216) Contrastive_loss: 8.5592 (8.8427) Loss: 17.668 (19.059)
|
| 597 |
-
2025-02-19,12:17:05 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 598 |
-
2025-02-19,12:27:20 | INFO | Train Epoch: 0 [ 2105344/128000000 (2%)] Data (t): 1.563 Batch (t): 5.024, 1746.20/s, 218.275/s/gpu LR: 0.000514 Logit Scale: 14.237 Class_loss: 8.9689 (9.8003) Contrastive_loss: 8.3292 (8.6715) Loss: 17.298 (18.472)
|
| 599 |
-
2025-02-19,12:36:42 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 600 |
-
2025-02-19,12:38:00 | INFO | Train Epoch: 0 [ 3153920/128000000 (2%)] Data (t): 0.900 Batch (t): 4.999, 1755.67/s, 219.459/s/gpu LR: 0.000770 Logit Scale: 14.214 Class_loss: 8.9421 (9.5857) Contrastive_loss: 8.1398 (8.5386) Loss: 17.082 (18.124)
|
| 601 |
-
2025-02-19,12:48:40 | INFO | Train Epoch: 0 [ 4202496/128000000 (3%)] Data (t): 0.594 Batch (t): 5.001, 1614.41/s, 201.801/s/gpu LR: 0.001000 Logit Scale: 14.196 Class_loss: 8.8778 (9.4442) Contrastive_loss: 7.8747 (8.4058) Loss: 16.753 (17.850)
|
| 602 |
-
2025-02-19,12:53:51 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 603 |
-
2025-02-19,12:59:12 | INFO | Train Epoch: 0 [ 5251072/128000000 (4%)] Data (t): 0.602 Batch (t): 4.938, 1690.05/s, 211.257/s/gpu LR: 0.001000 Logit Scale: 14.396 Class_loss: 8.8194 (9.3400) Contrastive_loss: 7.6024 (8.2719) Loss: 16.422 (17.612)
|
| 604 |
-
2025-02-19,13:05:13 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints.
|
| 605 |
-
2025-02-19,13:05:18 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 606 |
-
2025-02-19,13:05:18 | INFO | Loaded CLIPCLS-ViT-B-16-NDR model config.
|
| 607 |
-
2025-02-19,13:05:20 | INFO | Model:
|
| 608 |
-
2025-02-19,13:05:20 | INFO | CLIPCLS(
|
| 609 |
-
(visual): NDRVisionTransformer(
|
| 610 |
-
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 611 |
-
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
| 612 |
-
(patch_dropout): Identity()
|
| 613 |
-
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 614 |
-
(transformer): RoPETransformer(
|
| 615 |
-
(resblocks): ModuleList(
|
| 616 |
-
(0-11): 12 x CustomResidualRoPEAttentionBlock(
|
| 617 |
-
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 618 |
-
(attn): RoPEAttention(
|
| 619 |
-
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 620 |
-
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 621 |
-
(out_drop): Dropout(p=0.0, inplace=False)
|
| 622 |
-
)
|
| 623 |
-
(ln_attn): Identity()
|
| 624 |
-
(ls_1): Identity()
|
| 625 |
-
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 626 |
-
(mlp): Sequential(
|
| 627 |
-
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 628 |
-
(gelu): GELU(approximate='none')
|
| 629 |
-
(ln): Identity()
|
| 630 |
-
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 631 |
-
)
|
| 632 |
-
(ls_2): Identity()
|
| 633 |
-
)
|
| 634 |
-
)
|
| 635 |
-
)
|
| 636 |
-
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 637 |
-
)
|
| 638 |
-
(text): TextTransformer(
|
| 639 |
-
(token_embedding): Embedding(49408, 512)
|
| 640 |
-
(transformer): Transformer(
|
| 641 |
-
(resblocks): ModuleList(
|
| 642 |
-
(0-11): 12 x ResidualAttentionBlock(
|
| 643 |
-
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 644 |
-
(attn): MultiheadAttention(
|
| 645 |
-
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 646 |
-
)
|
| 647 |
-
(ls_1): Identity()
|
| 648 |
-
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 649 |
-
(mlp): Sequential(
|
| 650 |
-
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 651 |
-
(gelu): GELU(approximate='none')
|
| 652 |
-
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 653 |
-
)
|
| 654 |
-
(ls_2): Identity()
|
| 655 |
-
)
|
| 656 |
-
)
|
| 657 |
-
)
|
| 658 |
-
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 659 |
-
)
|
| 660 |
-
(text_decoder): MixClsHead(
|
| 661 |
-
(mlps): ModuleList()
|
| 662 |
-
(ln_mlp): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 663 |
-
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 664 |
-
)
|
| 665 |
-
)
|
| 666 |
-
2025-02-19,13:05:20 | INFO | Params:
|
| 667 |
-
2025-02-19,13:05:20 | INFO | NDR_patch_size: 16
|
| 668 |
-
2025-02-19,13:05:20 | INFO | accum_freq: 1
|
| 669 |
-
2025-02-19,13:05:20 | INFO | aug_cfg: {}
|
| 670 |
-
2025-02-19,13:05:20 | INFO | batch_size: 1024
|
| 671 |
-
2025-02-19,13:05:20 | INFO | beta1: 0.9
|
| 672 |
-
2025-02-19,13:05:20 | INFO | beta2: 0.98
|
| 673 |
-
2025-02-19,13:05:20 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 674 |
-
2025-02-19,13:05:20 | INFO | coca_caption_loss_weight: 2.0
|
| 675 |
-
2025-02-19,13:05:20 | INFO | coca_contrastive_loss_weight: 1.0
|
| 676 |
-
2025-02-19,13:05:20 | INFO | copy_codebase: False
|
| 677 |
-
2025-02-19,13:05:20 | INFO | csv_caption_key: title
|
| 678 |
-
2025-02-19,13:05:20 | INFO | csv_img_key: filepath
|
| 679 |
-
2025-02-19,13:05:20 | INFO | csv_separator:
|
| 680 |
-
2025-02-19,13:05:20 | INFO | dataset_resampled: False
|
| 681 |
-
2025-02-19,13:05:20 | INFO | dataset_type: webdataset
|
| 682 |
-
2025-02-19,13:05:20 | INFO | ddp_static_graph: True
|
| 683 |
-
2025-02-19,13:05:20 | INFO | debug: False
|
| 684 |
-
2025-02-19,13:05:20 | INFO | delete_prev_step_ckpt: True
|
| 685 |
-
2025-02-19,13:05:20 | INFO | delete_previous_checkpoint: False
|
| 686 |
-
2025-02-19,13:05:20 | INFO | device: cuda:0
|
| 687 |
-
2025-02-19,13:05:20 | INFO | dist_backend: nccl
|
| 688 |
-
2025-02-19,13:05:20 | INFO | dist_url: env://
|
| 689 |
-
2025-02-19,13:05:20 | INFO | distill: False
|
| 690 |
-
2025-02-19,13:05:20 | INFO | distill_model: None
|
| 691 |
-
2025-02-19,13:05:20 | INFO | distill_pretrained: None
|
| 692 |
-
2025-02-19,13:05:20 | INFO | distributed: True
|
| 693 |
-
2025-02-19,13:05:20 | INFO | epochs: 4
|
| 694 |
-
2025-02-19,13:05:20 | INFO | epochs_cooldown: None
|
| 695 |
-
2025-02-19,13:05:20 | INFO | eps: 1e-06
|
| 696 |
-
2025-02-19,13:05:20 | INFO | force_custom_text: False
|
| 697 |
-
2025-02-19,13:05:20 | INFO | force_image_size: 224
|
| 698 |
-
2025-02-19,13:05:20 | INFO | force_patch_dropout: None
|
| 699 |
-
2025-02-19,13:05:20 | INFO | force_quick_gelu: False
|
| 700 |
-
2025-02-19,13:05:20 | INFO | gather_with_grad: True
|
| 701 |
-
2025-02-19,13:05:20 | INFO | global_batch_size: 8192
|
| 702 |
-
2025-02-19,13:05:20 | INFO | grad_checkpointing: True
|
| 703 |
-
2025-02-19,13:05:20 | INFO | grad_clip_norm: None
|
| 704 |
-
2025-02-19,13:05:20 | INFO | horovod: False
|
| 705 |
-
2025-02-19,13:05:20 | INFO | image_interpolation: None
|
| 706 |
-
2025-02-19,13:05:20 | INFO | image_mean: None
|
| 707 |
-
2025-02-19,13:05:20 | INFO | image_resize_mode: None
|
| 708 |
-
2025-02-19,13:05:20 | INFO | image_std: None
|
| 709 |
-
2025-02-19,13:05:20 | INFO | imagenet_v2: None
|
| 710 |
-
2025-02-19,13:05:20 | INFO | imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 711 |
-
2025-02-19,13:05:20 | INFO | local_loss: True
|
| 712 |
-
2025-02-19,13:05:20 | INFO | local_rank: 0
|
| 713 |
-
2025-02-19,13:05:20 | INFO | lock_image: False
|
| 714 |
-
2025-02-19,13:05:20 | INFO | lock_image_freeze_bn_stats: False
|
| 715 |
-
2025-02-19,13:05:20 | INFO | lock_image_unlocked_groups: 0
|
| 716 |
-
2025-02-19,13:05:20 | INFO | lock_text: False
|
| 717 |
-
2025-02-19,13:05:20 | INFO | lock_text_freeze_layer_norm: False
|
| 718 |
-
2025-02-19,13:05:20 | INFO | lock_text_unlocked_layers: 0
|
| 719 |
-
2025-02-19,13:05:20 | INFO | log_every_n_steps: 128
|
| 720 |
-
2025-02-19,13:05:20 | INFO | log_level: 20
|
| 721 |
-
2025-02-19,13:05:20 | INFO | log_local: False
|
| 722 |
-
2025-02-19,13:05:20 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 723 |
-
2025-02-19,13:05:20 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 724 |
-
2025-02-19,13:05:20 | INFO | lr: 0.001
|
| 725 |
-
2025-02-19,13:05:20 | INFO | lr_cooldown_end: 0.0
|
| 726 |
-
2025-02-19,13:05:20 | INFO | lr_cooldown_power: 1.0
|
| 727 |
-
2025-02-19,13:05:20 | INFO | lr_scheduler: cosine
|
| 728 |
-
2025-02-19,13:05:20 | INFO | max_seq_len: 100000000000000
|
| 729 |
-
2025-02-19,13:05:20 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 730 |
-
2025-02-19,13:05:20 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 731 |
-
2025-02-19,13:05:20 | INFO | native_dynamic_resolution: True
|
| 732 |
-
2025-02-19,13:05:20 | INFO | no_set_device_rank: False
|
| 733 |
-
2025-02-19,13:05:20 | INFO | only_packing: True
|
| 734 |
-
2025-02-19,13:05:20 | INFO | precision: amp_bfloat16
|
| 735 |
-
2025-02-19,13:05:20 | INFO | pretrained:
|
| 736 |
-
2025-02-19,13:05:20 | INFO | pretrained_image:
|
| 737 |
-
2025-02-19,13:05:20 | INFO | pretrained_text:
|
| 738 |
-
2025-02-19,13:05:20 | INFO | rank: 0
|
| 739 |
-
2025-02-19,13:05:20 | INFO | remote_sync: None
|
| 740 |
-
2025-02-19,13:05:20 | INFO | remote_sync_frequency: 300
|
| 741 |
-
2025-02-19,13:05:20 | INFO | remote_sync_protocol: s3
|
| 742 |
-
2025-02-19,13:05:20 | INFO | report_to: wandb
|
| 743 |
-
2025-02-19,13:05:20 | INFO | resume: None
|
| 744 |
-
2025-02-19,13:05:20 | INFO | rope_attn_num_heads: 12
|
| 745 |
-
2025-02-19,13:05:20 | INFO | rope_model_width: 768
|
| 746 |
-
2025-02-19,13:05:20 | INFO | save_every_n_steps: 6104
|
| 747 |
-
2025-02-19,13:05:20 | INFO | save_frequency: 1
|
| 748 |
-
2025-02-19,13:05:20 | INFO | save_most_recent: False
|
| 749 |
-
2025-02-19,13:05:20 | INFO | seed: 0
|
| 750 |
-
2025-02-19,13:05:20 | INFO | siglip: False
|
| 751 |
-
2025-02-19,13:05:20 | INFO | skip_scheduler: False
|
| 752 |
-
2025-02-19,13:05:20 | INFO | tensorboard: False
|
| 753 |
-
2025-02-19,13:05:20 | INFO | tensorboard_path:
|
| 754 |
-
2025-02-19,13:05:20 | INFO | torchcompile: False
|
| 755 |
-
2025-02-19,13:05:20 | INFO | torchscript: False
|
| 756 |
-
2025-02-19,13:05:20 | INFO | trace: False
|
| 757 |
-
2025-02-19,13:05:20 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 758 |
-
2025-02-19,13:05:20 | INFO | train_data_upsampling_factors: None
|
| 759 |
-
2025-02-19,13:05:20 | INFO | train_num_samples: 128000000
|
| 760 |
-
2025-02-19,13:05:20 | INFO | use_bn_sync: False
|
| 761 |
-
2025-02-19,13:05:20 | INFO | use_bnb_linear: None
|
| 762 |
-
2025-02-19,13:05:20 | INFO | val_data: None
|
| 763 |
-
2025-02-19,13:05:20 | INFO | val_frequency: 1
|
| 764 |
-
2025-02-19,13:05:20 | INFO | val_num_samples: None
|
| 765 |
-
2025-02-19,13:05:20 | INFO | val_steps: 6104
|
| 766 |
-
2025-02-19,13:05:20 | INFO | wandb: True
|
| 767 |
-
2025-02-19,13:05:20 | INFO | wandb_notes:
|
| 768 |
-
2025-02-19,13:05:20 | INFO | wandb_project_name: cls-clip-NDR
|
| 769 |
-
2025-02-19,13:05:20 | INFO | warmup: 500
|
| 770 |
-
2025-02-19,13:05:20 | INFO | wd: 0.2
|
| 771 |
-
2025-02-19,13:05:20 | INFO | workers: 6
|
| 772 |
-
2025-02-19,13:05:20 | INFO | world_size: 8
|
| 773 |
-
2025-02-19,13:05:20 | INFO | zeroshot_frequency: 2
|
| 774 |
-
2025-02-19,13:05:20 | INFO | zeroshot_steps: 6104
|
| 775 |
-
2025-02-19,13:05:35 | INFO | Start epoch 0
|
| 776 |
-
2025-02-19,13:05:51 | INFO | Train Epoch: 0 [ 8192/128040960 (0%)] Data (t): 10.133 Batch (t): 15.889, 515.567/s, 64.4459/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.345 (11.345) Contrastive_loss: 9.1457 (9.1457) Loss: 20.491 (20.491)
|
| 777 |
-
2025-02-19,13:06:40 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 778 |
-
2025-02-19,13:15:11 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 779 |
-
2025-02-19,13:15:21 | INFO | Train Epoch: 0 [ 1056768/128040960 (1%)] Data (t): 0.563 Batch (t): 4.452, 1948.42/s, 243.553/s/gpu LR: 0.000258 Logit Scale: 14.284 Class_loss: 9.1077 (10.226) Contrastive_loss: 8.6769 (8.9113) Loss: 17.785 (19.138)
|
| 780 |
-
2025-02-19,13:24:52 | INFO | Train Epoch: 0 [ 2105344/128040960 (2%)] Data (t): 0.545 Batch (t): 4.461, 2104.00/s, 263.000/s/gpu LR: 0.000514 Logit Scale: 14.242 Class_loss: 8.9850 (9.8126) Contrastive_loss: 8.3043 (8.7090) Loss: 17.289 (18.522)
|
| 781 |
-
2025-02-19,13:34:22 | INFO | Train Epoch: 0 [ 3153920/128040960 (2%)] Data (t): 0.554 Batch (t): 4.453, 1689.95/s, 211.243/s/gpu LR: 0.000770 Logit Scale: 14.214 Class_loss: 8.9541 (9.5980) Contrastive_loss: 8.0705 (8.5494) Loss: 17.025 (18.147)
|
| 782 |
-
2025-02-19,13:35:15 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 783 |
-
2025-02-19,13:43:54 | INFO | Train Epoch: 0 [ 4202496/128040960 (3%)] Data (t): 0.561 Batch (t): 4.469, 1886.29/s, 235.786/s/gpu LR: 0.001000 Logit Scale: 14.216 Class_loss: 8.8615 (9.4507) Contrastive_loss: 7.8130 (8.4021) Loss: 16.675 (17.853)
|
| 784 |
-
2025-02-19,13:48:38 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 785 |
-
2025-02-19,13:53:26 | INFO | Train Epoch: 0 [ 5251072/128040960 (4%)] Data (t): 0.560 Batch (t): 4.463, 1972.18/s, 246.523/s/gpu LR: 0.001000 Logit Scale: 14.389 Class_loss: 8.8393 (9.3488) Contrastive_loss: 7.6803 (8.2818) Loss: 16.520 (17.631)
|
| 786 |
-
2025-02-19,13:56:35 | WARNING | Handling webdataset error (OSError('image file is truncated (35 bytes not processed)')). Ignoring.
|
| 787 |
-
2025-02-19,14:02:57 | INFO | Train Epoch: 0 [ 6299648/128040960 (5%)] Data (t): 0.550 Batch (t): 4.464, 1777.37/s, 222.172/s/gpu LR: 0.001000 Logit Scale: 14.879 Class_loss: 8.7577 (9.2644) Contrastive_loss: 7.3735 (8.1521) Loss: 16.131 (17.416)
|
| 788 |
-
2025-02-19,14:12:33 | INFO | Train Epoch: 0 [ 7348224/128040960 (6%)] Data (t): 0.564 Batch (t): 4.498, 1942.67/s, 242.834/s/gpu LR: 0.001000 Logit Scale: 15.541 Class_loss: 8.7739 (9.2031) Contrastive_loss: 7.4424 (8.0634) Loss: 16.216 (17.266)
|
| 789 |
-
2025-02-19,14:22:05 | INFO | Train Epoch: 0 [ 8396800/128040960 (7%)] Data (t): 0.571 Batch (t): 4.473, 1944.53/s, 243.066/s/gpu LR: 0.001000 Logit Scale: 16.469 Class_loss: 8.7110 (9.1484) Contrastive_loss: 7.0960 (7.9559) Loss: 15.807 (17.104)
|
| 790 |
-
2025-02-19,14:31:38 | INFO | Train Epoch: 0 [ 9445376/128040960 (7%)] Data (t): 0.563 Batch (t): 4.472, 1748.42/s, 218.552/s/gpu LR: 0.001000 Logit Scale: 17.457 Class_loss: 8.5955 (9.0931) Contrastive_loss: 6.8748 (7.8478) Loss: 15.470 (16.941)
|
| 791 |
-
2025-02-19,14:40:26 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 792 |
-
2025-02-19,14:41:10 | INFO | Train Epoch: 0 [ 10493952/128040960 (8%)] Data (t): 0.558 Batch (t): 4.469, 1963.86/s, 245.483/s/gpu LR: 0.001000 Logit Scale: 18.481 Class_loss: 8.5946 (9.0478) Contrastive_loss: 6.8009 (7.7526) Loss: 15.396 (16.800)
|
| 793 |
-
2025-02-19,14:45:09 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 794 |
-
2025-02-19,14:50:40 | INFO | Train Epoch: 0 [ 11542528/128040960 (9%)] Data (t): 0.574 Batch (t): 4.459, 2100.78/s, 262.597/s/gpu LR: 0.000999 Logit Scale: 19.417 Class_loss: 8.5640 (9.0075) Contrastive_loss: 6.6877 (7.6639) Loss: 15.252 (16.671)
|
| 795 |
-
2025-02-19,14:59:04 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 796 |
-
2025-02-19,15:00:10 | INFO | Train Epoch: 0 [ 12591104/128040960 (10%)] Data (t): 0.571 Batch (t): 4.450, 1743.91/s, 217.989/s/gpu LR: 0.000999 Logit Scale: 20.350 Class_loss: 8.5830 (8.9748) Contrastive_loss: 6.6580 (7.5865) Loss: 15.241 (16.561)
|
| 797 |
-
2025-02-19,15:09:40 | INFO | Train Epoch: 0 [ 13639680/128040960 (11%)] Data (t): 0.559 Batch (t): 4.456, 1966.06/s, 245.757/s/gpu LR: 0.000999 Logit Scale: 21.192 Class_loss: 8.5088 (8.9415) Contrastive_loss: 6.5581 (7.5130) Loss: 15.067 (16.455)
|
| 798 |
-
2025-02-19,15:12:15 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 799 |
-
2025-02-19,15:19:08 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 800 |
-
2025-02-19,15:19:10 | INFO | Train Epoch: 0 [ 14688256/128040960 (11%)] Data (t): 0.566 Batch (t): 4.453, 1954.79/s, 244.348/s/gpu LR: 0.000999 Logit Scale: 21.899 Class_loss: 8.5548 (8.9157) Contrastive_loss: 6.4382 (7.4414) Loss: 14.993 (16.357)
|
| 801 |
-
2025-02-19,15:19:39 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 802 |
-
2025-02-19,15:28:39 | INFO | Train Epoch: 0 [ 15736832/128040960 (12%)] Data (t): 0.565 Batch (t): 4.443, 1826.76/s, 228.345/s/gpu LR: 0.000999 Logit Scale: 22.392 Class_loss: 8.5375 (8.8921) Contrastive_loss: 6.6134 (7.3896) Loss: 15.151 (16.282)
|
| 803 |
-
2025-02-19,15:31:54 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 804 |
-
2025-02-19,15:38:10 | INFO | Train Epoch: 0 [ 16785408/128040960 (13%)] Data (t): 0.569 Batch (t): 4.461, 1664.71/s, 208.089/s/gpu LR: 0.000998 Logit Scale: 22.853 Class_loss: 8.4941 (8.8687) Contrastive_loss: 6.5778 (7.3419) Loss: 15.072 (16.211)
|
| 805 |
-
2025-02-19,15:47:40 | INFO | Train Epoch: 0 [ 17833984/128040960 (14%)] Data (t): 0.567 Batch (t): 4.450, 2096.33/s, 262.041/s/gpu LR: 0.000998 Logit Scale: 23.429 Class_loss: 8.5666 (8.8519) Contrastive_loss: 6.4976 (7.2950) Loss: 15.064 (16.147)
|
| 806 |
-
2025-02-19,15:57:09 | INFO | Train Epoch: 0 [ 18882560/128040960 (15%)] Data (t): 0.573 Batch (t): 4.445, 1937.46/s, 242.183/s/gpu LR: 0.000998 Logit Scale: 24.001 Class_loss: 8.6211 (8.8398) Contrastive_loss: 6.5858 (7.2576) Loss: 15.207 (16.097)
|
| 807 |
-
2025-02-19,16:06:39 | INFO | Train Epoch: 0 [ 19931136/128040960 (16%)] Data (t): 0.557 Batch (t): 4.460, 1787.86/s, 223.483/s/gpu LR: 0.000998 Logit Scale: 24.533 Class_loss: 8.5764 (8.8266) Contrastive_loss: 6.6014 (7.2248) Loss: 15.178 (16.051)
|
| 808 |
-
2025-02-19,16:16:08 | INFO | Train Epoch: 0 [ 20979712/128040960 (16%)] Data (t): 0.555 Batch (t): 4.439, 1978.62/s, 247.327/s/gpu LR: 0.000997 Logit Scale: 25.075 Class_loss: 8.5267 (8.8123) Contrastive_loss: 6.3055 (7.1811) Loss: 14.832 (15.993)
|
| 809 |
-
2025-02-19,16:25:38 | INFO | Train Epoch: 0 [ 22028288/128040960 (17%)] Data (t): 0.572 Batch (t): 4.459, 1888.03/s, 236.004/s/gpu LR: 0.000997 Logit Scale: 25.526 Class_loss: 8.5753 (8.8015) Contrastive_loss: 6.5262 (7.1513) Loss: 15.101 (15.953)
|
| 810 |
-
2025-02-19,16:35:06 | INFO | Train Epoch: 0 [ 23076864/128040960 (18%)] Data (t): 0.571 Batch (t): 4.434, 1799.42/s, 224.928/s/gpu LR: 0.000997 Logit Scale: 25.952 Class_loss: 8.5775 (8.7918) Contrastive_loss: 6.5202 (7.1238) Loss: 15.098 (15.916)
|
| 811 |
-
2025-02-19,16:41:13 | WARNING | Handling webdataset error (OSError('image file is truncated (114 bytes not processed)')). Ignoring.
|
| 812 |
-
2025-02-19,16:44:34 | INFO | Train Epoch: 0 [ 24125440/128040960 (19%)] Data (t): 0.554 Batch (t): 4.436, 1961.03/s, 245.129/s/gpu LR: 0.000996 Logit Scale: 26.313 Class_loss: 8.5193 (8.7804) Contrastive_loss: 6.4393 (7.0953) Loss: 14.959 (15.876)
|
| 813 |
-
2025-02-19,16:52:29 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 814 |
-
2025-02-19,16:54:01 | INFO | Train Epoch: 0 [ 25174016/128040960 (20%)] Data (t): 0.556 Batch (t): 4.428, 1866.46/s, 233.307/s/gpu LR: 0.000996 Logit Scale: 26.777 Class_loss: 8.5398 (8.7708) Contrastive_loss: 6.4941 (7.0713) Loss: 15.034 (15.842)
|
| 815 |
-
2025-02-19,17:03:23 | INFO | Train Epoch: 0 [ 26222592/128040960 (20%)] Data (t): 0.543 Batch (t): 4.393, 1817.50/s, 227.187/s/gpu LR: 0.000995 Logit Scale: 27.129 Class_loss: 8.5815 (8.7635) Contrastive_loss: 6.5006 (7.0493) Loss: 15.082 (15.813)
|
| 816 |
-
2025-02-19,17:04:44 | WARNING | Handling webdataset error (OSError('image file is truncated (75 bytes not processed)')). Ignoring.
|
| 817 |
-
2025-02-19,17:07:16 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 818 |
-
2025-02-19,17:12:46 | INFO | Train Epoch: 0 [ 27271168/128040960 (21%)] Data (t): 0.554 Batch (t): 4.400, 1981.18/s, 247.647/s/gpu LR: 0.000995 Logit Scale: 27.474 Class_loss: 8.5443 (8.7554) Contrastive_loss: 6.3068 (7.0218) Loss: 14.851 (15.777)
|
| 819 |
-
2025-02-19,17:16:36 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 820 |
-
2025-02-19,17:22:15 | INFO | Train Epoch: 0 [ 28319744/128040960 (22%)] Data (t): 0.595 Batch (t): 4.443, 1857.18/s, 232.147/s/gpu LR: 0.000994 Logit Scale: 27.816 Class_loss: 8.5371 (8.7476) Contrastive_loss: 6.3310 (6.9972) Loss: 14.868 (15.745)
|
| 821 |
-
2025-02-19,17:31:46 | INFO | Train Epoch: 0 [ 29368320/128040960 (23%)] Data (t): 0.612 Batch (t): 4.461, 1883.20/s, 235.400/s/gpu LR: 0.000994 Logit Scale: 28.138 Class_loss: 8.6386 (8.7439) Contrastive_loss: 6.6651 (6.9857) Loss: 15.304 (15.730)
|
| 822 |
-
2025-02-19,17:37:01 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 823 |
-
2025-02-19,17:41:10 | INFO | Train Epoch: 0 [ 30416896/128040960 (24%)] Data (t): 0.566 Batch (t): 4.405, 2113.44/s, 264.180/s/gpu LR: 0.000993 Logit Scale: 28.479 Class_loss: 8.5183 (8.7363) Contrastive_loss: 6.2043 (6.9597) Loss: 14.723 (15.696)
|
| 824 |
-
2025-02-19,17:50:31 | INFO | Train Epoch: 0 [ 31465472/128040960 (25%)] Data (t): 0.560 Batch (t): 4.388, 1980.46/s, 247.557/s/gpu LR: 0.000993 Logit Scale: 28.767 Class_loss: 8.5412 (8.7301) Contrastive_loss: 6.1385 (6.9332) Loss: 14.680 (15.663)
|
| 825 |
-
2025-02-19,17:59:57 | INFO | Train Epoch: 0 [ 32514048/128040960 (25%)] Data (t): 0.576 Batch (t): 4.421, 1778.98/s, 222.373/s/gpu LR: 0.000992 Logit Scale: 29.225 Class_loss: 8.5244 (8.7236) Contrastive_loss: 6.2928 (6.9132) Loss: 14.817 (15.637)
|
| 826 |
-
2025-02-19,18:09:20 | INFO | Train Epoch: 0 [ 33562624/128040960 (26%)] Data (t): 0.570 Batch (t): 4.395, 2010.13/s, 251.266/s/gpu LR: 0.000992 Logit Scale: 29.516 Class_loss: 8.5979 (8.7198) Contrastive_loss: 6.4969 (6.9005) Loss: 15.095 (15.620)
|
| 827 |
-
2025-02-19,18:10:36 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 828 |
-
2025-02-19,18:18:42 | INFO | Train Epoch: 0 [ 34611200/128040960 (27%)] Data (t): 0.573 Batch (t): 4.395, 1723.65/s, 215.456/s/gpu LR: 0.000991 Logit Scale: 29.842 Class_loss: 8.5726 (8.7155) Contrastive_loss: 6.2865 (6.8825) Loss: 14.859 (15.598)
|
| 829 |
-
2025-02-19,18:28:03 | INFO | Train Epoch: 0 [ 35659776/128040960 (28%)] Data (t): 0.565 Batch (t): 4.379, 1771.34/s, 221.418/s/gpu LR: 0.000991 Logit Scale: 29.875 Class_loss: 8.6969 (8.7150) Contrastive_loss: 6.8899 (6.8827) Loss: 15.587 (15.598)
|
| 830 |
-
2025-02-19,18:37:25 | INFO | Train Epoch: 0 [ 36708352/128040960 (29%)] Data (t): 0.566 Batch (t): 4.389, 1942.40/s, 242.800/s/gpu LR: 0.000990 Logit Scale: 30.141 Class_loss: 8.6001 (8.7118) Contrastive_loss: 6.5299 (6.8729) Loss: 15.130 (15.585)
|
| 831 |
-
2025-02-19,18:37:48 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 832 |
-
2025-02-19,18:46:43 | INFO | Train Epoch: 0 [ 37756928/128040960 (29%)] Data (t): 0.572 Batch (t): 4.359, 1966.47/s, 245.808/s/gpu LR: 0.000989 Logit Scale: 30.344 Class_loss: 8.5232 (8.7067) Contrastive_loss: 6.3698 (6.8593) Loss: 14.893 (15.566)
|
| 833 |
-
2025-02-19,18:49:17 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 834 |
-
2025-02-19,18:56:01 | INFO | Train Epoch: 0 [ 38805504/128040960 (30%)] Data (t): 0.573 Batch (t): 4.358, 1861.34/s, 232.668/s/gpu LR: 0.000989 Logit Scale: 30.342 Class_loss: 8.6590 (8.7054) Contrastive_loss: 6.8510 (6.8591) Loss: 15.510 (15.564)
|
| 835 |
-
2025-02-19,19:05:19 | INFO | Train Epoch: 0 [ 39854080/128040960 (31%)] Data (t): 0.561 Batch (t): 4.364, 2074.33/s, 259.291/s/gpu LR: 0.000988 Logit Scale: 30.527 Class_loss: 8.7386 (8.7063) Contrastive_loss: 6.9183 (6.8606) Loss: 15.657 (15.567)
|
| 836 |
-
2025-02-19,19:09:01 | WARNING | Handling webdataset error (OSError('image file is truncated (84 bytes not processed)')). Ignoring.
|
| 837 |
-
2025-02-19,19:14:36 | INFO | Train Epoch: 0 [ 40902656/128040960 (32%)] Data (t): 0.557 Batch (t): 4.350, 1938.78/s, 242.347/s/gpu LR: 0.000987 Logit Scale: 30.693 Class_loss: 8.6893 (8.7058) Contrastive_loss: 6.9139 (6.8619) Loss: 15.603 (15.568)
|
| 838 |
-
2025-02-19,19:23:55 | INFO | Train Epoch: 0 [ 41951232/128040960 (33%)] Data (t): 0.583 Batch (t): 4.369, 1796.22/s, 224.527/s/gpu LR: 0.000986 Logit Scale: 30.823 Class_loss: 8.6125 (8.7036) Contrastive_loss: 6.6863 (6.8576) Loss: 15.299 (15.561)
|
| 839 |
-
2025-02-19,19:29:22 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 840 |
-
2025-02-19,19:33:16 | INFO | Train Epoch: 0 [ 42999808/128040960 (34%)] Data (t): 0.628 Batch (t): 4.384, 1942.68/s, 242.835/s/gpu LR: 0.000986 Logit Scale: 31.040 Class_loss: 8.6737 (8.7029) Contrastive_loss: 6.6027 (6.8516) Loss: 15.276 (15.554)
|
| 841 |
-
2025-02-19,19:42:37 | INFO | Train Epoch: 0 [ 44048384/128040960 (34%)] Data (t): 0.598 Batch (t): 4.381, 1870.95/s, 233.869/s/gpu LR: 0.000985 Logit Scale: 31.324 Class_loss: 8.6157 (8.7008) Contrastive_loss: 6.6863 (6.8477) Loss: 15.302 (15.549)
|
| 842 |
-
2025-02-19,19:50:01 | WARNING | Handling webdataset error (OSError('image file is truncated (49 bytes not processed)')). Ignoring.
|
| 843 |
-
2025-02-19,19:51:53 | INFO | Train Epoch: 0 [ 45096960/128040960 (35%)] Data (t): 0.560 Batch (t): 4.346, 1685.03/s, 210.628/s/gpu LR: 0.000984 Logit Scale: 31.575 Class_loss: 8.6482 (8.6996) Contrastive_loss: 6.5760 (6.8416) Loss: 15.224 (15.541)
|
| 844 |
-
2025-02-19,20:00:44 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 845 |
-
2025-02-19,20:01:08 | INFO | Train Epoch: 0 [ 46145536/128040960 (36%)] Data (t): 0.577 Batch (t): 4.330, 1977.80/s, 247.224/s/gpu LR: 0.000983 Logit Scale: 31.606 Class_loss: 8.6865 (8.6993) Contrastive_loss: 6.7368 (6.8392) Loss: 15.423 (15.539)
|
| 846 |
-
2025-02-19,20:10:24 | INFO | Train Epoch: 0 [ 47194112/128040960 (37%)] Data (t): 0.573 Batch (t): 4.348, 1957.80/s, 244.725/s/gpu LR: 0.000982 Logit Scale: 32.080 Class_loss: 8.6316 (8.6979) Contrastive_loss: 6.5471 (6.8329) Loss: 15.179 (15.531)
|
| 847 |
-
2025-02-19,20:16:50 | WARNING | Handling webdataset error (OSError('image file is truncated (86 bytes not processed)')). Ignoring.
|
| 848 |
-
2025-02-19,20:19:43 | INFO | Train Epoch: 0 [ 48242688/128040960 (38%)] Data (t): 0.574 Batch (t): 4.367, 1928.76/s, 241.095/s/gpu LR: 0.000981 Logit Scale: 31.915 Class_loss: 8.6851 (8.6976) Contrastive_loss: 6.7133 (6.8303) Loss: 15.398 (15.528)
|
| 849 |
-
2025-02-19,20:29:02 | INFO | Train Epoch: 0 [ 49291264/128040960 (38%)] Data (t): 0.579 Batch (t): 4.363, 2102.72/s, 262.839/s/gpu LR: 0.000981 Logit Scale: 32.106 Class_loss: 8.6748 (8.6971) Contrastive_loss: 6.8274 (6.8303) Loss: 15.502 (15.527)
|
| 850 |
-
2025-02-19,20:35:19 | INFO | Starting zero-shot imagenet.
|
| 851 |
-
2025-02-19,20:35:19 | INFO | Building zero-shot classifier
|
| 852 |
-
2025-02-19,20:35:29 | INFO | Using classifier
|
| 853 |
-
2025-02-19,20:40:28 | INFO | Finished zero-shot imagenet.
|
| 854 |
-
2025-02-19,20:40:28 | INFO | Eval Epoch: 0.39046705054382597 imagenet-zeroshot-val-top1: 0.0001 imagenet-zeroshot-val-top5: 0.0004
|
|
|
|
| 1 |
+
2025-02-23,13:12:02 | INFO | No latest resume checkpoint found in ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints.
|
| 2 |
+
2025-02-23,13:12:07 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 8.
|
| 3 |
+
2025-02-23,13:12:07 | INFO | Loaded CLIPCLS-ViT-B-16-NDR model config.
|
| 4 |
+
2025-02-23,13:12:09 | INFO | Model:
|
| 5 |
+
2025-02-23,13:12:09 | INFO | CLIPCLS(
|
| 6 |
(visual): NDRVisionTransformer(
|
| 7 |
(conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False)
|
| 8 |
(projection_embd): Linear(in_features=768, out_features=768, bias=True)
|
|
|
|
| 16 |
(attn_drop): Dropout(p=0.0, inplace=False)
|
| 17 |
(out_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 18 |
(out_drop): Dropout(p=0.0, inplace=False)
|
| 19 |
+
(k_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 20 |
+
(v_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 21 |
+
(q_proj): Linear(in_features=768, out_features=768, bias=True)
|
| 22 |
)
|
| 23 |
(ln_attn): Identity()
|
| 24 |
(ls_1): Identity()
|
|
|
|
| 63 |
(text_projection): Linear(in_features=768, out_features=49408, bias=True)
|
| 64 |
)
|
| 65 |
)
|
| 66 |
+
2025-02-23,13:12:09 | INFO | Params:
|
| 67 |
+
2025-02-23,13:12:09 | INFO | NDR_patch_size: 16
|
| 68 |
+
2025-02-23,13:12:09 | INFO | accum_freq: 1
|
| 69 |
+
2025-02-23,13:12:09 | INFO | aug_cfg: {}
|
| 70 |
+
2025-02-23,13:12:09 | INFO | batch_size: 1024
|
| 71 |
+
2025-02-23,13:12:09 | INFO | beta1: 0.9
|
| 72 |
+
2025-02-23,13:12:09 | INFO | beta2: 0.98
|
| 73 |
+
2025-02-23,13:12:09 | INFO | checkpoint_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/checkpoints
|
| 74 |
+
2025-02-23,13:12:09 | INFO | coca_caption_loss_weight: 2.0
|
| 75 |
+
2025-02-23,13:12:09 | INFO | coca_contrastive_loss_weight: 1.0
|
| 76 |
+
2025-02-23,13:12:09 | INFO | copy_codebase: False
|
| 77 |
+
2025-02-23,13:12:09 | INFO | csv_caption_key: title
|
| 78 |
+
2025-02-23,13:12:09 | INFO | csv_img_key: filepath
|
| 79 |
+
2025-02-23,13:12:09 | INFO | csv_separator:
|
| 80 |
+
2025-02-23,13:12:09 | INFO | dataset_resampled: False
|
| 81 |
+
2025-02-23,13:12:09 | INFO | dataset_type: webdataset
|
| 82 |
+
2025-02-23,13:12:09 | INFO | ddp_static_graph: True
|
| 83 |
+
2025-02-23,13:12:09 | INFO | debug: False
|
| 84 |
+
2025-02-23,13:12:09 | INFO | delete_prev_step_ckpt: True
|
| 85 |
+
2025-02-23,13:12:09 | INFO | delete_previous_checkpoint: False
|
| 86 |
+
2025-02-23,13:12:09 | INFO | device: cuda:0
|
| 87 |
+
2025-02-23,13:12:09 | INFO | dist_backend: nccl
|
| 88 |
+
2025-02-23,13:12:09 | INFO | dist_url: env://
|
| 89 |
+
2025-02-23,13:12:09 | INFO | distill: False
|
| 90 |
+
2025-02-23,13:12:09 | INFO | distill_model: None
|
| 91 |
+
2025-02-23,13:12:09 | INFO | distill_pretrained: None
|
| 92 |
+
2025-02-23,13:12:09 | INFO | distributed: True
|
| 93 |
+
2025-02-23,13:12:09 | INFO | epochs: 4
|
| 94 |
+
2025-02-23,13:12:09 | INFO | epochs_cooldown: None
|
| 95 |
+
2025-02-23,13:12:09 | INFO | eps: 1e-06
|
| 96 |
+
2025-02-23,13:12:09 | INFO | force_custom_text: False
|
| 97 |
+
2025-02-23,13:12:09 | INFO | force_image_size: 224
|
| 98 |
+
2025-02-23,13:12:09 | INFO | force_patch_dropout: None
|
| 99 |
+
2025-02-23,13:12:09 | INFO | force_quick_gelu: False
|
| 100 |
+
2025-02-23,13:12:09 | INFO | gather_with_grad: True
|
| 101 |
+
2025-02-23,13:12:09 | INFO | global_batch_size: 8192
|
| 102 |
+
2025-02-23,13:12:09 | INFO | grad_checkpointing: True
|
| 103 |
+
2025-02-23,13:12:09 | INFO | grad_clip_norm: None
|
| 104 |
+
2025-02-23,13:12:09 | INFO | horovod: False
|
| 105 |
+
2025-02-23,13:12:09 | INFO | image_interpolation: None
|
| 106 |
+
2025-02-23,13:12:09 | INFO | image_mean: None
|
| 107 |
+
2025-02-23,13:12:09 | INFO | image_resize_mode: None
|
| 108 |
+
2025-02-23,13:12:09 | INFO | image_std: None
|
| 109 |
+
2025-02-23,13:12:09 | INFO | imagenet_v2: None
|
| 110 |
+
2025-02-23,13:12:09 | INFO | imagenet_val: /mnt/bn/zilongdata-hl/dataset/imagenet/val
|
| 111 |
+
2025-02-23,13:12:09 | INFO | local_loss: True
|
| 112 |
+
2025-02-23,13:12:09 | INFO | local_rank: 0
|
| 113 |
+
2025-02-23,13:12:09 | INFO | lock_image: False
|
| 114 |
+
2025-02-23,13:12:09 | INFO | lock_image_freeze_bn_stats: False
|
| 115 |
+
2025-02-23,13:12:09 | INFO | lock_image_unlocked_groups: 0
|
| 116 |
+
2025-02-23,13:12:09 | INFO | lock_text: False
|
| 117 |
+
2025-02-23,13:12:09 | INFO | lock_text_freeze_layer_norm: False
|
| 118 |
+
2025-02-23,13:12:09 | INFO | lock_text_unlocked_layers: 0
|
| 119 |
+
2025-02-23,13:12:09 | INFO | log_every_n_steps: 128
|
| 120 |
+
2025-02-23,13:12:09 | INFO | log_level: 20
|
| 121 |
+
2025-02-23,13:12:09 | INFO | log_local: False
|
| 122 |
+
2025-02-23,13:12:09 | INFO | log_path: ./logs-lr1e-3-datacomp/exp_rope_clipcls_vit_b16_s512m_bs8k/out.log
|
| 123 |
+
2025-02-23,13:12:09 | INFO | logs: ./logs-lr1e-3-datacomp
|
| 124 |
+
2025-02-23,13:12:09 | INFO | lr: 0.001
|
| 125 |
+
2025-02-23,13:12:09 | INFO | lr_cooldown_end: 0.0
|
| 126 |
+
2025-02-23,13:12:09 | INFO | lr_cooldown_power: 1.0
|
| 127 |
+
2025-02-23,13:12:09 | INFO | lr_scheduler: cosine
|
| 128 |
+
2025-02-23,13:12:09 | INFO | max_seq_len: 100000000000000
|
| 129 |
+
2025-02-23,13:12:09 | INFO | model: CLIPCLS-ViT-B-16-NDR
|
| 130 |
+
2025-02-23,13:12:09 | INFO | name: exp_rope_clipcls_vit_b16_s512m_bs8k
|
| 131 |
+
2025-02-23,13:12:09 | INFO | native_dynamic_resolution: True
|
| 132 |
+
2025-02-23,13:12:09 | INFO | no_set_device_rank: False
|
| 133 |
+
2025-02-23,13:12:09 | INFO | only_packing: True
|
| 134 |
+
2025-02-23,13:12:09 | INFO | precision: amp_bfloat16
|
| 135 |
+
2025-02-23,13:12:09 | INFO | pretrained:
|
| 136 |
+
2025-02-23,13:12:09 | INFO | pretrained_image:
|
| 137 |
+
2025-02-23,13:12:09 | INFO | pretrained_text:
|
| 138 |
+
2025-02-23,13:12:09 | INFO | rank: 0
|
| 139 |
+
2025-02-23,13:12:09 | INFO | remote_sync: None
|
| 140 |
+
2025-02-23,13:12:09 | INFO | remote_sync_frequency: 300
|
| 141 |
+
2025-02-23,13:12:09 | INFO | remote_sync_protocol: s3
|
| 142 |
+
2025-02-23,13:12:09 | INFO | report_to: wandb
|
| 143 |
+
2025-02-23,13:12:09 | INFO | resume: None
|
| 144 |
+
2025-02-23,13:12:09 | INFO | rope_attn_num_heads: 12
|
| 145 |
+
2025-02-23,13:12:09 | INFO | rope_model_width: 768
|
| 146 |
+
2025-02-23,13:12:09 | INFO | save_every_n_steps: 6104
|
| 147 |
+
2025-02-23,13:12:09 | INFO | save_frequency: 1
|
| 148 |
+
2025-02-23,13:12:09 | INFO | save_most_recent: False
|
| 149 |
+
2025-02-23,13:12:09 | INFO | seed: 0
|
| 150 |
+
2025-02-23,13:12:09 | INFO | siglip: False
|
| 151 |
+
2025-02-23,13:12:09 | INFO | skip_scheduler: False
|
| 152 |
+
2025-02-23,13:12:09 | INFO | tensorboard: False
|
| 153 |
+
2025-02-23,13:12:09 | INFO | tensorboard_path:
|
| 154 |
+
2025-02-23,13:12:09 | INFO | torchcompile: False
|
| 155 |
+
2025-02-23,13:12:09 | INFO | torchscript: False
|
| 156 |
+
2025-02-23,13:12:09 | INFO | trace: False
|
| 157 |
+
2025-02-23,13:12:09 | INFO | train_data: /mnt/bn/bytenas-weixian/data/Recap-DataComp-1B-Dataset/{000000..140146}.tar
|
| 158 |
+
2025-02-23,13:12:09 | INFO | train_data_upsampling_factors: None
|
| 159 |
+
2025-02-23,13:12:09 | INFO | train_num_samples: 128000000
|
| 160 |
+
2025-02-23,13:12:09 | INFO | use_bn_sync: False
|
| 161 |
+
2025-02-23,13:12:09 | INFO | use_bnb_linear: None
|
| 162 |
+
2025-02-23,13:12:09 | INFO | val_data: None
|
| 163 |
+
2025-02-23,13:12:09 | INFO | val_frequency: 1
|
| 164 |
+
2025-02-23,13:12:09 | INFO | val_num_samples: None
|
| 165 |
+
2025-02-23,13:12:09 | INFO | val_steps: 6104
|
| 166 |
+
2025-02-23,13:12:09 | INFO | wandb: True
|
| 167 |
+
2025-02-23,13:12:09 | INFO | wandb_notes:
|
| 168 |
+
2025-02-23,13:12:09 | INFO | wandb_project_name: cls-clip-NDR
|
| 169 |
+
2025-02-23,13:12:09 | INFO | warmup: 500
|
| 170 |
+
2025-02-23,13:12:09 | INFO | wd: 0.2
|
| 171 |
+
2025-02-23,13:12:09 | INFO | workers: 6
|
| 172 |
+
2025-02-23,13:12:09 | INFO | world_size: 8
|
| 173 |
+
2025-02-23,13:12:09 | INFO | zeroshot_frequency: 2
|
| 174 |
+
2025-02-23,13:12:09 | INFO | zeroshot_steps: 6104
|
| 175 |
+
2025-02-23,13:12:28 | INFO | Start epoch 0
|
| 176 |
+
2025-02-23,13:12:43 | INFO | Train Epoch: 0 [ 8192/128040960 (0%)] Data (t): 10.707 Batch (t): 14.944, 548.167/s, 68.5209/s/gpu LR: 0.000002 Logit Scale: 14.286 Class_loss: 11.311 (11.311) Contrastive_loss: 9.0709 (9.0709) Loss: 20.382 (20.382)
|
| 177 |
+
2025-02-23,13:13:52 | WARNING | Handling webdataset error (OSError('image file is truncated (44 bytes not processed)')). Ignoring.
|
| 178 |
+
2025-02-23,13:21:00 | INFO | Train Epoch: 0 [ 1056768/128040960 (1%)] Data (t): 0.599 Batch (t): 3.881, 2101.42/s, 262.678/s/gpu LR: 0.000258 Logit Scale: 14.255 Class_loss: 9.1021 (10.206) Contrastive_loss: 8.4561 (8.7635) Loss: 17.558 (18.970)
|
| 179 |
+
2025-02-23,13:22:48 | WARNING | Handling webdataset error (OSError('image file is truncated (47 bytes not processed)')). Ignoring.
|
| 180 |
+
2025-02-23,13:29:19 | INFO | Train Epoch: 0 [ 2105344/128040960 (2%)] Data (t): 0.582 Batch (t): 3.898, 2358.26/s, 294.783/s/gpu LR: 0.000514 Logit Scale: 14.250 Class_loss: 8.9261 (9.7797) Contrastive_loss: 7.8397 (8.4555) Loss: 16.766 (18.235)
|
| 181 |
+
2025-02-23,13:36:04 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 182 |
+
2025-02-23,13:37:39 | INFO | Train Epoch: 0 [ 3153920/128040960 (2%)] Data (t): 0.600 Batch (t): 3.903, 1496.77/s, 187.096/s/gpu LR: 0.000770 Logit Scale: 14.333 Class_loss: 8.7444 (9.5209) Contrastive_loss: 7.3942 (8.1902) Loss: 16.139 (17.711)
|
| 183 |
+
2025-02-23,13:45:57 | INFO | Train Epoch: 0 [ 4202496/128040960 (3%)] Data (t): 0.592 Batch (t): 3.890, 1803.19/s, 225.399/s/gpu LR: 0.001000 Logit Scale: 14.793 Class_loss: 8.6215 (9.3410) Contrastive_loss: 7.0694 (7.9660) Loss: 15.691 (17.307)
|
| 184 |
+
2025-02-23,13:52:46 | WARNING | Handling webdataset error (OSError('image file is truncated (25 bytes not processed)')). Ignoring.
|
| 185 |
+
2025-02-23,13:54:13 | INFO | Train Epoch: 0 [ 5251072/128040960 (4%)] Data (t): 0.568 Batch (t): 3.877, 2204.53/s, 275.566/s/gpu LR: 0.001000 Logit Scale: 16.099 Class_loss: 8.5467 (9.2086) Contrastive_loss: 6.5581 (7.7314) Loss: 15.105 (16.940)
|
| 186 |
+
2025-02-23,13:56:10 | WARNING | Handling webdataset error (OSError('image file is truncated (35 bytes not processed)')). Ignoring.
|
| 187 |
+
2025-02-23,14:02:30 | INFO | Train Epoch: 0 [ 6299648/128040960 (5%)] Data (t): 0.576 Batch (t): 3.884, 1710.11/s, 213.764/s/gpu LR: 0.001000 Logit Scale: 17.683 Class_loss: 8.4530 (9.1007) Contrastive_loss: 6.1219 (7.5015) Loss: 14.575 (16.602)
|
| 188 |
+
2025-02-23,14:10:51 | INFO | Train Epoch: 0 [ 7348224/128040960 (6%)] Data (t): 0.582 Batch (t): 3.914, 1958.33/s, 244.791/s/gpu LR: 0.001000 Logit Scale: 19.436 Class_loss: 8.3378 (9.0053) Contrastive_loss: 5.7965 (7.2883) Loss: 14.134 (16.294)
|
| 189 |
+
2025-02-23,14:19:04 | INFO | Train Epoch: 0 [ 8396800/128040960 (7%)] Data (t): 0.575 Batch (t): 3.856, 2435.36/s, 304.420/s/gpu LR: 0.001000 Logit Scale: 21.366 Class_loss: 8.2516 (8.9216) Contrastive_loss: 5.5538 (7.0956) Loss: 13.805 (16.017)
|
| 190 |
+
2025-02-23,14:27:24 | INFO | Train Epoch: 0 [ 9445376/128040960 (7%)] Data (t): 0.583 Batch (t): 3.903, 1773.51/s, 221.688/s/gpu LR: 0.001000 Logit Scale: 23.168 Class_loss: 8.0835 (8.8378) Contrastive_loss: 5.0439 (6.8904) Loss: 13.127 (15.728)
|
| 191 |
+
2025-02-23,14:35:43 | INFO | Train Epoch: 0 [ 10493952/128040960 (8%)] Data (t): 0.591 Batch (t): 3.901, 1913.58/s, 239.197/s/gpu LR: 0.001000 Logit Scale: 24.937 Class_loss: 8.0279 (8.7641) Contrastive_loss: 5.0101 (6.7195) Loss: 13.038 (15.484)
|
| 192 |
+
2025-02-23,14:35:50 | WARNING | Handling webdataset error (OSError('image file is truncated (104 bytes not processed)')). Ignoring.
|
| 193 |
+
2025-02-23,14:41:31 | WARNING | Handling webdataset error (OSError('image file is truncated (21 bytes not processed)')). Ignoring.
|
| 194 |
+
2025-02-23,14:43:59 | INFO | Train Epoch: 0 [ 11542528/128040960 (9%)] Data (t): 0.588 Batch (t): 3.874, 2421.35/s, 302.668/s/gpu LR: 0.000999 Logit Scale: 26.585 Class_loss: 8.0193 (8.7021) Contrastive_loss: 4.7237 (6.5532) Loss: 12.743 (15.255)
|
| 195 |
+
2025-02-23,14:52:16 | INFO | Train Epoch: 0 [ 12591104/128040960 (10%)] Data (t): 0.582 Batch (t): 3.881, 1943.61/s, 242.951/s/gpu LR: 0.000999 Logit Scale: 27.926 Class_loss: 7.9741 (8.6461) Contrastive_loss: 4.6122 (6.4039) Loss: 12.586 (15.050)
|
| 196 |
+
2025-02-23,14:52:28 | WARNING | Handling webdataset error (OSError('image file is truncated (68 bytes not processed)')). Ignoring.
|
| 197 |
+
2025-02-23,15:00:37 | INFO | Train Epoch: 0 [ 13639680/128040960 (11%)] Data (t): 0.589 Batch (t): 3.912, 2007.46/s, 250.933/s/gpu LR: 0.000999 Logit Scale: 29.127 Class_loss: 7.8066 (8.5861) Contrastive_loss: 4.3675 (6.2584) Loss: 12.174 (14.845)
|
| 198 |
+
2025-02-23,15:02:03 | WARNING | Handling webdataset error (OSError('image file is truncated (28 bytes not processed)')). Ignoring.
|
| 199 |
+
2025-02-23,15:08:55 | WARNING | Handling webdataset error (OSError('image file is truncated (32 bytes not processed)')). Ignoring.
|
| 200 |
+
2025-02-23,15:08:56 | INFO | Train Epoch: 0 [ 14688256/128040960 (11%)] Data (t): 0.589 Batch (t): 3.902, 2144.54/s, 268.067/s/gpu LR: 0.000999 Logit Scale: 30.481 Class_loss: 7.8160 (8.5348) Contrastive_loss: 4.2199 (6.1225) Loss: 12.036 (14.657)
|
| 201 |
+
2025-02-23,15:10:29 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 202 |
+
2025-02-23,15:17:15 | INFO | Train Epoch: 0 [ 15736832/128040960 (12%)] Data (t): 0.574 Batch (t): 3.895, 1953.49/s, 244.186/s/gpu LR: 0.000999 Logit Scale: 31.580 Class_loss: 7.8392 (8.4913) Contrastive_loss: 4.1857 (6.0015) Loss: 12.025 (14.493)
|
| 203 |
+
2025-02-23,15:22:02 | WARNING | Handling webdataset error (OSError('image file is truncated (23 bytes not processed)')). Ignoring.
|
| 204 |
+
2025-02-23,15:25:34 | INFO | Train Epoch: 0 [ 16785408/128040960 (13%)] Data (t): 0.590 Batch (t): 3.898, 1977.38/s, 247.172/s/gpu LR: 0.000998 Logit Scale: 32.576 Class_loss: 7.7953 (8.4503) Contrastive_loss: 4.0662 (5.8876) Loss: 11.862 (14.338)
|
| 205 |
+
2025-02-23,15:33:56 | INFO | Train Epoch: 0 [ 17833984/128040960 (14%)] Data (t): 0.582 Batch (t): 3.922, 2343.18/s, 292.898/s/gpu LR: 0.000998 Logit Scale: 33.473 Class_loss: 7.7390 (8.4108) Contrastive_loss: 3.8979 (5.7771) Loss: 11.637 (14.188)
|
| 206 |
+
2025-02-23,15:42:13 | INFO | Train Epoch: 0 [ 18882560/128040960 (15%)] Data (t): 0.586 Batch (t): 3.886, 1697.62/s, 212.202/s/gpu LR: 0.000998 Logit Scale: 34.328 Class_loss: 7.6724 (8.3720) Contrastive_loss: 3.7513 (5.6705) Loss: 11.424 (14.042)
|
| 207 |
+
2025-02-23,15:50:35 | INFO | Train Epoch: 0 [ 19931136/128040960 (16%)] Data (t): 0.591 Batch (t): 3.918, 2126.56/s, 265.819/s/gpu LR: 0.000998 Logit Scale: 35.155 Class_loss: 7.7281 (8.3398) Contrastive_loss: 3.7844 (5.5762) Loss: 11.512 (13.916)
|
| 208 |
+
2025-02-23,15:58:54 | INFO | Train Epoch: 0 [ 20979712/128040960 (16%)] Data (t): 0.580 Batch (t): 3.901, 2292.57/s, 286.571/s/gpu LR: 0.000997 Logit Scale: 35.931 Class_loss: 7.6948 (8.3091) Contrastive_loss: 3.6492 (5.4844) Loss: 11.344 (13.793)
|
| 209 |
+
2025-02-23,16:07:10 | INFO | Train Epoch: 0 [ 22028288/128040960 (17%)] Data (t): 0.568 Batch (t): 3.878, 1756.62/s, 219.578/s/gpu LR: 0.000997 Logit Scale: 36.574 Class_loss: 7.6475 (8.2790) Contrastive_loss: 3.5964 (5.3986) Loss: 11.244 (13.678)
|
| 210 |
+
2025-02-23,16:15:29 | INFO | Train Epoch: 0 [ 23076864/128040960 (18%)] Data (t): 0.576 Batch (t): 3.899, 2101.20/s, 262.650/s/gpu LR: 0.000997 Logit Scale: 37.341 Class_loss: 7.5790 (8.2485) Contrastive_loss: 3.4505 (5.3139) Loss: 11.029 (13.562)
|
| 211 |
+
2025-02-23,16:19:19 | WARNING | Handling webdataset error (OSError('image file is truncated (114 bytes not processed)')). Ignoring.
|
| 212 |
+
2025-02-23,16:23:47 | INFO | Train Epoch: 0 [ 24125440/128040960 (19%)] Data (t): 0.575 Batch (t): 3.891, 2361.97/s, 295.247/s/gpu LR: 0.000996 Logit Scale: 38.044 Class_loss: 7.5879 (8.2210) Contrastive_loss: 3.3941 (5.2339) Loss: 10.982 (13.455)
|
| 213 |
+
2025-02-23,16:27:57 | WARNING | Handling webdataset error (OSError('image file is truncated (57 bytes not processed)')). Ignoring.
|
| 214 |
+
2025-02-23,16:32:05 | INFO | Train Epoch: 0 [ 25174016/128040960 (20%)] Data (t): 0.567 Batch (t): 3.887, 1834.13/s, 229.267/s/gpu LR: 0.000996 Logit Scale: 38.669 Class_loss: 7.6978 (8.2001) Contrastive_loss: 3.4617 (5.1630) Loss: 11.159 (13.363)
|
| 215 |
+
2025-02-23,16:40:23 | INFO | Train Epoch: 0 [ 26222592/128040960 (20%)] Data (t): 0.582 Batch (t): 3.893, 1834.67/s, 229.334/s/gpu LR: 0.000995 Logit Scale: 39.157 Class_loss: 7.5242 (8.1741) Contrastive_loss: 3.1117 (5.0841) Loss: 10.636 (13.258)
|
| 216 |
+
2025-02-23,16:41:36 | WARNING | Handling webdataset error (OSError('image file is truncated (75 bytes not processed)')). Ignoring.
|
| 217 |
+
2025-02-23,16:44:59 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 218 |
+
2025-02-23,16:48:40 | INFO | Train Epoch: 0 [ 27271168/128040960 (21%)] Data (t): 0.570 Batch (t): 3.876, 2393.05/s, 299.131/s/gpu LR: 0.000995 Logit Scale: 39.766 Class_loss: 7.6023 (8.1529) Contrastive_loss: 3.2599 (5.0165) Loss: 10.862 (13.169)
|
| 219 |
+
2025-02-23,16:51:14 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 220 |
+
2025-02-23,16:56:55 | INFO | Train Epoch: 0 [ 28319744/128040960 (22%)] Data (t): 0.578 Batch (t): 3.873, 1814.12/s, 226.765/s/gpu LR: 0.000994 Logit Scale: 40.230 Class_loss: 7.5114 (8.1300) Contrastive_loss: 3.2006 (4.9517) Loss: 10.712 (13.082)
|
| 221 |
+
2025-02-23,17:05:13 | INFO | Train Epoch: 0 [ 29368320/128040960 (23%)] Data (t): 0.575 Batch (t): 3.891, 2127.31/s, 265.913/s/gpu LR: 0.000994 Logit Scale: 40.706 Class_loss: 7.4508 (8.1066) Contrastive_loss: 3.1043 (4.8880) Loss: 10.555 (12.995)
|
| 222 |
+
2025-02-23,17:08:16 | WARNING | Handling webdataset error (OSError('image file is truncated (1 bytes not processed)')). Ignoring.
|
| 223 |
+
2025-02-23,17:13:30 | INFO | Train Epoch: 0 [ 30416896/128040960 (24%)] Data (t): 0.560 Batch (t): 3.879, 2415.30/s, 301.912/s/gpu LR: 0.000993 Logit Scale: 41.238 Class_loss: 7.5722 (8.0888) Contrastive_loss: 3.0906 (4.8281) Loss: 10.663 (12.917)
|
| 224 |
+
2025-02-23,17:21:47 | INFO | Train Epoch: 0 [ 31465472/128040960 (25%)] Data (t): 0.567 Batch (t): 3.884, 1807.93/s, 225.991/s/gpu LR: 0.000993 Logit Scale: 41.587 Class_loss: 7.5039 (8.0699) Contrastive_loss: 3.1094 (4.7726) Loss: 10.613 (12.843)
|
| 225 |
+
2025-02-23,17:30:08 | INFO | Train Epoch: 0 [ 32514048/128040960 (25%)] Data (t): 0.564 Batch (t): 3.911, 1776.85/s, 222.106/s/gpu LR: 0.000992 Logit Scale: 41.969 Class_loss: 7.4753 (8.0513) Contrastive_loss: 2.9970 (4.7171) Loss: 10.472 (12.768)
|
| 226 |
+
2025-02-23,17:37:36 | WARNING | Handling webdataset error (OSError('image file is truncated (0 bytes not processed)')). Ignoring.
|
| 227 |
+
2025-02-23,17:38:26 | INFO | Train Epoch: 0 [ 33562624/128040960 (26%)] Data (t): 0.579 Batch (t): 3.890, 2202.74/s, 275.343/s/gpu LR: 0.000992 Logit Scale: 42.419 Class_loss: 7.5543 (8.0363) Contrastive_loss: 3.1043 (4.6683) Loss: 10.659 (12.705)
|
| 228 |
+
2025-02-23,17:46:40 | INFO | Train Epoch: 0 [ 34611200/128040960 (27%)] Data (t): 0.585 Batch (t): 3.865, 1889.41/s, 236.176/s/gpu LR: 0.000991 Logit Scale: 42.823 Class_loss: 7.3684 (8.0166) Contrastive_loss: 2.8546 (4.6149) Loss: 10.223 (12.632)
|
| 229 |
+
2025-02-23,17:54:57 | INFO | Train Epoch: 0 [ 35659776/128040960 (28%)] Data (t): 0.583 Batch (t): 3.882, 2059.71/s, 257.464/s/gpu LR: 0.000991 Logit Scale: 43.244 Class_loss: 7.4483 (8.0004) Contrastive_loss: 2.8973 (4.5659) Loss: 10.346 (12.566)
|
| 230 |
+
2025-02-23,18:03:13 | INFO | Train Epoch: 0 [ 36708352/128040960 (29%)] Data (t): 0.566 Batch (t): 3.876, 2395.99/s, 299.499/s/gpu LR: 0.000990 Logit Scale: 43.583 Class_loss: 7.4030 (7.9838) Contrastive_loss: 2.7918 (4.5166) Loss: 10.195 (12.500)
|
| 231 |
+
2025-02-23,18:11:34 | INFO | Train Epoch: 0 [ 37756928/128040960 (29%)] Data (t): 0.566 Batch (t): 3.908, 1831.96/s, 228.995/s/gpu LR: 0.000989 Logit Scale: 43.975 Class_loss: 7.4112 (7.9683) Contrastive_loss: 2.9169 (4.4733) Loss: 10.328 (12.442)
|
| 232 |
+
2025-02-23,18:13:06 | WARNING | Handling webdataset error (OSError('image file is truncated (59 bytes not processed)')). Ignoring.
|
| 233 |
+
2025-02-23,18:13:21 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 234 |
+
2025-02-23,18:19:54 | INFO | Train Epoch: 0 [ 38805504/128040960 (30%)] Data (t): 0.573 Batch (t): 3.913, 2062.09/s, 257.762/s/gpu LR: 0.000989 Logit Scale: 44.289 Class_loss: 7.5102 (7.9563) Contrastive_loss: 2.9506 (4.4333) Loss: 10.461 (12.390)
|
| 235 |
+
2025-02-23,18:26:34 | WARNING | Handling webdataset error (OSError('image file is truncated (84 bytes not processed)')). Ignoring.
|
| 236 |
+
2025-02-23,18:28:15 | INFO | Train Epoch: 0 [ 39854080/128040960 (31%)] Data (t): 0.556 Batch (t): 3.908, 2400.92/s, 300.115/s/gpu LR: 0.000988 Logit Scale: 44.617 Class_loss: 7.4665 (7.9437) Contrastive_loss: 2.8113 (4.3917) Loss: 10.278 (12.335)
|
| 237 |
+
2025-02-23,18:36:30 | INFO | Train Epoch: 0 [ 40902656/128040960 (32%)] Data (t): 0.560 Batch (t): 3.869, 1901.30/s, 237.663/s/gpu LR: 0.000987 Logit Scale: 44.933 Class_loss: 7.3862 (7.9298) Contrastive_loss: 2.7733 (4.3512) Loss: 10.159 (12.281)
|
| 238 |
+
2025-02-23,18:44:44 | INFO | Train Epoch: 0 [ 41951232/128040960 (33%)] Data (t): 0.577 Batch (t): 3.861, 2104.11/s, 263.013/s/gpu LR: 0.000986 Logit Scale: 45.279 Class_loss: 7.3391 (7.9154) Contrastive_loss: 2.6347 (4.3094) Loss: 9.9738 (12.225)
|
| 239 |
+
2025-02-23,18:49:29 | WARNING | Handling webdataset error (OSError('image file is truncated (18 bytes not processed)')). Ignoring.
|
| 240 |
+
2025-02-23,18:52:55 | INFO | Train Epoch: 0 [ 42999808/128040960 (34%)] Data (t): 0.566 Batch (t): 3.838, 2376.47/s, 297.059/s/gpu LR: 0.000986 Logit Scale: 45.562 Class_loss: 7.3534 (7.9020) Contrastive_loss: 2.7322 (4.2718) Loss: 10.086 (12.174)
|
| 241 |
+
2025-02-23,19:01:04 | INFO | Train Epoch: 0 [ 44048384/128040960 (34%)] Data (t): 0.571 Batch (t): 3.815, 2192.53/s, 274.067/s/gpu LR: 0.000985 Logit Scale: 45.840 Class_loss: 7.3400 (7.8889) Contrastive_loss: 2.6036 (4.2330) Loss: 9.9436 (12.122)
|
| 242 |
+
2025-02-23,19:08:38 | WARNING | Handling webdataset error (OSError('image file is truncated (49 bytes not processed)')). Ignoring.
|
| 243 |
+
2025-02-23,19:09:11 | INFO | Train Epoch: 0 [ 45096960/128040960 (35%)] Data (t): 0.574 Batch (t): 3.807, 2004.62/s, 250.577/s/gpu LR: 0.000984 Logit Scale: 46.126 Class_loss: 7.3479 (7.8766) Contrastive_loss: 2.5682 (4.1952) Loss: 9.9162 (12.072)
|
| 244 |
+
2025-02-23,19:11:36 | WARNING | Handling webdataset error (OSError('image file is truncated (5 bytes not processed)')). Ignoring.
|
| 245 |
+
2025-02-23,19:17:19 | INFO | Train Epoch: 0 [ 46145536/128040960 (36%)] Data (t): 0.578 Batch (t): 3.814, 2168.97/s, 271.121/s/gpu LR: 0.000983 Logit Scale: 46.387 Class_loss: 7.4054 (7.8661) Contrastive_loss: 2.7297 (4.1626) Loss: 10.135 (12.029)
|
| 246 |
+
2025-02-23,19:25:26 | INFO | Train Epoch: 0 [ 47194112/128040960 (37%)] Data (t): 0.587 Batch (t): 3.807, 2087.89/s, 260.986/s/gpu LR: 0.000982 Logit Scale: 46.735 Class_loss: 7.3726 (7.8554) Contrastive_loss: 2.6101 (4.1289) Loss: 9.9827 (11.984)
|
| 247 |
+
2025-02-23,19:29:10 | WARNING | Handling webdataset error (OSError('image file is truncated (86 bytes not processed)')). Ignoring.
|
| 248 |
+
2025-02-23,19:33:32 | INFO | Train Epoch: 0 [ 48242688/128040960 (38%)] Data (t): 0.579 Batch (t): 3.793, 2090.68/s, 261.334/s/gpu LR: 0.000981 Logit Scale: 47.001 Class_loss: 7.3526 (7.8447) Contrastive_loss: 2.5917 (4.0962) Loss: 9.9443 (11.941)
|
| 249 |
+
2025-02-23,19:41:36 | INFO | Train Epoch: 0 [ 49291264/128040960 (38%)] Data (t): 0.579 Batch (t): 3.785, 2044.74/s, 255.593/s/gpu LR: 0.000981 Logit Scale: 47.336 Class_loss: 7.2820 (7.8330) Contrastive_loss: 2.5000 (4.0629) Loss: 9.7821 (11.896)
|
| 250 |
+
2025-02-23,19:47:10 | INFO | Starting zero-shot imagenet.
|
| 251 |
+
2025-02-23,19:47:10 | INFO | Building zero-shot classifier
|
| 252 |
+
2025-02-23,19:47:20 | INFO | Using classifier
|
| 253 |
+
2025-02-23,19:52:05 | INFO | Finished zero-shot imagenet.
|
| 254 |
+
2025-02-23,19:52:05 | INFO | Eval Epoch: 0.39046705054382597 imagenet-zeroshot-val-top1: 0.0016 imagenet-zeroshot-val-top5: 0.0029
|
| 255 |
+
2025-02-23,19:54:49 | INFO | Train Epoch: 0 [ 50339840/128040960 (39%)] Data (t): 2.951 Batch (t): 6.193, 1968.39/s, 246.049/s/gpu LR: 0.000980 Logit Scale: 47.651 Class_loss: 7.1624 (7.8193) Contrastive_loss: 2.3409 (4.0278) Loss: 9.5033 (11.847)
|
| 256 |
+
2025-02-23,19:55:24 | WARNING | Handling webdataset error (OSError('image file is truncated (3 bytes not processed)')). Ignoring.
|
| 257 |
+
2025-02-23,20:00:35 | WARNING | Handling webdataset error (OSError('image file is truncated (4 bytes not processed)')). Ignoring.
|
| 258 |
+
2025-02-23,20:02:59 | INFO | Train Epoch: 0 [ 51388416/128040960 (40%)] Data (t): 0.599 Batch (t): 3.827, 2098.02/s, 262.252/s/gpu LR: 0.000979 Logit Scale: 48.004 Class_loss: 7.2576 (7.8081) Contrastive_loss: 2.3933 (3.9951) Loss: 9.6509 (11.803)
|
| 259 |
+
2025-02-23,20:11:08 | INFO | Train Epoch: 0 [ 52436992/128040960 (41%)] Data (t): 0.586 Batch (t): 3.821, 2202.76/s, 275.345/s/gpu LR: 0.000978 Logit Scale: 48.332 Class_loss: 7.2700 (7.7975) Contrastive_loss: 2.4359 (3.9645) Loss: 9.7059 (11.762)
|
| 260 |
+
2025-02-23,20:19:15 | INFO | Train Epoch: 0 [ 53485568/128040960 (42%)] Data (t): 0.594 Batch (t): 3.807, 2094.91/s, 261.863/s/gpu LR: 0.000977 Logit Scale: 48.544 Class_loss: 7.3811 (7.7895) Contrastive_loss: 2.4358 (3.9351) Loss: 9.8170 (11.725)
|
| 261 |
+
2025-02-23,20:27:27 | INFO | Train Epoch: 0 [ 54534144/128040960 (43%)] Data (t): 0.588 Batch (t): 3.840, 1794.31/s, 224.289/s/gpu LR: 0.000976 Logit Scale: 48.794 Class_loss: 7.2531 (7.7794) Contrastive_loss: 2.4125 (3.9064) Loss: 9.6657 (11.686)
|
| 262 |
+
2025-02-23,20:35:37 | INFO | Train Epoch: 0 [ 55582720/128040960 (43%)] Data (t): 0.579 Batch (t): 3.831, 2343.31/s, 292.914/s/gpu LR: 0.000975 Logit Scale: 49.100 Class_loss: 7.2532 (7.7696) Contrastive_loss: 2.2154 (3.8751) Loss: 9.4686 (11.645)
|
| 263 |
+
2025-02-23,20:43:47 | INFO | Train Epoch: 0 [ 56631296/128040960 (44%)] Data (t): 0.573 Batch (t): 3.827, 2122.87/s, 265.358/s/gpu LR: 0.000974 Logit Scale: 49.410 Class_loss: 7.2306 (7.7598) Contrastive_loss: 2.3136 (3.8467) Loss: 9.5442 (11.607)
|
| 264 |
+
2025-02-23,20:52:00 | INFO | Train Epoch: 0 [ 57679872/128040960 (45%)] Data (t): 0.604 Batch (t): 3.846, 2096.23/s, 262.029/s/gpu LR: 0.000973 Logit Scale: 49.716 Class_loss: 7.1305 (7.7486) Contrastive_loss: 2.0969 (3.8154) Loss: 9.2274 (11.564)
|
| 265 |
+
2025-02-23,20:56:40 | WARNING | Handling webdataset error (OSError('image file is truncated (82 bytes not processed)')). Ignoring.
|
| 266 |
+
2025-02-23,21:00:08 | INFO | Train Epoch: 0 [ 58728448/128040960 (46%)] Data (t): 0.602 Batch (t): 3.819, 2354.92/s, 294.366/s/gpu LR: 0.000972 Logit Scale: 49.892 Class_loss: 7.2595 (7.7400) Contrastive_loss: 2.2807 (3.7885) Loss: 9.5402 (11.529)
|
| 267 |
+
2025-02-23,21:01:10 | WARNING | Handling webdataset error (OSError('image file is truncated (107 bytes not processed)')). Ignoring.
|
| 268 |
+
2025-02-23,21:07:06 | WARNING | Handling webdataset error (OSError('image file is truncated (73 bytes not processed)')). Ignoring.
|
| 269 |
+
2025-02-23,21:08:15 | INFO | Train Epoch: 0 [ 59777024/128040960 (47%)] Data (t): 0.581 Batch (t): 3.804, 2195.81/s, 274.476/s/gpu LR: 0.000971 Logit Scale: 50.232 Class_loss: 7.1226 (7.7294) Contrastive_loss: 2.0808 (3.7591) Loss: 9.2033 (11.488)
|
| 270 |
+
2025-02-23,21:09:18 | WARNING | Handling webdataset error (OSError('image file is truncated (70 bytes not processed)')). Ignoring.
|
| 271 |
+
2025-02-23,21:16:22 | INFO | Train Epoch: 0 [ 60825600/128040960 (48%)] Data (t): 0.561 Batch (t): 3.800, 1967.50/s, 245.937/s/gpu LR: 0.000970 Logit Scale: 50.393 Class_loss: 7.1660 (7.7198) Contrastive_loss: 2.1565 (3.7319) Loss: 9.3225 (11.452)
|
| 272 |
+
2025-02-23,21:23:36 | WARNING | Handling webdataset error (OSError('image file is truncated (31 bytes not processed)')). Ignoring.
|
| 273 |
+
2025-02-23,21:24:28 | INFO | Train Epoch: 0 [ 61874176/128040960 (48%)] Data (t): 0.557 Batch (t): 3.797, 2434.11/s, 304.264/s/gpu LR: 0.000968 Logit Scale: 50.774 Class_loss: 7.1578 (7.7105) Contrastive_loss: 2.1526 (3.7056) Loss: 9.3104 (11.416)
|
| 274 |
+
2025-02-23,21:31:49 | WARNING | Handling webdataset error (OSError('image file is truncated (76 bytes not processed)')). Ignoring.
|
| 275 |
+
2025-02-23,21:32:32 | INFO | Train Epoch: 0 [ 62922752/128040960 (49%)] Data (t): 0.564 Batch (t): 3.787, 2174.85/s, 271.856/s/gpu LR: 0.000967 Logit Scale: 50.970 Class_loss: 7.1338 (7.7010) Contrastive_loss: 2.0349 (3.6782) Loss: 9.1686 (11.379)
|
| 276 |
+
2025-02-23,21:37:26 | WARNING | Handling webdataset error (OSError('image file is truncated (2 bytes not processed)')). Ignoring.
|
| 277 |
+
2025-02-23,21:40:36 | INFO | Train Epoch: 0 [ 63971328/128040960 (50%)] Data (t): 0.582 Batch (t): 3.776, 2095.28/s, 261.910/s/gpu LR: 0.000966 Logit Scale: 51.282 Class_loss: 7.1992 (7.6929) Contrastive_loss: 2.1613 (3.6537) Loss: 9.3605 (11.347)
|
| 278 |
+
2025-02-23,21:48:40 | INFO | Train Epoch: 0 [ 65019904/128040960 (51%)] Data (t): 0.579 Batch (t): 3.779, 2174.94/s, 271.867/s/gpu LR: 0.000965 Logit Scale: 51.664 Class_loss: 7.2132 (7.6853) Contrastive_loss: 2.1094 (3.6292) Loss: 9.3227 (11.315)
|
| 279 |
+
2025-02-23,21:52:58 | WARNING | Handling webdataset error (OSError('image file is truncated (54 bytes not processed)')). Ignoring.
|
| 280 |
+
2025-02-23,21:56:47 | INFO | Train Epoch: 0 [ 66068480/128040960 (52%)] Data (t): 0.583 Batch (t): 3.810, 2149.65/s, 268.707/s/gpu LR: 0.000964 Logit Scale: 51.833 Class_loss: 7.1846 (7.6775) Contrastive_loss: 2.1898 (3.6067) Loss: 9.3745 (11.284)
|
| 281 |
+
2025-02-23,21:57:37 | WARNING | Handling webdataset error (OSError('image file is truncated (52 bytes not processed)')). Ignoring.
|
| 282 |
+
2025-02-23,22:01:01 | WARNING | Handling webdataset error (OSError('image file is truncated (31 bytes not processed)')). Ignoring.
|
| 283 |
+
2025-02-23,22:04:53 | INFO | Train Epoch: 0 [ 67117056/128040960 (52%)] Data (t): 0.578 Batch (t): 3.792, 2070.69/s, 258.836/s/gpu LR: 0.000963 Logit Scale: 52.079 Class_loss: 7.0895 (7.6684) Contrastive_loss: 1.9922 (3.5819) Loss: 9.0816 (11.250)
|
| 284 |
+
2025-02-23,22:11:46 | WARNING | Handling webdataset error (OSError('image file is truncated (46 bytes not processed)')). Ignoring.
|
| 285 |
+
2025-02-23,22:12:57 | INFO | Train Epoch: 0 [ 68165632/128040960 (53%)] Data (t): 0.570 Batch (t): 3.787, 2210.06/s, 276.257/s/gpu LR: 0.000961 Logit Scale: 52.366 Class_loss: 7.1580 (7.6607) Contrastive_loss: 2.1444 (3.5601) Loss: 9.3024 (11.221)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|