jflotz commited on
Commit
0cdc658
·
1 Parent(s): d8c3456

Training in progress, step 580000

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-260000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
 
1
  {
2
+ "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-260000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
 
1
  {
2
+ "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
3
  "architectures": [
4
  "PIXELForPreTraining"
5
  ],
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bb93720ada86a6004ba26eb4fabd56849226e35d2f46baf4052697153c666bb
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:111490892a3e244f3ca9b9b0f1e04552db811c5c11d50e5dae81456c571735a9
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c26410b4d902c6d077c2953f9eace1e47991bc5dcd2f62ceb38a14b3e3ee84
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e62e41706c3cdebd0963ceae6fb24ae079cf26e6452a67e31e4c02f3a80456e6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2528712b13528834db613d1453dd7fb04ea3b7a1940fa3521b7d0be4a96c0a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.357137280706646,
5
- "global_step": 570000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -11406,11 +11406,211 @@
11406
  "eval_samples_per_second": 887.536,
11407
  "eval_steps_per_second": 13.91,
11408
  "step": 570000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11409
  }
11410
  ],
11411
  "max_steps": 1000000,
11412
  "num_train_epochs": 12,
11413
- "total_flos": 3.9956820200446935e+22,
11414
  "trial_name": null,
11415
  "trial_params": null
11416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.111528724222923,
5
+ "global_step": 580000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
11406
  "eval_samples_per_second": 887.536,
11407
  "eval_steps_per_second": 13.91,
11408
  "step": 570000
11409
+ },
11410
+ {
11411
+ "epoch": 6.01,
11412
+ "learning_rate": 6.950709521760712e-05,
11413
+ "loss": 0.2048,
11414
+ "step": 570500
11415
+ },
11416
+ {
11417
+ "epoch": 6.01,
11418
+ "learning_rate": 6.939267446224418e-05,
11419
+ "loss": 0.2046,
11420
+ "step": 571000
11421
+ },
11422
+ {
11423
+ "epoch": 6.01,
11424
+ "eval_loss": 0.19425606727600098,
11425
+ "eval_runtime": 2.5589,
11426
+ "eval_samples_per_second": 897.645,
11427
+ "eval_steps_per_second": 14.068,
11428
+ "step": 571000
11429
+ },
11430
+ {
11431
+ "epoch": 6.02,
11432
+ "learning_rate": 6.927828270690422e-05,
11433
+ "loss": 0.2047,
11434
+ "step": 571500
11435
+ },
11436
+ {
11437
+ "epoch": 6.02,
11438
+ "learning_rate": 6.91639202643299e-05,
11439
+ "loss": 0.2042,
11440
+ "step": 572000
11441
+ },
11442
+ {
11443
+ "epoch": 6.02,
11444
+ "eval_loss": 0.19538278877735138,
11445
+ "eval_runtime": 2.5656,
11446
+ "eval_samples_per_second": 895.308,
11447
+ "eval_steps_per_second": 14.032,
11448
+ "step": 572000
11449
+ },
11450
+ {
11451
+ "epoch": 6.03,
11452
+ "learning_rate": 6.904958744718383e-05,
11453
+ "loss": 0.2046,
11454
+ "step": 572500
11455
+ },
11456
+ {
11457
+ "epoch": 6.03,
11458
+ "learning_rate": 6.893528456804756e-05,
11459
+ "loss": 0.2042,
11460
+ "step": 573000
11461
+ },
11462
+ {
11463
+ "epoch": 6.03,
11464
+ "eval_loss": 0.19409753382205963,
11465
+ "eval_runtime": 2.5486,
11466
+ "eval_samples_per_second": 901.27,
11467
+ "eval_steps_per_second": 14.125,
11468
+ "step": 573000
11469
+ },
11470
+ {
11471
+ "epoch": 6.04,
11472
+ "learning_rate": 6.882101193942075e-05,
11473
+ "loss": 0.2042,
11474
+ "step": 573500
11475
+ },
11476
+ {
11477
+ "epoch": 6.04,
11478
+ "learning_rate": 6.870676987372044e-05,
11479
+ "loss": 0.2041,
11480
+ "step": 574000
11481
+ },
11482
+ {
11483
+ "epoch": 6.04,
11484
+ "eval_loss": 0.1939525604248047,
11485
+ "eval_runtime": 2.5437,
11486
+ "eval_samples_per_second": 903.03,
11487
+ "eval_steps_per_second": 14.153,
11488
+ "step": 574000
11489
+ },
11490
+ {
11491
+ "epoch": 6.05,
11492
+ "learning_rate": 6.859255868328003e-05,
11493
+ "loss": 0.2039,
11494
+ "step": 574500
11495
+ },
11496
+ {
11497
+ "epoch": 6.06,
11498
+ "learning_rate": 6.847837868034861e-05,
11499
+ "loss": 0.2042,
11500
+ "step": 575000
11501
+ },
11502
+ {
11503
+ "epoch": 6.06,
11504
+ "eval_loss": 0.1951504349708557,
11505
+ "eval_runtime": 2.5798,
11506
+ "eval_samples_per_second": 890.377,
11507
+ "eval_steps_per_second": 13.955,
11508
+ "step": 575000
11509
+ },
11510
+ {
11511
+ "epoch": 6.06,
11512
+ "learning_rate": 6.836423017708996e-05,
11513
+ "loss": 0.2038,
11514
+ "step": 575500
11515
+ },
11516
+ {
11517
+ "epoch": 6.07,
11518
+ "learning_rate": 6.825011348558167e-05,
11519
+ "loss": 0.204,
11520
+ "step": 576000
11521
+ },
11522
+ {
11523
+ "epoch": 6.07,
11524
+ "eval_loss": 0.19505272805690765,
11525
+ "eval_runtime": 2.5475,
11526
+ "eval_samples_per_second": 901.67,
11527
+ "eval_steps_per_second": 14.132,
11528
+ "step": 576000
11529
+ },
11530
+ {
11531
+ "epoch": 6.07,
11532
+ "learning_rate": 6.813602891781443e-05,
11533
+ "loss": 0.2039,
11534
+ "step": 576500
11535
+ },
11536
+ {
11537
+ "epoch": 6.08,
11538
+ "learning_rate": 6.802197678569109e-05,
11539
+ "loss": 0.2038,
11540
+ "step": 577000
11541
+ },
11542
+ {
11543
+ "epoch": 6.08,
11544
+ "eval_loss": 0.19440634548664093,
11545
+ "eval_runtime": 2.537,
11546
+ "eval_samples_per_second": 905.395,
11547
+ "eval_steps_per_second": 14.19,
11548
+ "step": 577000
11549
+ },
11550
+ {
11551
+ "epoch": 6.08,
11552
+ "learning_rate": 6.790795740102589e-05,
11553
+ "loss": 0.2038,
11554
+ "step": 577500
11555
+ },
11556
+ {
11557
+ "epoch": 6.09,
11558
+ "learning_rate": 6.779397107554339e-05,
11559
+ "loss": 0.2038,
11560
+ "step": 578000
11561
+ },
11562
+ {
11563
+ "epoch": 6.09,
11564
+ "eval_loss": 0.19268804788589478,
11565
+ "eval_runtime": 2.5143,
11566
+ "eval_samples_per_second": 913.559,
11567
+ "eval_steps_per_second": 14.318,
11568
+ "step": 578000
11569
+ },
11570
+ {
11571
+ "epoch": 6.09,
11572
+ "learning_rate": 6.768001812087789e-05,
11573
+ "loss": 0.2038,
11574
+ "step": 578500
11575
+ },
11576
+ {
11577
+ "epoch": 6.1,
11578
+ "learning_rate": 6.756609884857239e-05,
11579
+ "loss": 0.2037,
11580
+ "step": 579000
11581
+ },
11582
+ {
11583
+ "epoch": 6.1,
11584
+ "eval_loss": 0.19323283433914185,
11585
+ "eval_runtime": 2.5075,
11586
+ "eval_samples_per_second": 916.04,
11587
+ "eval_steps_per_second": 14.357,
11588
+ "step": 579000
11589
+ },
11590
+ {
11591
+ "epoch": 6.11,
11592
+ "learning_rate": 6.745221357007786e-05,
11593
+ "loss": 0.2037,
11594
+ "step": 579500
11595
+ },
11596
+ {
11597
+ "epoch": 6.11,
11598
+ "learning_rate": 6.733836259675233e-05,
11599
+ "loss": 0.2036,
11600
+ "step": 580000
11601
+ },
11602
+ {
11603
+ "epoch": 6.11,
11604
+ "eval_loss": 0.19253070652484894,
11605
+ "eval_runtime": 2.4507,
11606
+ "eval_samples_per_second": 937.277,
11607
+ "eval_steps_per_second": 14.69,
11608
+ "step": 580000
11609
  }
11610
  ],
11611
  "max_steps": 1000000,
11612
  "num_train_epochs": 12,
11613
+ "total_flos": 4.065782281075041e+22,
11614
  "trial_name": null,
11615
  "trial_params": null
11616
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17ba5c281d8b06e9a9525a16977a4c6939809adffe354977bdd6e1078fe1499c
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
3
  size 3311
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c26410b4d902c6d077c2953f9eace1e47991bc5dcd2f62ceb38a14b3e3ee84
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17ba5c281d8b06e9a9525a16977a4c6939809adffe354977bdd6e1078fe1499c
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
3
  size 3311