jflotz commited on May 9, 2023

Commit

0cdc658

1 Parent(s): d8c3456

Training in progress, step 580000

Browse files

Files changed (17) hide show

config.json +1 -1
last-checkpoint/config.json +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -3
last-checkpoint/training_args.bin +1 -1
pytorch_model.bin +1 -1
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-260000",
   "architectures": [
     "PIXELForPreTraining"
   ],

 {
+  "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
   "architectures": [
     "PIXELForPreTraining"
   ],

last-checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-260000",
   "architectures": [
     "PIXELForPreTraining"
   ],

 {
+  "_name_or_path": "/scratch/project/dd-23-53/experiments/pixel-base-bigrams/checkpoint-570000",
   "architectures": [
     "PIXELForPreTraining"
   ],

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bb93720ada86a6004ba26eb4fabd56849226e35d2f46baf4052697153c666bb
 size 893439185

 version https://git-lfs.github.com/spec/v1
+oid sha256:111490892a3e244f3ca9b9b0f1e04552db811c5c11d50e5dae81456c571735a9
 size 893439185

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5c26410b4d902c6d077c2953f9eace1e47991bc5dcd2f62ceb38a14b3e3ee84
 size 449471589

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c101353913b86ff575c9018e352c46e19aa450e36d2ee34d697b2d5ed877d1d
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e62e41706c3cdebd0963ceae6fb24ae079cf26e6452a67e31e4c02f3a80456e6
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe2528712b13528834db613d1453dd7fb04ea3b7a1940fa3521b7d0be4a96c0a
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 6.357137280706646,
-  "global_step": 570000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11406,11 +11406,211 @@
       "eval_samples_per_second": 887.536,
       "eval_steps_per_second": 13.91,
       "step": 570000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 12,
-  "total_flos": 3.9956820200446935e+22,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.111528724222923,
+  "global_step": 580000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 887.536,
       "eval_steps_per_second": 13.91,
       "step": 570000
+    },
+    {
+      "epoch": 6.01,
+      "learning_rate": 6.950709521760712e-05,
+      "loss": 0.2048,
+      "step": 570500
+    },
+    {
+      "epoch": 6.01,
+      "learning_rate": 6.939267446224418e-05,
+      "loss": 0.2046,
+      "step": 571000
+    },
+    {
+      "epoch": 6.01,
+      "eval_loss": 0.19425606727600098,
+      "eval_runtime": 2.5589,
+      "eval_samples_per_second": 897.645,
+      "eval_steps_per_second": 14.068,
+      "step": 571000
+    },
+    {
+      "epoch": 6.02,
+      "learning_rate": 6.927828270690422e-05,
+      "loss": 0.2047,
+      "step": 571500
+    },
+    {
+      "epoch": 6.02,
+      "learning_rate": 6.91639202643299e-05,
+      "loss": 0.2042,
+      "step": 572000
+    },
+    {
+      "epoch": 6.02,
+      "eval_loss": 0.19538278877735138,
+      "eval_runtime": 2.5656,
+      "eval_samples_per_second": 895.308,
+      "eval_steps_per_second": 14.032,
+      "step": 572000
+    },
+    {
+      "epoch": 6.03,
+      "learning_rate": 6.904958744718383e-05,
+      "loss": 0.2046,
+      "step": 572500
+    },
+    {
+      "epoch": 6.03,
+      "learning_rate": 6.893528456804756e-05,
+      "loss": 0.2042,
+      "step": 573000
+    },
+    {
+      "epoch": 6.03,
+      "eval_loss": 0.19409753382205963,
+      "eval_runtime": 2.5486,
+      "eval_samples_per_second": 901.27,
+      "eval_steps_per_second": 14.125,
+      "step": 573000
+    },
+    {
+      "epoch": 6.04,
+      "learning_rate": 6.882101193942075e-05,
+      "loss": 0.2042,
+      "step": 573500
+    },
+    {
+      "epoch": 6.04,
+      "learning_rate": 6.870676987372044e-05,
+      "loss": 0.2041,
+      "step": 574000
+    },
+    {
+      "epoch": 6.04,
+      "eval_loss": 0.1939525604248047,
+      "eval_runtime": 2.5437,
+      "eval_samples_per_second": 903.03,
+      "eval_steps_per_second": 14.153,
+      "step": 574000
+    },
+    {
+      "epoch": 6.05,
+      "learning_rate": 6.859255868328003e-05,
+      "loss": 0.2039,
+      "step": 574500
+    },
+    {
+      "epoch": 6.06,
+      "learning_rate": 6.847837868034861e-05,
+      "loss": 0.2042,
+      "step": 575000
+    },
+    {
+      "epoch": 6.06,
+      "eval_loss": 0.1951504349708557,
+      "eval_runtime": 2.5798,
+      "eval_samples_per_second": 890.377,
+      "eval_steps_per_second": 13.955,
+      "step": 575000
+    },
+    {
+      "epoch": 6.06,
+      "learning_rate": 6.836423017708996e-05,
+      "loss": 0.2038,
+      "step": 575500
+    },
+    {
+      "epoch": 6.07,
+      "learning_rate": 6.825011348558167e-05,
+      "loss": 0.204,
+      "step": 576000
+    },
+    {
+      "epoch": 6.07,
+      "eval_loss": 0.19505272805690765,
+      "eval_runtime": 2.5475,
+      "eval_samples_per_second": 901.67,
+      "eval_steps_per_second": 14.132,
+      "step": 576000
+    },
+    {
+      "epoch": 6.07,
+      "learning_rate": 6.813602891781443e-05,
+      "loss": 0.2039,
+      "step": 576500
+    },
+    {
+      "epoch": 6.08,
+      "learning_rate": 6.802197678569109e-05,
+      "loss": 0.2038,
+      "step": 577000
+    },
+    {
+      "epoch": 6.08,
+      "eval_loss": 0.19440634548664093,
+      "eval_runtime": 2.537,
+      "eval_samples_per_second": 905.395,
+      "eval_steps_per_second": 14.19,
+      "step": 577000
+    },
+    {
+      "epoch": 6.08,
+      "learning_rate": 6.790795740102589e-05,
+      "loss": 0.2038,
+      "step": 577500
+    },
+    {
+      "epoch": 6.09,
+      "learning_rate": 6.779397107554339e-05,
+      "loss": 0.2038,
+      "step": 578000
+    },
+    {
+      "epoch": 6.09,
+      "eval_loss": 0.19268804788589478,
+      "eval_runtime": 2.5143,
+      "eval_samples_per_second": 913.559,
+      "eval_steps_per_second": 14.318,
+      "step": 578000
+    },
+    {
+      "epoch": 6.09,
+      "learning_rate": 6.768001812087789e-05,
+      "loss": 0.2038,
+      "step": 578500
+    },
+    {
+      "epoch": 6.1,
+      "learning_rate": 6.756609884857239e-05,
+      "loss": 0.2037,
+      "step": 579000
+    },
+    {
+      "epoch": 6.1,
+      "eval_loss": 0.19323283433914185,
+      "eval_runtime": 2.5075,
+      "eval_samples_per_second": 916.04,
+      "eval_steps_per_second": 14.357,
+      "step": 579000
+    },
+    {
+      "epoch": 6.11,
+      "learning_rate": 6.745221357007786e-05,
+      "loss": 0.2037,
+      "step": 579500
+    },
+    {
+      "epoch": 6.11,
+      "learning_rate": 6.733836259675233e-05,
+      "loss": 0.2036,
+      "step": 580000
+    },
+    {
+      "epoch": 6.11,
+      "eval_loss": 0.19253070652484894,
+      "eval_runtime": 2.4507,
+      "eval_samples_per_second": 937.277,
+      "eval_steps_per_second": 14.69,
+      "step": 580000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 12,
+  "total_flos": 4.065782281075041e+22,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17ba5c281d8b06e9a9525a16977a4c6939809adffe354977bdd6e1078fe1499c
 size 3311

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
 size 3311

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5c26410b4d902c6d077c2953f9eace1e47991bc5dcd2f62ceb38a14b3e3ee84
 size 449471589

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17ba5c281d8b06e9a9525a16977a4c6939809adffe354977bdd6e1078fe1499c
 size 3311

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a6ae323073c615770ba7c18bfa53b7b7169d86c9cda5252cdcc294ff053a147
 size 3311