Upload checkpoint-500/trainer_state.json with huggingface_hub

Browse files

Files changed (1) hide show

checkpoint-500/trainer_state.json +383 -0

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,383 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04,
+      "grad_norm": 2.3459789752960205,
+      "learning_rate": 2e-05,
+      "loss": 2.4508,
+      "step": 10
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.7824317216873169,
+      "learning_rate": 4e-05,
+      "loss": 2.0751,
+      "step": 20
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.754115641117096,
+      "learning_rate": 4.9986331433523156e-05,
+      "loss": 1.663,
+      "step": 30
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.7826311588287354,
+      "learning_rate": 4.9877072563625285e-05,
+      "loss": 1.2938,
+      "step": 40
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.9058009386062622,
+      "learning_rate": 4.965903258506806e-05,
+      "loss": 0.9719,
+      "step": 50
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.8621117472648621,
+      "learning_rate": 4.933316493120015e-05,
+      "loss": 0.7141,
+      "step": 60
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 0.8602449297904968,
+      "learning_rate": 4.8900894538358944e-05,
+      "loss": 0.5491,
+      "step": 70
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.8764705657958984,
+      "learning_rate": 4.8364111614986527e-05,
+      "loss": 0.4541,
+      "step": 80
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 0.8664467334747314,
+      "learning_rate": 4.7725163376229064e-05,
+      "loss": 0.3817,
+      "step": 90
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.752210795879364,
+      "learning_rate": 4.698684378016222e-05,
+      "loss": 0.3162,
+      "step": 100
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 0.9347196817398071,
+      "learning_rate": 4.6152381310523387e-05,
+      "loss": 0.2979,
+      "step": 110
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.661382794380188,
+      "learning_rate": 4.522542485937369e-05,
+      "loss": 0.2545,
+      "step": 120
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 0.7543488144874573,
+      "learning_rate": 4.421002777142148e-05,
+      "loss": 0.2436,
+      "step": 130
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.6346200108528137,
+      "learning_rate": 4.311063011977723e-05,
+      "loss": 0.2363,
+      "step": 140
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 0.7474273443222046,
+      "learning_rate": 4.193203929064353e-05,
+      "loss": 0.2278,
+      "step": 150
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.7145452499389648,
+      "learning_rate": 4.067940896183843e-05,
+      "loss": 0.2233,
+      "step": 160
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 0.6111130714416504,
+      "learning_rate": 3.935821656707359e-05,
+      "loss": 0.222,
+      "step": 170
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.588006317615509,
+      "learning_rate": 3.797423934453038e-05,
+      "loss": 0.2136,
+      "step": 180
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 0.5854628682136536,
+      "learning_rate": 3.65335290744672e-05,
+      "loss": 0.2162,
+      "step": 190
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.500828206539154,
+      "learning_rate": 3.504238561632424e-05,
+      "loss": 0.2114,
+      "step": 200
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 0.4557226896286011,
+      "learning_rate": 3.350732936104108e-05,
+      "loss": 0.2229,
+      "step": 210
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.5085278153419495,
+      "learning_rate": 3.1935072719046115e-05,
+      "loss": 0.212,
+      "step": 220
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 0.47359195351600647,
+      "learning_rate": 3.0332490768593675e-05,
+      "loss": 0.2056,
+      "step": 230
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.5809552073478699,
+      "learning_rate": 2.870659119279605e-05,
+      "loss": 0.2132,
+      "step": 240
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.5612187385559082,
+      "learning_rate": 2.7064483636808313e-05,
+      "loss": 0.2087,
+      "step": 250
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 0.49165409803390503,
+      "learning_rate": 2.5413348619158967e-05,
+      "loss": 0.2064,
+      "step": 260
+    },
+    {
+      "epoch": 1.08,
+      "grad_norm": 0.5103522539138794,
+      "learning_rate": 2.3760406133169443e-05,
+      "loss": 0.2061,
+      "step": 270
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 0.48928719758987427,
+      "learning_rate": 2.2112884075760347e-05,
+      "loss": 0.2041,
+      "step": 280
+    },
+    {
+      "epoch": 1.16,
+      "grad_norm": 0.5604917407035828,
+      "learning_rate": 2.047798664169726e-05,
+      "loss": 0.2021,
+      "step": 290
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.5653706789016724,
+      "learning_rate": 1.8862862821480025e-05,
+      "loss": 0.2009,
+      "step": 300
+    },
+    {
+      "epoch": 1.24,
+      "grad_norm": 0.5317566394805908,
+      "learning_rate": 1.7274575140626318e-05,
+      "loss": 0.2053,
+      "step": 310
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.5173370838165283,
+      "learning_rate": 1.5720068777044476e-05,
+      "loss": 0.2046,
+      "step": 320
+    },
+    {
+      "epoch": 1.32,
+      "grad_norm": 0.5695991516113281,
+      "learning_rate": 1.4206141191537682e-05,
+      "loss": 0.2043,
+      "step": 330
+    },
+    {
+      "epoch": 1.3599999999999999,
+      "grad_norm": 0.5045269131660461,
+      "learning_rate": 1.2739412404237306e-05,
+      "loss": 0.2026,
+      "step": 340
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 0.5585351586341858,
+      "learning_rate": 1.1326296046939333e-05,
+      "loss": 0.2006,
+      "step": 350
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 0.5607123374938965,
+      "learning_rate": 9.972971317924374e-06,
+      "loss": 0.2004,
+      "step": 360
+    },
+    {
+      "epoch": 1.48,
+      "grad_norm": 0.5031062960624695,
+      "learning_rate": 8.685355961895784e-06,
+      "loss": 0.2007,
+      "step": 370
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 0.579496443271637,
+      "learning_rate": 7.469080393187786e-06,
+      "loss": 0.2003,
+      "step": 380
+    },
+    {
+      "epoch": 1.56,
+      "grad_norm": 0.5717282891273499,
+      "learning_rate": 6.329463075396161e-06,
+      "loss": 0.2018,
+      "step": 390
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.5317989587783813,
+      "learning_rate": 5.271487265090163e-06,
+      "loss": 0.2002,
+      "step": 400
+    },
+    {
+      "epoch": 1.6400000000000001,
+      "grad_norm": 0.5046743750572205,
+      "learning_rate": 4.299779221299499e-06,
+      "loss": 0.1981,
+      "step": 410
+    },
+    {
+      "epoch": 1.6800000000000002,
+      "grad_norm": 0.5230479836463928,
+      "learning_rate": 3.418587976060653e-06,
+      "loss": 0.2,
+      "step": 420
+    },
+    {
+      "epoch": 1.72,
+      "grad_norm": 0.5649706721305847,
+      "learning_rate": 2.6317667544809134e-06,
+      "loss": 0.2014,
+      "step": 430
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 0.6512070894241333,
+      "learning_rate": 1.9427561255653816e-06,
+      "loss": 0.199,
+      "step": 440
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 0.48896294832229614,
+      "learning_rate": 1.3545689574841342e-06,
+      "loss": 0.2012,
+      "step": 450
+    },
+    {
+      "epoch": 1.8399999999999999,
+      "grad_norm": 0.559664785861969,
+      "learning_rate": 8.697772430662859e-07,
+      "loss": 0.2005,
+      "step": 460
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 0.529403030872345,
+      "learning_rate": 4.905008531297661e-07,
+      "loss": 0.1994,
+      "step": 470
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.6188033223152161,
+      "learning_rate": 2.1839826682562015e-07,
+      "loss": 0.1945,
+      "step": 480
+    },
+    {
+      "epoch": 1.96,
+      "grad_norm": 0.5380053520202637,
+      "learning_rate": 5.4659319530636633e-08,
+      "loss": 0.1973,
+      "step": 490
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.5263402462005615,
+      "learning_rate": 0.0,
+      "loss": 0.1982,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 500,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 0,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.505320079425536e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}