Upload 8 files

Browse files

Files changed (5) hide show

optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +465 -0
training_args.bin +3 -0

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fc06cbe2a43b0a0c035ed87bf071a180679b4135f5645b2d266c2599065ae1c
+size 33662074

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77d2e2048751f7b82b7d03a4d56de219162dbe5a0a285f2a2e022884f503a580
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c0ba6dee32a29c1b38007c255586352f19a3c1a7bfb24452b6b1f3673e8e047
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,465 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9955555555555555,
+  "eval_steps": 100,
+  "global_step": 562,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.035555555555555556,
+      "grad_norm": 11.767955780029297,
+      "learning_rate": 0.00019679715302491104,
+      "loss": 33.6554,
+      "step": 10
+    },
+    {
+      "epoch": 0.07111111111111111,
+      "grad_norm": 9.438668251037598,
+      "learning_rate": 0.0001932384341637011,
+      "loss": 33.8787,
+      "step": 20
+    },
+    {
+      "epoch": 0.10666666666666667,
+      "grad_norm": 10.174617767333984,
+      "learning_rate": 0.00018967971530249112,
+      "loss": 33.8919,
+      "step": 30
+    },
+    {
+      "epoch": 0.14222222222222222,
+      "grad_norm": 8.84274673461914,
+      "learning_rate": 0.00018612099644128114,
+      "loss": 33.7011,
+      "step": 40
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 10.169342041015625,
+      "learning_rate": 0.0001825622775800712,
+      "loss": 33.6306,
+      "step": 50
+    },
+    {
+      "epoch": 0.21333333333333335,
+      "grad_norm": 9.339362144470215,
+      "learning_rate": 0.0001790035587188612,
+      "loss": 33.5378,
+      "step": 60
+    },
+    {
+      "epoch": 0.24888888888888888,
+      "grad_norm": 10.399051666259766,
+      "learning_rate": 0.00017544483985765125,
+      "loss": 33.1223,
+      "step": 70
+    },
+    {
+      "epoch": 0.28444444444444444,
+      "grad_norm": 8.772202491760254,
+      "learning_rate": 0.00017188612099644127,
+      "loss": 34.3864,
+      "step": 80
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 9.338233947753906,
+      "learning_rate": 0.00016832740213523133,
+      "loss": 33.2955,
+      "step": 90
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 9.439739227294922,
+      "learning_rate": 0.00016476868327402135,
+      "loss": 33.229,
+      "step": 100
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "eval_loss": 2.133469820022583,
+      "eval_runtime": 296.1668,
+      "eval_samples_per_second": 3.376,
+      "eval_steps_per_second": 0.422,
+      "step": 100
+    },
+    {
+      "epoch": 0.39111111111111113,
+      "grad_norm": 9.046673774719238,
+      "learning_rate": 0.0001612099644128114,
+      "loss": 33.3667,
+      "step": 110
+    },
+    {
+      "epoch": 0.4266666666666667,
+      "grad_norm": 8.99227237701416,
+      "learning_rate": 0.00015765124555160143,
+      "loss": 32.6701,
+      "step": 120
+    },
+    {
+      "epoch": 0.4622222222222222,
+      "grad_norm": 7.6904144287109375,
+      "learning_rate": 0.00015409252669039148,
+      "loss": 33.2927,
+      "step": 130
+    },
+    {
+      "epoch": 0.49777777777777776,
+      "grad_norm": 8.012206077575684,
+      "learning_rate": 0.00015053380782918148,
+      "loss": 33.2934,
+      "step": 140
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 10.931622505187988,
+      "learning_rate": 0.00014697508896797153,
+      "loss": 33.3676,
+      "step": 150
+    },
+    {
+      "epoch": 0.5688888888888889,
+      "grad_norm": 7.606035232543945,
+      "learning_rate": 0.00014341637010676156,
+      "loss": 34.1758,
+      "step": 160
+    },
+    {
+      "epoch": 0.6044444444444445,
+      "grad_norm": 9.531214714050293,
+      "learning_rate": 0.0001398576512455516,
+      "loss": 33.0847,
+      "step": 170
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 8.761300086975098,
+      "learning_rate": 0.00013629893238434164,
+      "loss": 33.5206,
+      "step": 180
+    },
+    {
+      "epoch": 0.6755555555555556,
+      "grad_norm": 9.155729293823242,
+      "learning_rate": 0.0001327402135231317,
+      "loss": 33.2403,
+      "step": 190
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 9.354476928710938,
+      "learning_rate": 0.00012918149466192172,
+      "loss": 33.5548,
+      "step": 200
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "eval_loss": 2.126850128173828,
+      "eval_runtime": 296.1679,
+      "eval_samples_per_second": 3.376,
+      "eval_steps_per_second": 0.422,
+      "step": 200
+    },
+    {
+      "epoch": 0.7466666666666667,
+      "grad_norm": 8.922224998474121,
+      "learning_rate": 0.00012562277580071177,
+      "loss": 33.279,
+      "step": 210
+    },
+    {
+      "epoch": 0.7822222222222223,
+      "grad_norm": 9.973633766174316,
+      "learning_rate": 0.00012206405693950178,
+      "loss": 33.5481,
+      "step": 220
+    },
+    {
+      "epoch": 0.8177777777777778,
+      "grad_norm": 8.771803855895996,
+      "learning_rate": 0.00011850533807829183,
+      "loss": 33.1058,
+      "step": 230
+    },
+    {
+      "epoch": 0.8533333333333334,
+      "grad_norm": 10.16543960571289,
+      "learning_rate": 0.00011494661921708185,
+      "loss": 33.3706,
+      "step": 240
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 9.286821365356445,
+      "learning_rate": 0.0001113879003558719,
+      "loss": 33.3456,
+      "step": 250
+    },
+    {
+      "epoch": 0.9244444444444444,
+      "grad_norm": 9.520956039428711,
+      "learning_rate": 0.00010782918149466192,
+      "loss": 33.5781,
+      "step": 260
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 10.376456260681152,
+      "learning_rate": 0.00010427046263345198,
+      "loss": 32.9687,
+      "step": 270
+    },
+    {
+      "epoch": 0.9955555555555555,
+      "grad_norm": 8.36178207397461,
+      "learning_rate": 0.00010071174377224199,
+      "loss": 33.7239,
+      "step": 280
+    },
+    {
+      "epoch": 1.0284444444444445,
+      "grad_norm": 10.113052368164062,
+      "learning_rate": 9.715302491103203e-05,
+      "loss": 29.9997,
+      "step": 290
+    },
+    {
+      "epoch": 1.064,
+      "grad_norm": 11.123631477355957,
+      "learning_rate": 9.359430604982207e-05,
+      "loss": 32.5004,
+      "step": 300
+    },
+    {
+      "epoch": 1.064,
+      "eval_loss": 2.122236490249634,
+      "eval_runtime": 296.127,
+      "eval_samples_per_second": 3.377,
+      "eval_steps_per_second": 0.422,
+      "step": 300
+    },
+    {
+      "epoch": 1.0995555555555556,
+      "grad_norm": 9.897551536560059,
+      "learning_rate": 9.00355871886121e-05,
+      "loss": 32.5046,
+      "step": 310
+    },
+    {
+      "epoch": 1.1351111111111112,
+      "grad_norm": 9.53073501586914,
+      "learning_rate": 8.647686832740213e-05,
+      "loss": 32.2727,
+      "step": 320
+    },
+    {
+      "epoch": 1.1706666666666667,
+      "grad_norm": 10.394311904907227,
+      "learning_rate": 8.291814946619217e-05,
+      "loss": 32.688,
+      "step": 330
+    },
+    {
+      "epoch": 1.2062222222222223,
+      "grad_norm": 9.498970031738281,
+      "learning_rate": 7.935943060498221e-05,
+      "loss": 33.6316,
+      "step": 340
+    },
+    {
+      "epoch": 1.2417777777777779,
+      "grad_norm": 10.150975227355957,
+      "learning_rate": 7.580071174377225e-05,
+      "loss": 33.0713,
+      "step": 350
+    },
+    {
+      "epoch": 1.2773333333333334,
+      "grad_norm": 9.899177551269531,
+      "learning_rate": 7.224199288256229e-05,
+      "loss": 32.4769,
+      "step": 360
+    },
+    {
+      "epoch": 1.3128888888888888,
+      "grad_norm": 9.39831829071045,
+      "learning_rate": 6.868327402135231e-05,
+      "loss": 32.2654,
+      "step": 370
+    },
+    {
+      "epoch": 1.3484444444444446,
+      "grad_norm": 10.761151313781738,
+      "learning_rate": 6.512455516014235e-05,
+      "loss": 32.491,
+      "step": 380
+    },
+    {
+      "epoch": 1.384,
+      "grad_norm": 9.932414054870605,
+      "learning_rate": 6.156583629893239e-05,
+      "loss": 33.5308,
+      "step": 390
+    },
+    {
+      "epoch": 1.4195555555555557,
+      "grad_norm": 11.054327011108398,
+      "learning_rate": 5.8007117437722425e-05,
+      "loss": 31.7061,
+      "step": 400
+    },
+    {
+      "epoch": 1.4195555555555557,
+      "eval_loss": 2.120673418045044,
+      "eval_runtime": 296.1092,
+      "eval_samples_per_second": 3.377,
+      "eval_steps_per_second": 0.422,
+      "step": 400
+    },
+    {
+      "epoch": 1.455111111111111,
+      "grad_norm": 10.89476203918457,
+      "learning_rate": 5.4448398576512464e-05,
+      "loss": 32.485,
+      "step": 410
+    },
+    {
+      "epoch": 1.4906666666666666,
+      "grad_norm": 9.823376655578613,
+      "learning_rate": 5.0889679715302496e-05,
+      "loss": 32.9951,
+      "step": 420
+    },
+    {
+      "epoch": 1.5262222222222221,
+      "grad_norm": 11.316079139709473,
+      "learning_rate": 4.733096085409253e-05,
+      "loss": 32.3443,
+      "step": 430
+    },
+    {
+      "epoch": 1.561777777777778,
+      "grad_norm": 11.608524322509766,
+      "learning_rate": 4.377224199288256e-05,
+      "loss": 32.2948,
+      "step": 440
+    },
+    {
+      "epoch": 1.5973333333333333,
+      "grad_norm": 11.020298957824707,
+      "learning_rate": 4.02135231316726e-05,
+      "loss": 32.6702,
+      "step": 450
+    },
+    {
+      "epoch": 1.6328888888888888,
+      "grad_norm": 9.804555892944336,
+      "learning_rate": 3.665480427046263e-05,
+      "loss": 31.6452,
+      "step": 460
+    },
+    {
+      "epoch": 1.6684444444444444,
+      "grad_norm": 11.037073135375977,
+      "learning_rate": 3.309608540925267e-05,
+      "loss": 32.479,
+      "step": 470
+    },
+    {
+      "epoch": 1.704,
+      "grad_norm": 9.837021827697754,
+      "learning_rate": 2.9537366548042704e-05,
+      "loss": 32.72,
+      "step": 480
+    },
+    {
+      "epoch": 1.7395555555555555,
+      "grad_norm": 11.720721244812012,
+      "learning_rate": 2.597864768683274e-05,
+      "loss": 32.6789,
+      "step": 490
+    },
+    {
+      "epoch": 1.775111111111111,
+      "grad_norm": 11.738125801086426,
+      "learning_rate": 2.2419928825622775e-05,
+      "loss": 33.3128,
+      "step": 500
+    },
+    {
+      "epoch": 1.775111111111111,
+      "eval_loss": 2.11881947517395,
+      "eval_runtime": 296.1216,
+      "eval_samples_per_second": 3.377,
+      "eval_steps_per_second": 0.422,
+      "step": 500
+    },
+    {
+      "epoch": 1.8106666666666666,
+      "grad_norm": 11.249613761901855,
+      "learning_rate": 1.8861209964412814e-05,
+      "loss": 31.9298,
+      "step": 510
+    },
+    {
+      "epoch": 1.8462222222222222,
+      "grad_norm": 11.530637741088867,
+      "learning_rate": 1.530249110320285e-05,
+      "loss": 31.8878,
+      "step": 520
+    },
+    {
+      "epoch": 1.8817777777777778,
+      "grad_norm": 11.147592544555664,
+      "learning_rate": 1.1743772241992882e-05,
+      "loss": 32.6852,
+      "step": 530
+    },
+    {
+      "epoch": 1.9173333333333333,
+      "grad_norm": 9.81916332244873,
+      "learning_rate": 8.185053380782918e-06,
+      "loss": 32.1578,
+      "step": 540
+    },
+    {
+      "epoch": 1.952888888888889,
+      "grad_norm": 10.557317733764648,
+      "learning_rate": 4.626334519572954e-06,
+      "loss": 32.2151,
+      "step": 550
+    },
+    {
+      "epoch": 1.9884444444444445,
+      "grad_norm": 10.493524551391602,
+      "learning_rate": 1.0676156583629894e-06,
+      "loss": 31.9549,
+      "step": 560
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 562,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.93400073703424e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f8fc3b0faf567e506d36321edce6eabba9e66e2254f1e24c9840de83a3bb7cb
+size 5304