Training in progress, step 10000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa2fa49e5ab01e8388f884f001a6fef59415f0afcdf8851cf32b99cba1b66f98
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e74cac81df1d9f55b850794a03cd64fce4492c0c0da5d81e9909dae9911f943
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7bd1f1004e066807e00b62878ad4b49df433de186c64f0a97f9237a03eb281b
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bf3be67603d9aa1f5d666b6a508c045b0cbd46af1138c22216863f18d284cfb
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2ea0240538fb238def027691182a688f4848085d98c59d8205c56a6ab84887c
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:592f06f7337b836b66cd80a06e6dc9e25ae533b97c6347eb9344f6ecddefa9aa
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2054ec2901370b6a537467b9fa82f13f962dc91e80e60e56cd6658a9567a46a8
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a081bc5da5ed0dc09d1d00741d1fe6bdeae12f8d58e5b4d44a7d78e0ad120f04
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.0121748633879784,
   "eval_steps": 500,
-  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1128,6 +1128,286 @@
       "learning_rate": 0.00029708649617388356,
       "loss": 2.0629,
       "step": 8000
     }
   ],
   "logging_steps": 50,
@@ -1147,7 +1427,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.278236395534811e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.0340327868852457,
   "eval_steps": 500,
+  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00029708649617388356,
       "loss": 2.0629,
       "step": 8000
+    },
+    {
+      "epoch": 2.01272131147541,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002970344005912617,
+      "loss": 2.0588,
+      "step": 8050
+    },
+    {
+      "epoch": 2.0132677595628414,
+      "grad_norm": 0.4765625,
+      "learning_rate": 0.000296981848022331,
+      "loss": 2.0373,
+      "step": 8100
+    },
+    {
+      "epoch": 2.0138142076502734,
+      "grad_norm": 0.44921875,
+      "learning_rate": 0.000296928838630426,
+      "loss": 2.0348,
+      "step": 8150
+    },
+    {
+      "epoch": 2.014360655737705,
+      "grad_norm": 0.490234375,
+      "learning_rate": 0.0002968753725803013,
+      "loss": 2.0784,
+      "step": 8200
+    },
+    {
+      "epoch": 2.0149071038251365,
+      "grad_norm": 0.5390625,
+      "learning_rate": 0.0002968214500381304,
+      "loss": 2.0531,
+      "step": 8250
+    },
+    {
+      "epoch": 2.0154535519125685,
+      "grad_norm": 0.546875,
+      "learning_rate": 0.000296767071171506,
+      "loss": 2.0482,
+      "step": 8300
+    },
+    {
+      "epoch": 2.016,
+      "grad_norm": 0.484375,
+      "learning_rate": 0.00029671223614943874,
+      "loss": 2.0193,
+      "step": 8350
+    },
+    {
+      "epoch": 2.0165464480874316,
+      "grad_norm": 0.4765625,
+      "learning_rate": 0.0002966569451423572,
+      "loss": 2.007,
+      "step": 8400
+    },
+    {
+      "epoch": 2.0170928961748635,
+      "grad_norm": 0.48828125,
+      "learning_rate": 0.000296601198322107,
+      "loss": 2.0325,
+      "step": 8450
+    },
+    {
+      "epoch": 2.017639344262295,
+      "grad_norm": 0.4921875,
+      "learning_rate": 0.0002965449958619508,
+      "loss": 2.0173,
+      "step": 8500
+    },
+    {
+      "epoch": 2.0181857923497266,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.0002964883379365668,
+      "loss": 1.9927,
+      "step": 8550
+    },
+    {
+      "epoch": 2.0187322404371586,
+      "grad_norm": 0.546875,
+      "learning_rate": 0.00029643122472204934,
+      "loss": 2.0149,
+      "step": 8600
+    },
+    {
+      "epoch": 2.01927868852459,
+      "grad_norm": 0.49609375,
+      "learning_rate": 0.00029637365639590763,
+      "loss": 2.0077,
+      "step": 8650
+    },
+    {
+      "epoch": 2.0198251366120217,
+      "grad_norm": 0.5390625,
+      "learning_rate": 0.00029631563313706525,
+      "loss": 1.9926,
+      "step": 8700
+    },
+    {
+      "epoch": 2.0203715846994537,
+      "grad_norm": 0.494140625,
+      "learning_rate": 0.0002962571551258599,
+      "loss": 2.0248,
+      "step": 8750
+    },
+    {
+      "epoch": 2.020918032786885,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00029619822254404256,
+      "loss": 1.998,
+      "step": 8800
+    },
+    {
+      "epoch": 2.0214644808743167,
+      "grad_norm": 0.478515625,
+      "learning_rate": 0.00029613883557477706,
+      "loss": 1.9957,
+      "step": 8850
+    },
+    {
+      "epoch": 2.0220109289617487,
+      "grad_norm": 0.49609375,
+      "learning_rate": 0.00029607899440263946,
+      "loss": 2.0132,
+      "step": 8900
+    },
+    {
+      "epoch": 2.0225573770491803,
+      "grad_norm": 0.5390625,
+      "learning_rate": 0.00029601869921361756,
+      "loss": 2.0038,
+      "step": 8950
+    },
+    {
+      "epoch": 2.0231038251366122,
+      "grad_norm": 0.458984375,
+      "learning_rate": 0.00029595795019511005,
+      "loss": 1.9447,
+      "step": 9000
+    },
+    {
+      "epoch": 2.023650273224044,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.00029589674753592647,
+      "loss": 1.9806,
+      "step": 9050
+    },
+    {
+      "epoch": 2.0241967213114753,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.000295835091426286,
+      "loss": 1.9738,
+      "step": 9100
+    },
+    {
+      "epoch": 2.0247431693989073,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.00029577298205781726,
+      "loss": 1.9568,
+      "step": 9150
+    },
+    {
+      "epoch": 2.025289617486339,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.00029571041962355755,
+      "loss": 1.9778,
+      "step": 9200
+    },
+    {
+      "epoch": 2.0258360655737704,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.0002956474043179525,
+      "loss": 2.0212,
+      "step": 9250
+    },
+    {
+      "epoch": 2.0263825136612024,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.0002955839363368549,
+      "loss": 1.9981,
+      "step": 9300
+    },
+    {
+      "epoch": 2.026928961748634,
+      "grad_norm": 0.5,
+      "learning_rate": 0.00029552001587752495,
+      "loss": 1.9763,
+      "step": 9350
+    },
+    {
+      "epoch": 2.0274754098360654,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002954556431386288,
+      "loss": 1.9687,
+      "step": 9400
+    },
+    {
+      "epoch": 2.0280218579234974,
+      "grad_norm": 0.5390625,
+      "learning_rate": 0.00029539081832023837,
+      "loss": 1.9391,
+      "step": 9450
+    },
+    {
+      "epoch": 2.028568306010929,
+      "grad_norm": 0.56640625,
+      "learning_rate": 0.0002953255416238308,
+      "loss": 1.9614,
+      "step": 9500
+    },
+    {
+      "epoch": 2.0291147540983605,
+      "grad_norm": 0.490234375,
+      "learning_rate": 0.0002952598132522874,
+      "loss": 1.9405,
+      "step": 9550
+    },
+    {
+      "epoch": 2.0296612021857925,
+      "grad_norm": 0.546875,
+      "learning_rate": 0.00029519363340989367,
+      "loss": 1.9653,
+      "step": 9600
+    },
+    {
+      "epoch": 2.030207650273224,
+      "grad_norm": 0.50390625,
+      "learning_rate": 0.0002951270023023379,
+      "loss": 1.9704,
+      "step": 9650
+    },
+    {
+      "epoch": 2.0307540983606556,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00029505992013671126,
+      "loss": 1.9592,
+      "step": 9700
+    },
+    {
+      "epoch": 2.0313005464480876,
+      "grad_norm": 0.474609375,
+      "learning_rate": 0.0002949923871215065,
+      "loss": 1.9679,
+      "step": 9750
+    },
+    {
+      "epoch": 2.031846994535519,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.000294924403466618,
+      "loss": 1.9398,
+      "step": 9800
+    },
+    {
+      "epoch": 2.0323934426229506,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.00029485596938334037,
+      "loss": 1.9469,
+      "step": 9850
+    },
+    {
+      "epoch": 2.0329398907103826,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00029478708508436834,
+      "loss": 1.9742,
+      "step": 9900
+    },
+    {
+      "epoch": 2.033486338797814,
+      "grad_norm": 0.482421875,
+      "learning_rate": 0.000294717750783796,
+      "loss": 1.9619,
+      "step": 9950
+    },
+    {
+      "epoch": 2.0340327868852457,
+      "grad_norm": 0.6015625,
+      "learning_rate": 0.0002946479666971158,
+      "loss": 1.8817,
+      "step": 10000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 5.347720296331739e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null