Training in progress, step 1200, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +213 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea44be5f29e63d43296d9d83bd74000d9eec25472608a721883a3def330d0d51
 size 3237818848

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f18f2b21d1eb9893ef7d432745ca210cc86cd300d6d237450504c29478453fb
 size 3237818848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcfc52b46b2bcbd19bdeae44612f8466c1fd2dddd02666025d9a6d924a564419
 size 2062251569

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffd49387501c08473c006cb3983fe8e3572862f34ccc79a00ee2957719d3508e
 size 2062251569

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60c8632974dc900245d4dfbbcf87a13b532e38345500a34dea8a1b480b697112
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a602fcddae5166b23f64a1263af24cb60ac56e25cf7aa91c125f6b46213120d
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:504b7bc543b9e5f039f6559d07b099507a66c15c86836ff5981e4eee51792c02
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b04ef7af3a89dd0eb8778c7ed7d28aeab310d9f53593d47cc2bdc9458a253ac
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a838d3ba3633bb04603e3afbc02ea3103b4064d4c633a0639c7ced656d5b0c92
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:370edc1f7812cd81a8eae6fcade42c3407f4dcaf97659f9602f84f2549a0a41c
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4,
   "eval_steps": 300,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -650,6 +650,216 @@
       "learning_rate": 0.00013498887007137918,
       "loss": 1.0813,
       "step": 900
     }
   ],
   "logging_steps": 10,
@@ -669,7 +879,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.4551296598016e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5333333333333333,
   "eval_steps": 300,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00013498887007137918,
       "loss": 1.0813,
       "step": 900
+    },
+    {
+      "epoch": 0.40444444444444444,
+      "grad_norm": 7.365835189819336,
+      "learning_rate": 0.0001336512708132819,
+      "loss": 1.0193,
+      "step": 910
+    },
+    {
+      "epoch": 0.4088888888888889,
+      "grad_norm": 11.938828468322754,
+      "learning_rate": 0.00013230684686628744,
+      "loss": 1.1339,
+      "step": 920
+    },
+    {
+      "epoch": 0.41333333333333333,
+      "grad_norm": 12.521608352661133,
+      "learning_rate": 0.00013095587088800902,
+      "loss": 1.0743,
+      "step": 930
+    },
+    {
+      "epoch": 0.4177777777777778,
+      "grad_norm": 9.13135051727295,
+      "learning_rate": 0.00012959861686485304,
+      "loss": 1.0734,
+      "step": 940
+    },
+    {
+      "epoch": 0.4222222222222222,
+      "grad_norm": 16.696514129638672,
+      "learning_rate": 0.0001282353600564527,
+      "loss": 1.1145,
+      "step": 950
+    },
+    {
+      "epoch": 0.4266666666666667,
+      "grad_norm": 12.382914543151855,
+      "learning_rate": 0.00012686637693984384,
+      "loss": 0.9964,
+      "step": 960
+    },
+    {
+      "epoch": 0.4311111111111111,
+      "grad_norm": 10.711663246154785,
+      "learning_rate": 0.00012549194515339344,
+      "loss": 1.0572,
+      "step": 970
+    },
+    {
+      "epoch": 0.43555555555555553,
+      "grad_norm": 13.973264694213867,
+      "learning_rate": 0.00012411234344049293,
+      "loss": 1.0616,
+      "step": 980
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 10.161416053771973,
+      "learning_rate": 0.0001227278515930273,
+      "loss": 1.0561,
+      "step": 990
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 27.74120330810547,
+      "learning_rate": 0.00012133875039463148,
+      "loss": 1.1011,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4488888888888889,
+      "grad_norm": 9.277678489685059,
+      "learning_rate": 0.00011994532156374574,
+      "loss": 1.0957,
+      "step": 1010
+    },
+    {
+      "epoch": 0.4533333333333333,
+      "grad_norm": 9.599855422973633,
+      "learning_rate": 0.00011854784769648137,
+      "loss": 1.0394,
+      "step": 1020
+    },
+    {
+      "epoch": 0.4577777777777778,
+      "grad_norm": 9.483017921447754,
+      "learning_rate": 0.00011714661220930833,
+      "loss": 0.9773,
+      "step": 1030
+    },
+    {
+      "epoch": 0.4622222222222222,
+      "grad_norm": 11.467011451721191,
+      "learning_rate": 0.00011574189928157689,
+      "loss": 1.0346,
+      "step": 1040
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 8.953259468078613,
+      "learning_rate": 0.00011433399379788387,
+      "loss": 1.0622,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4711111111111111,
+      "grad_norm": 6.459799289703369,
+      "learning_rate": 0.00011292318129029665,
+      "loss": 0.9814,
+      "step": 1060
+    },
+    {
+      "epoch": 0.47555555555555556,
+      "grad_norm": 8.728630065917969,
+      "learning_rate": 0.00011150974788044521,
+      "loss": 1.0526,
+      "step": 1070
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 7.729814529418945,
+      "learning_rate": 0.00011009398022149495,
+      "loss": 0.9997,
+      "step": 1080
+    },
+    {
+      "epoch": 0.48444444444444446,
+      "grad_norm": 9.42880916595459,
+      "learning_rate": 0.00010867616544001164,
+      "loss": 0.9999,
+      "step": 1090
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 8.457280158996582,
+      "learning_rate": 0.00010725659107773045,
+      "loss": 1.0464,
+      "step": 1100
+    },
+    {
+      "epoch": 0.49333333333333335,
+      "grad_norm": 8.715860366821289,
+      "learning_rate": 0.00010583554503324044,
+      "loss": 1.0088,
+      "step": 1110
+    },
+    {
+      "epoch": 0.49777777777777776,
+      "grad_norm": 6.529873847961426,
+      "learning_rate": 0.00010441331550359712,
+      "loss": 1.0749,
+      "step": 1120
+    },
+    {
+      "epoch": 0.5022222222222222,
+      "grad_norm": 7.668039798736572,
+      "learning_rate": 0.0001029901909258742,
+      "loss": 1.026,
+      "step": 1130
+    },
+    {
+      "epoch": 0.5066666666666667,
+      "grad_norm": 8.81876277923584,
+      "learning_rate": 0.00010156645991866677,
+      "loss": 1.0293,
+      "step": 1140
+    },
+    {
+      "epoch": 0.5111111111111111,
+      "grad_norm": 8.55112075805664,
+      "learning_rate": 0.00010014241122355762,
+      "loss": 1.0282,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5155555555555555,
+      "grad_norm": 9.484146118164062,
+      "learning_rate": 9.871833364655865e-05,
+      "loss": 0.9964,
+      "step": 1160
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 10.939757347106934,
+      "learning_rate": 9.729451599953917e-05,
+      "loss": 1.0519,
+      "step": 1170
+    },
+    {
+      "epoch": 0.5244444444444445,
+      "grad_norm": 9.229081153869629,
+      "learning_rate": 9.587124704165302e-05,
+      "loss": 1.0511,
+      "step": 1180
+    },
+    {
+      "epoch": 0.5288888888888889,
+      "grad_norm": 12.294286727905273,
+      "learning_rate": 9.44488154207766e-05,
+      "loss": 1.0302,
+      "step": 1190
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 11.527563095092773,
+      "learning_rate": 9.302750961496888e-05,
+      "loss": 1.0333,
+      "step": 1200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.2735062130688e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null