Training in progress, step 2700, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +213 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c8c03bddde1d45b42f156ee9380731c978903eeb514446180e27d130995337d
 size 3237829088

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec39b6059f46dd5c028a2b3a8df89e54652f47a7b2d1b473858cb9613ea2bf35
 size 3237829088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a2d8576045f18ebc0d44a01c8bb87c6bcc68dc93cada4a8ed13a6805a28e50a
 size 2062272049

 version https://git-lfs.github.com/spec/v1
+oid sha256:38c900497bf3dbda1d6e7c1b32cf2a719ed87675df67744d76a636bb793c4be6
 size 2062272049

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cdd2d9b8a329c8bdf157e8302d4758961f9a282c3f0127e29e492f0c374d2cc5
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:f936c4340b1a5e33087b6159d8f0cde321033f9a21edc5ffdda56dd518d57d1d
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a82daf79aef8e8b5ecd74ff5d2377b7a09a1c4d4504ecc0c2a12006214be596b
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:29254c2526b30c1f020401ec71783f99885e5c23773b0ea29681c66ec8089ebb
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f95bb9c4b14269e2ef89bd678ab3c3d4b5f143d243a24d6ece8108f7e85154f8
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd91190946d7dc5a14f47d6b938cddd6477162a42282961cbb0f0f14b153eef3
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6887645286267757,
   "eval_steps": 300,
-  "global_step": 2400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1688,6 +1688,216 @@
       "learning_rate": 4.60977866504668e-05,
       "loss": 0.7796,
       "step": 2400
     }
   ],
   "logging_steps": 10,
@@ -1707,7 +1917,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.8205186392064e+19,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7748600947051227,
   "eval_steps": 300,
+  "global_step": 2700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.60977866504668e-05,
       "loss": 0.7796,
       "step": 2400
+    },
+    {
+      "epoch": 0.6916343808293873,
+      "grad_norm": 5.168239593505859,
+      "learning_rate": 4.5325446759871316e-05,
+      "loss": 0.7764,
+      "step": 2410
+    },
+    {
+      "epoch": 0.6945042330319988,
+      "grad_norm": 3.202075958251953,
+      "learning_rate": 4.455773117142965e-05,
+      "loss": 0.7483,
+      "step": 2420
+    },
+    {
+      "epoch": 0.6973740852346104,
+      "grad_norm": 4.126010417938232,
+      "learning_rate": 4.379470481752139e-05,
+      "loss": 0.7702,
+      "step": 2430
+    },
+    {
+      "epoch": 0.700243937437222,
+      "grad_norm": 5.2914509773254395,
+      "learning_rate": 4.303643223391698e-05,
+      "loss": 0.7663,
+      "step": 2440
+    },
+    {
+      "epoch": 0.7031137896398335,
+      "grad_norm": 5.010975360870361,
+      "learning_rate": 4.2282977554319034e-05,
+      "loss": 0.7911,
+      "step": 2450
+    },
+    {
+      "epoch": 0.7059836418424451,
+      "grad_norm": 3.504735231399536,
+      "learning_rate": 4.153440450493823e-05,
+      "loss": 0.7452,
+      "step": 2460
+    },
+    {
+      "epoch": 0.7088534940450567,
+      "grad_norm": 5.5859880447387695,
+      "learning_rate": 4.0790776399103294e-05,
+      "loss": 0.758,
+      "step": 2470
+    },
+    {
+      "epoch": 0.7117233462476682,
+      "grad_norm": 6.027501583099365,
+      "learning_rate": 4.0052156131906214e-05,
+      "loss": 0.7945,
+      "step": 2480
+    },
+    {
+      "epoch": 0.7145931984502798,
+      "grad_norm": 5.546058654785156,
+      "learning_rate": 3.93186061748824e-05,
+      "loss": 0.7676,
+      "step": 2490
+    },
+    {
+      "epoch": 0.7174630506528914,
+      "grad_norm": 4.879994869232178,
+      "learning_rate": 3.859018857072719e-05,
+      "loss": 0.7926,
+      "step": 2500
+    },
+    {
+      "epoch": 0.7203329028555029,
+      "grad_norm": 4.717655181884766,
+      "learning_rate": 3.786696492804812e-05,
+      "loss": 0.7451,
+      "step": 2510
+    },
+    {
+      "epoch": 0.7232027550581145,
+      "grad_norm": 6.432432174682617,
+      "learning_rate": 3.714899641615438e-05,
+      "loss": 0.7938,
+      "step": 2520
+    },
+    {
+      "epoch": 0.7260726072607261,
+      "grad_norm": 5.008986473083496,
+      "learning_rate": 3.6436343759882926e-05,
+      "loss": 0.765,
+      "step": 2530
+    },
+    {
+      "epoch": 0.7289424594633377,
+      "grad_norm": 7.00074577331543,
+      "learning_rate": 3.5729067234462785e-05,
+      "loss": 0.7794,
+      "step": 2540
+    },
+    {
+      "epoch": 0.7318123116659492,
+      "grad_norm": 6.525863170623779,
+      "learning_rate": 3.5027226660416736e-05,
+      "loss": 0.7979,
+      "step": 2550
+    },
+    {
+      "epoch": 0.7346821638685608,
+      "grad_norm": 5.4863786697387695,
+      "learning_rate": 3.433088139850193e-05,
+      "loss": 0.7625,
+      "step": 2560
+    },
+    {
+      "epoch": 0.7375520160711724,
+      "grad_norm": 3.975086212158203,
+      "learning_rate": 3.364009034468926e-05,
+      "loss": 0.7471,
+      "step": 2570
+    },
+    {
+      "epoch": 0.7404218682737839,
+      "grad_norm": 3.787874460220337,
+      "learning_rate": 3.2954911925181876e-05,
+      "loss": 0.7662,
+      "step": 2580
+    },
+    {
+      "epoch": 0.7432917204763955,
+      "grad_norm": 4.633001804351807,
+      "learning_rate": 3.2275404091473795e-05,
+      "loss": 0.774,
+      "step": 2590
+    },
+    {
+      "epoch": 0.7461615726790071,
+      "grad_norm": 4.832580089569092,
+      "learning_rate": 3.1601624315448166e-05,
+      "loss": 0.7749,
+      "step": 2600
+    },
+    {
+      "epoch": 0.7490314248816186,
+      "grad_norm": 4.763906955718994,
+      "learning_rate": 3.0933629584516665e-05,
+      "loss": 0.7438,
+      "step": 2610
+    },
+    {
+      "epoch": 0.7519012770842302,
+      "grad_norm": 4.065663814544678,
+      "learning_rate": 3.027147639679928e-05,
+      "loss": 0.7546,
+      "step": 2620
+    },
+    {
+      "epoch": 0.7547711292868418,
+      "grad_norm": 4.496669769287109,
+      "learning_rate": 2.961522075634604e-05,
+      "loss": 0.7878,
+      "step": 2630
+    },
+    {
+      "epoch": 0.7576409814894532,
+      "grad_norm": 3.8822827339172363,
+      "learning_rate": 2.896491816840008e-05,
+      "loss": 0.7884,
+      "step": 2640
+    },
+    {
+      "epoch": 0.7605108336920648,
+      "grad_norm": 4.25615119934082,
+      "learning_rate": 2.8320623634703147e-05,
+      "loss": 0.7418,
+      "step": 2650
+    },
+    {
+      "epoch": 0.7633806858946764,
+      "grad_norm": 4.472879886627197,
+      "learning_rate": 2.76823916488436e-05,
+      "loss": 0.7944,
+      "step": 2660
+    },
+    {
+      "epoch": 0.7662505380972879,
+      "grad_norm": 6.644125938415527,
+      "learning_rate": 2.705027619164754e-05,
+      "loss": 0.7525,
+      "step": 2670
+    },
+    {
+      "epoch": 0.7691203902998995,
+      "grad_norm": 3.8960325717926025,
+      "learning_rate": 2.6424330726612946e-05,
+      "loss": 0.748,
+      "step": 2680
+    },
+    {
+      "epoch": 0.7719902425025111,
+      "grad_norm": 3.907740354537964,
+      "learning_rate": 2.5804608195388057e-05,
+      "loss": 0.7686,
+      "step": 2690
+    },
+    {
+      "epoch": 0.7748600947051227,
+      "grad_norm": 4.432440757751465,
+      "learning_rate": 2.5191161013293396e-05,
+      "loss": 0.7671,
+      "step": 2700
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.10480834691072e+20,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null