Training in progress, step 200, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:507d55726514d36d57a7fdead1e533238594fb1d956a903a49ce02157b571e17
 size 2611614300

 version https://git-lfs.github.com/spec/v1
+oid sha256:30b0f309b7f4a021aeae9b8c023638b91640c0494b78df53f9799eafce8591ec
 size 2611614300

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2fb309794c77ba81c5d999193c64bdbf97c4b11eecd10e3754dddae31d948844
 size 5213028466

 version https://git-lfs.github.com/spec/v1
+oid sha256:7cbefb9ea304174eb6b10d342f4670b2fe3cd40d03615dc555c42ec57dc27c0e
 size 5213028466

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:874b9aec013ad321d4edc1c021ca42f8014c7ff34d53714ff8059015e8ee9794
 size 14942

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b5a50d8043ac6976ce002b35434e69f45b0b7b1b32881ce5602e437b14e194b
 size 14942

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c39af0f6970331c0f430d145f5514421d9baa7e90f9cf02971fb62606bf1ff3d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:eecc763d4f0407011d54bc501a4da5c4c1dfc18e161c6f252fcc58e764d0886a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.024843756065370134,
   "eval_steps": 500,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -14,6 +14,13 @@
       "learning_rate": 0.0009975155279503105,
       "loss": 1.0811305421386547e+17,
       "step": 100
     }
   ],
   "logging_steps": 100,
@@ -33,7 +40,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.9448797327261696e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.04968751213074027,
   "eval_steps": 500,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0009975155279503105,
       "loss": 1.0811305421386547e+17,
       "step": 100
+    },
+    {
+      "epoch": 0.04968751213074027,
+      "grad_norm": 0.9572473764419556,
+      "learning_rate": 0.000995031055900621,
+      "loss": 1.7117682180722524e+16,
+      "step": 200
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 3.889759465452339e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null