End of training

Files changed (3) hide show

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.0,
-    "train_loss": 6.0430145263671875,
-    "train_runtime": 20.6459,
-    "train_samples": 100,
-    "train_samples_per_second": 4.844,
-    "train_steps_per_second": 0.048
 }

 {
+    "epoch": 0.99,
+    "train_loss": 4.877151981476815,
+    "train_runtime": 551.4767,
+    "train_samples": 4000,
+    "train_samples_per_second": 7.253,
+    "train_steps_per_second": 0.056
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.0,
-    "train_loss": 6.0430145263671875,
-    "train_runtime": 20.6459,
-    "train_samples": 100,
-    "train_samples_per_second": 4.844,
-    "train_steps_per_second": 0.048
 }

 {
+    "epoch": 0.99,
+    "train_loss": 4.877151981476815,
+    "train_runtime": 551.4767,
+    "train_samples": 4000,
+    "train_samples_per_second": 7.253,
+    "train_steps_per_second": 0.056
 }

trainer_state.json CHANGED Viewed

@@ -1,28 +1,46 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 1,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "step": 1,
-      "total_flos": 19597256755200.0,
-      "train_loss": 6.0430145263671875,
-      "train_runtime": 20.6459,
-      "train_samples_per_second": 4.844,
-      "train_steps_per_second": 0.048
     }
   ],
   "logging_steps": 10,
-  "max_steps": 1,
   "num_train_epochs": 1,
   "save_steps": 200.0,
-  "total_flos": 19597256755200.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.992,
   "eval_steps": 500,
+  "global_step": 31,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.32,
+      "learning_rate": 2.3333333333333336e-05,
+      "loss": 5.7397,
+      "step": 10
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 1.2222222222222222e-05,
+      "loss": 4.7137,
+      "step": 20
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.111111111111111e-06,
+      "loss": 4.2477,
+      "step": 30
+    },
+    {
+      "epoch": 0.99,
+      "step": 31,
+      "total_flos": 777619148046336.0,
+      "train_loss": 4.877151981476815,
+      "train_runtime": 551.4767,
+      "train_samples_per_second": 7.253,
+      "train_steps_per_second": 0.056
     }
   ],
   "logging_steps": 10,
+  "max_steps": 31,
   "num_train_epochs": 1,
   "save_steps": 200.0,
+  "total_flos": 777619148046336.0,
   "trial_name": null,
   "trial_params": null
 }