Training in progress, step 4400, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +30 -6
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:609fc3aec7d4ea09743f402bcbdc9312381fb07cfdea7f760dbb9aa66c9922ae
 size 990185320

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e52c190ed78a0ebb86d913c3edb39c59d1fb56fb02a685e065429d572224717
 size 990185320

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f7fbb8e0818078c8e05f154119f20b96ef2520c1006cc15bca9aee9ce6c37a6
 size 1980545291

 version https://git-lfs.github.com/spec/v1
+oid sha256:713ec180255281424e9221e7ce8537b4f82b9eec0533c8b1de86f4fe29e8f69b
 size 1980545291

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6db4e22e6e4d4c2dc1940f88f326ffecd6b0a23e25b1f1f067d9f0becb200e8
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:85d47bc78c02f641fd9d0148cd322117520fa2159b2a37680fd3d6bfd1e4d8f0
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41bbb83c038b51603ecc1a132a2a64f6ce3985f0afe06c759ec4c137f1f6ea37
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ac71e29fa8fcf946ca23ffb18fdc365d779c9119156f3d1b53233021eb38387
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.0,
   "eval_steps": 400,
-  "global_step": 4209,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -402,12 +402,36 @@
       "learning_rate": 1.1879306248515088e-07,
       "loss": 0.6361,
       "step": 4200
     }
   ],
   "logging_steps": 100,
-  "max_steps": 4209,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 400,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -416,12 +440,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.152343386095616e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.1361368496079827,
   "eval_steps": 400,
+  "global_step": 4400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.1879306248515088e-07,
       "loss": 0.6361,
       "step": 4200
+    },
+    {
+      "epoch": 3.0648610121168924,
+      "grad_norm": 4.8953704833984375,
+      "learning_rate": 1.9358517462580187e-05,
+      "loss": 0.6203,
+      "step": 4300
+    },
+    {
+      "epoch": 3.1361368496079827,
+      "grad_norm": 4.9622979164123535,
+      "learning_rate": 1.864575908766928e-05,
+      "loss": 0.6883,
+      "step": 4400
+    },
+    {
+      "epoch": 3.1361368496079827,
+      "eval_bleu": 54.4615551796715,
+      "eval_chrf": 76.75255355595692,
+      "eval_loss": 1.152636170387268,
+      "eval_runtime": 45.8574,
+      "eval_samples_per_second": 9.246,
+      "eval_steps_per_second": 1.156,
+      "step": 4400
     }
   ],
   "logging_steps": 100,
+  "max_steps": 7015,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 400,
   "stateful_callbacks": {
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2046541613170688e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a88ea9e1b21a65cd09311fdaf930fc0a1e92d081971bef8af8521a261729151c
 size 5905

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a97dc88cb36ad68d168c1fe8fdf25a699fd4c00ef193018770a2a9f07a5f869
 size 5905