Training in progress, step 500, checkpoint

Files changed (7) hide show

checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a82666204de8b89bd0036f6260389ddc7c2fbf6bc8307febc536ce90920f9ad1
 size 435544704

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0e79df87d9c759bd2761e451a5e321f108e1ad64bdcc914571daecc9f5d2dbe
 size 435544704

checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:110f3051b3640d1a1fc6615fd702246f8d8b3fdc45b595ac33e7a223d9bd664b
 size 871183627

 version https://git-lfs.github.com/spec/v1
+oid sha256:975de4f1994dc71281a49407729fdb3f499bcdd19ac0baf85690d59a68bebdea
 size 871183627

checkpoint-500/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00b092abfcd4cf2a442863110ef72a1ea6d44da9e7f7b987c0619b0f03d4c428
 size 14709

 version https://git-lfs.github.com/spec/v1
+oid sha256:420235bf06177d9f84f39f30fbfcee8722feaeba193f6efc76bcd8a2574da100
 size 14709

checkpoint-500/scaler.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f77569c2e850b04af982cc8c1389f1430851448915c593b69e5da36ce05b71d7
+size 1383

checkpoint-500/tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-500/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_global_step": 491,
-  "best_metric": 4.097745895385742,
   "best_model_checkpoint": null,
-  "epoch": 1.0183299389002036,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
@@ -10,16 +10,15 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "eval_loss": 4.097745895385742,
-      "eval_runtime": 66.4624,
-      "eval_samples_per_second": 504.782,
-      "eval_steps_per_second": 1.986,
-      "step": 491
     }
   ],
-  "logging_steps": 4000,
-  "max_steps": 2455,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 500,
@@ -35,7 +34,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8358601457664000.0,
   "train_batch_size": 256,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": null,
+  "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.998003992015968,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.001996007984031936,
+      "grad_norm": 30.08072853088379,
+      "learning_rate": 0.0,
+      "loss": 10.3965,
+      "step": 1
     }
   ],
+  "logging_steps": 500,
+  "max_steps": 2505,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 8361345024000000.0,
   "train_batch_size": 256,
   "trial_name": null,
   "trial_params": null

checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a57dba9b9c8c3226400ebcad0a2060a021a094dd027a7ac1acf9378f5c6cc27
 size 5969

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb7a8afa9c6745812d26be318b1c7d9348962a9c4c0d9cbb9442934f28d74eb6
 size 5969