Training in progress, step 2610, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8e7cadebbae1d54eaf66ccc7b9f5c9ff4dd4bf44b4d8a1f8b8a0084844d5797
 size 70430032

 version https://git-lfs.github.com/spec/v1
+oid sha256:8687c9499c8dfa4261b0eebcd4d4d6977344d6025ebb881e9d485193f3090c9b
 size 70430032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad5f8e1609877fa0eb8aa18abf22e51f9d92e64d503e8b3d45179c00ded279ac
 size 141058579

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0bcf2fa7bc23621db9dbb2c18b0bd7c532f2860d632202ce5a20bd39fe79c89
 size 141058579

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d5e082edfc1c825c97152696481c3dec5a3c519176259229a60653e2b9ec26f
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5907384589dae04315beaebce35821e4a256f922bf500c935f9571f4eba436e
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4376ea4c2891899450d26351d7f334694fcd1845e06b10d3542406ce1aa6a830
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:be40234ff8e6330cc9e39f66e1c311009b73515e48fbe9cb20c1bea05dae9f87
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5546666666666666,
   "eval_steps": 500,
-  "global_step": 2600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2618,6 +2618,16 @@
       "mean_token_accuracy": 0.7560776218771934,
       "num_tokens": 12082379.0,
       "step": 2600
     }
   ],
   "logging_steps": 10,
@@ -2637,7 +2647,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.727758984615424e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5568,
   "eval_steps": 500,
+  "global_step": 2610,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.7560776218771934,
       "num_tokens": 12082379.0,
       "step": 2600
+    },
+    {
+      "entropy": 0.904555281996727,
+      "epoch": 0.5568,
+      "grad_norm": 0.2646128535270691,
+      "learning_rate": 8.486244594122297e-05,
+      "loss": 1.0139558792114258,
+      "mean_token_accuracy": 0.7750694006681442,
+      "num_tokens": 12125503.0,
+      "step": 2610
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 5.746905600795955e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null