Training in progress, step 1800, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3919cab12afb691f8e2bde9aed0bdad3628d6f1a5ecae97beb9b67f52859024e
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:229e19659100db88dd521b24c7f3783cb59725c077f5c58e0b1e8cbed6566cad
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65cec01b065a22732babc2be6945a5935ab48f5f41fd2fba8b539e6256b0dfa7
 size 2479955235

 version https://git-lfs.github.com/spec/v1
+oid sha256:086d4ae403b4653b546f4d669e2f8c98a6c3bc786f7ff28201dea70b2067e4f2
 size 2479955235

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da57c8097b451ef1168f1b0191d0689aff1a3bd0997413b1e9eeee0934b0b53c
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:1547aae10ac7691e1716f567b08e3b4d274fa923879a48af8c2bb55c815a28a2
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d5ccf396d48a7891c1332094feb71b5d5d1edce123ef8038fc290770c5e3a02
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdd5d251a495085a19339ae2e6833dedf33f9b2050a0e70b16dd4cd5da2b7a12
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.68,
   "eval_steps": 500,
-  "global_step": 1700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1198,6 +1198,76 @@
       "learning_rate": 1.6157556270096464e-05,
       "loss": 0.4807,
       "step": 1700
     }
   ],
   "logging_steps": 10,
@@ -1217,7 +1287,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.0652319992449024e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.72,
   "eval_steps": 500,
+  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.6157556270096464e-05,
       "loss": 0.4807,
       "step": 1700
+    },
+    {
+      "epoch": 0.684,
+      "grad_norm": 9.49954891204834,
+      "learning_rate": 1.5956591639871383e-05,
+      "loss": 0.3647,
+      "step": 1710
+    },
+    {
+      "epoch": 0.688,
+      "grad_norm": 14.690208435058594,
+      "learning_rate": 1.5755627009646305e-05,
+      "loss": 0.3715,
+      "step": 1720
+    },
+    {
+      "epoch": 0.692,
+      "grad_norm": 12.074922561645508,
+      "learning_rate": 1.5554662379421224e-05,
+      "loss": 0.491,
+      "step": 1730
+    },
+    {
+      "epoch": 0.696,
+      "grad_norm": 13.278485298156738,
+      "learning_rate": 1.5353697749196143e-05,
+      "loss": 0.4185,
+      "step": 1740
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 12.987263679504395,
+      "learning_rate": 1.5152733118971063e-05,
+      "loss": 0.5613,
+      "step": 1750
+    },
+    {
+      "epoch": 0.704,
+      "grad_norm": 6.863049030303955,
+      "learning_rate": 1.4951768488745982e-05,
+      "loss": 0.3245,
+      "step": 1760
+    },
+    {
+      "epoch": 0.708,
+      "grad_norm": 11.087668418884277,
+      "learning_rate": 1.47508038585209e-05,
+      "loss": 0.4174,
+      "step": 1770
+    },
+    {
+      "epoch": 0.712,
+      "grad_norm": 5.16309118270874,
+      "learning_rate": 1.4549839228295819e-05,
+      "loss": 0.3233,
+      "step": 1780
+    },
+    {
+      "epoch": 0.716,
+      "grad_norm": 12.031776428222656,
+      "learning_rate": 1.4348874598070741e-05,
+      "loss": 0.3574,
+      "step": 1790
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 13.569413185119629,
+      "learning_rate": 1.414790996784566e-05,
+      "loss": 0.5619,
+      "step": 1800
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.2448823590445056e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null