Training in progress, step 70, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd1a841fe06df2853fbd416cdc8d079cc531d947e13cc486f09f2bf14ee91d69
 size 407179392

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fc911797dbb48113bb099b3777dbf3951f49575b52e0434f7aca5f9c5aad6f6
 size 407179392

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22480292f7d6813921a3064dc9a64c13b8e3536ecbe8984008c1fe7db58637a9
 size 109342998

 version https://git-lfs.github.com/spec/v1
+oid sha256:02fe19c69de97dca9a1c4e13242451ee6dcf7736625098ac454b279b844dbbfa
 size 109342998

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c242a91be9cfcabf4b35bd27e771f2777f257c530adf560f58a0f34ab68510e0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:293ce427b38d75786040ab38f0cbba49856bd9344d124c96eba3b830a2206f44
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b04614c3fedfb0bab0db621905a752b151866b1995a65909c46af0471d274d85
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ce28fc8e95726572260a53b608e14738b0137be2acc0fb947d81ee76553c52a
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.00014562117137670256,
   "eval_steps": 10,
-  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -483,6 +483,84 @@
       "eval_samples_per_second": 0.448,
       "eval_steps_per_second": 0.448,
       "step": 60
     }
   ],
   "logging_steps": 1,
@@ -502,7 +580,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1310165764669440.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.000169891366606153,
   "eval_steps": 10,
+  "global_step": 70,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.448,
       "eval_steps_per_second": 0.448,
       "step": 60
+    },
+    {
+      "epoch": 0.0001480481908996476,
+      "grad_norm": 2.424983024597168,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 1.631,
+      "step": 61
+    },
+    {
+      "epoch": 0.00015047521042259264,
+      "grad_norm": 6.923261642456055,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 3.3649,
+      "step": 62
+    },
+    {
+      "epoch": 0.0001529022299455377,
+      "grad_norm": 3.311091184616089,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 2.1499,
+      "step": 63
+    },
+    {
+      "epoch": 0.0001553292494684827,
+      "grad_norm": 4.510260581970215,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 1.5414,
+      "step": 64
+    },
+    {
+      "epoch": 0.00015775626899142776,
+      "grad_norm": 4.838099956512451,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 2.9635,
+      "step": 65
+    },
+    {
+      "epoch": 0.00016018328851437281,
+      "grad_norm": 3.5294570922851562,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 1.4734,
+      "step": 66
+    },
+    {
+      "epoch": 0.00016261030803731787,
+      "grad_norm": 3.9977903366088867,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 1.6908,
+      "step": 67
+    },
+    {
+      "epoch": 0.0001650373275602629,
+      "grad_norm": 4.988344192504883,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 0.6794,
+      "step": 68
+    },
+    {
+      "epoch": 0.00016746434708320794,
+      "grad_norm": 1.6817117929458618,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 1.4359,
+      "step": 69
+    },
+    {
+      "epoch": 0.000169891366606153,
+      "grad_norm": 5.336698532104492,
+      "learning_rate": 0.00019999999494757503,
+      "loss": 3.0466,
+      "step": 70
+    },
+    {
+      "epoch": 0.000169891366606153,
+      "eval_loss": 2.2040059566497803,
+      "eval_runtime": 47.1606,
+      "eval_samples_per_second": 0.445,
+      "eval_steps_per_second": 0.445,
+      "step": 70
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1528526725447680.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null