Training in progress, epoch 0, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +160 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f7a8d58b4c09ade6f079e7b6a46e65c4b32950956c4f0dfb9b6939df8a17f87
 size 69760376

 version https://git-lfs.github.com/spec/v1
+oid sha256:0acbc6280d59d7c546d4019439dd15028d08b91d45d71b01e76d925d5b9aa047
 size 69760376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f03e4a75d05a3a69122dc2cde414860903f0552bb86a96ea0c3ca9b096fa4f0
 size 35674187

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9288679039f283abf554e10a50a709b5399da64be515ef088c22abd9c4696b5
 size 35674187

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:128263f9d56df8dfbebdf37b11d1574802ff808151ff8b1eaf516edf15d33663
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:613c246f7daefe145538132c58a45b1f05c81b12e71ac5e830125551ea6dba3f
 size 15429

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af755915271b882008c5dc7bb597fd721d504e6913687828b26b35921fe7eb3c
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:9291f46bb0ea12256885a523949d99cb2662c99c6053a28fd3413aa853f7487e
 size 15429

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bace330f16736829343cc88653634357667c6ace471d122f10b8c2f916085bd
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf1ff5a38e6a8205b2cb133f2fe8a22c4a8fff158447854d82be8ecb3a5173b4
 size 15429

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d28958f4c0698d32eee35941b6e510c911f3e053aeeb69a91c16b23f28de1d2
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7fbccc240607f30e73da17be501b2e69611822ad5cbb5bf68c3113e547a764a
 size 15429

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cffc4dbac33d7f056f73816d3b2c381c4dec0851d36a6b2b5d0ea3711b36c12
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:5544d99b47a50f9a0dfa3c93e360c08a1e2651081f44245ce601a2c2cc1689e5
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "best_global_step": null,
-  "best_metric": 1.5769575834274292,
   "best_model_checkpoint": null,
-  "epoch": 0.29896430223867315,
   "eval_steps": 50,
-  "global_step": 4200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6560,6 +6560,162 @@
       "eval_samples_per_second": 507.408,
       "eval_steps_per_second": 15.857,
       "step": 4200
     }
   ],
   "logging_steps": 5,
@@ -6588,7 +6744,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.6306887513695846e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_global_step": null,
+  "best_metric": 1.5748103857040405,
   "best_model_checkpoint": null,
+  "epoch": 0.30608249991102254,
   "eval_steps": 50,
+  "global_step": 4300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 507.408,
       "eval_steps_per_second": 15.857,
       "step": 4200
+    },
+    {
+      "epoch": 0.29932021212229065,
+      "grad_norm": 1.0814719200134277,
+      "learning_rate": 0.00019736991588454418,
+      "loss": 1.58,
+      "step": 4205
+    },
+    {
+      "epoch": 0.2996761220059081,
+      "grad_norm": 1.125158429145813,
+      "learning_rate": 0.00019736353172038974,
+      "loss": 1.5156,
+      "step": 4210
+    },
+    {
+      "epoch": 0.3000320318895256,
+      "grad_norm": 1.249143123626709,
+      "learning_rate": 0.00019735713992079262,
+      "loss": 1.5065,
+      "step": 4215
+    },
+    {
+      "epoch": 0.30038794177314304,
+      "grad_norm": 1.0526578426361084,
+      "learning_rate": 0.00019735074048625413,
+      "loss": 1.4694,
+      "step": 4220
+    },
+    {
+      "epoch": 0.3007438516567605,
+      "grad_norm": 0.9182873368263245,
+      "learning_rate": 0.0001973443334172761,
+      "loss": 1.4801,
+      "step": 4225
+    },
+    {
+      "epoch": 0.301099761540378,
+      "grad_norm": 0.9803130626678467,
+      "learning_rate": 0.000197337918714361,
+      "loss": 1.5348,
+      "step": 4230
+    },
+    {
+      "epoch": 0.30145567142399543,
+      "grad_norm": 1.5117913484573364,
+      "learning_rate": 0.00019733149637801188,
+      "loss": 1.6197,
+      "step": 4235
+    },
+    {
+      "epoch": 0.30181158130761293,
+      "grad_norm": 0.9475013017654419,
+      "learning_rate": 0.00019732506640873237,
+      "loss": 1.5342,
+      "step": 4240
+    },
+    {
+      "epoch": 0.3021674911912304,
+      "grad_norm": 0.7990288734436035,
+      "learning_rate": 0.00019731862880702675,
+      "loss": 1.4877,
+      "step": 4245
+    },
+    {
+      "epoch": 0.3025234010748479,
+      "grad_norm": 0.9656383395195007,
+      "learning_rate": 0.00019731218357339983,
+      "loss": 1.5794,
+      "step": 4250
+    },
+    {
+      "epoch": 0.3025234010748479,
+      "eval_loss": 1.5782647132873535,
+      "eval_runtime": 197.0151,
+      "eval_samples_per_second": 507.068,
+      "eval_steps_per_second": 15.847,
+      "step": 4250
+    },
+    {
+      "epoch": 0.3028793109584653,
+      "grad_norm": 0.9215989708900452,
+      "learning_rate": 0.00019730573070835713,
+      "loss": 1.5015,
+      "step": 4255
+    },
+    {
+      "epoch": 0.30323522084208276,
+      "grad_norm": 0.9019184708595276,
+      "learning_rate": 0.00019729927021240463,
+      "loss": 1.4834,
+      "step": 4260
+    },
+    {
+      "epoch": 0.30359113072570026,
+      "grad_norm": 1.0000808238983154,
+      "learning_rate": 0.00019729280208604898,
+      "loss": 1.6177,
+      "step": 4265
+    },
+    {
+      "epoch": 0.3039470406093177,
+      "grad_norm": 1.110743522644043,
+      "learning_rate": 0.00019728632632979746,
+      "loss": 1.5632,
+      "step": 4270
+    },
+    {
+      "epoch": 0.3043029504929352,
+      "grad_norm": 1.039726734161377,
+      "learning_rate": 0.00019727984294415788,
+      "loss": 1.4988,
+      "step": 4275
+    },
+    {
+      "epoch": 0.30465886037655265,
+      "grad_norm": 1.0508739948272705,
+      "learning_rate": 0.0001972733519296387,
+      "loss": 1.6282,
+      "step": 4280
+    },
+    {
+      "epoch": 0.3050147702601701,
+      "grad_norm": 0.9834769368171692,
+      "learning_rate": 0.000197266853286749,
+      "loss": 1.5576,
+      "step": 4285
+    },
+    {
+      "epoch": 0.3053706801437876,
+      "grad_norm": 1.0661753416061401,
+      "learning_rate": 0.0001972603470159983,
+      "loss": 1.5619,
+      "step": 4290
+    },
+    {
+      "epoch": 0.30572659002740504,
+      "grad_norm": 0.9293569326400757,
+      "learning_rate": 0.00019725383311789693,
+      "loss": 1.5036,
+      "step": 4295
+    },
+    {
+      "epoch": 0.30608249991102254,
+      "grad_norm": 1.0038737058639526,
+      "learning_rate": 0.0001972473115929557,
+      "loss": 1.5417,
+      "step": 4300
+    },
+    {
+      "epoch": 0.30608249991102254,
+      "eval_loss": 1.5748103857040405,
+      "eval_runtime": 197.5572,
+      "eval_samples_per_second": 505.676,
+      "eval_steps_per_second": 15.803,
+      "step": 4300
     }
   ],
   "logging_steps": 5,
       "attributes": {}
     }
   },
+  "total_flos": 2.6945799231792742e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null