Training in progress, step 11200, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1245,6 +1245,10 @@ You can finetune this model on your own dataset.
 | 0.1926 | 10900 | 0.4082        |
 | 0.1935 | 10950 | 0.4056        |
 | 0.1944 | 11000 | 0.4435        |
 ### Framework Versions

 | 0.1926 | 10900 | 0.4082        |
 | 0.1935 | 10950 | 0.4056        |
 | 0.1944 | 11000 | 0.4435        |
+| 0.1953 | 11050 | 0.4272        |
+| 0.1961 | 11100 | 0.4334        |
+| 0.1970 | 11150 | 0.4479        |
+| 0.1979 | 11200 | 0.3545        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68fa837444bb1d1983506586971bf36bcb05644b535c7a58278e0f70de2e98b7
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e44300f1a7eda978b714254f7642e5fa8e46c4465149d151fb36c7a22d09887
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22e30cc5ca132903fd2a4ed91cad71091631f1cc2f4eba3197af287eeafbaf42
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:283034d540cc735bf4188ad3d17ee98d86aa7e8c24e656d2a4ff17d9e61dfb8e
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e22fe178097ffc288163bb2b208108bb4bcac4332048a6b0583e8f00c3efbf3d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f802fc7d6b08b3448bdbc5653ab5c831b50c834ee5d8105b5d34c4dfeaebf61e
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43a0047c1f4849b7b6cea19c700df8596a7bc71c9ef39f6ce4cc6960374828c2
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:16f0df52c717c2e3e1dfb7c95b1b78d2bff8956a86b6980e2bde6913f11a1016
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd72024597b2c3bf8343bfe8c1006caf70bcb94b9f5552f878b8f2c8272ff940
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5330400d6885c6b9af0b1821a1e49b220dac708c65a515f28b9197806d03f3ed
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.19437719778763407,
   "eval_steps": 500,
-  "global_step": 11000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1548,6 +1548,34 @@
       "learning_rate": 4.475957668217785e-05,
       "loss": 0.4435,
       "step": 11000
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.19791132865650016,
   "eval_steps": 500,
+  "global_step": 11200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.475957668217785e-05,
       "loss": 0.4435,
       "step": 11000
+    },
+    {
+      "epoch": 0.1952607305048506,
+      "grad_norm": 2.1512463092803955,
+      "learning_rate": 4.4710490663839315e-05,
+      "loss": 0.4272,
+      "step": 11050
+    },
+    {
+      "epoch": 0.1961442632220671,
+      "grad_norm": 1.5526123046875,
+      "learning_rate": 4.466140464550077e-05,
+      "loss": 0.4334,
+      "step": 11100
+    },
+    {
+      "epoch": 0.19702779593928363,
+      "grad_norm": 1.4258567094802856,
+      "learning_rate": 4.4612318627162245e-05,
+      "loss": 0.4479,
+      "step": 11150
+    },
+    {
+      "epoch": 0.19791132865650016,
+      "grad_norm": 3.2408463954925537,
+      "learning_rate": 4.456323260882371e-05,
+      "loss": 0.3545,
+      "step": 11200
     }
   ],
   "logging_steps": 50,