Training in progress, step 6000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8ae4d439763bede675a7bb8407ca626ba1a1ca1d28d508145ff27990bcdfd60
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:f198d05f5a6f7322d5950baad97f98d6f59bcdb9ed02f220583ce5fd10a379c7
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8df4f8c8c0f93c7a4647906cc1e5f85c72386b1b581eb687df3d305abbdc44a7
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:302e7a816c65dc7ea036853d2e134881bf37e4d7e3ce31f671702ad86c5f1616
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8af3cc3f1560f815527e73bcdf0bbfb03998a87b5067ff9928ca94f46e638231
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef678004bfc53268aeb4845a442c0327144244832e571a2be41a7160145765eb
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bda4b56b57284b5d776cea834f86539fa062d5e046885e07dcb7516921ccd6ee
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:5732bb4fae95fda377427872ad7c4fed0c45a84922701b3143ffa39cf761f9db
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9292110153742186,
   "eval_steps": 500,
-  "global_step": 5500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3953,6 +3953,364 @@
       "eval_samples_per_second": 276.03,
       "eval_steps_per_second": 5.797,
       "step": 5500
     }
   ],
   "logging_steps": 10,
@@ -3972,7 +4330,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.83951251472384e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0136847440446022,
   "eval_steps": 500,
+  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 276.03,
       "eval_steps_per_second": 5.797,
       "step": 5500
+    },
+    {
+      "epoch": 0.9309004899476263,
+      "grad_norm": 0.4981231689453125,
+      "learning_rate": 0.00021527977734609537,
+      "loss": 4.546344757080078,
+      "step": 5510
+    },
+    {
+      "epoch": 0.9325899645210339,
+      "grad_norm": 0.5102471709251404,
+      "learning_rate": 0.00021484818619522722,
+      "loss": 4.555732727050781,
+      "step": 5520
+    },
+    {
+      "epoch": 0.9342794390944417,
+      "grad_norm": 0.4952332675457001,
+      "learning_rate": 0.00021441593376712224,
+      "loss": 4.551390838623047,
+      "step": 5530
+    },
+    {
+      "epoch": 0.9359689136678493,
+      "grad_norm": 0.48818397521972656,
+      "learning_rate": 0.0002139830244695935,
+      "loss": 4.579892349243164,
+      "step": 5540
+    },
+    {
+      "epoch": 0.937658388241257,
+      "grad_norm": 0.5026493072509766,
+      "learning_rate": 0.00021354946271715265,
+      "loss": 4.550901794433594,
+      "step": 5550
+    },
+    {
+      "epoch": 0.9393478628146646,
+      "grad_norm": 0.5072263479232788,
+      "learning_rate": 0.00021311525293096444,
+      "loss": 4.540952682495117,
+      "step": 5560
+    },
+    {
+      "epoch": 0.9410373373880723,
+      "grad_norm": 0.506399929523468,
+      "learning_rate": 0.00021268039953880184,
+      "loss": 4.528360748291016,
+      "step": 5570
+    },
+    {
+      "epoch": 0.94272681196148,
+      "grad_norm": 0.4890686571598053,
+      "learning_rate": 0.00021224490697500088,
+      "loss": 4.535118484497071,
+      "step": 5580
+    },
+    {
+      "epoch": 0.9444162865348876,
+      "grad_norm": 0.53489750623703,
+      "learning_rate": 0.00021180877968041552,
+      "loss": 4.554486846923828,
+      "step": 5590
+    },
+    {
+      "epoch": 0.9461057611082954,
+      "grad_norm": 0.5218686461448669,
+      "learning_rate": 0.00021137202210237213,
+      "loss": 4.540655136108398,
+      "step": 5600
+    },
+    {
+      "epoch": 0.947795235681703,
+      "grad_norm": 0.5503886342048645,
+      "learning_rate": 0.0002109346386946243,
+      "loss": 4.5588642120361325,
+      "step": 5610
+    },
+    {
+      "epoch": 0.9494847102551106,
+      "grad_norm": 0.4940740168094635,
+      "learning_rate": 0.00021049663391730752,
+      "loss": 4.542883682250976,
+      "step": 5620
+    },
+    {
+      "epoch": 0.9511741848285183,
+      "grad_norm": 0.4873588979244232,
+      "learning_rate": 0.00021005801223689344,
+      "loss": 4.569081115722656,
+      "step": 5630
+    },
+    {
+      "epoch": 0.952863659401926,
+      "grad_norm": 0.4529159367084503,
+      "learning_rate": 0.00020961877812614458,
+      "loss": 4.569264984130859,
+      "step": 5640
+    },
+    {
+      "epoch": 0.9545531339753337,
+      "grad_norm": 0.5239872932434082,
+      "learning_rate": 0.00020917893606406843,
+      "loss": 4.523569869995117,
+      "step": 5650
+    },
+    {
+      "epoch": 0.9562426085487413,
+      "grad_norm": 0.5355167388916016,
+      "learning_rate": 0.0002087384905358722,
+      "loss": 4.528088760375977,
+      "step": 5660
+    },
+    {
+      "epoch": 0.9579320831221491,
+      "grad_norm": 0.5981546640396118,
+      "learning_rate": 0.00020829744603291663,
+      "loss": 4.515169525146485,
+      "step": 5670
+    },
+    {
+      "epoch": 0.9596215576955567,
+      "grad_norm": 0.5237213969230652,
+      "learning_rate": 0.00020785580705267047,
+      "loss": 4.559556198120117,
+      "step": 5680
+    },
+    {
+      "epoch": 0.9613110322689643,
+      "grad_norm": 0.5060997009277344,
+      "learning_rate": 0.00020741357809866447,
+      "loss": 4.5545307159423825,
+      "step": 5690
+    },
+    {
+      "epoch": 0.963000506842372,
+      "grad_norm": 0.4923208951950073,
+      "learning_rate": 0.0002069707636804457,
+      "loss": 4.550180053710937,
+      "step": 5700
+    },
+    {
+      "epoch": 0.9646899814157797,
+      "grad_norm": 0.5420214533805847,
+      "learning_rate": 0.0002065273683135312,
+      "loss": 4.5501148223876955,
+      "step": 5710
+    },
+    {
+      "epoch": 0.9663794559891874,
+      "grad_norm": 0.48059365153312683,
+      "learning_rate": 0.00020608339651936224,
+      "loss": 4.532232284545898,
+      "step": 5720
+    },
+    {
+      "epoch": 0.968068930562595,
+      "grad_norm": 0.49933409690856934,
+      "learning_rate": 0.00020563885282525802,
+      "loss": 4.532613372802734,
+      "step": 5730
+    },
+    {
+      "epoch": 0.9697584051360028,
+      "grad_norm": 0.5204219222068787,
+      "learning_rate": 0.00020519374176436968,
+      "loss": 4.545319366455078,
+      "step": 5740
+    },
+    {
+      "epoch": 0.9714478797094104,
+      "grad_norm": 0.49551549553871155,
+      "learning_rate": 0.00020474806787563392,
+      "loss": 4.532552337646484,
+      "step": 5750
+    },
+    {
+      "epoch": 0.973137354282818,
+      "grad_norm": 0.512352705001831,
+      "learning_rate": 0.0002043018357037267,
+      "loss": 4.541680908203125,
+      "step": 5760
+    },
+    {
+      "epoch": 0.9748268288562257,
+      "grad_norm": 0.49258846044540405,
+      "learning_rate": 0.00020385504979901712,
+      "loss": 4.545413970947266,
+      "step": 5770
+    },
+    {
+      "epoch": 0.9765163034296334,
+      "grad_norm": 0.5037888288497925,
+      "learning_rate": 0.00020340771471752078,
+      "loss": 4.531426239013672,
+      "step": 5780
+    },
+    {
+      "epoch": 0.9782057780030411,
+      "grad_norm": 0.526168167591095,
+      "learning_rate": 0.0002029598350208534,
+      "loss": 4.524025344848633,
+      "step": 5790
+    },
+    {
+      "epoch": 0.9798952525764487,
+      "grad_norm": 0.5037376880645752,
+      "learning_rate": 0.00020251141527618434,
+      "loss": 4.531801223754883,
+      "step": 5800
+    },
+    {
+      "epoch": 0.9815847271498563,
+      "grad_norm": 0.49936115741729736,
+      "learning_rate": 0.00020206246005618998,
+      "loss": 4.517900848388672,
+      "step": 5810
+    },
+    {
+      "epoch": 0.9832742017232641,
+      "grad_norm": 0.5162473917007446,
+      "learning_rate": 0.00020161297393900713,
+      "loss": 4.51179313659668,
+      "step": 5820
+    },
+    {
+      "epoch": 0.9849636762966717,
+      "grad_norm": 0.5165606141090393,
+      "learning_rate": 0.00020116296150818623,
+      "loss": 4.53326187133789,
+      "step": 5830
+    },
+    {
+      "epoch": 0.9866531508700794,
+      "grad_norm": 0.5134915113449097,
+      "learning_rate": 0.0002007124273526449,
+      "loss": 4.505707168579102,
+      "step": 5840
+    },
+    {
+      "epoch": 0.988342625443487,
+      "grad_norm": 0.4991665184497833,
+      "learning_rate": 0.00020026137606662077,
+      "loss": 4.525319671630859,
+      "step": 5850
+    },
+    {
+      "epoch": 0.9900321000168948,
+      "grad_norm": 0.5060558915138245,
+      "learning_rate": 0.0001998098122496249,
+      "loss": 4.531586456298828,
+      "step": 5860
+    },
+    {
+      "epoch": 0.9917215745903024,
+      "grad_norm": 0.5269056558609009,
+      "learning_rate": 0.00019935774050639472,
+      "loss": 4.517117691040039,
+      "step": 5870
+    },
+    {
+      "epoch": 0.99341104916371,
+      "grad_norm": 0.5109555125236511,
+      "learning_rate": 0.0001989051654468473,
+      "loss": 4.501250076293945,
+      "step": 5880
+    },
+    {
+      "epoch": 0.9951005237371178,
+      "grad_norm": 0.5808560848236084,
+      "learning_rate": 0.00019845209168603195,
+      "loss": 4.523174285888672,
+      "step": 5890
+    },
+    {
+      "epoch": 0.9967899983105254,
+      "grad_norm": 0.509011447429657,
+      "learning_rate": 0.00019799852384408355,
+      "loss": 4.526637268066406,
+      "step": 5900
+    },
+    {
+      "epoch": 0.9984794728839331,
+      "grad_norm": 0.48725831508636475,
+      "learning_rate": 0.00019754446654617527,
+      "loss": 4.508483123779297,
+      "step": 5910
+    },
+    {
+      "epoch": 1.0001689474573408,
+      "grad_norm": 0.5509161353111267,
+      "learning_rate": 0.00019708992442247136,
+      "loss": 4.524269866943359,
+      "step": 5920
+    },
+    {
+      "epoch": 1.0018584220307485,
+      "grad_norm": 0.5454822182655334,
+      "learning_rate": 0.0001966349021080801,
+      "loss": 4.459218597412109,
+      "step": 5930
+    },
+    {
+      "epoch": 1.003547896604156,
+      "grad_norm": 0.5172731280326843,
+      "learning_rate": 0.0001961794042430062,
+      "loss": 4.506275939941406,
+      "step": 5940
+    },
+    {
+      "epoch": 1.0052373711775637,
+      "grad_norm": 0.4621833562850952,
+      "learning_rate": 0.000195723435472104,
+      "loss": 4.4765571594238285,
+      "step": 5950
+    },
+    {
+      "epoch": 1.0069268457509715,
+      "grad_norm": 0.4911273717880249,
+      "learning_rate": 0.00019526700044502956,
+      "loss": 4.482321929931641,
+      "step": 5960
+    },
+    {
+      "epoch": 1.0086163203243792,
+      "grad_norm": 0.4957529306411743,
+      "learning_rate": 0.0001948101038161937,
+      "loss": 4.473802947998047,
+      "step": 5970
+    },
+    {
+      "epoch": 1.0103057948977867,
+      "grad_norm": 0.4635160267353058,
+      "learning_rate": 0.0001943527502447141,
+      "loss": 4.482971572875977,
+      "step": 5980
+    },
+    {
+      "epoch": 1.0119952694711944,
+      "grad_norm": 0.4733022451400757,
+      "learning_rate": 0.00019389494439436836,
+      "loss": 4.454212188720703,
+      "step": 5990
+    },
+    {
+      "epoch": 1.0136847440446022,
+      "grad_norm": 0.49631762504577637,
+      "learning_rate": 0.0001934366909335458,
+      "loss": 4.491296005249024,
+      "step": 6000
+    },
+    {
+      "epoch": 1.0136847440446022,
+      "eval_loss": 4.470248699188232,
+      "eval_runtime": 3.6231,
+      "eval_samples_per_second": 276.003,
+      "eval_steps_per_second": 5.796,
+      "step": 6000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.0067200216019763e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null