Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scaler.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +183 -43

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adf1daa567e842d4a49a2a309c28335adcbc556ed387bd933791c5748955879c
 size 105084648

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2038b7d186dcf5681c4d83de9768682356029a22590cad51687295c3994b772
 size 105084648

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:036dfbad9db93056477a452361b658411f43d32c9474a36e48ff0e618e0db69a
 size 210233675

 version https://git-lfs.github.com/spec/v1
+oid sha256:593fb2c6e4858080c601758c9635d147ce263203bca363c3dfc105c7a402733c
 size 210233675

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84be4ea5bd79d9a9d3711f20f0c9582c128f347dd9ed89e8c000bb14920e6f49
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:878ed4344f40441f51be4ac14226427439b6dc5bb61077e19d16d91c52b3678a
 size 14645

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4aa03f6e0cd07cf67ce1fbe3101d545f5771ef9148b9debf02b11cf6948da5c
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0033c7745b46bdca3ecab5787678834ca68f7f7e1288869dceeb38812abc253
 size 1383

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0062eed7ddae26275a466fd49b27ea8db659c723946cdc1a97b53a778e040e27
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:caad6e0f583a07a0c98d029dca8047c09991d00e348a78110b876e78a7d60992
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,152 +2,292 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.7229642395519171,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.08616975441619991,
-      "grad_norm": 1.3410744667053223,
       "learning_rate": 1.98e-05,
-      "loss": 5.56121826171875,
       "step": 100
     },
     {
       "epoch": 0.17233950883239982,
-      "grad_norm": 0.6071470379829407,
       "learning_rate": 3.979999999999999e-05,
-      "loss": 3.912900390625,
       "step": 200
     },
     {
       "epoch": 0.25850926324859974,
-      "grad_norm": 1.0832934379577637,
       "learning_rate": 5.98e-05,
-      "loss": 3.399825134277344,
       "step": 300
     },
     {
       "epoch": 0.34467901766479964,
-      "grad_norm": 0.8494643568992615,
       "learning_rate": 7.98e-05,
-      "loss": 3.0080584716796874,
       "step": 400
     },
     {
       "epoch": 0.4308487720809996,
-      "grad_norm": 0.9254368543624878,
       "learning_rate": 9.979999999999999e-05,
-      "loss": 2.8078402709960937,
       "step": 500
     },
     {
       "epoch": 0.5170185264971995,
-      "grad_norm": 2.038386821746826,
       "learning_rate": 0.00011979999999999998,
-      "loss": 2.6626937866210936,
       "step": 600
     },
     {
       "epoch": 0.6031882809133994,
-      "grad_norm": 2.6908981800079346,
       "learning_rate": 0.00013979999999999998,
-      "loss": 2.5472840881347656,
       "step": 700
     },
     {
       "epoch": 0.6893580353295993,
-      "grad_norm": 1.0315098762512207,
       "learning_rate": 0.00015979999999999998,
-      "loss": 2.4246885681152346,
       "step": 800
     },
     {
       "epoch": 0.7755277897457993,
-      "grad_norm": 0.698249101638794,
       "learning_rate": 0.0001798,
-      "loss": 2.268210601806641,
       "step": 900
     },
     {
       "epoch": 0.8616975441619992,
-      "grad_norm": 0.7050304412841797,
       "learning_rate": 0.0001998,
-      "loss": 2.119554290771484,
       "step": 1000
     },
     {
       "epoch": 0.9478672985781991,
-      "grad_norm": 0.6321768760681152,
       "learning_rate": 0.00021979999999999998,
-      "loss": 2.0509904479980468,
       "step": 1100
     },
     {
       "epoch": 1.033606204222318,
-      "grad_norm": 0.5764491558074951,
       "learning_rate": 0.00023979999999999997,
-      "loss": 1.9791275024414063,
       "step": 1200
     },
     {
       "epoch": 1.1197759586385179,
-      "grad_norm": 0.5116275548934937,
       "learning_rate": 0.00025979999999999997,
-      "loss": 1.925416259765625,
       "step": 1300
     },
     {
       "epoch": 1.2059457130547178,
-      "grad_norm": 0.5461997389793396,
       "learning_rate": 0.00027979999999999997,
-      "loss": 1.8484840393066406,
       "step": 1400
     },
     {
       "epoch": 1.2921154674709177,
-      "grad_norm": 0.4215202331542969,
       "learning_rate": 0.00029979999999999997,
-      "loss": 1.8083718872070313,
       "step": 1500
     },
     {
       "epoch": 1.3782852218871176,
-      "grad_norm": 0.4892556369304657,
       "learning_rate": 0.000299991068233357,
-      "loss": 1.762831573486328,
       "step": 1600
     },
     {
       "epoch": 1.4644549763033177,
-      "grad_norm": 0.4539775252342224,
       "learning_rate": 0.0002999639122316208,
-      "loss": 1.7123506164550781,
       "step": 1700
     },
     {
       "epoch": 1.5506247307195173,
-      "grad_norm": 0.4713730216026306,
       "learning_rate": 0.0002999185343831476,
-      "loss": 1.6709410095214843,
       "step": 1800
     },
     {
       "epoch": 1.6367944851357175,
-      "grad_norm": 0.46391186118125916,
       "learning_rate": 0.0002998549402017187,
-      "loss": 1.6304544067382813,
       "step": 1900
     },
     {
       "epoch": 1.7229642395519171,
-      "grad_norm": 0.3968624770641327,
       "learning_rate": 0.0002997731374145493,
-      "loss": 1.5913111877441406,
       "step": 2000
     }
   ],
   "logging_steps": 100,
@@ -167,7 +307,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9914156127879168.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.4454976303317535,
   "eval_steps": 500,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.08616975441619991,
+      "grad_norm": 1.8284112215042114,
       "learning_rate": 1.98e-05,
+      "loss": 5.544659423828125,
       "step": 100
     },
     {
       "epoch": 0.17233950883239982,
+      "grad_norm": 0.5319015383720398,
       "learning_rate": 3.979999999999999e-05,
+      "loss": 3.90488037109375,
       "step": 200
     },
     {
       "epoch": 0.25850926324859974,
+      "grad_norm": 1.3977950811386108,
       "learning_rate": 5.98e-05,
+      "loss": 3.39756103515625,
       "step": 300
     },
     {
       "epoch": 0.34467901766479964,
+      "grad_norm": 1.9291573762893677,
       "learning_rate": 7.98e-05,
+      "loss": 3.019991149902344,
       "step": 400
     },
     {
       "epoch": 0.4308487720809996,
+      "grad_norm": 1.4095340967178345,
       "learning_rate": 9.979999999999999e-05,
+      "loss": 2.815445861816406,
       "step": 500
     },
     {
       "epoch": 0.5170185264971995,
+      "grad_norm": 3.2716641426086426,
       "learning_rate": 0.00011979999999999998,
+      "loss": 2.6590045166015623,
       "step": 600
     },
     {
       "epoch": 0.6031882809133994,
+      "grad_norm": 1.3838716745376587,
       "learning_rate": 0.00013979999999999998,
+      "loss": 2.543310089111328,
       "step": 700
     },
     {
       "epoch": 0.6893580353295993,
+      "grad_norm": 1.069161057472229,
       "learning_rate": 0.00015979999999999998,
+      "loss": 2.396273651123047,
       "step": 800
     },
     {
       "epoch": 0.7755277897457993,
+      "grad_norm": 0.8585665822029114,
       "learning_rate": 0.0001798,
+      "loss": 2.242165985107422,
       "step": 900
     },
     {
       "epoch": 0.8616975441619992,
+      "grad_norm": 0.7467069625854492,
       "learning_rate": 0.0001998,
+      "loss": 2.1027012634277344,
       "step": 1000
     },
     {
       "epoch": 0.9478672985781991,
+      "grad_norm": 0.5805935859680176,
       "learning_rate": 0.00021979999999999998,
+      "loss": 2.037454376220703,
       "step": 1100
     },
     {
       "epoch": 1.033606204222318,
+      "grad_norm": 0.5948718786239624,
       "learning_rate": 0.00023979999999999997,
+      "loss": 1.9681085205078126,
       "step": 1200
     },
     {
       "epoch": 1.1197759586385179,
+      "grad_norm": 0.5413378477096558,
       "learning_rate": 0.00025979999999999997,
+      "loss": 1.9135774230957032,
       "step": 1300
     },
     {
       "epoch": 1.2059457130547178,
+      "grad_norm": 0.5196030139923096,
       "learning_rate": 0.00027979999999999997,
+      "loss": 1.8392716979980468,
       "step": 1400
     },
     {
       "epoch": 1.2921154674709177,
+      "grad_norm": 0.49619364738464355,
       "learning_rate": 0.00029979999999999997,
+      "loss": 1.8049734497070313,
       "step": 1500
     },
     {
       "epoch": 1.3782852218871176,
+      "grad_norm": 0.44414839148521423,
       "learning_rate": 0.000299991068233357,
+      "loss": 1.7638165283203124,
       "step": 1600
     },
     {
       "epoch": 1.4644549763033177,
+      "grad_norm": 0.46444711089134216,
       "learning_rate": 0.0002999639122316208,
+      "loss": 1.7137832641601562,
       "step": 1700
     },
     {
       "epoch": 1.5506247307195173,
+      "grad_norm": 0.5176238417625427,
       "learning_rate": 0.0002999185343831476,
+      "loss": 1.675589599609375,
       "step": 1800
     },
     {
       "epoch": 1.6367944851357175,
+      "grad_norm": 0.4177858829498291,
       "learning_rate": 0.0002998549402017187,
+      "loss": 1.6349491882324219,
       "step": 1900
     },
     {
       "epoch": 1.7229642395519171,
+      "grad_norm": 0.42198434472084045,
       "learning_rate": 0.0002997731374145493,
+      "loss": 1.596505126953125,
       "step": 2000
+    },
+    {
+      "epoch": 1.8091339939681172,
+      "grad_norm": 0.4523915946483612,
+      "learning_rate": 0.0002996731359613498,
+      "loss": 1.5908058166503907,
+      "step": 2100
+    },
+    {
+      "epoch": 1.8953037483843171,
+      "grad_norm": 0.3901713788509369,
+      "learning_rate": 0.0002995549479931178,
+      "loss": 1.5610142517089844,
+      "step": 2200
+    },
+    {
+      "epoch": 1.981473502800517,
+      "grad_norm": 0.41816478967666626,
+      "learning_rate": 0.00029941858787066206,
+      "loss": 1.5319706726074218,
+      "step": 2300
+    },
+    {
+      "epoch": 2.067212408444636,
+      "grad_norm": 0.3872755765914917,
+      "learning_rate": 0.00029926407216285706,
+      "loss": 1.5055549621582032,
+      "step": 2400
+    },
+    {
+      "epoch": 2.1533821628608356,
+      "grad_norm": 0.4193103611469269,
+      "learning_rate": 0.0002990914196446301,
+      "loss": 1.4792218017578125,
+      "step": 2500
+    },
+    {
+      "epoch": 2.2395519172770357,
+      "grad_norm": 0.4024358093738556,
+      "learning_rate": 0.00029890065129467986,
+      "loss": 1.4786280822753906,
+      "step": 2600
+    },
+    {
+      "epoch": 2.325721671693236,
+      "grad_norm": 0.37588468194007874,
+      "learning_rate": 0.0002986917902929273,
+      "loss": 1.4545697021484374,
+      "step": 2700
+    },
+    {
+      "epoch": 2.4118914261094355,
+      "grad_norm": 0.39736974239349365,
+      "learning_rate": 0.0002984648620176991,
+      "loss": 1.4498170471191407,
+      "step": 2800
+    },
+    {
+      "epoch": 2.4980611805256356,
+      "grad_norm": 0.42380592226982117,
+      "learning_rate": 0.00029821989404264424,
+      "loss": 1.4262150573730468,
+      "step": 2900
+    },
+    {
+      "epoch": 2.5842309349418353,
+      "grad_norm": 0.411803662776947,
+      "learning_rate": 0.00029795691613338307,
+      "loss": 1.417086181640625,
+      "step": 3000
+    },
+    {
+      "epoch": 2.6704006893580354,
+      "grad_norm": 0.3662901818752289,
+      "learning_rate": 0.000297675960243891,
+      "loss": 1.3942941284179688,
+      "step": 3100
+    },
+    {
+      "epoch": 2.756570443774235,
+      "grad_norm": 0.3642771244049072,
+      "learning_rate": 0.00029737706051261557,
+      "loss": 1.38471923828125,
+      "step": 3200
+    },
+    {
+      "epoch": 2.842740198190435,
+      "grad_norm": 0.4138600826263428,
+      "learning_rate": 0.00029706025325832857,
+      "loss": 1.3765927124023438,
+      "step": 3300
+    },
+    {
+      "epoch": 2.9289099526066353,
+      "grad_norm": 0.3687536418437958,
+      "learning_rate": 0.0002967255769757127,
+      "loss": 1.3617820739746094,
+      "step": 3400
+    },
+    {
+      "epoch": 3.014648858250754,
+      "grad_norm": 0.3252148926258087,
+      "learning_rate": 0.0002963730723306845,
+      "loss": 1.3490205383300782,
+      "step": 3500
+    },
+    {
+      "epoch": 3.100818612666954,
+      "grad_norm": 0.3874260187149048,
+      "learning_rate": 0.0002960027821554529,
+      "loss": 1.3380169677734375,
+      "step": 3600
+    },
+    {
+      "epoch": 3.1869883670831536,
+      "grad_norm": 0.37778887152671814,
+      "learning_rate": 0.00029561475144331467,
+      "loss": 1.3190237426757812,
+      "step": 3700
+    },
+    {
+      "epoch": 3.2731581214993537,
+      "grad_norm": 0.37266016006469727,
+      "learning_rate": 0.00029520902734318766,
+      "loss": 1.313209991455078,
+      "step": 3800
+    },
+    {
+      "epoch": 3.359327875915554,
+      "grad_norm": 0.3792646527290344,
+      "learning_rate": 0.00029478565915388153,
+      "loss": 1.3055996704101562,
+      "step": 3900
+    },
+    {
+      "epoch": 3.4454976303317535,
+      "grad_norm": 0.3583495318889618,
+      "learning_rate": 0.00029434469831810764,
+      "loss": 1.301021728515625,
+      "step": 4000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.9825523114901504e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null