Training in progress, step 150, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +56 -621
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5101d174985af56a5ca8f66d4497a19fc1df92b1752fd206a9acc99b0f5016c5
 size 2101902096

 version https://git-lfs.github.com/spec/v1
+oid sha256:2314f125d34ade15e993416f255766d0491d5e00c74946a6a9a7639c77297350
 size 2101902096

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:987d9f0f8403b666ca4e1990d4dc7891c8110a1c42e7d6aac6980dcbb286de39
 size 4071291450

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd958853885bc4ee006f0df661f3c058ae435618526f06ab5d4874778a025e20
 size 4071291450

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38fc59e25c61798df539d64abf9802bee7a84d13adb6854b0d457cfa6ed5ff0c
-size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5bd235718e3623137e071bdd8f7e045fcf547d46c7047e3798cd61cc469fbcec
+size 14308

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b00ea15ea31930415de4593eb2a65802185be2077128d834e34240164dbbce43
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:19c9bb74c7ccc0ce8938928022bb60a0f70baa72e09d1fb547a24fe2599bec65
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,690 +1,125 @@
 {
-  "best_metric": 0.5061885118484497,
-  "best_model_checkpoint": "./output/checkpoint-900",
-  "epoch": 0.07205187735169322,
   "eval_steps": 150,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0008005764150188136,
-      "grad_norm": 14.606176376342773,
       "learning_rate": 8.000000000000001e-06,
-      "loss": 1.1075,
       "step": 10
     },
     {
-      "epoch": 0.0016011528300376272,
-      "grad_norm": 11.753077507019043,
       "learning_rate": 1.6000000000000003e-05,
-      "loss": 0.8446,
       "step": 20
     },
     {
-      "epoch": 0.0024017292450564404,
-      "grad_norm": 9.326775550842285,
       "learning_rate": 2.4e-05,
-      "loss": 0.6425,
       "step": 30
     },
     {
-      "epoch": 0.0032023056600752543,
-      "grad_norm": 12.25445556640625,
       "learning_rate": 3.2000000000000005e-05,
-      "loss": 0.6062,
       "step": 40
     },
     {
-      "epoch": 0.004002882075094067,
-      "grad_norm": 8.398188591003418,
       "learning_rate": 4e-05,
-      "loss": 0.5653,
       "step": 50
     },
     {
-      "epoch": 0.004803458490112881,
-      "grad_norm": 10.263713836669922,
       "learning_rate": 4.8e-05,
-      "loss": 0.5658,
       "step": 60
     },
     {
-      "epoch": 0.005604034905131695,
-      "grad_norm": 9.32615852355957,
       "learning_rate": 5.6e-05,
-      "loss": 0.5652,
       "step": 70
     },
     {
-      "epoch": 0.006404611320150509,
-      "grad_norm": 8.419366836547852,
       "learning_rate": 6.400000000000001e-05,
-      "loss": 0.5538,
       "step": 80
     },
     {
-      "epoch": 0.007205187735169322,
-      "grad_norm": 9.608399391174316,
       "learning_rate": 7.2e-05,
-      "loss": 0.5357,
       "step": 90
     },
     {
-      "epoch": 0.008005764150188135,
-      "grad_norm": 10.52286434173584,
       "learning_rate": 8e-05,
-      "loss": 0.5475,
       "step": 100
     },
     {
-      "epoch": 0.008806340565206948,
-      "grad_norm": 8.929399490356445,
       "learning_rate": 7.999917787833465e-05,
-      "loss": 0.554,
       "step": 110
     },
     {
-      "epoch": 0.009606916980225762,
-      "grad_norm": 13.271443367004395,
       "learning_rate": 7.999671154713278e-05,
-      "loss": 0.5339,
       "step": 120
     },
     {
-      "epoch": 0.010407493395244577,
-      "grad_norm": 9.399694442749023,
       "learning_rate": 7.99926011077756e-05,
-      "loss": 0.5338,
       "step": 130
     },
     {
-      "epoch": 0.01120806981026339,
-      "grad_norm": 8.28138542175293,
       "learning_rate": 7.99868467292272e-05,
-      "loss": 0.4835,
       "step": 140
     },
     {
-      "epoch": 0.012008646225282204,
-      "grad_norm": 10.049982070922852,
       "learning_rate": 7.997944864802752e-05,
-      "loss": 0.5392,
       "step": 150
     },
     {
-      "epoch": 0.012008646225282204,
-      "eval_loss": 0.5402679443359375,
-      "eval_runtime": 48.4496,
-      "eval_samples_per_second": 10.32,
-      "eval_steps_per_second": 10.32,
       "step": 150
-    },
-    {
-      "epoch": 0.012809222640301017,
-      "grad_norm": 13.39472484588623,
-      "learning_rate": 7.997040716828271e-05,
-      "loss": 0.6016,
-      "step": 160
-    },
-    {
-      "epoch": 0.01360979905531983,
-      "grad_norm": 9.441704750061035,
-      "learning_rate": 7.995972266165259e-05,
-      "loss": 0.5175,
-      "step": 170
-    },
-    {
-      "epoch": 0.014410375470338644,
-      "grad_norm": 12.495186805725098,
-      "learning_rate": 7.994739556733538e-05,
-      "loss": 0.5198,
-      "step": 180
-    },
-    {
-      "epoch": 0.015210951885357458,
-      "grad_norm": 9.358332633972168,
-      "learning_rate": 7.993342639204965e-05,
-      "loss": 0.5557,
-      "step": 190
-    },
-    {
-      "epoch": 0.01601152830037627,
-      "grad_norm": 10.003859519958496,
-      "learning_rate": 7.991781571001347e-05,
-      "loss": 0.5767,
-      "step": 200
-    },
-    {
-      "epoch": 0.016812104715395085,
-      "grad_norm": 8.369383811950684,
-      "learning_rate": 7.990056416292084e-05,
-      "loss": 0.5524,
-      "step": 210
-    },
-    {
-      "epoch": 0.017612681130413897,
-      "grad_norm": 9.327197074890137,
-      "learning_rate": 7.988167245991528e-05,
-      "loss": 0.5463,
-      "step": 220
-    },
-    {
-      "epoch": 0.01841325754543271,
-      "grad_norm": 8.661181449890137,
-      "learning_rate": 7.986114137756074e-05,
-      "loss": 0.577,
-      "step": 230
-    },
-    {
-      "epoch": 0.019213833960451523,
-      "grad_norm": 8.12822151184082,
-      "learning_rate": 7.983897175980957e-05,
-      "loss": 0.5381,
-      "step": 240
-    },
-    {
-      "epoch": 0.02001441037547034,
-      "grad_norm": 8.482646942138672,
-      "learning_rate": 7.981516451796794e-05,
-      "loss": 0.4825,
-      "step": 250
-    },
-    {
-      "epoch": 0.020814986790489154,
-      "grad_norm": 6.139826774597168,
-      "learning_rate": 7.97897206306583e-05,
-      "loss": 0.5136,
-      "step": 260
-    },
-    {
-      "epoch": 0.021615563205507966,
-      "grad_norm": 8.262868881225586,
-      "learning_rate": 7.976264114377922e-05,
-      "loss": 0.5535,
-      "step": 270
-    },
-    {
-      "epoch": 0.02241613962052678,
-      "grad_norm": 8.162726402282715,
-      "learning_rate": 7.973392717046233e-05,
-      "loss": 0.4943,
-      "step": 280
-    },
-    {
-      "epoch": 0.023216716035545593,
-      "grad_norm": 7.8664870262146,
-      "learning_rate": 7.97035798910266e-05,
-      "loss": 0.5262,
-      "step": 290
-    },
-    {
-      "epoch": 0.024017292450564408,
-      "grad_norm": 8.503366470336914,
-      "learning_rate": 7.967160055292984e-05,
-      "loss": 0.5778,
-      "step": 300
-    },
-    {
-      "epoch": 0.024017292450564408,
-      "eval_loss": 0.5316260457038879,
-      "eval_runtime": 51.9268,
-      "eval_samples_per_second": 9.629,
-      "eval_steps_per_second": 9.629,
-      "step": 300
-    },
-    {
-      "epoch": 0.02481786886558322,
-      "grad_norm": 7.278728485107422,
-      "learning_rate": 7.96379904707174e-05,
-      "loss": 0.4912,
-      "step": 310
-    },
-    {
-      "epoch": 0.025618445280602035,
-      "grad_norm": 9.4771089553833,
-      "learning_rate": 7.960275102596809e-05,
-      "loss": 0.5316,
-      "step": 320
-    },
-    {
-      "epoch": 0.026419021695620847,
-      "grad_norm": 7.614287376403809,
-      "learning_rate": 7.956588366723745e-05,
-      "loss": 0.5514,
-      "step": 330
-    },
-    {
-      "epoch": 0.02721959811063966,
-      "grad_norm": 7.349905014038086,
-      "learning_rate": 7.952738990999824e-05,
-      "loss": 0.5241,
-      "step": 340
-    },
-    {
-      "epoch": 0.028020174525658473,
-      "grad_norm": 8.33869743347168,
-      "learning_rate": 7.948727133657802e-05,
-      "loss": 0.5551,
-      "step": 350
-    },
-    {
-      "epoch": 0.02882075094067729,
-      "grad_norm": 8.975132942199707,
-      "learning_rate": 7.94455295960942e-05,
-      "loss": 0.5094,
-      "step": 360
-    },
-    {
-      "epoch": 0.0296213273556961,
-      "grad_norm": 7.986541748046875,
-      "learning_rate": 7.940216640438628e-05,
-      "loss": 0.5196,
-      "step": 370
-    },
-    {
-      "epoch": 0.030421903770714916,
-      "grad_norm": 6.738289833068848,
-      "learning_rate": 7.93571835439452e-05,
-      "loss": 0.5176,
-      "step": 380
-    },
-    {
-      "epoch": 0.031222480185733727,
-      "grad_norm": 8.22880744934082,
-      "learning_rate": 7.931058286384016e-05,
-      "loss": 0.5096,
-      "step": 390
-    },
-    {
-      "epoch": 0.03202305660075254,
-      "grad_norm": 7.914773941040039,
-      "learning_rate": 7.926236627964262e-05,
-      "loss": 0.578,
-      "step": 400
-    },
-    {
-      "epoch": 0.032823633015771354,
-      "grad_norm": 11.044404029846191,
-      "learning_rate": 7.92125357733475e-05,
-      "loss": 0.5168,
-      "step": 410
-    },
-    {
-      "epoch": 0.03362420943079017,
-      "grad_norm": 9.762344360351562,
-      "learning_rate": 7.916109339329173e-05,
-      "loss": 0.5214,
-      "step": 420
-    },
-    {
-      "epoch": 0.034424785845808985,
-      "grad_norm": 8.412607192993164,
-      "learning_rate": 7.910804125407007e-05,
-      "loss": 0.5388,
-      "step": 430
-    },
-    {
-      "epoch": 0.03522536226082779,
-      "grad_norm": 6.477634429931641,
-      "learning_rate": 7.905338153644818e-05,
-      "loss": 0.4985,
-      "step": 440
-    },
-    {
-      "epoch": 0.03602593867584661,
-      "grad_norm": 7.517724990844727,
-      "learning_rate": 7.899711648727294e-05,
-      "loss": 0.5023,
-      "step": 450
-    },
-    {
-      "epoch": 0.03602593867584661,
-      "eval_loss": 0.5241909027099609,
-      "eval_runtime": 48.1214,
-      "eval_samples_per_second": 10.39,
-      "eval_steps_per_second": 10.39,
-      "step": 450
-    },
-    {
-      "epoch": 0.03682651509086542,
-      "grad_norm": 8.395613670349121,
-      "learning_rate": 7.89392484193802e-05,
-      "loss": 0.5248,
-      "step": 460
-    },
-    {
-      "epoch": 0.03762709150588424,
-      "grad_norm": 7.838438510894775,
-      "learning_rate": 7.887977971149952e-05,
-      "loss": 0.5335,
-      "step": 470
-    },
-    {
-      "epoch": 0.03842766792090305,
-      "grad_norm": 9.784844398498535,
-      "learning_rate": 7.881871280815659e-05,
-      "loss": 0.5283,
-      "step": 480
-    },
-    {
-      "epoch": 0.03922824433592186,
-      "grad_norm": 6.710247039794922,
-      "learning_rate": 7.875605021957262e-05,
-      "loss": 0.5024,
-      "step": 490
-    },
-    {
-      "epoch": 0.04002882075094068,
-      "grad_norm": 6.177467346191406,
-      "learning_rate": 7.869179452156118e-05,
-      "loss": 0.5169,
-      "step": 500
-    },
-    {
-      "epoch": 0.04082939716595949,
-      "grad_norm": 9.919758796691895,
-      "learning_rate": 7.862594835542236e-05,
-      "loss": 0.4898,
-      "step": 510
-    },
-    {
-      "epoch": 0.04162997358097831,
-      "grad_norm": 6.045280456542969,
-      "learning_rate": 7.855851442783414e-05,
-      "loss": 0.5014,
-      "step": 520
-    },
-    {
-      "epoch": 0.042430549995997116,
-      "grad_norm": 10.190174102783203,
-      "learning_rate": 7.848949551074116e-05,
-      "loss": 0.5353,
-      "step": 530
-    },
-    {
-      "epoch": 0.04323112641101593,
-      "grad_norm": 7.281028747558594,
-      "learning_rate": 7.841889444124078e-05,
-      "loss": 0.5321,
-      "step": 540
-    },
-    {
-      "epoch": 0.044031702826034747,
-      "grad_norm": 9.220998764038086,
-      "learning_rate": 7.834671412146643e-05,
-      "loss": 0.5427,
-      "step": 550
-    },
-    {
-      "epoch": 0.04483227924105356,
-      "grad_norm": 9.340047836303711,
-      "learning_rate": 7.827295751846836e-05,
-      "loss": 0.5152,
-      "step": 560
-    },
-    {
-      "epoch": 0.04563285565607237,
-      "grad_norm": 9.684113502502441,
-      "learning_rate": 7.819762766409162e-05,
-      "loss": 0.5007,
-      "step": 570
-    },
-    {
-      "epoch": 0.046433432071091185,
-      "grad_norm": 8.104248046875,
-      "learning_rate": 7.81207276548515e-05,
-      "loss": 0.5161,
-      "step": 580
-    },
-    {
-      "epoch": 0.04723400848611,
-      "grad_norm": 8.265843391418457,
-      "learning_rate": 7.804226065180615e-05,
-      "loss": 0.4761,
-      "step": 590
-    },
-    {
-      "epoch": 0.048034584901128816,
-      "grad_norm": 7.592025279998779,
-      "learning_rate": 7.796222988042676e-05,
-      "loss": 0.5054,
-      "step": 600
-    },
-    {
-      "epoch": 0.048034584901128816,
-      "eval_loss": 0.5160062909126282,
-      "eval_runtime": 53.8725,
-      "eval_samples_per_second": 9.281,
-      "eval_steps_per_second": 9.281,
-      "step": 600
-    },
-    {
-      "epoch": 0.048835161316147624,
-      "grad_norm": 5.990994453430176,
-      "learning_rate": 7.788063863046486e-05,
-      "loss": 0.5454,
-      "step": 610
-    },
-    {
-      "epoch": 0.04963573773116644,
-      "grad_norm": 10.633271217346191,
-      "learning_rate": 7.779749025581717e-05,
-      "loss": 0.5441,
-      "step": 620
-    },
-    {
-      "epoch": 0.050436314146185254,
-      "grad_norm": 7.758908271789551,
-      "learning_rate": 7.771278817438773e-05,
-      "loss": 0.4952,
-      "step": 630
-    },
-    {
-      "epoch": 0.05123689056120407,
-      "grad_norm": 6.998986721038818,
-      "learning_rate": 7.762653586794731e-05,
-      "loss": 0.5136,
-      "step": 640
-    },
-    {
-      "epoch": 0.05203746697622288,
-      "grad_norm": 8.175464630126953,
-      "learning_rate": 7.753873688199042e-05,
-      "loss": 0.5448,
-      "step": 650
-    },
-    {
-      "epoch": 0.05283804339124169,
-      "grad_norm": 8.736907005310059,
-      "learning_rate": 7.74493948255895e-05,
-      "loss": 0.5084,
-      "step": 660
-    },
-    {
-      "epoch": 0.05363861980626051,
-      "grad_norm": 7.661477088928223,
-      "learning_rate": 7.735851337124654e-05,
-      "loss": 0.5042,
-      "step": 670
-    },
-    {
-      "epoch": 0.05443919622127932,
-      "grad_norm": 6.164790630340576,
-      "learning_rate": 7.726609625474218e-05,
-      "loss": 0.502,
-      "step": 680
-    },
-    {
-      "epoch": 0.05523977263629813,
-      "grad_norm": 8.20608901977539,
-      "learning_rate": 7.717214727498209e-05,
-      "loss": 0.5068,
-      "step": 690
-    },
-    {
-      "epoch": 0.05604034905131695,
-      "grad_norm": 7.037665843963623,
-      "learning_rate": 7.707667029384088e-05,
-      "loss": 0.5195,
-      "step": 700
-    },
-    {
-      "epoch": 0.05684092546633576,
-      "grad_norm": 6.408506393432617,
-      "learning_rate": 7.697966923600327e-05,
-      "loss": 0.58,
-      "step": 710
-    },
-    {
-      "epoch": 0.05764150188135458,
-      "grad_norm": 7.004055023193359,
-      "learning_rate": 7.688114808880283e-05,
-      "loss": 0.5103,
-      "step": 720
-    },
-    {
-      "epoch": 0.058442078296373386,
-      "grad_norm": 8.525979995727539,
-      "learning_rate": 7.678111090205804e-05,
-      "loss": 0.5181,
-      "step": 730
-    },
-    {
-      "epoch": 0.0592426547113922,
-      "grad_norm": 9.37247371673584,
-      "learning_rate": 7.667956178790582e-05,
-      "loss": 0.4865,
-      "step": 740
-    },
-    {
-      "epoch": 0.060043231126411016,
-      "grad_norm": 9.634757995605469,
-      "learning_rate": 7.65765049206325e-05,
-      "loss": 0.4985,
-      "step": 750
-    },
-    {
-      "epoch": 0.060043231126411016,
-      "eval_loss": 0.5135068893432617,
-      "eval_runtime": 48.1272,
-      "eval_samples_per_second": 10.389,
-      "eval_steps_per_second": 10.389,
-      "step": 750
-    },
-    {
-      "epoch": 0.06084380754142983,
-      "grad_norm": 8.048176765441895,
-      "learning_rate": 7.647194453650228e-05,
-      "loss": 0.5367,
-      "step": 760
-    },
-    {
-      "epoch": 0.061644383956448646,
-      "grad_norm": 8.86915111541748,
-      "learning_rate": 7.6365884933583e-05,
-      "loss": 0.5433,
-      "step": 770
-    },
-    {
-      "epoch": 0.062444960371467455,
-      "grad_norm": 8.023998260498047,
-      "learning_rate": 7.625833047156953e-05,
-      "loss": 0.5164,
-      "step": 780
-    },
-    {
-      "epoch": 0.06324553678648627,
-      "grad_norm": 7.905406475067139,
-      "learning_rate": 7.614928557160454e-05,
-      "loss": 0.5121,
-      "step": 790
-    },
-    {
-      "epoch": 0.06404611320150508,
-      "grad_norm": 11.182598114013672,
-      "learning_rate": 7.603875471609677e-05,
-      "loss": 0.5484,
-      "step": 800
-    },
-    {
-      "epoch": 0.0648466896165239,
-      "grad_norm": 9.298077583312988,
-      "learning_rate": 7.592674244853676e-05,
-      "loss": 0.5282,
-      "step": 810
-    },
-    {
-      "epoch": 0.06564726603154271,
-      "grad_norm": 6.5173773765563965,
-      "learning_rate": 7.581325337331013e-05,
-      "loss": 0.5331,
-      "step": 820
-    },
-    {
-      "epoch": 0.06644784244656153,
-      "grad_norm": 6.437970161437988,
-      "learning_rate": 7.569829215550825e-05,
-      "loss": 0.487,
-      "step": 830
-    },
-    {
-      "epoch": 0.06724841886158034,
-      "grad_norm": 7.935225486755371,
-      "learning_rate": 7.558186352073648e-05,
-      "loss": 0.5303,
-      "step": 840
-    },
-    {
-      "epoch": 0.06804899527659915,
-      "grad_norm": 7.8669633865356445,
-      "learning_rate": 7.546397225492001e-05,
-      "loss": 0.5428,
-      "step": 850
-    },
-    {
-      "epoch": 0.06884957169161797,
-      "grad_norm": 7.498812675476074,
-      "learning_rate": 7.534462320410702e-05,
-      "loss": 0.4996,
-      "step": 860
-    },
-    {
-      "epoch": 0.06965014810663678,
-      "grad_norm": 8.47286605834961,
-      "learning_rate": 7.522382127426952e-05,
-      "loss": 0.4899,
-      "step": 870
-    },
-    {
-      "epoch": 0.07045072452165559,
-      "grad_norm": 8.583860397338867,
-      "learning_rate": 7.510157143110172e-05,
-      "loss": 0.5122,
-      "step": 880
-    },
-    {
-      "epoch": 0.07125130093667441,
-      "grad_norm": 8.282376289367676,
-      "learning_rate": 7.497787869981583e-05,
-      "loss": 0.4667,
-      "step": 890
-    },
-    {
-      "epoch": 0.07205187735169322,
-      "grad_norm": 6.540297508239746,
-      "learning_rate": 7.485274816493558e-05,
-      "loss": 0.5061,
-      "step": 900
-    },
-    {
-      "epoch": 0.07205187735169322,
-      "eval_loss": 0.5061885118484497,
-      "eval_runtime": 52.6411,
-      "eval_samples_per_second": 9.498,
-      "eval_steps_per_second": 9.498,
-      "step": 900
     }
   ],
   "logging_steps": 10,
@@ -704,8 +139,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.25220521365504e+16,
-  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 0.5219287276268005,
+  "best_model_checkpoint": "./output/checkpoint-150",
+  "epoch": 0.006004563468235859,
   "eval_steps": 150,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.00040030423121572396,
+      "grad_norm": 19.109567642211914,
       "learning_rate": 8.000000000000001e-06,
+      "loss": 0.4779,
       "step": 10
     },
     {
+      "epoch": 0.0008006084624314479,
+      "grad_norm": 10.358642578125,
       "learning_rate": 1.6000000000000003e-05,
+      "loss": 0.5845,
       "step": 20
     },
     {
+      "epoch": 0.0012009126936471718,
+      "grad_norm": 11.2018404006958,
       "learning_rate": 2.4e-05,
+      "loss": 0.5179,
       "step": 30
     },
     {
+      "epoch": 0.0016012169248628958,
+      "grad_norm": 15.831893920898438,
       "learning_rate": 3.2000000000000005e-05,
+      "loss": 0.4929,
       "step": 40
     },
     {
+      "epoch": 0.0020015211560786197,
+      "grad_norm": 13.353575706481934,
       "learning_rate": 4e-05,
+      "loss": 0.44,
       "step": 50
     },
     {
+      "epoch": 0.0024018253872943435,
+      "grad_norm": 16.60424041748047,
       "learning_rate": 4.8e-05,
+      "loss": 0.4523,
       "step": 60
     },
     {
+      "epoch": 0.002802129618510068,
+      "grad_norm": 6.861016273498535,
       "learning_rate": 5.6e-05,
+      "loss": 0.5108,
       "step": 70
     },
     {
+      "epoch": 0.0032024338497257917,
+      "grad_norm": 7.793421268463135,
       "learning_rate": 6.400000000000001e-05,
+      "loss": 0.4571,
       "step": 80
     },
     {
+      "epoch": 0.0036027380809415155,
+      "grad_norm": 13.897448539733887,
       "learning_rate": 7.2e-05,
+      "loss": 0.5071,
       "step": 90
     },
     {
+      "epoch": 0.004003042312157239,
+      "grad_norm": 12.516758918762207,
       "learning_rate": 8e-05,
+      "loss": 0.5128,
       "step": 100
     },
     {
+      "epoch": 0.004403346543372964,
+      "grad_norm": 13.91842269897461,
       "learning_rate": 7.999917787833465e-05,
+      "loss": 0.4579,
       "step": 110
     },
     {
+      "epoch": 0.004803650774588687,
+      "grad_norm": 13.659226417541504,
       "learning_rate": 7.999671154713278e-05,
+      "loss": 0.5425,
       "step": 120
     },
     {
+      "epoch": 0.005203955005804411,
+      "grad_norm": 6.929769039154053,
       "learning_rate": 7.99926011077756e-05,
+      "loss": 0.5318,
       "step": 130
     },
     {
+      "epoch": 0.005604259237020136,
+      "grad_norm": 14.516855239868164,
       "learning_rate": 7.99868467292272e-05,
+      "loss": 0.6059,
       "step": 140
     },
     {
+      "epoch": 0.006004563468235859,
+      "grad_norm": 11.343602180480957,
       "learning_rate": 7.997944864802752e-05,
+      "loss": 0.5262,
       "step": 150
     },
     {
+      "epoch": 0.006004563468235859,
+      "eval_loss": 0.5219287276268005,
+      "eval_runtime": 52.4269,
+      "eval_samples_per_second": 9.537,
+      "eval_steps_per_second": 9.537,
       "step": 150
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6573404135940096.0,
+  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e774682193ad432810f3383530fec82e083d08f8f27130e1364d9e773b4cbea5
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:0220a6fb174e4d99167eee5832c42de5c71879ba1f6fffd1d08d2f2f173cd453
 size 5496