Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +712 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:881444cf70f70d0ccdcde7f927db7dc1702a38e39028bae1149df9a6322310ea
 size 859942080

 version https://git-lfs.github.com/spec/v1
+oid sha256:70b40b4ea852faa0dabd88f7ddbc8095331dcaf982cf3b34cc7272211b022508
 size 859942080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37ce568517e845e12b882a12daa1dcaa6c37ead61f87765fbfff89ea1cf27426
 size 90187222

 version https://git-lfs.github.com/spec/v1
+oid sha256:6973271476cd76cc32089427e31b042ab3cf370ff220d960bc069ac0e8b7e1d7
 size 90187222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0249daad6bba3532cdfe955baeb2365ed9c0ac740b6b1be283905494df7b79fa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:092d05c850cba14a4e1067d9540d36872c05fc71b3eeadb4562ba802384222c9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fec1f8f2c1e492d7ced7a566946ce0a12ce91b87a9087e2da8c9d69f72bf0622
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:e75c704c71c99deef040fe407c6f53cc8d33f4439273d19c1681b1ebdfb69672
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.802311658859253,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.0007281677115873328,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,714 @@
       "eval_samples_per_second": 5.695,
       "eval_steps_per_second": 1.898,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -737,7 +1445,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -751,7 +1459,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6630505080422400.0,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.802311658859253,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0014563354231746657,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.695,
       "eval_steps_per_second": 1.898,
       "step": 100
+    },
+    {
+      "epoch": 0.0007354493887032061,
+      "grad_norm": 30.542020797729492,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 2.7235,
+      "step": 101
+    },
+    {
+      "epoch": 0.0007427310658190794,
+      "grad_norm": 12.602702140808105,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 1.4984,
+      "step": 102
+    },
+    {
+      "epoch": 0.0007500127429349528,
+      "grad_norm": 13.053272247314453,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 2.4308,
+      "step": 103
+    },
+    {
+      "epoch": 0.0007572944200508261,
+      "grad_norm": 11.506697654724121,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 2.5812,
+      "step": 104
+    },
+    {
+      "epoch": 0.0007645760971666995,
+      "grad_norm": 10.1392822265625,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 1.6152,
+      "step": 105
+    },
+    {
+      "epoch": 0.0007718577742825728,
+      "grad_norm": 14.92361068725586,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 2.2026,
+      "step": 106
+    },
+    {
+      "epoch": 0.0007791394513984461,
+      "grad_norm": 8.74295425415039,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 1.7257,
+      "step": 107
+    },
+    {
+      "epoch": 0.0007864211285143195,
+      "grad_norm": 9.7953519821167,
+      "learning_rate": 0.0001999999803956598,
+      "loss": 1.9238,
+      "step": 108
+    },
+    {
+      "epoch": 0.0007937028056301927,
+      "grad_norm": 11.444181442260742,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.2776,
+      "step": 109
+    },
+    {
+      "epoch": 0.000800984482746066,
+      "grad_norm": 12.844846725463867,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.9025,
+      "step": 110
+    },
+    {
+      "epoch": 0.0008082661598619394,
+      "grad_norm": 7.37370491027832,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.0972,
+      "step": 111
+    },
+    {
+      "epoch": 0.0008155478369778127,
+      "grad_norm": 6.512350559234619,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.4349,
+      "step": 112
+    },
+    {
+      "epoch": 0.0008228295140936861,
+      "grad_norm": 14.362659454345703,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.0781,
+      "step": 113
+    },
+    {
+      "epoch": 0.0008301111912095594,
+      "grad_norm": 10.504934310913086,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.8158,
+      "step": 114
+    },
+    {
+      "epoch": 0.0008373928683254327,
+      "grad_norm": 9.051297187805176,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.393,
+      "step": 115
+    },
+    {
+      "epoch": 0.0008446745454413061,
+      "grad_norm": 8.922006607055664,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.5453,
+      "step": 116
+    },
+    {
+      "epoch": 0.0008519562225571794,
+      "grad_norm": 10.427721977233887,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.8282,
+      "step": 117
+    },
+    {
+      "epoch": 0.0008592378996730527,
+      "grad_norm": 7.2870917320251465,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.0126,
+      "step": 118
+    },
+    {
+      "epoch": 0.000866519576788926,
+      "grad_norm": 11.430087089538574,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.6173,
+      "step": 119
+    },
+    {
+      "epoch": 0.0008738012539047993,
+      "grad_norm": 8.79759407043457,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.2973,
+      "step": 120
+    },
+    {
+      "epoch": 0.0008810829310206727,
+      "grad_norm": 27.648391723632812,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.5452,
+      "step": 121
+    },
+    {
+      "epoch": 0.000888364608136546,
+      "grad_norm": 6.860217094421387,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.1589,
+      "step": 122
+    },
+    {
+      "epoch": 0.0008956462852524193,
+      "grad_norm": 9.904220581054688,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.7574,
+      "step": 123
+    },
+    {
+      "epoch": 0.0009029279623682927,
+      "grad_norm": 14.817357063293457,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.2985,
+      "step": 124
+    },
+    {
+      "epoch": 0.000910209639484166,
+      "grad_norm": 9.02469253540039,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.0967,
+      "step": 125
+    },
+    {
+      "epoch": 0.0009174913166000394,
+      "grad_norm": 9.622922897338867,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.4315,
+      "step": 126
+    },
+    {
+      "epoch": 0.0009247729937159126,
+      "grad_norm": 7.905317306518555,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.3262,
+      "step": 127
+    },
+    {
+      "epoch": 0.0009320546708317859,
+      "grad_norm": 8.914352416992188,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.7299,
+      "step": 128
+    },
+    {
+      "epoch": 0.0009393363479476593,
+      "grad_norm": 9.581490516662598,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.8589,
+      "step": 129
+    },
+    {
+      "epoch": 0.0009466180250635326,
+      "grad_norm": 6.935079097747803,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.1753,
+      "step": 130
+    },
+    {
+      "epoch": 0.000953899702179406,
+      "grad_norm": 6.804558753967285,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.62,
+      "step": 131
+    },
+    {
+      "epoch": 0.0009611813792952793,
+      "grad_norm": 11.983720779418945,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.5915,
+      "step": 132
+    },
+    {
+      "epoch": 0.0009684630564111526,
+      "grad_norm": 25.60173988342285,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.0053,
+      "step": 133
+    },
+    {
+      "epoch": 0.000975744733527026,
+      "grad_norm": 8.45429515838623,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.0073,
+      "step": 134
+    },
+    {
+      "epoch": 0.0009830264106428994,
+      "grad_norm": 6.321603775024414,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 0.9744,
+      "step": 135
+    },
+    {
+      "epoch": 0.0009903080877587725,
+      "grad_norm": 9.055106163024902,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.6416,
+      "step": 136
+    },
+    {
+      "epoch": 0.000997589764874646,
+      "grad_norm": 8.92955207824707,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.0005,
+      "step": 137
+    },
+    {
+      "epoch": 0.0010048714419905193,
+      "grad_norm": 8.891891479492188,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.6211,
+      "step": 138
+    },
+    {
+      "epoch": 0.0010121531191063925,
+      "grad_norm": 13.83609676361084,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.6726,
+      "step": 139
+    },
+    {
+      "epoch": 0.001019434796222266,
+      "grad_norm": 16.78993034362793,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.86,
+      "step": 140
+    },
+    {
+      "epoch": 0.0010267164733381393,
+      "grad_norm": 10.312461853027344,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.4386,
+      "step": 141
+    },
+    {
+      "epoch": 0.0010339981504540125,
+      "grad_norm": 8.104599952697754,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.0064,
+      "step": 142
+    },
+    {
+      "epoch": 0.0010412798275698859,
+      "grad_norm": 9.345172882080078,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.9574,
+      "step": 143
+    },
+    {
+      "epoch": 0.0010485615046857593,
+      "grad_norm": 8.887224197387695,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.4813,
+      "step": 144
+    },
+    {
+      "epoch": 0.0010558431818016324,
+      "grad_norm": 10.085160255432129,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.4148,
+      "step": 145
+    },
+    {
+      "epoch": 0.0010631248589175058,
+      "grad_norm": 15.231815338134766,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.1894,
+      "step": 146
+    },
+    {
+      "epoch": 0.0010704065360333792,
+      "grad_norm": 9.214299201965332,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 1.2747,
+      "step": 147
+    },
+    {
+      "epoch": 0.0010776882131492526,
+      "grad_norm": 9.717480659484863,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.3933,
+      "step": 148
+    },
+    {
+      "epoch": 0.0010849698902651258,
+      "grad_norm": 12.608345031738281,
+      "learning_rate": 0.00019999996584374458,
+      "loss": 2.2024,
+      "step": 149
+    },
+    {
+      "epoch": 0.0010922515673809992,
+      "grad_norm": 11.714061737060547,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 2.1489,
+      "step": 150
+    },
+    {
+      "epoch": 0.0010995332444968726,
+      "grad_norm": 6.733028411865234,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 0.9061,
+      "step": 151
+    },
+    {
+      "epoch": 0.0011068149216127458,
+      "grad_norm": 9.173693656921387,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.2983,
+      "step": 152
+    },
+    {
+      "epoch": 0.0011140965987286192,
+      "grad_norm": 10.551212310791016,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 2.1741,
+      "step": 153
+    },
+    {
+      "epoch": 0.0011213782758444926,
+      "grad_norm": 9.643767356872559,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.8785,
+      "step": 154
+    },
+    {
+      "epoch": 0.0011286599529603657,
+      "grad_norm": 8.361176490783691,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.7363,
+      "step": 155
+    },
+    {
+      "epoch": 0.0011359416300762391,
+      "grad_norm": 7.118173122406006,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.9851,
+      "step": 156
+    },
+    {
+      "epoch": 0.0011432233071921125,
+      "grad_norm": 11.809035301208496,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 2.0862,
+      "step": 157
+    },
+    {
+      "epoch": 0.001150504984307986,
+      "grad_norm": 8.845592498779297,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.2204,
+      "step": 158
+    },
+    {
+      "epoch": 0.001157786661423859,
+      "grad_norm": 8.715829849243164,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.9076,
+      "step": 159
+    },
+    {
+      "epoch": 0.0011650683385397325,
+      "grad_norm": 8.711019515991211,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.4475,
+      "step": 160
+    },
+    {
+      "epoch": 0.0011723500156556059,
+      "grad_norm": 9.015527725219727,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.9736,
+      "step": 161
+    },
+    {
+      "epoch": 0.001179631692771479,
+      "grad_norm": 13.795394897460938,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 2.1726,
+      "step": 162
+    },
+    {
+      "epoch": 0.0011869133698873525,
+      "grad_norm": 8.399027824401855,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.5726,
+      "step": 163
+    },
+    {
+      "epoch": 0.0011941950470032259,
+      "grad_norm": 9.823261260986328,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.7958,
+      "step": 164
+    },
+    {
+      "epoch": 0.001201476724119099,
+      "grad_norm": 7.4197516441345215,
+      "learning_rate": 0.00019999995129182935,
+      "loss": 1.1734,
+      "step": 165
+    },
+    {
+      "epoch": 0.0012087584012349724,
+      "grad_norm": 9.796939849853516,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 1.5177,
+      "step": 166
+    },
+    {
+      "epoch": 0.0012160400783508458,
+      "grad_norm": 10.06812858581543,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.1493,
+      "step": 167
+    },
+    {
+      "epoch": 0.001223321755466719,
+      "grad_norm": 12.96749210357666,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.4498,
+      "step": 168
+    },
+    {
+      "epoch": 0.0012306034325825924,
+      "grad_norm": 10.271540641784668,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.1217,
+      "step": 169
+    },
+    {
+      "epoch": 0.0012378851096984658,
+      "grad_norm": 10.417543411254883,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 1.5065,
+      "step": 170
+    },
+    {
+      "epoch": 0.0012451667868143392,
+      "grad_norm": 14.934460639953613,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 1.8384,
+      "step": 171
+    },
+    {
+      "epoch": 0.0012524484639302124,
+      "grad_norm": 29.06182098388672,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.2379,
+      "step": 172
+    },
+    {
+      "epoch": 0.0012597301410460858,
+      "grad_norm": 179.1177215576172,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.4171,
+      "step": 173
+    },
+    {
+      "epoch": 0.0012670118181619592,
+      "grad_norm": 17.668655395507812,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.3388,
+      "step": 174
+    },
+    {
+      "epoch": 0.0012742934952778323,
+      "grad_norm": 8.100573539733887,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 1.3806,
+      "step": 175
+    },
+    {
+      "epoch": 0.0012815751723937057,
+      "grad_norm": 9.43472671508789,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 1.2791,
+      "step": 176
+    },
+    {
+      "epoch": 0.0012888568495095791,
+      "grad_norm": 11.577341079711914,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 1.7206,
+      "step": 177
+    },
+    {
+      "epoch": 0.0012961385266254523,
+      "grad_norm": 12.485160827636719,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.4225,
+      "step": 178
+    },
+    {
+      "epoch": 0.0013034202037413257,
+      "grad_norm": 11.283897399902344,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 1.535,
+      "step": 179
+    },
+    {
+      "epoch": 0.001310701880857199,
+      "grad_norm": 9.801483154296875,
+      "learning_rate": 0.00019999993673991412,
+      "loss": 2.1996,
+      "step": 180
+    },
+    {
+      "epoch": 0.0013179835579730723,
+      "grad_norm": 11.930302619934082,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.4919,
+      "step": 181
+    },
+    {
+      "epoch": 0.0013252652350889457,
+      "grad_norm": 10.425878524780273,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.7397,
+      "step": 182
+    },
+    {
+      "epoch": 0.001332546912204819,
+      "grad_norm": 9.171979904174805,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.8727,
+      "step": 183
+    },
+    {
+      "epoch": 0.0013398285893206924,
+      "grad_norm": 10.64809799194336,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.742,
+      "step": 184
+    },
+    {
+      "epoch": 0.0013471102664365656,
+      "grad_norm": 13.533452033996582,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.7024,
+      "step": 185
+    },
+    {
+      "epoch": 0.001354391943552439,
+      "grad_norm": 9.46273422241211,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.8248,
+      "step": 186
+    },
+    {
+      "epoch": 0.0013616736206683124,
+      "grad_norm": 20.131072998046875,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.7554,
+      "step": 187
+    },
+    {
+      "epoch": 0.0013689552977841856,
+      "grad_norm": 6.414751052856445,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 0.8933,
+      "step": 188
+    },
+    {
+      "epoch": 0.001376236974900059,
+      "grad_norm": 7.662179946899414,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.4111,
+      "step": 189
+    },
+    {
+      "epoch": 0.0013835186520159324,
+      "grad_norm": 10.391175270080566,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 2.0454,
+      "step": 190
+    },
+    {
+      "epoch": 0.0013908003291318056,
+      "grad_norm": 9.538223266601562,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 2.2686,
+      "step": 191
+    },
+    {
+      "epoch": 0.001398082006247679,
+      "grad_norm": 13.454922676086426,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 2.0085,
+      "step": 192
+    },
+    {
+      "epoch": 0.0014053636833635523,
+      "grad_norm": 8.68910026550293,
+      "learning_rate": 0.0001999999221879989,
+      "loss": 1.2429,
+      "step": 193
+    },
+    {
+      "epoch": 0.0014126453604794255,
+      "grad_norm": 11.174205780029297,
+      "learning_rate": 0.00019999990763608366,
+      "loss": 1.5733,
+      "step": 194
+    },
+    {
+      "epoch": 0.001419927037595299,
+      "grad_norm": 6.9463791847229,
+      "learning_rate": 0.00019999990763608366,
+      "loss": 1.1096,
+      "step": 195
+    },
+    {
+      "epoch": 0.0014272087147111723,
+      "grad_norm": 10.863914489746094,
+      "learning_rate": 0.00019999990763608366,
+      "loss": 1.374,
+      "step": 196
+    },
+    {
+      "epoch": 0.0014344903918270457,
+      "grad_norm": 7.314742565155029,
+      "learning_rate": 0.00019999990763608366,
+      "loss": 1.8196,
+      "step": 197
+    },
+    {
+      "epoch": 0.0014417720689429189,
+      "grad_norm": 13.76647663116455,
+      "learning_rate": 0.00019999990763608366,
+      "loss": 1.8654,
+      "step": 198
+    },
+    {
+      "epoch": 0.0014490537460587923,
+      "grad_norm": 18.60045051574707,
+      "learning_rate": 0.00019999990763608366,
+      "loss": 1.8438,
+      "step": 199
+    },
+    {
+      "epoch": 0.0014563354231746657,
+      "grad_norm": 6.76107931137085,
+      "learning_rate": 0.00019999990763608366,
+      "loss": 1.1957,
+      "step": 200
+    },
+    {
+      "epoch": 0.0014563354231746657,
+      "eval_loss": 1.8587133884429932,
+      "eval_runtime": 36.3018,
+      "eval_samples_per_second": 5.702,
+      "eval_steps_per_second": 1.901,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.32610101608448e+16,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null