PIRSData / checkpoint-320 /trainer_state.json
andrk9's picture
Upload folder using huggingface_hub
190eb1c verified
{
"best_metric": 0.9927281737327576,
"best_model_checkpoint": "/scratch/kwamea/llama-output/checkpoint-290",
"epoch": 42.666666666666664,
"eval_steps": 5,
"global_step": 320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.67,
"grad_norm": 0.243179589509964,
"learning_rate": 9.857142857142858e-05,
"loss": 1.9956,
"step": 5
},
{
"epoch": 0.67,
"eval_loss": 1.9701930284500122,
"eval_runtime": 17.115,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 5
},
{
"epoch": 1.33,
"grad_norm": 0.34590908885002136,
"learning_rate": 9.714285714285715e-05,
"loss": 1.9758,
"step": 10
},
{
"epoch": 1.33,
"eval_loss": 1.8941271305084229,
"eval_runtime": 17.0912,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 10
},
{
"epoch": 2.0,
"grad_norm": 0.31595832109451294,
"learning_rate": 9.571428571428573e-05,
"loss": 1.849,
"step": 15
},
{
"epoch": 2.0,
"eval_loss": 1.8046789169311523,
"eval_runtime": 17.098,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 15
},
{
"epoch": 2.67,
"grad_norm": 0.3428090512752533,
"learning_rate": 9.428571428571429e-05,
"loss": 1.789,
"step": 20
},
{
"epoch": 2.67,
"eval_loss": 1.7658358812332153,
"eval_runtime": 17.0734,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 20
},
{
"epoch": 3.33,
"grad_norm": 0.3102028965950012,
"learning_rate": 9.285714285714286e-05,
"loss": 1.7789,
"step": 25
},
{
"epoch": 3.33,
"eval_loss": 1.7225048542022705,
"eval_runtime": 17.0972,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 25
},
{
"epoch": 4.0,
"grad_norm": 0.38602885603904724,
"learning_rate": 9.142857142857143e-05,
"loss": 1.7003,
"step": 30
},
{
"epoch": 4.0,
"eval_loss": 1.6749440431594849,
"eval_runtime": 17.1034,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 30
},
{
"epoch": 4.67,
"grad_norm": 0.37120407819747925,
"learning_rate": 9e-05,
"loss": 1.6424,
"step": 35
},
{
"epoch": 4.67,
"eval_loss": 1.6231099367141724,
"eval_runtime": 17.1067,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 35
},
{
"epoch": 5.33,
"grad_norm": 0.4633428454399109,
"learning_rate": 8.857142857142857e-05,
"loss": 1.6023,
"step": 40
},
{
"epoch": 5.33,
"eval_loss": 1.5727053880691528,
"eval_runtime": 17.1002,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 40
},
{
"epoch": 6.0,
"grad_norm": 0.5034663081169128,
"learning_rate": 8.714285714285715e-05,
"loss": 1.5322,
"step": 45
},
{
"epoch": 6.0,
"eval_loss": 1.5312587022781372,
"eval_runtime": 17.1159,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 45
},
{
"epoch": 6.67,
"grad_norm": 0.5549929141998291,
"learning_rate": 8.571428571428571e-05,
"loss": 1.4788,
"step": 50
},
{
"epoch": 6.67,
"eval_loss": 1.492464303970337,
"eval_runtime": 17.0823,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 50
},
{
"epoch": 7.33,
"grad_norm": 0.49194690585136414,
"learning_rate": 8.428571428571429e-05,
"loss": 1.4632,
"step": 55
},
{
"epoch": 7.33,
"eval_loss": 1.4622489213943481,
"eval_runtime": 17.1022,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 55
},
{
"epoch": 8.0,
"grad_norm": 0.5866131782531738,
"learning_rate": 8.285714285714287e-05,
"loss": 1.3951,
"step": 60
},
{
"epoch": 8.0,
"eval_loss": 1.435951828956604,
"eval_runtime": 17.1087,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 60
},
{
"epoch": 8.67,
"grad_norm": 0.6252542734146118,
"learning_rate": 8.142857142857143e-05,
"loss": 1.3796,
"step": 65
},
{
"epoch": 8.67,
"eval_loss": 1.413227915763855,
"eval_runtime": 17.0914,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 65
},
{
"epoch": 9.33,
"grad_norm": 0.6751863360404968,
"learning_rate": 8e-05,
"loss": 1.3257,
"step": 70
},
{
"epoch": 9.33,
"eval_loss": 1.395649790763855,
"eval_runtime": 17.0885,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 70
},
{
"epoch": 10.0,
"grad_norm": 0.8878222703933716,
"learning_rate": 7.857142857142858e-05,
"loss": 1.2795,
"step": 75
},
{
"epoch": 10.0,
"eval_loss": 1.3699487447738647,
"eval_runtime": 17.1031,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 75
},
{
"epoch": 10.67,
"grad_norm": 0.8470121026039124,
"learning_rate": 7.714285714285715e-05,
"loss": 1.2449,
"step": 80
},
{
"epoch": 10.67,
"eval_loss": 1.347831130027771,
"eval_runtime": 17.0985,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 80
},
{
"epoch": 11.33,
"grad_norm": 1.0655425786972046,
"learning_rate": 7.571428571428571e-05,
"loss": 1.1983,
"step": 85
},
{
"epoch": 11.33,
"eval_loss": 1.3311971426010132,
"eval_runtime": 17.0784,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 85
},
{
"epoch": 12.0,
"grad_norm": 1.2651888132095337,
"learning_rate": 7.428571428571429e-05,
"loss": 1.1467,
"step": 90
},
{
"epoch": 12.0,
"eval_loss": 1.3095277547836304,
"eval_runtime": 17.0903,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 90
},
{
"epoch": 12.67,
"grad_norm": 1.248926043510437,
"learning_rate": 7.285714285714286e-05,
"loss": 1.0922,
"step": 95
},
{
"epoch": 12.67,
"eval_loss": 1.2942878007888794,
"eval_runtime": 17.0947,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 95
},
{
"epoch": 13.33,
"grad_norm": 1.896952509880066,
"learning_rate": 7.142857142857143e-05,
"loss": 1.0403,
"step": 100
},
{
"epoch": 13.33,
"eval_loss": 1.2803159952163696,
"eval_runtime": 17.0819,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 100
},
{
"epoch": 14.0,
"grad_norm": 1.862244725227356,
"learning_rate": 7e-05,
"loss": 1.0049,
"step": 105
},
{
"epoch": 14.0,
"eval_loss": 1.2643567323684692,
"eval_runtime": 17.0849,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 105
},
{
"epoch": 14.67,
"grad_norm": 1.7487821578979492,
"learning_rate": 6.857142857142858e-05,
"loss": 0.9262,
"step": 110
},
{
"epoch": 14.67,
"eval_loss": 1.2471646070480347,
"eval_runtime": 17.1278,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 110
},
{
"epoch": 15.33,
"grad_norm": 1.838605284690857,
"learning_rate": 6.714285714285714e-05,
"loss": 0.8965,
"step": 115
},
{
"epoch": 15.33,
"eval_loss": 1.2377034425735474,
"eval_runtime": 17.0731,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 115
},
{
"epoch": 16.0,
"grad_norm": 3.117398977279663,
"learning_rate": 6.571428571428571e-05,
"loss": 0.8581,
"step": 120
},
{
"epoch": 16.0,
"eval_loss": 1.2083133459091187,
"eval_runtime": 17.1304,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 120
},
{
"epoch": 16.67,
"grad_norm": 2.5655250549316406,
"learning_rate": 6.428571428571429e-05,
"loss": 0.7929,
"step": 125
},
{
"epoch": 16.67,
"eval_loss": 1.1945828199386597,
"eval_runtime": 17.104,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 125
},
{
"epoch": 17.33,
"grad_norm": 2.168546199798584,
"learning_rate": 6.285714285714286e-05,
"loss": 0.7543,
"step": 130
},
{
"epoch": 17.33,
"eval_loss": 1.1876276731491089,
"eval_runtime": 17.1046,
"eval_samples_per_second": 0.409,
"eval_steps_per_second": 0.058,
"step": 130
},
{
"epoch": 18.0,
"grad_norm": 2.5984208583831787,
"learning_rate": 6.142857142857143e-05,
"loss": 0.716,
"step": 135
},
{
"epoch": 18.0,
"eval_loss": 1.1714750528335571,
"eval_runtime": 17.0807,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 135
},
{
"epoch": 18.67,
"grad_norm": 3.479024887084961,
"learning_rate": 6e-05,
"loss": 0.6681,
"step": 140
},
{
"epoch": 18.67,
"eval_loss": 1.169895052909851,
"eval_runtime": 17.0681,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 140
},
{
"epoch": 19.33,
"grad_norm": 2.563386917114258,
"learning_rate": 5.8571428571428575e-05,
"loss": 0.6306,
"step": 145
},
{
"epoch": 19.33,
"eval_loss": 1.1741083860397339,
"eval_runtime": 17.0568,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 145
},
{
"epoch": 20.0,
"grad_norm": 2.96592116355896,
"learning_rate": 5.714285714285714e-05,
"loss": 0.6183,
"step": 150
},
{
"epoch": 20.0,
"eval_loss": 1.1455965042114258,
"eval_runtime": 17.073,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 150
},
{
"epoch": 20.67,
"grad_norm": 2.6751275062561035,
"learning_rate": 5.571428571428572e-05,
"loss": 0.5464,
"step": 155
},
{
"epoch": 20.67,
"eval_loss": 1.131102204322815,
"eval_runtime": 17.0578,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 155
},
{
"epoch": 21.33,
"grad_norm": 2.3700051307678223,
"learning_rate": 5.428571428571428e-05,
"loss": 0.551,
"step": 160
},
{
"epoch": 21.33,
"eval_loss": 1.127384066581726,
"eval_runtime": 17.0546,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 160
},
{
"epoch": 22.0,
"grad_norm": 3.3827567100524902,
"learning_rate": 5.285714285714286e-05,
"loss": 0.5179,
"step": 165
},
{
"epoch": 22.0,
"eval_loss": 1.111584186553955,
"eval_runtime": 17.0812,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 165
},
{
"epoch": 22.67,
"grad_norm": 3.55790114402771,
"learning_rate": 5.142857142857143e-05,
"loss": 0.4831,
"step": 170
},
{
"epoch": 22.67,
"eval_loss": 1.0948525667190552,
"eval_runtime": 17.0547,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 170
},
{
"epoch": 23.33,
"grad_norm": 3.0782699584960938,
"learning_rate": 5e-05,
"loss": 0.4587,
"step": 175
},
{
"epoch": 23.33,
"eval_loss": 1.0906586647033691,
"eval_runtime": 17.0666,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 175
},
{
"epoch": 24.0,
"grad_norm": 3.3993167877197266,
"learning_rate": 4.8571428571428576e-05,
"loss": 0.4203,
"step": 180
},
{
"epoch": 24.0,
"eval_loss": 1.0688152313232422,
"eval_runtime": 17.0721,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 180
},
{
"epoch": 24.67,
"grad_norm": 3.319303035736084,
"learning_rate": 4.714285714285714e-05,
"loss": 0.3975,
"step": 185
},
{
"epoch": 24.67,
"eval_loss": 1.0746583938598633,
"eval_runtime": 17.0709,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 185
},
{
"epoch": 25.33,
"grad_norm": 2.4532127380371094,
"learning_rate": 4.5714285714285716e-05,
"loss": 0.3832,
"step": 190
},
{
"epoch": 25.33,
"eval_loss": 1.0772522687911987,
"eval_runtime": 17.0619,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 190
},
{
"epoch": 26.0,
"grad_norm": 3.956822156906128,
"learning_rate": 4.428571428571428e-05,
"loss": 0.3725,
"step": 195
},
{
"epoch": 26.0,
"eval_loss": 1.0638784170150757,
"eval_runtime": 17.0807,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 195
},
{
"epoch": 26.67,
"grad_norm": 2.76033353805542,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.3473,
"step": 200
},
{
"epoch": 26.67,
"eval_loss": 1.04669988155365,
"eval_runtime": 17.0774,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 200
},
{
"epoch": 27.33,
"grad_norm": 3.8683507442474365,
"learning_rate": 4.1428571428571437e-05,
"loss": 0.3243,
"step": 205
},
{
"epoch": 27.33,
"eval_loss": 1.0470303297042847,
"eval_runtime": 17.0718,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 205
},
{
"epoch": 28.0,
"grad_norm": 4.535538196563721,
"learning_rate": 4e-05,
"loss": 0.3202,
"step": 210
},
{
"epoch": 28.0,
"eval_loss": 1.025539517402649,
"eval_runtime": 17.0604,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 210
},
{
"epoch": 28.67,
"grad_norm": 2.6224355697631836,
"learning_rate": 3.857142857142858e-05,
"loss": 0.2958,
"step": 215
},
{
"epoch": 28.67,
"eval_loss": 1.0192126035690308,
"eval_runtime": 17.0657,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 215
},
{
"epoch": 29.33,
"grad_norm": 2.5870041847229004,
"learning_rate": 3.7142857142857143e-05,
"loss": 0.2783,
"step": 220
},
{
"epoch": 29.33,
"eval_loss": 1.0211580991744995,
"eval_runtime": 17.0857,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 220
},
{
"epoch": 30.0,
"grad_norm": 3.4565751552581787,
"learning_rate": 3.571428571428572e-05,
"loss": 0.2773,
"step": 225
},
{
"epoch": 30.0,
"eval_loss": 1.006419062614441,
"eval_runtime": 17.0807,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 225
},
{
"epoch": 30.67,
"grad_norm": 2.4756500720977783,
"learning_rate": 3.428571428571429e-05,
"loss": 0.2482,
"step": 230
},
{
"epoch": 30.67,
"eval_loss": 1.0081219673156738,
"eval_runtime": 17.0576,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 230
},
{
"epoch": 31.33,
"grad_norm": 2.38002610206604,
"learning_rate": 3.285714285714286e-05,
"loss": 0.2464,
"step": 235
},
{
"epoch": 31.33,
"eval_loss": 1.0151804685592651,
"eval_runtime": 17.0587,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 235
},
{
"epoch": 32.0,
"grad_norm": 3.7081105709075928,
"learning_rate": 3.142857142857143e-05,
"loss": 0.2442,
"step": 240
},
{
"epoch": 32.0,
"eval_loss": 1.0032445192337036,
"eval_runtime": 17.1613,
"eval_samples_per_second": 0.408,
"eval_steps_per_second": 0.058,
"step": 240
},
{
"epoch": 32.67,
"grad_norm": 2.55924391746521,
"learning_rate": 3e-05,
"loss": 0.2193,
"step": 245
},
{
"epoch": 32.67,
"eval_loss": 0.9989615082740784,
"eval_runtime": 17.0447,
"eval_samples_per_second": 0.411,
"eval_steps_per_second": 0.059,
"step": 245
},
{
"epoch": 33.33,
"grad_norm": 1.9451407194137573,
"learning_rate": 2.857142857142857e-05,
"loss": 0.2101,
"step": 250
},
{
"epoch": 33.33,
"eval_loss": 1.0029457807540894,
"eval_runtime": 17.0816,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 250
},
{
"epoch": 34.0,
"grad_norm": 2.713731527328491,
"learning_rate": 2.714285714285714e-05,
"loss": 0.2194,
"step": 255
},
{
"epoch": 34.0,
"eval_loss": 0.9959421753883362,
"eval_runtime": 17.0747,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 255
},
{
"epoch": 34.67,
"grad_norm": 2.1633846759796143,
"learning_rate": 2.5714285714285714e-05,
"loss": 0.1958,
"step": 260
},
{
"epoch": 34.67,
"eval_loss": 0.9989770650863647,
"eval_runtime": 17.0821,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 260
},
{
"epoch": 35.33,
"grad_norm": 3.9233529567718506,
"learning_rate": 2.4285714285714288e-05,
"loss": 0.1831,
"step": 265
},
{
"epoch": 35.33,
"eval_loss": 1.0072578191757202,
"eval_runtime": 17.0564,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 265
},
{
"epoch": 36.0,
"grad_norm": 2.4143056869506836,
"learning_rate": 2.2857142857142858e-05,
"loss": 0.1753,
"step": 270
},
{
"epoch": 36.0,
"eval_loss": 0.9938892722129822,
"eval_runtime": 17.0668,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 270
},
{
"epoch": 36.67,
"grad_norm": 2.706679582595825,
"learning_rate": 2.1428571428571428e-05,
"loss": 0.1698,
"step": 275
},
{
"epoch": 36.67,
"eval_loss": 0.9969200491905212,
"eval_runtime": 17.0643,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 275
},
{
"epoch": 37.33,
"grad_norm": 1.872753620147705,
"learning_rate": 2e-05,
"loss": 0.16,
"step": 280
},
{
"epoch": 37.33,
"eval_loss": 0.9940390586853027,
"eval_runtime": 17.0728,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 280
},
{
"epoch": 38.0,
"grad_norm": 2.7510581016540527,
"learning_rate": 1.8571428571428572e-05,
"loss": 0.1614,
"step": 285
},
{
"epoch": 38.0,
"eval_loss": 1.0066231489181519,
"eval_runtime": 17.072,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 285
},
{
"epoch": 38.67,
"grad_norm": 1.8461092710494995,
"learning_rate": 1.7142857142857145e-05,
"loss": 0.1506,
"step": 290
},
{
"epoch": 38.67,
"eval_loss": 0.9927281737327576,
"eval_runtime": 17.0481,
"eval_samples_per_second": 0.411,
"eval_steps_per_second": 0.059,
"step": 290
},
{
"epoch": 39.33,
"grad_norm": 1.8425017595291138,
"learning_rate": 1.5714285714285715e-05,
"loss": 0.1419,
"step": 295
},
{
"epoch": 39.33,
"eval_loss": 1.0133570432662964,
"eval_runtime": 17.0642,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 295
},
{
"epoch": 40.0,
"grad_norm": 2.0457987785339355,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.1459,
"step": 300
},
{
"epoch": 40.0,
"eval_loss": 1.0127934217453003,
"eval_runtime": 17.0581,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 300
},
{
"epoch": 40.67,
"grad_norm": 1.5630775690078735,
"learning_rate": 1.2857142857142857e-05,
"loss": 0.1225,
"step": 305
},
{
"epoch": 40.67,
"eval_loss": 1.0092624425888062,
"eval_runtime": 17.0483,
"eval_samples_per_second": 0.411,
"eval_steps_per_second": 0.059,
"step": 305
},
{
"epoch": 41.33,
"grad_norm": 1.37598717212677,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.146,
"step": 310
},
{
"epoch": 41.33,
"eval_loss": 1.0083317756652832,
"eval_runtime": 17.0804,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 310
},
{
"epoch": 42.0,
"grad_norm": 1.8867217302322388,
"learning_rate": 1e-05,
"loss": 0.13,
"step": 315
},
{
"epoch": 42.0,
"eval_loss": 1.0165104866027832,
"eval_runtime": 17.0621,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 315
},
{
"epoch": 42.67,
"grad_norm": 1.4643555879592896,
"learning_rate": 8.571428571428573e-06,
"loss": 0.131,
"step": 320
},
{
"epoch": 42.67,
"eval_loss": 1.0264887809753418,
"eval_runtime": 17.0554,
"eval_samples_per_second": 0.41,
"eval_steps_per_second": 0.059,
"step": 320
}
],
"logging_steps": 5,
"max_steps": 350,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 10,
"total_flos": 2.1266150580320993e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}