xlsr-wav2vec2-2e-4-augmented / trainer_state.json
soba1911's picture
Upload 8 files
b6b4340 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.19080659150043366,
"eval_steps": 10,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003469210754553339,
"grad_norm": 31.211658477783203,
"learning_rate": 0.00019636363636363636,
"loss": 0.6808,
"step": 10
},
{
"epoch": 0.003469210754553339,
"eval_accuracy": 0.5609713792800903,
"eval_loss": 0.6768143773078918,
"eval_runtime": 673.3248,
"eval_samples_per_second": 8.562,
"eval_steps_per_second": 2.142,
"step": 10
},
{
"epoch": 0.006938421509106678,
"grad_norm": 2.6544265747070312,
"learning_rate": 0.00019272727272727274,
"loss": 0.6238,
"step": 20
},
{
"epoch": 0.006938421509106678,
"eval_accuracy": 0.6926279067993164,
"eval_loss": 0.6432190537452698,
"eval_runtime": 674.7679,
"eval_samples_per_second": 8.544,
"eval_steps_per_second": 2.137,
"step": 20
},
{
"epoch": 0.010407632263660017,
"grad_norm": 28.516557693481445,
"learning_rate": 0.0001890909090909091,
"loss": 0.6604,
"step": 30
},
{
"epoch": 0.010407632263660017,
"eval_accuracy": 0.7094535827636719,
"eval_loss": 0.5888805985450745,
"eval_runtime": 675.7811,
"eval_samples_per_second": 8.531,
"eval_steps_per_second": 2.134,
"step": 30
},
{
"epoch": 0.013876843018213356,
"grad_norm": 21.07656478881836,
"learning_rate": 0.00018545454545454545,
"loss": 0.6838,
"step": 40
},
{
"epoch": 0.013876843018213356,
"eval_accuracy": 0.7339115142822266,
"eval_loss": 0.5620893239974976,
"eval_runtime": 673.4164,
"eval_samples_per_second": 8.561,
"eval_steps_per_second": 2.141,
"step": 40
},
{
"epoch": 0.017346053772766695,
"grad_norm": 5.250032901763916,
"learning_rate": 0.00018181818181818183,
"loss": 0.5994,
"step": 50
},
{
"epoch": 0.017346053772766695,
"eval_accuracy": 0.6922810077667236,
"eval_loss": 0.6344946026802063,
"eval_runtime": 680.1572,
"eval_samples_per_second": 8.476,
"eval_steps_per_second": 2.12,
"step": 50
},
{
"epoch": 0.020815264527320035,
"grad_norm": 45.266639709472656,
"learning_rate": 0.0001781818181818182,
"loss": 0.6421,
"step": 60
},
{
"epoch": 0.020815264527320035,
"eval_accuracy": 0.7212489247322083,
"eval_loss": 0.6085450649261475,
"eval_runtime": 679.9324,
"eval_samples_per_second": 8.479,
"eval_steps_per_second": 2.121,
"step": 60
},
{
"epoch": 0.024284475281873375,
"grad_norm": 18.615516662597656,
"learning_rate": 0.00017454545454545454,
"loss": 0.549,
"step": 70
},
{
"epoch": 0.024284475281873375,
"eval_accuracy": 0.7897658348083496,
"eval_loss": 0.4875045418739319,
"eval_runtime": 685.8032,
"eval_samples_per_second": 8.406,
"eval_steps_per_second": 2.103,
"step": 70
},
{
"epoch": 0.027753686036426712,
"grad_norm": 8.090106010437012,
"learning_rate": 0.0001709090909090909,
"loss": 0.5293,
"step": 80
},
{
"epoch": 0.027753686036426712,
"eval_accuracy": 0.8057242035865784,
"eval_loss": 0.4444285035133362,
"eval_runtime": 679.7237,
"eval_samples_per_second": 8.481,
"eval_steps_per_second": 2.121,
"step": 80
},
{
"epoch": 0.031222896790980052,
"grad_norm": 12.921670913696289,
"learning_rate": 0.00016727272727272728,
"loss": 0.6783,
"step": 90
},
{
"epoch": 0.031222896790980052,
"eval_accuracy": 0.49470946192741394,
"eval_loss": 0.8580413460731506,
"eval_runtime": 681.5405,
"eval_samples_per_second": 8.459,
"eval_steps_per_second": 2.116,
"step": 90
},
{
"epoch": 0.03469210754553339,
"grad_norm": 3.922827959060669,
"learning_rate": 0.00016363636363636366,
"loss": 0.7702,
"step": 100
},
{
"epoch": 0.03469210754553339,
"eval_accuracy": 0.5052905678749084,
"eval_loss": 0.6616266369819641,
"eval_runtime": 688.564,
"eval_samples_per_second": 8.372,
"eval_steps_per_second": 2.094,
"step": 100
},
{
"epoch": 0.03816131830008673,
"grad_norm": 2.154066801071167,
"learning_rate": 0.00016,
"loss": 0.7213,
"step": 110
},
{
"epoch": 0.03816131830008673,
"eval_accuracy": 0.6563746929168701,
"eval_loss": 0.6373852491378784,
"eval_runtime": 684.8723,
"eval_samples_per_second": 8.418,
"eval_steps_per_second": 2.106,
"step": 110
},
{
"epoch": 0.04163052905464007,
"grad_norm": 1.061826467514038,
"learning_rate": 0.00015636363636363637,
"loss": 0.6699,
"step": 120
},
{
"epoch": 0.04163052905464007,
"eval_accuracy": 0.6971378922462463,
"eval_loss": 0.6286903619766235,
"eval_runtime": 681.0538,
"eval_samples_per_second": 8.465,
"eval_steps_per_second": 2.117,
"step": 120
},
{
"epoch": 0.045099739809193407,
"grad_norm": 10.424234390258789,
"learning_rate": 0.00015272727272727275,
"loss": 0.5399,
"step": 130
},
{
"epoch": 0.045099739809193407,
"eval_accuracy": 0.8036426901817322,
"eval_loss": 0.4253140985965729,
"eval_runtime": 690.0787,
"eval_samples_per_second": 8.354,
"eval_steps_per_second": 2.09,
"step": 130
},
{
"epoch": 0.04856895056374675,
"grad_norm": 2.499521017074585,
"learning_rate": 0.0001490909090909091,
"loss": 0.5553,
"step": 140
},
{
"epoch": 0.04856895056374675,
"eval_accuracy": 0.7542064189910889,
"eval_loss": 0.44444340467453003,
"eval_runtime": 685.7103,
"eval_samples_per_second": 8.407,
"eval_steps_per_second": 2.103,
"step": 140
},
{
"epoch": 0.05203816131830009,
"grad_norm": 14.523048400878906,
"learning_rate": 0.00014545454545454546,
"loss": 0.6979,
"step": 150
},
{
"epoch": 0.05203816131830009,
"eval_accuracy": 0.7439722418785095,
"eval_loss": 0.5539606213569641,
"eval_runtime": 691.0552,
"eval_samples_per_second": 8.342,
"eval_steps_per_second": 2.087,
"step": 150
},
{
"epoch": 0.055507372072853424,
"grad_norm": 12.651830673217773,
"learning_rate": 0.00014181818181818184,
"loss": 0.4658,
"step": 160
},
{
"epoch": 0.055507372072853424,
"eval_accuracy": 0.8102341890335083,
"eval_loss": 0.41497698426246643,
"eval_runtime": 683.4853,
"eval_samples_per_second": 8.435,
"eval_steps_per_second": 2.11,
"step": 160
},
{
"epoch": 0.05897658282740677,
"grad_norm": 1.7211685180664062,
"learning_rate": 0.0001381818181818182,
"loss": 0.3741,
"step": 170
},
{
"epoch": 0.05897658282740677,
"eval_accuracy": 0.8556808233261108,
"eval_loss": 0.36343446373939514,
"eval_runtime": 684.3542,
"eval_samples_per_second": 8.424,
"eval_steps_per_second": 2.107,
"step": 170
},
{
"epoch": 0.062445793581960105,
"grad_norm": 0.5337616801261902,
"learning_rate": 0.00013454545454545455,
"loss": 0.2849,
"step": 180
},
{
"epoch": 0.062445793581960105,
"eval_accuracy": 0.877883791923523,
"eval_loss": 0.2661490738391876,
"eval_runtime": 687.6402,
"eval_samples_per_second": 8.384,
"eval_steps_per_second": 2.097,
"step": 180
},
{
"epoch": 0.06591500433651344,
"grad_norm": 240.42689514160156,
"learning_rate": 0.00013090909090909093,
"loss": 0.3898,
"step": 190
},
{
"epoch": 0.06591500433651344,
"eval_accuracy": 0.9306157827377319,
"eval_loss": 0.19569338858127594,
"eval_runtime": 686.2526,
"eval_samples_per_second": 8.401,
"eval_steps_per_second": 2.101,
"step": 190
},
{
"epoch": 0.06938421509106678,
"grad_norm": 7.064624786376953,
"learning_rate": 0.00012727272727272728,
"loss": 0.3303,
"step": 200
},
{
"epoch": 0.06938421509106678,
"eval_accuracy": 0.9614917635917664,
"eval_loss": 0.13166119158267975,
"eval_runtime": 691.1196,
"eval_samples_per_second": 8.342,
"eval_steps_per_second": 2.086,
"step": 200
},
{
"epoch": 0.07285342584562012,
"grad_norm": 9.704263687133789,
"learning_rate": 0.00012363636363636364,
"loss": 0.1237,
"step": 210
},
{
"epoch": 0.07285342584562012,
"eval_accuracy": 0.9715524911880493,
"eval_loss": 0.0882689580321312,
"eval_runtime": 689.479,
"eval_samples_per_second": 8.361,
"eval_steps_per_second": 2.091,
"step": 210
},
{
"epoch": 0.07632263660017347,
"grad_norm": 1.1022824048995972,
"learning_rate": 0.00012,
"loss": 0.4275,
"step": 220
},
{
"epoch": 0.07632263660017347,
"eval_accuracy": 0.7349522709846497,
"eval_loss": 0.7713552117347717,
"eval_runtime": 684.6118,
"eval_samples_per_second": 8.421,
"eval_steps_per_second": 2.106,
"step": 220
},
{
"epoch": 0.0797918473547268,
"grad_norm": 3.139575481414795,
"learning_rate": 0.00011636363636363636,
"loss": 0.3942,
"step": 230
},
{
"epoch": 0.0797918473547268,
"eval_accuracy": 0.9280138611793518,
"eval_loss": 0.19574595987796783,
"eval_runtime": 690.4355,
"eval_samples_per_second": 8.35,
"eval_steps_per_second": 2.089,
"step": 230
},
{
"epoch": 0.08326105810928014,
"grad_norm": 1.0898246765136719,
"learning_rate": 0.00011272727272727272,
"loss": 0.4053,
"step": 240
},
{
"epoch": 0.08326105810928014,
"eval_accuracy": 0.9436253309249878,
"eval_loss": 0.1560826450586319,
"eval_runtime": 683.6485,
"eval_samples_per_second": 8.433,
"eval_steps_per_second": 2.109,
"step": 240
},
{
"epoch": 0.08673026886383348,
"grad_norm": 6.503047943115234,
"learning_rate": 0.00010909090909090909,
"loss": 0.2822,
"step": 250
},
{
"epoch": 0.08673026886383348,
"eval_accuracy": 0.9448395371437073,
"eval_loss": 0.15691135823726654,
"eval_runtime": 686.5667,
"eval_samples_per_second": 8.397,
"eval_steps_per_second": 2.1,
"step": 250
},
{
"epoch": 0.09019947961838681,
"grad_norm": 0.2402292639017105,
"learning_rate": 0.00010545454545454545,
"loss": 0.112,
"step": 260
},
{
"epoch": 0.09019947961838681,
"eval_accuracy": 0.9762359261512756,
"eval_loss": 0.10326409339904785,
"eval_runtime": 691.0661,
"eval_samples_per_second": 8.342,
"eval_steps_per_second": 2.087,
"step": 260
},
{
"epoch": 0.09366869037294015,
"grad_norm": 72.20112609863281,
"learning_rate": 0.00010181818181818181,
"loss": 0.1256,
"step": 270
},
{
"epoch": 0.09366869037294015,
"eval_accuracy": 0.9831743240356445,
"eval_loss": 0.07919599115848541,
"eval_runtime": 689.3527,
"eval_samples_per_second": 8.363,
"eval_steps_per_second": 2.092,
"step": 270
},
{
"epoch": 0.0971379011274935,
"grad_norm": 4.924927711486816,
"learning_rate": 9.818181818181818e-05,
"loss": 0.2008,
"step": 280
},
{
"epoch": 0.0971379011274935,
"eval_accuracy": 0.9845620393753052,
"eval_loss": 0.05440772697329521,
"eval_runtime": 694.2453,
"eval_samples_per_second": 8.304,
"eval_steps_per_second": 2.077,
"step": 280
},
{
"epoch": 0.10060711188204684,
"grad_norm": 0.23780618607997894,
"learning_rate": 9.454545454545455e-05,
"loss": 0.096,
"step": 290
},
{
"epoch": 0.10060711188204684,
"eval_accuracy": 0.984908938407898,
"eval_loss": 0.05138614773750305,
"eval_runtime": 684.3622,
"eval_samples_per_second": 8.424,
"eval_steps_per_second": 2.107,
"step": 290
},
{
"epoch": 0.10407632263660017,
"grad_norm": 0.08319728076457977,
"learning_rate": 9.090909090909092e-05,
"loss": 0.0246,
"step": 300
},
{
"epoch": 0.10407632263660017,
"eval_accuracy": 0.9760624170303345,
"eval_loss": 0.1299581676721573,
"eval_runtime": 685.1071,
"eval_samples_per_second": 8.415,
"eval_steps_per_second": 2.105,
"step": 300
},
{
"epoch": 0.10754553339115351,
"grad_norm": 0.021385882049798965,
"learning_rate": 8.727272727272727e-05,
"loss": 0.121,
"step": 310
},
{
"epoch": 0.10754553339115351,
"eval_accuracy": 0.9847354888916016,
"eval_loss": 0.06069515272974968,
"eval_runtime": 691.1118,
"eval_samples_per_second": 8.342,
"eval_steps_per_second": 2.086,
"step": 310
},
{
"epoch": 0.11101474414570685,
"grad_norm": 0.015313231386244297,
"learning_rate": 8.363636363636364e-05,
"loss": 0.1667,
"step": 320
},
{
"epoch": 0.11101474414570685,
"eval_accuracy": 0.9810928106307983,
"eval_loss": 0.10495974123477936,
"eval_runtime": 687.1574,
"eval_samples_per_second": 8.39,
"eval_steps_per_second": 2.099,
"step": 320
},
{
"epoch": 0.11448395490026018,
"grad_norm": 0.036152616143226624,
"learning_rate": 8e-05,
"loss": 0.2541,
"step": 330
},
{
"epoch": 0.11448395490026018,
"eval_accuracy": 0.9830008745193481,
"eval_loss": 0.09200656414031982,
"eval_runtime": 684.8484,
"eval_samples_per_second": 8.418,
"eval_steps_per_second": 2.106,
"step": 330
},
{
"epoch": 0.11795316565481354,
"grad_norm": 0.0553424209356308,
"learning_rate": 7.636363636363637e-05,
"loss": 0.0719,
"step": 340
},
{
"epoch": 0.11795316565481354,
"eval_accuracy": 0.9921942949295044,
"eval_loss": 0.03378007933497429,
"eval_runtime": 689.3401,
"eval_samples_per_second": 8.363,
"eval_steps_per_second": 2.092,
"step": 340
},
{
"epoch": 0.12142237640936687,
"grad_norm": 0.08452742546796799,
"learning_rate": 7.272727272727273e-05,
"loss": 0.1828,
"step": 350
},
{
"epoch": 0.12142237640936687,
"eval_accuracy": 0.9882046580314636,
"eval_loss": 0.043387919664382935,
"eval_runtime": 683.8045,
"eval_samples_per_second": 8.431,
"eval_steps_per_second": 2.109,
"step": 350
},
{
"epoch": 0.12489158716392021,
"grad_norm": 0.02259010076522827,
"learning_rate": 6.90909090909091e-05,
"loss": 0.0743,
"step": 360
},
{
"epoch": 0.12489158716392021,
"eval_accuracy": 0.9939289093017578,
"eval_loss": 0.025819096714258194,
"eval_runtime": 686.7645,
"eval_samples_per_second": 8.394,
"eval_steps_per_second": 2.1,
"step": 360
},
{
"epoch": 0.12836079791847355,
"grad_norm": 0.03690154105424881,
"learning_rate": 6.545454545454546e-05,
"loss": 0.0055,
"step": 370
},
{
"epoch": 0.12836079791847355,
"eval_accuracy": 0.9946227073669434,
"eval_loss": 0.026782656088471413,
"eval_runtime": 685.8688,
"eval_samples_per_second": 8.405,
"eval_steps_per_second": 2.102,
"step": 370
},
{
"epoch": 0.13183000867302688,
"grad_norm": 0.4121657609939575,
"learning_rate": 6.181818181818182e-05,
"loss": 0.1603,
"step": 380
},
{
"epoch": 0.13183000867302688,
"eval_accuracy": 0.9916738867759705,
"eval_loss": 0.0345265194773674,
"eval_runtime": 683.3279,
"eval_samples_per_second": 8.437,
"eval_steps_per_second": 2.11,
"step": 380
},
{
"epoch": 0.13529921942758022,
"grad_norm": 0.11380640417337418,
"learning_rate": 5.818181818181818e-05,
"loss": 0.1733,
"step": 390
},
{
"epoch": 0.13529921942758022,
"eval_accuracy": 0.9868170022964478,
"eval_loss": 0.0511435903608799,
"eval_runtime": 689.7162,
"eval_samples_per_second": 8.359,
"eval_steps_per_second": 2.091,
"step": 390
},
{
"epoch": 0.13876843018213356,
"grad_norm": 0.6289365887641907,
"learning_rate": 5.4545454545454546e-05,
"loss": 0.2929,
"step": 400
},
{
"epoch": 0.13876843018213356,
"eval_accuracy": 0.9939289093017578,
"eval_loss": 0.033962786197662354,
"eval_runtime": 686.7558,
"eval_samples_per_second": 8.395,
"eval_steps_per_second": 2.1,
"step": 400
},
{
"epoch": 0.1422376409366869,
"grad_norm": 0.1024908572435379,
"learning_rate": 5.090909090909091e-05,
"loss": 0.0709,
"step": 410
},
{
"epoch": 0.1422376409366869,
"eval_accuracy": 0.9871639013290405,
"eval_loss": 0.08034045249223709,
"eval_runtime": 693.8335,
"eval_samples_per_second": 8.309,
"eval_steps_per_second": 2.078,
"step": 410
},
{
"epoch": 0.14570685169124023,
"grad_norm": 0.05307464674115181,
"learning_rate": 4.7272727272727275e-05,
"loss": 0.0871,
"step": 420
},
{
"epoch": 0.14570685169124023,
"eval_accuracy": 0.9918473362922668,
"eval_loss": 0.04723769426345825,
"eval_runtime": 695.8072,
"eval_samples_per_second": 8.285,
"eval_steps_per_second": 2.072,
"step": 420
},
{
"epoch": 0.1491760624457936,
"grad_norm": 0.051310401409864426,
"learning_rate": 4.3636363636363636e-05,
"loss": 0.0112,
"step": 430
},
{
"epoch": 0.1491760624457936,
"eval_accuracy": 0.9928880929946899,
"eval_loss": 0.02936358004808426,
"eval_runtime": 687.912,
"eval_samples_per_second": 8.38,
"eval_steps_per_second": 2.096,
"step": 430
},
{
"epoch": 0.15264527320034693,
"grad_norm": 0.13214071094989777,
"learning_rate": 4e-05,
"loss": 0.0048,
"step": 440
},
{
"epoch": 0.15264527320034693,
"eval_accuracy": 0.9921942949295044,
"eval_loss": 0.03213657811284065,
"eval_runtime": 687.4824,
"eval_samples_per_second": 8.386,
"eval_steps_per_second": 2.098,
"step": 440
},
{
"epoch": 0.15611448395490027,
"grad_norm": 0.04072779417037964,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.1144,
"step": 450
},
{
"epoch": 0.15611448395490027,
"eval_accuracy": 0.9935819506645203,
"eval_loss": 0.031503621488809586,
"eval_runtime": 687.6764,
"eval_samples_per_second": 8.383,
"eval_steps_per_second": 2.097,
"step": 450
},
{
"epoch": 0.1595836947094536,
"grad_norm": 0.043403998017311096,
"learning_rate": 3.272727272727273e-05,
"loss": 0.1474,
"step": 460
},
{
"epoch": 0.1595836947094536,
"eval_accuracy": 0.9935819506645203,
"eval_loss": 0.031036239117383957,
"eval_runtime": 686.2379,
"eval_samples_per_second": 8.401,
"eval_steps_per_second": 2.101,
"step": 460
},
{
"epoch": 0.16305290546400694,
"grad_norm": 1.3192791938781738,
"learning_rate": 2.909090909090909e-05,
"loss": 0.1232,
"step": 470
},
{
"epoch": 0.16305290546400694,
"eval_accuracy": 0.9939289093017578,
"eval_loss": 0.028218073770403862,
"eval_runtime": 688.1062,
"eval_samples_per_second": 8.378,
"eval_steps_per_second": 2.096,
"step": 470
},
{
"epoch": 0.16652211621856028,
"grad_norm": 2.083038330078125,
"learning_rate": 2.5454545454545454e-05,
"loss": 0.0174,
"step": 480
},
{
"epoch": 0.16652211621856028,
"eval_accuracy": 0.9937554001808167,
"eval_loss": 0.025646191090345383,
"eval_runtime": 683.4068,
"eval_samples_per_second": 8.436,
"eval_steps_per_second": 2.11,
"step": 480
},
{
"epoch": 0.16999132697311362,
"grad_norm": 0.05482853576540947,
"learning_rate": 2.1818181818181818e-05,
"loss": 0.038,
"step": 490
},
{
"epoch": 0.16999132697311362,
"eval_accuracy": 0.9939289093017578,
"eval_loss": 0.028373345732688904,
"eval_runtime": 691.4177,
"eval_samples_per_second": 8.338,
"eval_steps_per_second": 2.086,
"step": 490
},
{
"epoch": 0.17346053772766695,
"grad_norm": 0.018828364089131355,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.0172,
"step": 500
},
{
"epoch": 0.17346053772766695,
"eval_accuracy": 0.9939289093017578,
"eval_loss": 0.031070245429873466,
"eval_runtime": 690.1128,
"eval_samples_per_second": 8.354,
"eval_steps_per_second": 2.09,
"step": 500
},
{
"epoch": 0.1769297484822203,
"grad_norm": 0.012764820829033852,
"learning_rate": 1.4545454545454545e-05,
"loss": 0.0046,
"step": 510
},
{
"epoch": 0.1769297484822203,
"eval_accuracy": 0.9937554001808167,
"eval_loss": 0.03232884034514427,
"eval_runtime": 683.4107,
"eval_samples_per_second": 8.436,
"eval_steps_per_second": 2.11,
"step": 510
},
{
"epoch": 0.18039895923677363,
"grad_norm": 0.02437267266213894,
"learning_rate": 1.0909090909090909e-05,
"loss": 0.0023,
"step": 520
},
{
"epoch": 0.18039895923677363,
"eval_accuracy": 0.9930616021156311,
"eval_loss": 0.0331372506916523,
"eval_runtime": 687.1986,
"eval_samples_per_second": 8.389,
"eval_steps_per_second": 2.098,
"step": 520
},
{
"epoch": 0.18386816999132696,
"grad_norm": 0.023814663290977478,
"learning_rate": 7.272727272727272e-06,
"loss": 0.0024,
"step": 530
},
{
"epoch": 0.18386816999132696,
"eval_accuracy": 0.9928880929946899,
"eval_loss": 0.033249419182538986,
"eval_runtime": 687.1764,
"eval_samples_per_second": 8.389,
"eval_steps_per_second": 2.098,
"step": 530
},
{
"epoch": 0.1873373807458803,
"grad_norm": 0.037307947874069214,
"learning_rate": 3.636363636363636e-06,
"loss": 0.1262,
"step": 540
},
{
"epoch": 0.1873373807458803,
"eval_accuracy": 0.9939289093017578,
"eval_loss": 0.033335860818624496,
"eval_runtime": 685.229,
"eval_samples_per_second": 8.413,
"eval_steps_per_second": 2.104,
"step": 540
},
{
"epoch": 0.19080659150043366,
"grad_norm": 0.11771216243505478,
"learning_rate": 0.0,
"loss": 0.003,
"step": 550
},
{
"epoch": 0.19080659150043366,
"eval_accuracy": 0.9937554001808167,
"eval_loss": 0.03361057490110397,
"eval_runtime": 685.0012,
"eval_samples_per_second": 8.416,
"eval_steps_per_second": 2.105,
"step": 550
}
],
"logging_steps": 10,
"max_steps": 550,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.214558191809199e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}