xlsr-wav2vec2-4e-4-augmented / trainer_state.json
soba1911's picture
Upload 8 files
010296c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.19080659150043366,
"eval_steps": 10,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003469210754553339,
"grad_norm": 21.349376678466797,
"learning_rate": 0.00039272727272727273,
"loss": 0.7499,
"step": 10
},
{
"epoch": 0.003469210754553339,
"eval_accuracy": 0.5052905678749084,
"eval_loss": 0.8751901388168335,
"eval_runtime": 684.1934,
"eval_samples_per_second": 8.426,
"eval_steps_per_second": 2.108,
"step": 10
},
{
"epoch": 0.006938421509106678,
"grad_norm": 8.41540241241455,
"learning_rate": 0.0003854545454545455,
"loss": 0.8516,
"step": 20
},
{
"epoch": 0.006938421509106678,
"eval_accuracy": 0.49470946192741394,
"eval_loss": 1.0566141605377197,
"eval_runtime": 674.6878,
"eval_samples_per_second": 8.545,
"eval_steps_per_second": 2.137,
"step": 20
},
{
"epoch": 0.010407632263660017,
"grad_norm": 1.6247762441635132,
"learning_rate": 0.0003781818181818182,
"loss": 0.758,
"step": 30
},
{
"epoch": 0.010407632263660017,
"eval_accuracy": 0.5052905678749084,
"eval_loss": 0.6859118938446045,
"eval_runtime": 671.8473,
"eval_samples_per_second": 8.581,
"eval_steps_per_second": 2.146,
"step": 30
},
{
"epoch": 0.013876843018213356,
"grad_norm": 1.565369725227356,
"learning_rate": 0.0003709090909090909,
"loss": 0.7641,
"step": 40
},
{
"epoch": 0.013876843018213356,
"eval_accuracy": 0.49470946192741394,
"eval_loss": 0.7000069618225098,
"eval_runtime": 669.6953,
"eval_samples_per_second": 8.608,
"eval_steps_per_second": 2.153,
"step": 40
},
{
"epoch": 0.017346053772766695,
"grad_norm": 0.3613499104976654,
"learning_rate": 0.00036363636363636367,
"loss": 0.7147,
"step": 50
},
{
"epoch": 0.017346053772766695,
"eval_accuracy": 0.5052905678749084,
"eval_loss": 0.6953065991401672,
"eval_runtime": 671.8252,
"eval_samples_per_second": 8.581,
"eval_steps_per_second": 2.146,
"step": 50
},
{
"epoch": 0.020815264527320035,
"grad_norm": 0.978003203868866,
"learning_rate": 0.0003563636363636364,
"loss": 0.7172,
"step": 60
},
{
"epoch": 0.020815264527320035,
"eval_accuracy": 0.5052905678749084,
"eval_loss": 0.6954382658004761,
"eval_runtime": 667.8042,
"eval_samples_per_second": 8.633,
"eval_steps_per_second": 2.159,
"step": 60
},
{
"epoch": 0.024284475281873375,
"grad_norm": 1.575156807899475,
"learning_rate": 0.0003490909090909091,
"loss": 0.6646,
"step": 70
},
{
"epoch": 0.024284475281873375,
"eval_accuracy": 0.6794449090957642,
"eval_loss": 0.6547678112983704,
"eval_runtime": 671.8486,
"eval_samples_per_second": 8.581,
"eval_steps_per_second": 2.146,
"step": 70
},
{
"epoch": 0.027753686036426712,
"grad_norm": 1.7995883226394653,
"learning_rate": 0.0003418181818181818,
"loss": 0.6741,
"step": 80
},
{
"epoch": 0.027753686036426712,
"eval_accuracy": 0.49470946192741394,
"eval_loss": 0.7157939076423645,
"eval_runtime": 669.7623,
"eval_samples_per_second": 8.608,
"eval_steps_per_second": 2.153,
"step": 80
},
{
"epoch": 0.031222896790980052,
"grad_norm": 1.0423110723495483,
"learning_rate": 0.00033454545454545456,
"loss": 0.6865,
"step": 90
},
{
"epoch": 0.031222896790980052,
"eval_accuracy": 0.49470946192741394,
"eval_loss": 0.7069694399833679,
"eval_runtime": 671.5859,
"eval_samples_per_second": 8.584,
"eval_steps_per_second": 2.147,
"step": 90
},
{
"epoch": 0.03469210754553339,
"grad_norm": 0.3134685158729553,
"learning_rate": 0.0003272727272727273,
"loss": 0.6556,
"step": 100
},
{
"epoch": 0.03469210754553339,
"eval_accuracy": 0.5052905678749084,
"eval_loss": 0.663440465927124,
"eval_runtime": 674.0916,
"eval_samples_per_second": 8.552,
"eval_steps_per_second": 2.139,
"step": 100
},
{
"epoch": 0.03816131830008673,
"grad_norm": 0.2366938591003418,
"learning_rate": 0.00032,
"loss": 0.6998,
"step": 110
},
{
"epoch": 0.03816131830008673,
"eval_accuracy": 0.4983521103858948,
"eval_loss": 0.7156180739402771,
"eval_runtime": 669.1584,
"eval_samples_per_second": 8.615,
"eval_steps_per_second": 2.155,
"step": 110
},
{
"epoch": 0.04163052905464007,
"grad_norm": 0.8924188017845154,
"learning_rate": 0.00031272727272727273,
"loss": 0.673,
"step": 120
},
{
"epoch": 0.04163052905464007,
"eval_accuracy": 0.7351257801055908,
"eval_loss": 0.5488757491111755,
"eval_runtime": 674.8075,
"eval_samples_per_second": 8.543,
"eval_steps_per_second": 2.137,
"step": 120
},
{
"epoch": 0.045099739809193407,
"grad_norm": 1.685569405555725,
"learning_rate": 0.0003054545454545455,
"loss": 0.6757,
"step": 130
},
{
"epoch": 0.045099739809193407,
"eval_accuracy": 0.6183868050575256,
"eval_loss": 0.6621639132499695,
"eval_runtime": 676.2118,
"eval_samples_per_second": 8.525,
"eval_steps_per_second": 2.132,
"step": 130
},
{
"epoch": 0.04856895056374675,
"grad_norm": 0.9425109028816223,
"learning_rate": 0.0002981818181818182,
"loss": 0.7129,
"step": 140
},
{
"epoch": 0.04856895056374675,
"eval_accuracy": 0.7564613819122314,
"eval_loss": 0.5819193720817566,
"eval_runtime": 671.3349,
"eval_samples_per_second": 8.587,
"eval_steps_per_second": 2.148,
"step": 140
},
{
"epoch": 0.05203816131830009,
"grad_norm": 1.6216133832931519,
"learning_rate": 0.0002909090909090909,
"loss": 0.5978,
"step": 150
},
{
"epoch": 0.05203816131830009,
"eval_accuracy": 0.5904596447944641,
"eval_loss": 0.6498456597328186,
"eval_runtime": 674.7362,
"eval_samples_per_second": 8.544,
"eval_steps_per_second": 2.137,
"step": 150
},
{
"epoch": 0.055507372072853424,
"grad_norm": 0.8777738213539124,
"learning_rate": 0.0002836363636363637,
"loss": 0.6108,
"step": 160
},
{
"epoch": 0.055507372072853424,
"eval_accuracy": 0.6152645349502563,
"eval_loss": 0.6110721230506897,
"eval_runtime": 672.1883,
"eval_samples_per_second": 8.576,
"eval_steps_per_second": 2.145,
"step": 160
},
{
"epoch": 0.05897658282740677,
"grad_norm": 0.8297567367553711,
"learning_rate": 0.0002763636363636364,
"loss": 0.4967,
"step": 170
},
{
"epoch": 0.05897658282740677,
"eval_accuracy": 0.7484822273254395,
"eval_loss": 0.5142812728881836,
"eval_runtime": 669.9409,
"eval_samples_per_second": 8.605,
"eval_steps_per_second": 2.152,
"step": 170
},
{
"epoch": 0.062445793581960105,
"grad_norm": 0.6402560472488403,
"learning_rate": 0.0002690909090909091,
"loss": 0.3308,
"step": 180
},
{
"epoch": 0.062445793581960105,
"eval_accuracy": 0.7941023707389832,
"eval_loss": 0.5033007264137268,
"eval_runtime": 674.7774,
"eval_samples_per_second": 8.544,
"eval_steps_per_second": 2.137,
"step": 180
},
{
"epoch": 0.06591500433651344,
"grad_norm": 3.098665475845337,
"learning_rate": 0.00026181818181818185,
"loss": 0.5915,
"step": 190
},
{
"epoch": 0.06591500433651344,
"eval_accuracy": 0.7613183259963989,
"eval_loss": 0.5313282012939453,
"eval_runtime": 674.9752,
"eval_samples_per_second": 8.541,
"eval_steps_per_second": 2.136,
"step": 190
},
{
"epoch": 0.06938421509106678,
"grad_norm": 2.2409965991973877,
"learning_rate": 0.00025454545454545456,
"loss": 0.502,
"step": 200
},
{
"epoch": 0.06938421509106678,
"eval_accuracy": 0.8124891519546509,
"eval_loss": 0.42527222633361816,
"eval_runtime": 675.4293,
"eval_samples_per_second": 8.535,
"eval_steps_per_second": 2.135,
"step": 200
},
{
"epoch": 0.07285342584562012,
"grad_norm": 0.5789617896080017,
"learning_rate": 0.00024727272727272727,
"loss": 0.5021,
"step": 210
},
{
"epoch": 0.07285342584562012,
"eval_accuracy": 0.8084995746612549,
"eval_loss": 0.4414755403995514,
"eval_runtime": 675.3173,
"eval_samples_per_second": 8.537,
"eval_steps_per_second": 2.135,
"step": 210
},
{
"epoch": 0.07632263660017347,
"grad_norm": 0.26364243030548096,
"learning_rate": 0.00024,
"loss": 0.4129,
"step": 220
},
{
"epoch": 0.07632263660017347,
"eval_accuracy": 0.7996530532836914,
"eval_loss": 0.4404090344905853,
"eval_runtime": 669.8525,
"eval_samples_per_second": 8.606,
"eval_steps_per_second": 2.153,
"step": 220
},
{
"epoch": 0.0797918473547268,
"grad_norm": 1.726539134979248,
"learning_rate": 0.00023272727272727271,
"loss": 0.4352,
"step": 230
},
{
"epoch": 0.0797918473547268,
"eval_accuracy": 0.7849089503288269,
"eval_loss": 0.49116840958595276,
"eval_runtime": 669.7677,
"eval_samples_per_second": 8.607,
"eval_steps_per_second": 2.153,
"step": 230
},
{
"epoch": 0.08326105810928014,
"grad_norm": 0.7366547584533691,
"learning_rate": 0.00022545454545454545,
"loss": 0.6708,
"step": 240
},
{
"epoch": 0.08326105810928014,
"eval_accuracy": 0.8705984354019165,
"eval_loss": 0.5467382073402405,
"eval_runtime": 667.648,
"eval_samples_per_second": 8.635,
"eval_steps_per_second": 2.16,
"step": 240
},
{
"epoch": 0.08673026886383348,
"grad_norm": 4.697054862976074,
"learning_rate": 0.00021818181818181818,
"loss": 0.6336,
"step": 250
},
{
"epoch": 0.08673026886383348,
"eval_accuracy": 0.8832610845565796,
"eval_loss": 0.3458012044429779,
"eval_runtime": 666.8811,
"eval_samples_per_second": 8.645,
"eval_steps_per_second": 2.162,
"step": 250
},
{
"epoch": 0.09019947961838681,
"grad_norm": 0.3141838014125824,
"learning_rate": 0.0002109090909090909,
"loss": 0.4794,
"step": 260
},
{
"epoch": 0.09019947961838681,
"eval_accuracy": 0.8700780868530273,
"eval_loss": 0.327676922082901,
"eval_runtime": 671.2259,
"eval_samples_per_second": 8.589,
"eval_steps_per_second": 2.148,
"step": 260
},
{
"epoch": 0.09366869037294015,
"grad_norm": 4.525466442108154,
"learning_rate": 0.00020363636363636363,
"loss": 0.4008,
"step": 270
},
{
"epoch": 0.09366869037294015,
"eval_accuracy": 0.8185603022575378,
"eval_loss": 0.5368410348892212,
"eval_runtime": 668.9566,
"eval_samples_per_second": 8.618,
"eval_steps_per_second": 2.156,
"step": 270
},
{
"epoch": 0.0971379011274935,
"grad_norm": 3.764827251434326,
"learning_rate": 0.00019636363636363636,
"loss": 0.3972,
"step": 280
},
{
"epoch": 0.0971379011274935,
"eval_accuracy": 0.9283608198165894,
"eval_loss": 0.20311547815799713,
"eval_runtime": 673.3421,
"eval_samples_per_second": 8.562,
"eval_steps_per_second": 2.142,
"step": 280
},
{
"epoch": 0.10060711188204684,
"grad_norm": 0.8228343725204468,
"learning_rate": 0.0001890909090909091,
"loss": 0.2846,
"step": 290
},
{
"epoch": 0.10060711188204684,
"eval_accuracy": 0.960797905921936,
"eval_loss": 0.13612762093544006,
"eval_runtime": 673.4488,
"eval_samples_per_second": 8.56,
"eval_steps_per_second": 2.141,
"step": 290
},
{
"epoch": 0.10407632263660017,
"grad_norm": 2.2025303840637207,
"learning_rate": 0.00018181818181818183,
"loss": 0.2268,
"step": 300
},
{
"epoch": 0.10407632263660017,
"eval_accuracy": 0.957328736782074,
"eval_loss": 0.12012926489114761,
"eval_runtime": 673.2169,
"eval_samples_per_second": 8.563,
"eval_steps_per_second": 2.142,
"step": 300
},
{
"epoch": 0.10754553339115351,
"grad_norm": 6.165393829345703,
"learning_rate": 0.00017454545454545454,
"loss": 0.283,
"step": 310
},
{
"epoch": 0.10754553339115351,
"eval_accuracy": 0.8777103424072266,
"eval_loss": 0.3603801131248474,
"eval_runtime": 677.3459,
"eval_samples_per_second": 8.511,
"eval_steps_per_second": 2.129,
"step": 310
},
{
"epoch": 0.11101474414570685,
"grad_norm": 1.3905977010726929,
"learning_rate": 0.00016727272727272728,
"loss": 0.5713,
"step": 320
},
{
"epoch": 0.11101474414570685,
"eval_accuracy": 0.9710320830345154,
"eval_loss": 0.1216258704662323,
"eval_runtime": 674.6755,
"eval_samples_per_second": 8.545,
"eval_steps_per_second": 2.137,
"step": 320
},
{
"epoch": 0.11448395490026018,
"grad_norm": 0.8541626930236816,
"learning_rate": 0.00016,
"loss": 0.1798,
"step": 330
},
{
"epoch": 0.11448395490026018,
"eval_accuracy": 0.96131831407547,
"eval_loss": 0.179531529545784,
"eval_runtime": 675.7124,
"eval_samples_per_second": 8.532,
"eval_steps_per_second": 2.134,
"step": 330
},
{
"epoch": 0.11795316565481354,
"grad_norm": 0.44364920258522034,
"learning_rate": 0.00015272727272727275,
"loss": 0.1716,
"step": 340
},
{
"epoch": 0.11795316565481354,
"eval_accuracy": 0.9774501323699951,
"eval_loss": 0.10663458704948425,
"eval_runtime": 678.1501,
"eval_samples_per_second": 8.501,
"eval_steps_per_second": 2.126,
"step": 340
},
{
"epoch": 0.12142237640936687,
"grad_norm": 0.32369139790534973,
"learning_rate": 0.00014545454545454546,
"loss": 0.1507,
"step": 350
},
{
"epoch": 0.12142237640936687,
"eval_accuracy": 0.9649609923362732,
"eval_loss": 0.1186453253030777,
"eval_runtime": 679.7025,
"eval_samples_per_second": 8.482,
"eval_steps_per_second": 2.122,
"step": 350
},
{
"epoch": 0.12489158716392021,
"grad_norm": 0.11126814782619476,
"learning_rate": 0.0001381818181818182,
"loss": 0.1163,
"step": 360
},
{
"epoch": 0.12489158716392021,
"eval_accuracy": 0.9701647758483887,
"eval_loss": 0.0995645821094513,
"eval_runtime": 680.1145,
"eval_samples_per_second": 8.477,
"eval_steps_per_second": 2.12,
"step": 360
},
{
"epoch": 0.12836079791847355,
"grad_norm": 0.15057525038719177,
"learning_rate": 0.00013090909090909093,
"loss": 0.0297,
"step": 370
},
{
"epoch": 0.12836079791847355,
"eval_accuracy": 0.9784908890724182,
"eval_loss": 0.07483232766389847,
"eval_runtime": 676.5662,
"eval_samples_per_second": 8.521,
"eval_steps_per_second": 2.131,
"step": 370
},
{
"epoch": 0.13183000867302688,
"grad_norm": 0.17313919961452484,
"learning_rate": 0.00012363636363636364,
"loss": 0.0742,
"step": 380
},
{
"epoch": 0.13183000867302688,
"eval_accuracy": 0.9791847467422485,
"eval_loss": 0.08153310418128967,
"eval_runtime": 678.0332,
"eval_samples_per_second": 8.503,
"eval_steps_per_second": 2.127,
"step": 380
},
{
"epoch": 0.13529921942758022,
"grad_norm": 0.024125000461935997,
"learning_rate": 0.00011636363636363636,
"loss": 0.0745,
"step": 390
},
{
"epoch": 0.13529921942758022,
"eval_accuracy": 0.9774501323699951,
"eval_loss": 0.10447587072849274,
"eval_runtime": 682.8191,
"eval_samples_per_second": 8.443,
"eval_steps_per_second": 2.112,
"step": 390
},
{
"epoch": 0.13876843018213356,
"grad_norm": 0.07723889499902725,
"learning_rate": 0.00010909090909090909,
"loss": 0.2459,
"step": 400
},
{
"epoch": 0.13876843018213356,
"eval_accuracy": 0.9791847467422485,
"eval_loss": 0.09286625683307648,
"eval_runtime": 680.3478,
"eval_samples_per_second": 8.474,
"eval_steps_per_second": 2.12,
"step": 400
},
{
"epoch": 0.1422376409366869,
"grad_norm": 0.28492438793182373,
"learning_rate": 0.00010181818181818181,
"loss": 0.0129,
"step": 410
},
{
"epoch": 0.1422376409366869,
"eval_accuracy": 0.9798786044120789,
"eval_loss": 0.08954403549432755,
"eval_runtime": 675.1468,
"eval_samples_per_second": 8.539,
"eval_steps_per_second": 2.136,
"step": 410
},
{
"epoch": 0.14570685169124023,
"grad_norm": 0.06717664748430252,
"learning_rate": 9.454545454545455e-05,
"loss": 0.027,
"step": 420
},
{
"epoch": 0.14570685169124023,
"eval_accuracy": 0.9805724024772644,
"eval_loss": 0.06631265580654144,
"eval_runtime": 677.1592,
"eval_samples_per_second": 8.514,
"eval_steps_per_second": 2.129,
"step": 420
},
{
"epoch": 0.1491760624457936,
"grad_norm": 0.09406604617834091,
"learning_rate": 8.727272727272727e-05,
"loss": 0.0684,
"step": 430
},
{
"epoch": 0.1491760624457936,
"eval_accuracy": 0.9798786044120789,
"eval_loss": 0.06788154691457748,
"eval_runtime": 678.5247,
"eval_samples_per_second": 8.496,
"eval_steps_per_second": 2.125,
"step": 430
},
{
"epoch": 0.15264527320034693,
"grad_norm": 0.06969747692346573,
"learning_rate": 8e-05,
"loss": 0.3329,
"step": 440
},
{
"epoch": 0.15264527320034693,
"eval_accuracy": 0.9781439900398254,
"eval_loss": 0.06968674063682556,
"eval_runtime": 684.4687,
"eval_samples_per_second": 8.423,
"eval_steps_per_second": 2.107,
"step": 440
},
{
"epoch": 0.15611448395490027,
"grad_norm": 0.11187425255775452,
"learning_rate": 7.272727272727273e-05,
"loss": 0.2407,
"step": 450
},
{
"epoch": 0.15611448395490027,
"eval_accuracy": 0.9810928106307983,
"eval_loss": 0.06031050533056259,
"eval_runtime": 681.7986,
"eval_samples_per_second": 8.456,
"eval_steps_per_second": 2.115,
"step": 450
},
{
"epoch": 0.1595836947094536,
"grad_norm": 0.10134902596473694,
"learning_rate": 6.545454545454546e-05,
"loss": 0.131,
"step": 460
},
{
"epoch": 0.1595836947094536,
"eval_accuracy": 0.9805724024772644,
"eval_loss": 0.08596213907003403,
"eval_runtime": 680.6363,
"eval_samples_per_second": 8.47,
"eval_steps_per_second": 2.119,
"step": 460
},
{
"epoch": 0.16305290546400694,
"grad_norm": 3.860629081726074,
"learning_rate": 5.818181818181818e-05,
"loss": 0.2148,
"step": 470
},
{
"epoch": 0.16305290546400694,
"eval_accuracy": 0.9774501323699951,
"eval_loss": 0.11235988885164261,
"eval_runtime": 686.097,
"eval_samples_per_second": 8.403,
"eval_steps_per_second": 2.102,
"step": 470
},
{
"epoch": 0.16652211621856028,
"grad_norm": 2.909680128097534,
"learning_rate": 5.090909090909091e-05,
"loss": 0.1593,
"step": 480
},
{
"epoch": 0.16652211621856028,
"eval_accuracy": 0.985949695110321,
"eval_loss": 0.06438818573951721,
"eval_runtime": 680.9029,
"eval_samples_per_second": 8.467,
"eval_steps_per_second": 2.118,
"step": 480
},
{
"epoch": 0.16999132697311362,
"grad_norm": 0.10226955264806747,
"learning_rate": 4.3636363636363636e-05,
"loss": 0.0383,
"step": 490
},
{
"epoch": 0.16999132697311362,
"eval_accuracy": 0.9862965941429138,
"eval_loss": 0.050194237381219864,
"eval_runtime": 688.0192,
"eval_samples_per_second": 8.379,
"eval_steps_per_second": 2.096,
"step": 490
},
{
"epoch": 0.17346053772766695,
"grad_norm": 0.13847249746322632,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.0461,
"step": 500
},
{
"epoch": 0.17346053772766695,
"eval_accuracy": 0.9845620393753052,
"eval_loss": 0.04632818326354027,
"eval_runtime": 686.0902,
"eval_samples_per_second": 8.403,
"eval_steps_per_second": 2.102,
"step": 500
},
{
"epoch": 0.1769297484822203,
"grad_norm": 0.061877623200416565,
"learning_rate": 2.909090909090909e-05,
"loss": 0.0163,
"step": 510
},
{
"epoch": 0.1769297484822203,
"eval_accuracy": 0.9830008745193481,
"eval_loss": 0.05002644658088684,
"eval_runtime": 683.5794,
"eval_samples_per_second": 8.434,
"eval_steps_per_second": 2.109,
"step": 510
},
{
"epoch": 0.18039895923677363,
"grad_norm": 0.07484019547700882,
"learning_rate": 2.1818181818181818e-05,
"loss": 0.0373,
"step": 520
},
{
"epoch": 0.18039895923677363,
"eval_accuracy": 0.9835212230682373,
"eval_loss": 0.04930433630943298,
"eval_runtime": 685.8885,
"eval_samples_per_second": 8.405,
"eval_steps_per_second": 2.102,
"step": 520
},
{
"epoch": 0.18386816999132696,
"grad_norm": 0.07109741866588593,
"learning_rate": 1.4545454545454545e-05,
"loss": 0.0101,
"step": 530
},
{
"epoch": 0.18386816999132696,
"eval_accuracy": 0.9845620393753052,
"eval_loss": 0.04636286944150925,
"eval_runtime": 684.6998,
"eval_samples_per_second": 8.42,
"eval_steps_per_second": 2.106,
"step": 530
},
{
"epoch": 0.1873373807458803,
"grad_norm": 3.584275484085083,
"learning_rate": 7.272727272727272e-06,
"loss": 0.1887,
"step": 540
},
{
"epoch": 0.1873373807458803,
"eval_accuracy": 0.986470103263855,
"eval_loss": 0.04420817643404007,
"eval_runtime": 684.4017,
"eval_samples_per_second": 8.423,
"eval_steps_per_second": 2.107,
"step": 540
},
{
"epoch": 0.19080659150043366,
"grad_norm": 0.20406727492809296,
"learning_rate": 0.0,
"loss": 0.0363,
"step": 550
},
{
"epoch": 0.19080659150043366,
"eval_accuracy": 0.9876843094825745,
"eval_loss": 0.043817631900310516,
"eval_runtime": 685.8982,
"eval_samples_per_second": 8.405,
"eval_steps_per_second": 2.102,
"step": 550
}
],
"logging_steps": 10,
"max_steps": 550,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.214558191809199e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}