| { | |
| "best_metric": 1.0, | |
| "best_model_checkpoint": "wav2vec2-base-lang-id/checkpoint-94", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 940, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10638297872340426, | |
| "grad_norm": 3.8006672859191895, | |
| "learning_rate": 3.1914893617021275e-05, | |
| "loss": 4.4798, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 6.771675109863281, | |
| "learning_rate": 6.382978723404255e-05, | |
| "loss": 3.4754, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3191489361702128, | |
| "grad_norm": 8.21432113647461, | |
| "learning_rate": 9.574468085106382e-05, | |
| "loss": 2.4467, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 7.680928707122803, | |
| "learning_rate": 0.0001276595744680851, | |
| "loss": 1.4232, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 2.195053815841675, | |
| "learning_rate": 0.00015957446808510637, | |
| "loss": 0.4151, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 0.13116297125816345, | |
| "learning_rate": 0.00019148936170212765, | |
| "loss": 0.0361, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7446808510638298, | |
| "grad_norm": 0.014657862484455109, | |
| "learning_rate": 0.0002234042553191489, | |
| "loss": 0.0017, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 0.004712587222456932, | |
| "learning_rate": 0.0002553191489361702, | |
| "loss": 0.0003, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9574468085106383, | |
| "grad_norm": 0.0027682166546583176, | |
| "learning_rate": 0.0002872340425531915, | |
| "loss": 0.0001, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 5.837103526573628e-05, | |
| "eval_runtime": 11.542, | |
| "eval_samples_per_second": 34.829, | |
| "eval_steps_per_second": 34.829, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 0.0021660495549440384, | |
| "learning_rate": 0.00029787234042553186, | |
| "loss": 0.0001, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1702127659574468, | |
| "grad_norm": 0.001946191769093275, | |
| "learning_rate": 0.00029432624113475173, | |
| "loss": 0.0001, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2765957446808511, | |
| "grad_norm": 0.001837807591073215, | |
| "learning_rate": 0.0002907801418439716, | |
| "loss": 0.0001, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.3829787234042552, | |
| "grad_norm": 0.0017718354938551784, | |
| "learning_rate": 0.0002872340425531915, | |
| "loss": 0.0001, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.4893617021276595, | |
| "grad_norm": 0.0017246523639187217, | |
| "learning_rate": 0.00028368794326241134, | |
| "loss": 0.0001, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.5957446808510638, | |
| "grad_norm": 0.0016802914906293154, | |
| "learning_rate": 0.00028014184397163116, | |
| "loss": 0.0001, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.702127659574468, | |
| "grad_norm": 0.0016365655465051532, | |
| "learning_rate": 0.00027659574468085103, | |
| "loss": 0.0001, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8085106382978724, | |
| "grad_norm": 0.001596157904714346, | |
| "learning_rate": 0.0002730496453900709, | |
| "loss": 0.0001, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9148936170212765, | |
| "grad_norm": 0.0015629915287718177, | |
| "learning_rate": 0.00026950354609929077, | |
| "loss": 0.0001, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 3.635883331298828e-05, | |
| "eval_runtime": 10.0577, | |
| "eval_samples_per_second": 39.969, | |
| "eval_steps_per_second": 39.969, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 2.021276595744681, | |
| "grad_norm": 0.001525467843748629, | |
| "learning_rate": 0.0002659574468085106, | |
| "loss": 0.0001, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.127659574468085, | |
| "grad_norm": 0.0014883955009281635, | |
| "learning_rate": 0.00026241134751773046, | |
| "loss": 0.0001, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.2340425531914896, | |
| "grad_norm": 0.0014570483472198248, | |
| "learning_rate": 0.00025886524822695033, | |
| "loss": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.3404255319148937, | |
| "grad_norm": 0.0014193649403750896, | |
| "learning_rate": 0.0002553191489361702, | |
| "loss": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.4468085106382977, | |
| "grad_norm": 0.0013909874251112342, | |
| "learning_rate": 0.00025177304964539007, | |
| "loss": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.5531914893617023, | |
| "grad_norm": 0.0013578328071162105, | |
| "learning_rate": 0.0002482269503546099, | |
| "loss": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.6595744680851063, | |
| "grad_norm": 0.0013242242857813835, | |
| "learning_rate": 0.00024468085106382976, | |
| "loss": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.7659574468085104, | |
| "grad_norm": 0.0013041673228144646, | |
| "learning_rate": 0.00024113475177304963, | |
| "loss": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.872340425531915, | |
| "grad_norm": 0.0012726597487926483, | |
| "learning_rate": 0.0002375886524822695, | |
| "loss": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.978723404255319, | |
| "grad_norm": 0.0012477930868044496, | |
| "learning_rate": 0.00023404255319148934, | |
| "loss": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 2.9017082852078602e-05, | |
| "eval_runtime": 9.9982, | |
| "eval_samples_per_second": 40.207, | |
| "eval_steps_per_second": 40.207, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 3.0851063829787235, | |
| "grad_norm": 0.0012191747082397342, | |
| "learning_rate": 0.0002304964539007092, | |
| "loss": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.1914893617021276, | |
| "grad_norm": 0.0011935862712562084, | |
| "learning_rate": 0.00022695035460992905, | |
| "loss": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.297872340425532, | |
| "grad_norm": 0.0011742267524823546, | |
| "learning_rate": 0.0002234042553191489, | |
| "loss": 0.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.404255319148936, | |
| "grad_norm": 0.0011481853434816003, | |
| "learning_rate": 0.00021985815602836877, | |
| "loss": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.5106382978723403, | |
| "grad_norm": 0.0011253234697505832, | |
| "learning_rate": 0.00021631205673758864, | |
| "loss": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.617021276595745, | |
| "grad_norm": 0.0011064092395827174, | |
| "learning_rate": 0.0002127659574468085, | |
| "loss": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.723404255319149, | |
| "grad_norm": 0.0010885618394240737, | |
| "learning_rate": 0.00020921985815602835, | |
| "loss": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.829787234042553, | |
| "grad_norm": 0.001064595184288919, | |
| "learning_rate": 0.0002056737588652482, | |
| "loss": 0.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.9361702127659575, | |
| "grad_norm": 0.0010447927052155137, | |
| "learning_rate": 0.00020212765957446807, | |
| "loss": 0.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 2.372264862060547e-05, | |
| "eval_runtime": 9.9011, | |
| "eval_samples_per_second": 40.602, | |
| "eval_steps_per_second": 40.602, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 4.042553191489362, | |
| "grad_norm": 0.001030342886224389, | |
| "learning_rate": 0.0001985815602836879, | |
| "loss": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.148936170212766, | |
| "grad_norm": 0.0010120035149157047, | |
| "learning_rate": 0.0001950354609929078, | |
| "loss": 0.0, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.25531914893617, | |
| "grad_norm": 0.0009937717113643885, | |
| "learning_rate": 0.00019148936170212765, | |
| "loss": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.361702127659575, | |
| "grad_norm": 0.0009758667438291013, | |
| "learning_rate": 0.0001879432624113475, | |
| "loss": 0.0, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.468085106382979, | |
| "grad_norm": 0.0009558630990795791, | |
| "learning_rate": 0.00018439716312056736, | |
| "loss": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.574468085106383, | |
| "grad_norm": 0.0009456143015995622, | |
| "learning_rate": 0.0001808510638297872, | |
| "loss": 0.0, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.680851063829787, | |
| "grad_norm": 0.0009262987296096981, | |
| "learning_rate": 0.00017730496453900708, | |
| "loss": 0.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.787234042553192, | |
| "grad_norm": 0.0009140170877799392, | |
| "learning_rate": 0.00017375886524822692, | |
| "loss": 0.0, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.8936170212765955, | |
| "grad_norm": 0.000904095999430865, | |
| "learning_rate": 0.00017021276595744682, | |
| "loss": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.0008834420586936176, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 0.0, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 2.014636993408203e-05, | |
| "eval_runtime": 9.9558, | |
| "eval_samples_per_second": 40.379, | |
| "eval_steps_per_second": 40.379, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.1063829787234045, | |
| "grad_norm": 0.0008752320427447557, | |
| "learning_rate": 0.0001631205673758865, | |
| "loss": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.212765957446808, | |
| "grad_norm": 0.0008663799380883574, | |
| "learning_rate": 0.00015957446808510637, | |
| "loss": 0.0, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.319148936170213, | |
| "grad_norm": 0.0008535313536413014, | |
| "learning_rate": 0.00015602836879432622, | |
| "loss": 0.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.425531914893617, | |
| "grad_norm": 0.0008452454931102693, | |
| "learning_rate": 0.00015248226950354606, | |
| "loss": 0.0, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.531914893617021, | |
| "grad_norm": 0.0008268958772532642, | |
| "learning_rate": 0.00014893617021276593, | |
| "loss": 0.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.638297872340425, | |
| "grad_norm": 0.0008181555895134807, | |
| "learning_rate": 0.0001453900709219858, | |
| "loss": 0.0, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.74468085106383, | |
| "grad_norm": 0.0008063354762271047, | |
| "learning_rate": 0.00014184397163120567, | |
| "loss": 0.0, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.851063829787234, | |
| "grad_norm": 0.0007958101341500878, | |
| "learning_rate": 0.00013829787234042552, | |
| "loss": 0.0, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.957446808510638, | |
| "grad_norm": 0.0007865344523452222, | |
| "learning_rate": 0.00013475177304964539, | |
| "loss": 0.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.7642974853515625e-05, | |
| "eval_runtime": 9.9723, | |
| "eval_samples_per_second": 40.312, | |
| "eval_steps_per_second": 40.312, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 6.0638297872340425, | |
| "grad_norm": 0.0007770307711325586, | |
| "learning_rate": 0.00013120567375886523, | |
| "loss": 0.0, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.170212765957447, | |
| "grad_norm": 0.0007697618566453457, | |
| "learning_rate": 0.0001276595744680851, | |
| "loss": 0.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.276595744680851, | |
| "grad_norm": 0.0007645227597095072, | |
| "learning_rate": 0.00012411347517730494, | |
| "loss": 0.0, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.382978723404255, | |
| "grad_norm": 0.0007574139162898064, | |
| "learning_rate": 0.00012056737588652481, | |
| "loss": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.48936170212766, | |
| "grad_norm": 0.000741077761631459, | |
| "learning_rate": 0.00011702127659574467, | |
| "loss": 0.0, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.595744680851064, | |
| "grad_norm": 0.000734071247279644, | |
| "learning_rate": 0.00011347517730496453, | |
| "loss": 0.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 6.702127659574468, | |
| "grad_norm": 0.000728779355995357, | |
| "learning_rate": 0.00010992907801418438, | |
| "loss": 0.0, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 6.808510638297872, | |
| "grad_norm": 0.0007212815107777715, | |
| "learning_rate": 0.00010638297872340425, | |
| "loss": 0.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 6.914893617021277, | |
| "grad_norm": 0.0007169453892856836, | |
| "learning_rate": 0.0001028368794326241, | |
| "loss": 0.0, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.596455513208639e-05, | |
| "eval_runtime": 9.9126, | |
| "eval_samples_per_second": 40.554, | |
| "eval_steps_per_second": 40.554, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 7.0212765957446805, | |
| "grad_norm": 0.000711097614839673, | |
| "learning_rate": 9.929078014184395e-05, | |
| "loss": 0.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 7.127659574468085, | |
| "grad_norm": 0.0007094301981851459, | |
| "learning_rate": 9.574468085106382e-05, | |
| "loss": 0.0, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 7.23404255319149, | |
| "grad_norm": 0.0006968958768993616, | |
| "learning_rate": 9.219858156028368e-05, | |
| "loss": 0.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.340425531914893, | |
| "grad_norm": 0.0006909930380061269, | |
| "learning_rate": 8.865248226950354e-05, | |
| "loss": 0.0, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.446808510638298, | |
| "grad_norm": 0.0006865290924906731, | |
| "learning_rate": 8.510638297872341e-05, | |
| "loss": 0.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.553191489361702, | |
| "grad_norm": 0.0006844609742984176, | |
| "learning_rate": 8.156028368794325e-05, | |
| "loss": 0.0, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 7.659574468085106, | |
| "grad_norm": 0.0006792008061893284, | |
| "learning_rate": 7.801418439716311e-05, | |
| "loss": 0.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 7.76595744680851, | |
| "grad_norm": 0.0006731408648192883, | |
| "learning_rate": 7.446808510638297e-05, | |
| "loss": 0.0, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 7.872340425531915, | |
| "grad_norm": 0.0006701324600726366, | |
| "learning_rate": 7.092198581560284e-05, | |
| "loss": 0.0, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 7.9787234042553195, | |
| "grad_norm": 0.0006633326993323863, | |
| "learning_rate": 6.737588652482269e-05, | |
| "loss": 0.0, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.4901161193847656e-05, | |
| "eval_runtime": 9.9494, | |
| "eval_samples_per_second": 40.405, | |
| "eval_steps_per_second": 40.405, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 8.085106382978724, | |
| "grad_norm": 0.0006632324075326324, | |
| "learning_rate": 6.382978723404255e-05, | |
| "loss": 0.0, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.191489361702128, | |
| "grad_norm": 0.000655403477139771, | |
| "learning_rate": 6.028368794326241e-05, | |
| "loss": 0.0, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.297872340425531, | |
| "grad_norm": 0.0006541645270772278, | |
| "learning_rate": 5.6737588652482264e-05, | |
| "loss": 0.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.404255319148936, | |
| "grad_norm": 0.0006483749020844698, | |
| "learning_rate": 5.319148936170213e-05, | |
| "loss": 0.0, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 8.51063829787234, | |
| "grad_norm": 0.0006471078377217054, | |
| "learning_rate": 4.964539007092198e-05, | |
| "loss": 0.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.617021276595745, | |
| "grad_norm": 0.0006466888007707894, | |
| "learning_rate": 4.609929078014184e-05, | |
| "loss": 0.0, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 8.72340425531915, | |
| "grad_norm": 0.0006407785695046186, | |
| "learning_rate": 4.2553191489361704e-05, | |
| "loss": 0.0, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 8.829787234042554, | |
| "grad_norm": 0.0006384547450579703, | |
| "learning_rate": 3.9007092198581555e-05, | |
| "loss": 0.0, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 8.936170212765958, | |
| "grad_norm": 0.0006379844271577895, | |
| "learning_rate": 3.546099290780142e-05, | |
| "loss": 0.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.4424324035644531e-05, | |
| "eval_runtime": 9.9315, | |
| "eval_samples_per_second": 40.477, | |
| "eval_steps_per_second": 40.477, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 9.042553191489361, | |
| "grad_norm": 0.0006334384088404477, | |
| "learning_rate": 3.1914893617021275e-05, | |
| "loss": 0.0, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 9.148936170212766, | |
| "grad_norm": 0.0006309397285804152, | |
| "learning_rate": 2.8368794326241132e-05, | |
| "loss": 0.0, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 9.25531914893617, | |
| "grad_norm": 0.0006310406024567783, | |
| "learning_rate": 2.482269503546099e-05, | |
| "loss": 0.0, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.361702127659575, | |
| "grad_norm": 0.0006308447918854654, | |
| "learning_rate": 2.1276595744680852e-05, | |
| "loss": 0.0, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.46808510638298, | |
| "grad_norm": 0.0006286040297709405, | |
| "learning_rate": 1.773049645390071e-05, | |
| "loss": 0.0, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 9.574468085106384, | |
| "grad_norm": 0.0006258686189539731, | |
| "learning_rate": 1.4184397163120566e-05, | |
| "loss": 0.0, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 9.680851063829786, | |
| "grad_norm": 0.0006251951563172042, | |
| "learning_rate": 1.0638297872340426e-05, | |
| "loss": 0.0, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 9.787234042553191, | |
| "grad_norm": 0.0006240535294637084, | |
| "learning_rate": 7.092198581560283e-06, | |
| "loss": 0.0, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 9.893617021276595, | |
| "grad_norm": 0.0006240674993023276, | |
| "learning_rate": 3.5460992907801415e-06, | |
| "loss": 0.0, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.0006238200003281236, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 1.4065806681173854e-05, | |
| "eval_runtime": 9.9903, | |
| "eval_samples_per_second": 40.239, | |
| "eval_steps_per_second": 40.239, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 940, | |
| "total_flos": 4.315241031363276e+18, | |
| "train_loss": 0.13065080859353445, | |
| "train_runtime": 1086.1024, | |
| "train_samples_per_second": 27.677, | |
| "train_steps_per_second": 0.865 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 940, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.315241031363276e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |