| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 49.44692737430167, | |
| "eval_steps": 100, | |
| "global_step": 4450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.111731843575419, | |
| "grad_norm": 0.48533013463020325, | |
| "learning_rate": 0.00028199999999999997, | |
| "loss": 11.3881, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.111731843575419, | |
| "eval_cer": 0.9926923478442427, | |
| "eval_loss": 3.8517303466796875, | |
| "eval_runtime": 12.8394, | |
| "eval_samples_per_second": 22.197, | |
| "eval_steps_per_second": 2.804, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.223463687150838, | |
| "grad_norm": 2.8934638500213623, | |
| "learning_rate": 0.0002935172413793103, | |
| "loss": 3.9077, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.223463687150838, | |
| "eval_cer": 0.9924835577826495, | |
| "eval_loss": 3.6587204933166504, | |
| "eval_runtime": 12.8147, | |
| "eval_samples_per_second": 22.24, | |
| "eval_steps_per_second": 2.809, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.335195530726257, | |
| "grad_norm": 2.12126088142395, | |
| "learning_rate": 0.0002866206896551724, | |
| "loss": 3.8977, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.335195530726257, | |
| "eval_cer": 0.9926923478442427, | |
| "eval_loss": 3.657623291015625, | |
| "eval_runtime": 12.8572, | |
| "eval_samples_per_second": 22.167, | |
| "eval_steps_per_second": 2.8, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.446927374301676, | |
| "grad_norm": 1.5865833759307861, | |
| "learning_rate": 0.00027972413793103445, | |
| "loss": 3.8192, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.446927374301676, | |
| "eval_cer": 0.9929011379058357, | |
| "eval_loss": 3.663019895553589, | |
| "eval_runtime": 12.8178, | |
| "eval_samples_per_second": 22.235, | |
| "eval_steps_per_second": 2.809, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.558659217877095, | |
| "grad_norm": 0.37739208340644836, | |
| "learning_rate": 0.00027282758620689656, | |
| "loss": 3.7035, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.558659217877095, | |
| "eval_cer": 0.9789122037791002, | |
| "eval_loss": 3.678802967071533, | |
| "eval_runtime": 12.8176, | |
| "eval_samples_per_second": 22.235, | |
| "eval_steps_per_second": 2.809, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.670391061452514, | |
| "grad_norm": 0.6414264440536499, | |
| "learning_rate": 0.00026593103448275857, | |
| "loss": 3.6207, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.670391061452514, | |
| "eval_cer": 0.9543793715419147, | |
| "eval_loss": 3.6347501277923584, | |
| "eval_runtime": 12.7764, | |
| "eval_samples_per_second": 22.307, | |
| "eval_steps_per_second": 2.818, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 7.782122905027933, | |
| "grad_norm": 0.7569805979728699, | |
| "learning_rate": 0.0002590344827586207, | |
| "loss": 3.5701, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.782122905027933, | |
| "eval_cer": 0.9704562062845808, | |
| "eval_loss": 3.594348669052124, | |
| "eval_runtime": 12.8842, | |
| "eval_samples_per_second": 22.12, | |
| "eval_steps_per_second": 2.794, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 8.893854748603353, | |
| "grad_norm": 0.617978572845459, | |
| "learning_rate": 0.00025213793103448273, | |
| "loss": 3.5264, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.893854748603353, | |
| "eval_cer": 0.9584507777429795, | |
| "eval_loss": 3.557063579559326, | |
| "eval_runtime": 12.7852, | |
| "eval_samples_per_second": 22.291, | |
| "eval_steps_per_second": 2.816, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 1.1942850351333618, | |
| "learning_rate": 0.00024524137931034484, | |
| "loss": 3.3878, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_cer": 0.961895813759265, | |
| "eval_loss": 3.5423524379730225, | |
| "eval_runtime": 12.8758, | |
| "eval_samples_per_second": 22.135, | |
| "eval_steps_per_second": 2.796, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 11.111731843575418, | |
| "grad_norm": 0.859997570514679, | |
| "learning_rate": 0.00023834482758620687, | |
| "loss": 3.2684, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 11.111731843575418, | |
| "eval_cer": 0.9471761144169537, | |
| "eval_loss": 3.390798568725586, | |
| "eval_runtime": 12.7862, | |
| "eval_samples_per_second": 22.29, | |
| "eval_steps_per_second": 2.816, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 12.223463687150838, | |
| "grad_norm": 2.701083183288574, | |
| "learning_rate": 0.00023144827586206896, | |
| "loss": 3.0432, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 12.223463687150838, | |
| "eval_cer": 0.8914291679716045, | |
| "eval_loss": 3.5293474197387695, | |
| "eval_runtime": 12.8801, | |
| "eval_samples_per_second": 22.127, | |
| "eval_steps_per_second": 2.795, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 13.335195530726256, | |
| "grad_norm": 1.2649680376052856, | |
| "learning_rate": 0.00022455172413793101, | |
| "loss": 2.7166, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 13.335195530726256, | |
| "eval_cer": 0.8706545568430942, | |
| "eval_loss": 2.8964927196502686, | |
| "eval_runtime": 12.8464, | |
| "eval_samples_per_second": 22.185, | |
| "eval_steps_per_second": 2.802, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 14.446927374301676, | |
| "grad_norm": 2.458191156387329, | |
| "learning_rate": 0.0002176551724137931, | |
| "loss": 2.372, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 14.446927374301676, | |
| "eval_cer": 0.8353690364338657, | |
| "eval_loss": 2.9027018547058105, | |
| "eval_runtime": 12.8524, | |
| "eval_samples_per_second": 22.175, | |
| "eval_steps_per_second": 2.801, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 15.558659217877095, | |
| "grad_norm": 2.145862102508545, | |
| "learning_rate": 0.00021075862068965515, | |
| "loss": 2.1382, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 15.558659217877095, | |
| "eval_cer": 0.8355778264954589, | |
| "eval_loss": 2.767353057861328, | |
| "eval_runtime": 12.8847, | |
| "eval_samples_per_second": 22.119, | |
| "eval_steps_per_second": 2.794, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 16.670391061452513, | |
| "grad_norm": 2.6955654621124268, | |
| "learning_rate": 0.0002038620689655172, | |
| "loss": 1.9442, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 16.670391061452513, | |
| "eval_cer": 0.8006054911786199, | |
| "eval_loss": 2.9376468658447266, | |
| "eval_runtime": 12.8501, | |
| "eval_samples_per_second": 22.179, | |
| "eval_steps_per_second": 2.802, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 17.782122905027933, | |
| "grad_norm": 1.8743767738342285, | |
| "learning_rate": 0.0001969655172413793, | |
| "loss": 1.7881, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 17.782122905027933, | |
| "eval_cer": 0.722831193235202, | |
| "eval_loss": 2.7831432819366455, | |
| "eval_runtime": 12.7796, | |
| "eval_samples_per_second": 22.301, | |
| "eval_steps_per_second": 2.817, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 18.893854748603353, | |
| "grad_norm": 2.472550392150879, | |
| "learning_rate": 0.00019006896551724138, | |
| "loss": 1.6766, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 18.893854748603353, | |
| "eval_cer": 0.7167762814490031, | |
| "eval_loss": 2.8390402793884277, | |
| "eval_runtime": 12.8876, | |
| "eval_samples_per_second": 22.114, | |
| "eval_steps_per_second": 2.793, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 8.466638565063477, | |
| "learning_rate": 0.00018317241379310343, | |
| "loss": 1.5416, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_cer": 0.730452030483349, | |
| "eval_loss": 3.0157971382141113, | |
| "eval_runtime": 12.7638, | |
| "eval_samples_per_second": 22.329, | |
| "eval_steps_per_second": 2.82, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 21.11173184357542, | |
| "grad_norm": 2.14225172996521, | |
| "learning_rate": 0.0001762758620689655, | |
| "loss": 1.4251, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 21.11173184357542, | |
| "eval_cer": 0.6958972752896962, | |
| "eval_loss": 2.983879327774048, | |
| "eval_runtime": 12.9131, | |
| "eval_samples_per_second": 22.071, | |
| "eval_steps_per_second": 2.788, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 22.223463687150836, | |
| "grad_norm": 4.268678188323975, | |
| "learning_rate": 0.00016937931034482757, | |
| "loss": 1.32, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 22.223463687150836, | |
| "eval_cer": 0.7089466541392629, | |
| "eval_loss": 2.9795498847961426, | |
| "eval_runtime": 13.1212, | |
| "eval_samples_per_second": 21.721, | |
| "eval_steps_per_second": 2.744, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 23.335195530726256, | |
| "grad_norm": 2.3187201023101807, | |
| "learning_rate": 0.00016248275862068966, | |
| "loss": 1.2355, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 23.335195530726256, | |
| "eval_cer": 0.7216828478964401, | |
| "eval_loss": 3.145298957824707, | |
| "eval_runtime": 12.8542, | |
| "eval_samples_per_second": 22.172, | |
| "eval_steps_per_second": 2.801, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 24.446927374301676, | |
| "grad_norm": 4.309772968292236, | |
| "learning_rate": 0.00015558620689655171, | |
| "loss": 1.1704, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 24.446927374301676, | |
| "eval_cer": 0.7027873473222674, | |
| "eval_loss": 3.060805320739746, | |
| "eval_runtime": 12.8156, | |
| "eval_samples_per_second": 22.238, | |
| "eval_steps_per_second": 2.809, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 25.558659217877096, | |
| "grad_norm": 2.0230820178985596, | |
| "learning_rate": 0.00014868965517241377, | |
| "loss": 1.0789, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 25.558659217877096, | |
| "eval_cer": 0.7133312454327174, | |
| "eval_loss": 3.070970058441162, | |
| "eval_runtime": 12.8729, | |
| "eval_samples_per_second": 22.139, | |
| "eval_steps_per_second": 2.797, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 26.670391061452513, | |
| "grad_norm": 3.8290631771087646, | |
| "learning_rate": 0.00014179310344827585, | |
| "loss": 1.0109, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 26.670391061452513, | |
| "eval_cer": 0.7095730243240421, | |
| "eval_loss": 3.1124370098114014, | |
| "eval_runtime": 12.8909, | |
| "eval_samples_per_second": 22.109, | |
| "eval_steps_per_second": 2.793, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 27.782122905027933, | |
| "grad_norm": 2.3735196590423584, | |
| "learning_rate": 0.0001348965517241379, | |
| "loss": 0.9362, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 27.782122905027933, | |
| "eval_cer": 0.7221004280196263, | |
| "eval_loss": 3.4384732246398926, | |
| "eval_runtime": 12.8512, | |
| "eval_samples_per_second": 22.177, | |
| "eval_steps_per_second": 2.801, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 28.893854748603353, | |
| "grad_norm": 2.767505645751953, | |
| "learning_rate": 0.000128, | |
| "loss": 0.8671, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 28.893854748603353, | |
| "eval_cer": 0.7152103559870551, | |
| "eval_loss": 3.3170225620269775, | |
| "eval_runtime": 12.9113, | |
| "eval_samples_per_second": 22.074, | |
| "eval_steps_per_second": 2.788, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 7.469735622406006, | |
| "learning_rate": 0.00012110344827586206, | |
| "loss": 0.831, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_cer": 0.7012214218603194, | |
| "eval_loss": 3.2871029376983643, | |
| "eval_runtime": 12.7981, | |
| "eval_samples_per_second": 22.269, | |
| "eval_steps_per_second": 2.813, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 31.11173184357542, | |
| "grad_norm": 3.1189279556274414, | |
| "learning_rate": 0.00011420689655172412, | |
| "loss": 0.7467, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 31.11173184357542, | |
| "eval_cer": 0.7047708529074016, | |
| "eval_loss": 3.5182864665985107, | |
| "eval_runtime": 12.9125, | |
| "eval_samples_per_second": 22.072, | |
| "eval_steps_per_second": 2.788, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 32.22346368715084, | |
| "grad_norm": 14.725250244140625, | |
| "learning_rate": 0.0001073103448275862, | |
| "loss": 0.7128, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 32.22346368715084, | |
| "eval_cer": 0.7088422591084664, | |
| "eval_loss": 3.3886497020721436, | |
| "eval_runtime": 12.9242, | |
| "eval_samples_per_second": 22.052, | |
| "eval_steps_per_second": 2.785, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 33.33519553072626, | |
| "grad_norm": 2.3201019763946533, | |
| "learning_rate": 0.00010041379310344826, | |
| "loss": 0.6815, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 33.33519553072626, | |
| "eval_cer": 0.7230399832967951, | |
| "eval_loss": 3.731771230697632, | |
| "eval_runtime": 12.9081, | |
| "eval_samples_per_second": 22.079, | |
| "eval_steps_per_second": 2.789, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 34.44692737430167, | |
| "grad_norm": 4.227590084075928, | |
| "learning_rate": 9.351724137931033e-05, | |
| "loss": 0.6452, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 34.44692737430167, | |
| "eval_cer": 0.732017955945297, | |
| "eval_loss": 3.7759644985198975, | |
| "eval_runtime": 12.9044, | |
| "eval_samples_per_second": 22.085, | |
| "eval_steps_per_second": 2.79, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 35.55865921787709, | |
| "grad_norm": 1.658971905708313, | |
| "learning_rate": 8.66206896551724e-05, | |
| "loss": 0.6052, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 35.55865921787709, | |
| "eval_cer": 0.7280509447750287, | |
| "eval_loss": 3.8133554458618164, | |
| "eval_runtime": 12.9295, | |
| "eval_samples_per_second": 22.043, | |
| "eval_steps_per_second": 2.784, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 36.67039106145251, | |
| "grad_norm": 3.6614513397216797, | |
| "learning_rate": 7.972413793103447e-05, | |
| "loss": 0.5779, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 36.67039106145251, | |
| "eval_cer": 0.6808643908549953, | |
| "eval_loss": 3.7408699989318848, | |
| "eval_runtime": 13.0708, | |
| "eval_samples_per_second": 21.804, | |
| "eval_steps_per_second": 2.754, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 37.78212290502793, | |
| "grad_norm": 1.5677216053009033, | |
| "learning_rate": 7.282758620689654e-05, | |
| "loss": 0.5343, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 37.78212290502793, | |
| "eval_cer": 0.6906775237498695, | |
| "eval_loss": 3.825223922729492, | |
| "eval_runtime": 12.9331, | |
| "eval_samples_per_second": 22.036, | |
| "eval_steps_per_second": 2.784, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 38.89385474860335, | |
| "grad_norm": 2.8445885181427, | |
| "learning_rate": 6.593103448275861e-05, | |
| "loss": 0.5056, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 38.89385474860335, | |
| "eval_cer": 0.6964192504436789, | |
| "eval_loss": 3.9913222789764404, | |
| "eval_runtime": 12.9414, | |
| "eval_samples_per_second": 22.022, | |
| "eval_steps_per_second": 2.782, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 12.087656021118164, | |
| "learning_rate": 5.903448275862069e-05, | |
| "loss": 0.5017, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_cer": 0.6999686814907611, | |
| "eval_loss": 4.1249589920043945, | |
| "eval_runtime": 12.9858, | |
| "eval_samples_per_second": 21.947, | |
| "eval_steps_per_second": 2.772, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 41.11173184357542, | |
| "grad_norm": 1.838942527770996, | |
| "learning_rate": 5.213793103448276e-05, | |
| "loss": 0.4592, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 41.11173184357542, | |
| "eval_cer": 0.7221004280196263, | |
| "eval_loss": 4.20962381362915, | |
| "eval_runtime": 12.8242, | |
| "eval_samples_per_second": 22.224, | |
| "eval_steps_per_second": 2.807, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 42.22346368715084, | |
| "grad_norm": 7.289366245269775, | |
| "learning_rate": 4.524137931034483e-05, | |
| "loss": 0.4478, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 42.22346368715084, | |
| "eval_cer": 0.7031005324146571, | |
| "eval_loss": 4.215978622436523, | |
| "eval_runtime": 12.8764, | |
| "eval_samples_per_second": 22.134, | |
| "eval_steps_per_second": 2.796, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 43.33519553072626, | |
| "grad_norm": 1.9154551029205322, | |
| "learning_rate": 3.834482758620689e-05, | |
| "loss": 0.4172, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 43.33519553072626, | |
| "eval_cer": 0.7085290740160768, | |
| "eval_loss": 4.245218276977539, | |
| "eval_runtime": 12.9248, | |
| "eval_samples_per_second": 22.051, | |
| "eval_steps_per_second": 2.785, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 44.44692737430167, | |
| "grad_norm": 6.6539154052734375, | |
| "learning_rate": 3.151724137931034e-05, | |
| "loss": 0.4018, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 44.44692737430167, | |
| "eval_cer": 0.7155235410794446, | |
| "eval_loss": 4.3074631690979, | |
| "eval_runtime": 12.9104, | |
| "eval_samples_per_second": 22.075, | |
| "eval_steps_per_second": 2.788, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 45.55865921787709, | |
| "grad_norm": 1.8227887153625488, | |
| "learning_rate": 2.4620689655172412e-05, | |
| "loss": 0.3854, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 45.55865921787709, | |
| "eval_cer": 0.7094686292932456, | |
| "eval_loss": 4.359448432922363, | |
| "eval_runtime": 12.9111, | |
| "eval_samples_per_second": 22.074, | |
| "eval_steps_per_second": 2.788, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 46.67039106145251, | |
| "grad_norm": 3.2107598781585693, | |
| "learning_rate": 1.7724137931034482e-05, | |
| "loss": 0.3771, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 46.67039106145251, | |
| "eval_cer": 0.7133312454327174, | |
| "eval_loss": 4.366276741027832, | |
| "eval_runtime": 12.9048, | |
| "eval_samples_per_second": 22.085, | |
| "eval_steps_per_second": 2.79, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 47.78212290502793, | |
| "grad_norm": 1.2981507778167725, | |
| "learning_rate": 1.082758620689655e-05, | |
| "loss": 0.3553, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 47.78212290502793, | |
| "eval_cer": 0.7135400354943104, | |
| "eval_loss": 4.419826030731201, | |
| "eval_runtime": 12.9283, | |
| "eval_samples_per_second": 22.045, | |
| "eval_steps_per_second": 2.785, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 48.89385474860335, | |
| "grad_norm": 2.526685953140259, | |
| "learning_rate": 3.93103448275862e-06, | |
| "loss": 0.3565, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 48.89385474860335, | |
| "eval_cer": 0.7108257646936006, | |
| "eval_loss": 4.4135847091674805, | |
| "eval_runtime": 12.8772, | |
| "eval_samples_per_second": 22.132, | |
| "eval_steps_per_second": 2.796, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 49.44692737430167, | |
| "step": 4450, | |
| "total_flos": 9.669635712078565e+18, | |
| "train_loss": 1.799734133602528, | |
| "train_runtime": 7166.9702, | |
| "train_samples_per_second": 19.897, | |
| "train_steps_per_second": 0.621 | |
| }, | |
| { | |
| "epoch": 49.44692737430167, | |
| "eval_cer": 0.7118697150015659, | |
| "eval_loss": 4.406894207000732, | |
| "eval_runtime": 12.8787, | |
| "eval_samples_per_second": 22.13, | |
| "eval_steps_per_second": 2.795, | |
| "step": 4450 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.669635712078565e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |