| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 50.0, | |
| "eval_steps": 500, | |
| "global_step": 367750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.11480995267629623, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.2501, | |
| "step": 7355 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.42542216181755066, | |
| "eval_runtime": 128.5308, | |
| "eval_samples_per_second": 457.774, | |
| "eval_steps_per_second": 7.158, | |
| "step": 7355 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.158855602145195, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.2332, | |
| "step": 14710 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.41126397252082825, | |
| "eval_runtime": 128.8707, | |
| "eval_samples_per_second": 456.566, | |
| "eval_steps_per_second": 7.139, | |
| "step": 14710 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.20708617568016052, | |
| "learning_rate": 9.4e-06, | |
| "loss": 0.2295, | |
| "step": 22065 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.40615084767341614, | |
| "eval_runtime": 128.7304, | |
| "eval_samples_per_second": 457.064, | |
| "eval_steps_per_second": 7.147, | |
| "step": 22065 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.2054029405117035, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 0.2273, | |
| "step": 29420 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.40087950229644775, | |
| "eval_runtime": 128.7273, | |
| "eval_samples_per_second": 457.075, | |
| "eval_steps_per_second": 7.147, | |
| "step": 29420 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.19840490818023682, | |
| "learning_rate": 9e-06, | |
| "loss": 0.2256, | |
| "step": 36775 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.3977925777435303, | |
| "eval_runtime": 128.707, | |
| "eval_samples_per_second": 457.147, | |
| "eval_steps_per_second": 7.148, | |
| "step": 36775 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.25789105892181396, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.2243, | |
| "step": 44130 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.3958837389945984, | |
| "eval_runtime": 128.6907, | |
| "eval_samples_per_second": 457.205, | |
| "eval_steps_per_second": 7.149, | |
| "step": 44130 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.21235878765583038, | |
| "learning_rate": 8.6e-06, | |
| "loss": 0.2231, | |
| "step": 51485 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.39352869987487793, | |
| "eval_runtime": 128.701, | |
| "eval_samples_per_second": 457.168, | |
| "eval_steps_per_second": 7.148, | |
| "step": 51485 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.1889820694923401, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.2221, | |
| "step": 58840 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.3912597596645355, | |
| "eval_runtime": 128.7122, | |
| "eval_samples_per_second": 457.128, | |
| "eval_steps_per_second": 7.148, | |
| "step": 58840 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.22390136122703552, | |
| "learning_rate": 8.2e-06, | |
| "loss": 0.2212, | |
| "step": 66195 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.39093491435050964, | |
| "eval_runtime": 128.7173, | |
| "eval_samples_per_second": 457.11, | |
| "eval_steps_per_second": 7.147, | |
| "step": 66195 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.1813807338476181, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.2205, | |
| "step": 73550 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.389521986246109, | |
| "eval_runtime": 128.684, | |
| "eval_samples_per_second": 457.229, | |
| "eval_steps_per_second": 7.149, | |
| "step": 73550 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.17810355126857758, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 0.2197, | |
| "step": 80905 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.3886621296405792, | |
| "eval_runtime": 128.7099, | |
| "eval_samples_per_second": 457.137, | |
| "eval_steps_per_second": 7.148, | |
| "step": 80905 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.24489013850688934, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 0.219, | |
| "step": 88260 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.3879886269569397, | |
| "eval_runtime": 128.7058, | |
| "eval_samples_per_second": 457.151, | |
| "eval_steps_per_second": 7.148, | |
| "step": 88260 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.1965673714876175, | |
| "learning_rate": 7.4e-06, | |
| "loss": 0.2184, | |
| "step": 95615 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.38754525780677795, | |
| "eval_runtime": 128.7201, | |
| "eval_samples_per_second": 457.1, | |
| "eval_steps_per_second": 7.147, | |
| "step": 95615 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.22494736313819885, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 0.2178, | |
| "step": 102970 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.3873791992664337, | |
| "eval_runtime": 128.7144, | |
| "eval_samples_per_second": 457.121, | |
| "eval_steps_per_second": 7.148, | |
| "step": 102970 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.32273635268211365, | |
| "learning_rate": 7e-06, | |
| "loss": 0.2172, | |
| "step": 110325 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.38621675968170166, | |
| "eval_runtime": 128.7027, | |
| "eval_samples_per_second": 457.162, | |
| "eval_steps_per_second": 7.148, | |
| "step": 110325 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.17209158837795258, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 0.2167, | |
| "step": 117680 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.3857288658618927, | |
| "eval_runtime": 128.6791, | |
| "eval_samples_per_second": 457.246, | |
| "eval_steps_per_second": 7.15, | |
| "step": 117680 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.27914878726005554, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 0.2162, | |
| "step": 125035 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.3846561014652252, | |
| "eval_runtime": 128.6869, | |
| "eval_samples_per_second": 457.218, | |
| "eval_steps_per_second": 7.149, | |
| "step": 125035 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.23364859819412231, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 0.2157, | |
| "step": 132390 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.3847697675228119, | |
| "eval_runtime": 128.7148, | |
| "eval_samples_per_second": 457.119, | |
| "eval_steps_per_second": 7.148, | |
| "step": 132390 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.172671377658844, | |
| "learning_rate": 6.200000000000001e-06, | |
| "loss": 0.2152, | |
| "step": 139745 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.38386115431785583, | |
| "eval_runtime": 128.6991, | |
| "eval_samples_per_second": 457.175, | |
| "eval_steps_per_second": 7.148, | |
| "step": 139745 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.19780349731445312, | |
| "learning_rate": 6e-06, | |
| "loss": 0.2148, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.3836727738380432, | |
| "eval_runtime": 128.7294, | |
| "eval_samples_per_second": 457.067, | |
| "eval_steps_per_second": 7.147, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 0.26560327410697937, | |
| "learning_rate": 5.8e-06, | |
| "loss": 0.2144, | |
| "step": 154455 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.3844703435897827, | |
| "eval_runtime": 128.7099, | |
| "eval_samples_per_second": 457.137, | |
| "eval_steps_per_second": 7.148, | |
| "step": 154455 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.22332455217838287, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.2139, | |
| "step": 161810 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.3834006190299988, | |
| "eval_runtime": 128.7123, | |
| "eval_samples_per_second": 457.128, | |
| "eval_steps_per_second": 7.148, | |
| "step": 161810 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.2586681842803955, | |
| "learning_rate": 5.400000000000001e-06, | |
| "loss": 0.2136, | |
| "step": 169165 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.38348647952079773, | |
| "eval_runtime": 128.6691, | |
| "eval_samples_per_second": 457.281, | |
| "eval_steps_per_second": 7.15, | |
| "step": 169165 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.2845219075679779, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.2132, | |
| "step": 176520 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.3828953504562378, | |
| "eval_runtime": 128.7332, | |
| "eval_samples_per_second": 457.054, | |
| "eval_steps_per_second": 7.147, | |
| "step": 176520 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.27165067195892334, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2128, | |
| "step": 183875 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.38219162821769714, | |
| "eval_runtime": 128.7152, | |
| "eval_samples_per_second": 457.118, | |
| "eval_steps_per_second": 7.148, | |
| "step": 183875 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.23254956305027008, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.2125, | |
| "step": 191230 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.38233497738838196, | |
| "eval_runtime": 128.8883, | |
| "eval_samples_per_second": 456.504, | |
| "eval_steps_per_second": 7.138, | |
| "step": 191230 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.2750227749347687, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 0.2122, | |
| "step": 198585 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.3827952444553375, | |
| "eval_runtime": 128.7247, | |
| "eval_samples_per_second": 457.084, | |
| "eval_steps_per_second": 7.147, | |
| "step": 198585 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.3043362498283386, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.2118, | |
| "step": 205940 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.38314878940582275, | |
| "eval_runtime": 128.7576, | |
| "eval_samples_per_second": 456.967, | |
| "eval_steps_per_second": 7.145, | |
| "step": 205940 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.22233448922634125, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 0.2115, | |
| "step": 213295 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.3818701505661011, | |
| "eval_runtime": 128.736, | |
| "eval_samples_per_second": 457.044, | |
| "eval_steps_per_second": 7.146, | |
| "step": 213295 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.26145127415657043, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.2112, | |
| "step": 220650 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.38293564319610596, | |
| "eval_runtime": 128.9344, | |
| "eval_samples_per_second": 456.341, | |
| "eval_steps_per_second": 7.135, | |
| "step": 220650 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 0.2705918252468109, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 0.211, | |
| "step": 228005 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.3823812007904053, | |
| "eval_runtime": 128.8218, | |
| "eval_samples_per_second": 456.739, | |
| "eval_steps_per_second": 7.142, | |
| "step": 228005 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.2663235366344452, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.2107, | |
| "step": 235360 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.382473886013031, | |
| "eval_runtime": 128.7309, | |
| "eval_samples_per_second": 457.062, | |
| "eval_steps_per_second": 7.147, | |
| "step": 235360 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 0.23493929207324982, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 0.2104, | |
| "step": 242715 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.3834179639816284, | |
| "eval_runtime": 128.7424, | |
| "eval_samples_per_second": 457.021, | |
| "eval_steps_per_second": 7.146, | |
| "step": 242715 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 0.2235766053199768, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.2102, | |
| "step": 250070 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.3825724124908447, | |
| "eval_runtime": 128.7687, | |
| "eval_samples_per_second": 456.928, | |
| "eval_steps_per_second": 7.145, | |
| "step": 250070 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 0.2881753742694855, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2099, | |
| "step": 257425 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.3824039697647095, | |
| "eval_runtime": 133.1919, | |
| "eval_samples_per_second": 441.754, | |
| "eval_steps_per_second": 6.907, | |
| "step": 257425 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 0.35670992732048035, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.2097, | |
| "step": 264780 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.38277488946914673, | |
| "eval_runtime": 128.7343, | |
| "eval_samples_per_second": 457.05, | |
| "eval_steps_per_second": 7.147, | |
| "step": 264780 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "grad_norm": 0.29673638939857483, | |
| "learning_rate": 2.6e-06, | |
| "loss": 0.2095, | |
| "step": 272135 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.38287386298179626, | |
| "eval_runtime": 129.4264, | |
| "eval_samples_per_second": 454.606, | |
| "eval_steps_per_second": 7.108, | |
| "step": 272135 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 0.25621339678764343, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.2093, | |
| "step": 279490 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.3827780485153198, | |
| "eval_runtime": 129.087, | |
| "eval_samples_per_second": 455.801, | |
| "eval_steps_per_second": 7.127, | |
| "step": 279490 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "grad_norm": 0.31819215416908264, | |
| "learning_rate": 2.2e-06, | |
| "loss": 0.2091, | |
| "step": 286845 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.3822120726108551, | |
| "eval_runtime": 128.8612, | |
| "eval_samples_per_second": 456.6, | |
| "eval_steps_per_second": 7.139, | |
| "step": 286845 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.2761085033416748, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.2089, | |
| "step": 294200 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.3824302554130554, | |
| "eval_runtime": 129.0551, | |
| "eval_samples_per_second": 455.914, | |
| "eval_steps_per_second": 7.129, | |
| "step": 294200 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "grad_norm": 0.27816739678382874, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 0.2088, | |
| "step": 301555 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.38338372111320496, | |
| "eval_runtime": 129.0432, | |
| "eval_samples_per_second": 455.956, | |
| "eval_steps_per_second": 7.129, | |
| "step": 301555 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "grad_norm": 0.3370245695114136, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.2086, | |
| "step": 308910 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.3826825022697449, | |
| "eval_runtime": 129.0854, | |
| "eval_samples_per_second": 455.807, | |
| "eval_steps_per_second": 7.127, | |
| "step": 308910 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "grad_norm": 0.23392541706562042, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 0.2085, | |
| "step": 316265 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.382882684469223, | |
| "eval_runtime": 128.8341, | |
| "eval_samples_per_second": 456.696, | |
| "eval_steps_per_second": 7.141, | |
| "step": 316265 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "grad_norm": 0.2567419409751892, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.2083, | |
| "step": 323620 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.3828926682472229, | |
| "eval_runtime": 129.1671, | |
| "eval_samples_per_second": 455.518, | |
| "eval_steps_per_second": 7.123, | |
| "step": 323620 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 0.22591634094715118, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.2082, | |
| "step": 330975 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.3830114006996155, | |
| "eval_runtime": 128.7432, | |
| "eval_samples_per_second": 457.018, | |
| "eval_steps_per_second": 7.146, | |
| "step": 330975 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "grad_norm": 0.310523122549057, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.2081, | |
| "step": 338330 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.38255205750465393, | |
| "eval_runtime": 129.0345, | |
| "eval_samples_per_second": 455.987, | |
| "eval_steps_per_second": 7.13, | |
| "step": 338330 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "grad_norm": 0.278604120016098, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 0.208, | |
| "step": 345685 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.3827236294746399, | |
| "eval_runtime": 129.0063, | |
| "eval_samples_per_second": 456.086, | |
| "eval_steps_per_second": 7.131, | |
| "step": 345685 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "grad_norm": 0.2605680227279663, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 0.2079, | |
| "step": 353040 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.38287004828453064, | |
| "eval_runtime": 128.8174, | |
| "eval_samples_per_second": 456.755, | |
| "eval_steps_per_second": 7.142, | |
| "step": 353040 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "grad_norm": 0.3245304822921753, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 0.2078, | |
| "step": 360395 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.38298800587654114, | |
| "eval_runtime": 128.8343, | |
| "eval_samples_per_second": 456.695, | |
| "eval_steps_per_second": 7.141, | |
| "step": 360395 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 0.3787703812122345, | |
| "learning_rate": 0.0, | |
| "loss": 0.2078, | |
| "step": 367750 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.3828030824661255, | |
| "eval_runtime": 128.8773, | |
| "eval_samples_per_second": 456.543, | |
| "eval_steps_per_second": 7.139, | |
| "step": 367750 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 367750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 2.9088945658368e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |