| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9932104752667313, |
| "eval_steps": 1024, |
| "global_step": 21504, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011823934229365849, |
| "grad_norm": 0.5449081659317017, |
| "learning_rate": 0.000498046875, |
| "loss": 9.256452560424805, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.023647868458731697, |
| "grad_norm": 0.5607944130897522, |
| "learning_rate": 0.000998046875, |
| "loss": 5.257084369659424, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03547180268809755, |
| "grad_norm": 0.15684783458709717, |
| "learning_rate": 0.000999640996023194, |
| "loss": 1.0911091566085815, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "grad_norm": 0.0985269844532013, |
| "learning_rate": 0.0009985588674043958, |
| "loss": 0.41666868329048157, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.27265830639135136, |
| "eval_ce_loss": 0.3059718095480579, |
| "eval_loss": 0.3059718095480579, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.27265830639135136, |
| "eval_ce_loss": 0.3059718095480579, |
| "eval_loss": 0.3059718095480579, |
| "eval_runtime": 113.3557, |
| "eval_samples_per_second": 246.948, |
| "eval_steps_per_second": 3.864, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05911967114682925, |
| "grad_norm": 0.0791890025138855, |
| "learning_rate": 0.0009967551747861387, |
| "loss": 0.24503706395626068, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0709436053761951, |
| "grad_norm": 0.07241348922252655, |
| "learning_rate": 0.000994232528651847, |
| "loss": 0.16681283712387085, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.08276753960556095, |
| "grad_norm": 0.06362218409776688, |
| "learning_rate": 0.0009909945800260092, |
| "loss": 0.12249665707349777, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "grad_norm": 0.06142408400774002, |
| "learning_rate": 0.0009870460151900522, |
| "loss": 0.09425020217895508, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.2645654621946291, |
| "eval_ce_loss": 0.0849783036211446, |
| "eval_loss": 0.0849783036211446, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.2645654621946291, |
| "eval_ce_loss": 0.0849783036211446, |
| "eval_loss": 0.0849783036211446, |
| "eval_runtime": 107.8993, |
| "eval_samples_per_second": 259.436, |
| "eval_steps_per_second": 4.059, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.10641540806429264, |
| "grad_norm": 0.058095596730709076, |
| "learning_rate": 0.0009823925488998885, |
| "loss": 0.07650011032819748, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.1182393422936585, |
| "grad_norm": 0.05368533730506897, |
| "learning_rate": 0.0009770409161149525, |
| "loss": 0.06166598200798035, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13006327652302435, |
| "grad_norm": 0.04492728039622307, |
| "learning_rate": 0.0009709988622506973, |
| "loss": 0.051953624933958054, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "grad_norm": 0.038033194839954376, |
| "learning_rate": 0.000964275131968659, |
| "loss": 0.0445532500743866, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.26919279555175596, |
| "eval_ce_loss": 0.04093987658858027, |
| "eval_loss": 0.04093987658858027, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.26919279555175596, |
| "eval_ce_loss": 0.04093987658858027, |
| "eval_loss": 0.04093987658858027, |
| "eval_runtime": 110.2876, |
| "eval_samples_per_second": 253.818, |
| "eval_steps_per_second": 3.971, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.15371114498175603, |
| "grad_norm": 0.036269549280405045, |
| "learning_rate": 0.0009568794565203123, |
| "loss": 0.03803830221295357, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.1655350792111219, |
| "grad_norm": 0.03661005198955536, |
| "learning_rate": 0.0009488225396630347, |
| "loss": 0.03269872069358826, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.17735901344048774, |
| "grad_norm": 0.03384782001376152, |
| "learning_rate": 0.0009401160421685646, |
| "loss": 0.02887391857802868, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "grad_norm": 0.03782425448298454, |
| "learning_rate": 0.0009307725649463714, |
| "loss": 0.025456363335251808, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.26767754742363126, |
| "eval_ce_loss": 0.023863823824991647, |
| "eval_loss": 0.023863823824991647, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.26767754742363126, |
| "eval_ce_loss": 0.023863823824991647, |
| "eval_loss": 0.023863823824991647, |
| "eval_runtime": 110.9469, |
| "eval_samples_per_second": 252.31, |
| "eval_steps_per_second": 3.948, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.20100688189921945, |
| "grad_norm": 0.043788664042949677, |
| "learning_rate": 0.0009208056308063659, |
| "loss": 0.022065965458750725, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.2128308161285853, |
| "grad_norm": 0.04368242621421814, |
| "learning_rate": 0.0009102296648873445, |
| "loss": 0.020814381539821625, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.22465475035795113, |
| "grad_norm": 0.024551063776016235, |
| "learning_rate": 0.0008990599737794927, |
| "loss": 0.01824565976858139, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "grad_norm": 0.02257447876036167, |
| "learning_rate": 0.0008873127233711644, |
| "loss": 0.016040779650211334, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.27054794345363903, |
| "eval_ce_loss": 0.01571538393092278, |
| "eval_loss": 0.01571538393092278, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.27054794345363903, |
| "eval_ce_loss": 0.01571538393092278, |
| "eval_loss": 0.01571538393092278, |
| "eval_runtime": 111.5952, |
| "eval_samples_per_second": 250.844, |
| "eval_steps_per_second": 3.925, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.24830261881668284, |
| "grad_norm": 0.021084846928715706, |
| "learning_rate": 0.0008750049154520011, |
| "loss": 0.014501616358757019, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2601265530460487, |
| "grad_norm": 0.025962376967072487, |
| "learning_rate": 0.0008621543631062487, |
| "loss": 0.013666299171745777, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.27195048727541454, |
| "grad_norm": 0.058145921677351, |
| "learning_rate": 0.0008487796649318904, |
| "loss": 0.012593724764883518, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "grad_norm": 0.02390468120574951, |
| "learning_rate": 0.0008349001781229053, |
| "loss": 0.011387365870177746, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.2698466629682675, |
| "eval_ce_loss": 0.010894727121324164, |
| "eval_loss": 0.010894727121324164, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.2698466629682675, |
| "eval_ce_loss": 0.010894727121324164, |
| "eval_loss": 0.010894727121324164, |
| "eval_runtime": 111.9404, |
| "eval_samples_per_second": 250.071, |
| "eval_steps_per_second": 3.913, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2955983557341462, |
| "grad_norm": 0.01789081282913685, |
| "learning_rate": 0.0008205359904536107, |
| "loss": 0.010051091201603413, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.30742228996351206, |
| "grad_norm": 0.024055052548646927, |
| "learning_rate": 0.0008057078912056363, |
| "loss": 0.009441766887903214, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.3192462241928779, |
| "grad_norm": 0.025855517014861107, |
| "learning_rate": 0.0007904373410796086, |
| "loss": 0.009011849761009216, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "grad_norm": 0.02206815779209137, |
| "learning_rate": 0.0007747464411350876, |
| "loss": 0.007886786945164204, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.26735583207895347, |
| "eval_ce_loss": 0.007830630816449092, |
| "eval_loss": 0.007830630816449092, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.26735583207895347, |
| "eval_ce_loss": 0.007830630816449092, |
| "eval_loss": 0.007830630816449092, |
| "eval_runtime": 115.3929, |
| "eval_samples_per_second": 242.589, |
| "eval_steps_per_second": 3.796, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.34289409265160964, |
| "grad_norm": 0.020352263003587723, |
| "learning_rate": 0.000758657900803716, |
| "loss": 0.007837384939193726, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3547180268809755, |
| "grad_norm": 0.028763771057128906, |
| "learning_rate": 0.000742195005021869, |
| "loss": 0.007105502299964428, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3665419611103413, |
| "grad_norm": 0.011777768842875957, |
| "learning_rate": 0.0007253815805303786, |
| "loss": 0.00655593303963542, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "grad_norm": 0.014801163226366043, |
| "learning_rate": 0.0007082419613901028, |
| "loss": 0.006127453874796629, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.26779506983265766, |
| "eval_ce_loss": 0.006108720087580229, |
| "eval_loss": 0.006108720087580229, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.26779506983265766, |
| "eval_ce_loss": 0.006108720087580229, |
| "eval_loss": 0.006108720087580229, |
| "eval_runtime": 111.3262, |
| "eval_samples_per_second": 251.45, |
| "eval_steps_per_second": 3.934, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.390189829569073, |
| "grad_norm": 0.012766832485795021, |
| "learning_rate": 0.0006908009537632514, |
| "loss": 0.005777300801128149, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.4020137637984389, |
| "grad_norm": 0.010622252710163593, |
| "learning_rate": 0.0006730838000114403, |
| "loss": 0.005370937753468752, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.41383769802780473, |
| "grad_norm": 0.018417516723275185, |
| "learning_rate": 0.0006551161421624341, |
| "loss": 0.004708444699645042, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "grad_norm": 0.01107096392661333, |
| "learning_rate": 0.0006369239847984517, |
| "loss": 0.004844233393669128, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.27117611081612686, |
| "eval_ce_loss": 0.004634970387913814, |
| "eval_loss": 0.004634970387913814, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.27117611081612686, |
| "eval_ce_loss": 0.004634970387913814, |
| "eval_loss": 0.004634970387913814, |
| "eval_runtime": 110.365, |
| "eval_samples_per_second": 253.64, |
| "eval_steps_per_second": 3.969, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4374855664865364, |
| "grad_norm": 0.016654323786497116, |
| "learning_rate": 0.0006185336574197479, |
| "loss": 0.004370348993688822, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.44930950071590225, |
| "grad_norm": 0.009538416750729084, |
| "learning_rate": 0.0005999717763379407, |
| "loss": 0.004241208545863628, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.4611334349452681, |
| "grad_norm": 0.019346073269844055, |
| "learning_rate": 0.0005812652061542363, |
| "loss": 0.004045420326292515, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "grad_norm": 0.009237069636583328, |
| "learning_rate": 0.0005624410208783071, |
| "loss": 0.0038289830554276705, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.2709327535478657, |
| "eval_ce_loss": 0.0036890122933966028, |
| "eval_loss": 0.0036890122933966028, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.2709327535478657, |
| "eval_ce_loss": 0.0036890122933966028, |
| "eval_loss": 0.0036890122933966028, |
| "eval_runtime": 109.8588, |
| "eval_samples_per_second": 254.809, |
| "eval_steps_per_second": 3.987, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.48478130340399983, |
| "grad_norm": 0.018487900495529175, |
| "learning_rate": 0.0005435264647440881, |
| "loss": 0.0035509562585502863, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.49660523763336567, |
| "grad_norm": 0.007689731661230326, |
| "learning_rate": 0.000524548912779213, |
| "loss": 0.003250380977988243, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.5084291718627315, |
| "grad_norm": 0.013474254868924618, |
| "learning_rate": 0.0005055358311851499, |
| "loss": 0.003267573891207576, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "grad_norm": 0.0173040684312582, |
| "learning_rate": 0.0004865147375853812, |
| "loss": 0.0030927686020731926, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.27271556678043923, |
| "eval_ce_loss": 0.0031219599181127023, |
| "eval_loss": 0.0031219599181127023, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.27271556678043923, |
| "eval_ce_loss": 0.0031219599181127023, |
| "eval_loss": 0.0031219599181127023, |
| "eval_runtime": 109.1112, |
| "eval_samples_per_second": 256.555, |
| "eval_steps_per_second": 4.014, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5320770403214632, |
| "grad_norm": 0.01329875085502863, |
| "learning_rate": 0.0004675131611991607, |
| "loss": 0.002897108905017376, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5439009745508291, |
| "grad_norm": 0.01354902796447277, |
| "learning_rate": 0.0004485586029984899, |
| "loss": 0.003055332228541374, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5557249087801949, |
| "grad_norm": 0.008980591781437397, |
| "learning_rate": 0.00042967849590597266, |
| "loss": 0.0028114793822169304, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "grad_norm": 0.006398347206413746, |
| "learning_rate": 0.0004109001650911621, |
| "loss": 0.002583935856819153, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.2711715005619301, |
| "eval_ce_loss": 0.002555297637415718, |
| "eval_loss": 0.002555297637415718, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.2711715005619301, |
| "eval_ce_loss": 0.002555297637415718, |
| "eval_loss": 0.002555297637415718, |
| "eval_runtime": 110.1449, |
| "eval_samples_per_second": 254.147, |
| "eval_steps_per_second": 3.977, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5793727772389267, |
| "grad_norm": 0.006243300624191761, |
| "learning_rate": 0.0003922507884228551, |
| "loss": 0.002527546603232622, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5911967114682924, |
| "grad_norm": 0.013985877856612206, |
| "learning_rate": 0.00037375735713457723, |
| "loss": 0.002268948359414935, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.6030206456976583, |
| "grad_norm": 0.007294178940355778, |
| "learning_rate": 0.00035544663676018276, |
| "loss": 0.0022150948643684387, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "grad_norm": 0.00554023077711463, |
| "learning_rate": 0.00033734512839611255, |
| "loss": 0.0021498501300811768, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.2683290253865085, |
| "eval_ce_loss": 0.0022093608622775693, |
| "eval_loss": 0.0022093608622775693, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.2683290253865085, |
| "eval_ce_loss": 0.0022093608622775693, |
| "eval_loss": 0.0022093608622775693, |
| "eval_runtime": 110.7199, |
| "eval_samples_per_second": 252.827, |
| "eval_steps_per_second": 3.956, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.62666851415639, |
| "grad_norm": 0.013757260516285896, |
| "learning_rate": 0.0003194790303463687, |
| "loss": 0.0020817620679736137, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.6384924483857558, |
| "grad_norm": 0.019224034622311592, |
| "learning_rate": 0.00030187420020572406, |
| "loss": 0.002039685845375061, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6503163826151217, |
| "grad_norm": 0.01222301833331585, |
| "learning_rate": 0.00028455611743603626, |
| "loss": 0.0020018373616039753, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "grad_norm": 0.021646995097398758, |
| "learning_rate": 0.0002675498464898373, |
| "loss": 0.00218460732139647, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.26793733719293295, |
| "eval_ce_loss": 0.0019528514221664114, |
| "eval_loss": 0.0019528514221664114, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.26793733719293295, |
| "eval_ce_loss": 0.0019528514221664114, |
| "eval_loss": 0.0019528514221664114, |
| "eval_runtime": 110.9086, |
| "eval_samples_per_second": 252.397, |
| "eval_steps_per_second": 3.949, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6739642510738534, |
| "grad_norm": 0.006785638630390167, |
| "learning_rate": 0.0002508800005345623, |
| "loss": 0.001926972414366901, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6857881853032193, |
| "grad_norm": 0.008890391327440739, |
| "learning_rate": 0.00023457070582992562, |
| "loss": 0.001943480921909213, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6976121195325851, |
| "grad_norm": 0.006759077310562134, |
| "learning_rate": 0.00021864556680999692, |
| "loss": 0.00178119249176234, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "grad_norm": 0.006032236386090517, |
| "learning_rate": 0.0002031276319205152, |
| "loss": 0.0017275057034566998, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.27057655967695254, |
| "eval_ce_loss": 0.0017077088214219951, |
| "eval_loss": 0.0017077088214219951, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.27057655967695254, |
| "eval_ce_loss": 0.0017077088214219951, |
| "eval_loss": 0.0017077088214219951, |
| "eval_runtime": 111.5469, |
| "eval_samples_per_second": 250.953, |
| "eval_steps_per_second": 3.927, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.7212599879913169, |
| "grad_norm": 0.008526836521923542, |
| "learning_rate": 0.00018803936026088542, |
| "loss": 0.001562677789479494, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.7330839222206826, |
| "grad_norm": 0.007385567296296358, |
| "learning_rate": 0.00017340258907913464, |
| "loss": 0.0016144757391884923, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.7449078564500485, |
| "grad_norm": 0.0050244200974702835, |
| "learning_rate": 0.0001592385021668743, |
| "loss": 0.0016120458021759987, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "grad_norm": 0.005648311693221331, |
| "learning_rate": 0.0001455675992000087, |
| "loss": 0.001692429999820888, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "eval_bleu": 0.2685717071524248, |
| "eval_ce_loss": 0.0015959215974420593, |
| "eval_loss": 0.0015959215974420593, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "eval_bleu": 0.2685717071524248, |
| "eval_ce_loss": 0.0015959215974420593, |
| "eval_loss": 0.0015959215974420593, |
| "eval_runtime": 109.7001, |
| "eval_samples_per_second": 255.177, |
| "eval_steps_per_second": 3.993, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7685557249087802, |
| "grad_norm": 0.00418821582570672, |
| "learning_rate": 0.000132409666069565, |
| "loss": 0.001514198025688529, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.780379659138146, |
| "grad_norm": 0.008380233310163021, |
| "learning_rate": 0.0001197837462455823, |
| "loss": 0.0014031081227585673, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.7922035933675119, |
| "grad_norm": 0.008456946350634098, |
| "learning_rate": 0.00010770811321550749, |
| "loss": 0.0014507079031318426, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "grad_norm": 0.01046363078057766, |
| "learning_rate": 9.620024403698591e-05, |
| "loss": 0.0016408010851591825, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "eval_bleu": 0.2652117482429029, |
| "eval_ce_loss": 0.0014940057051086057, |
| "eval_loss": 0.0014940057051086057, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "eval_bleu": 0.2652117482429029, |
| "eval_ce_loss": 0.0014940057051086057, |
| "eval_loss": 0.0014940057051086057, |
| "eval_runtime": 109.1029, |
| "eval_samples_per_second": 256.574, |
| "eval_steps_per_second": 4.015, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8158514618262436, |
| "grad_norm": 0.003936219960451126, |
| "learning_rate": 8.527679404332429e-05, |
| "loss": 0.001487646484747529, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.8276753960556095, |
| "grad_norm": 0.030263634398579597, |
| "learning_rate": 7.495357273823544e-05, |
| "loss": 0.0014763937797397375, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.8394993302849753, |
| "grad_norm": 0.021031692624092102, |
| "learning_rate": 6.524552091475183e-05, |
| "loss": 0.001435705809853971, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "grad_norm": 0.010445632040500641, |
| "learning_rate": 5.6166689031422024e-05, |
| "loss": 0.001405209768563509, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "eval_bleu": 0.2666050549594915, |
| "eval_ce_loss": 0.0014094488985837038, |
| "eval_loss": 0.0014094488985837038, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "eval_bleu": 0.2666050549594915, |
| "eval_ce_loss": 0.0014094488985837038, |
| "eval_loss": 0.0014094488985837038, |
| "eval_runtime": 110.9263, |
| "eval_samples_per_second": 252.357, |
| "eval_steps_per_second": 3.949, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8631471987437069, |
| "grad_norm": 0.0071349553763866425, |
| "learning_rate": 4.773021687709067e-05, |
| "loss": 0.0014939571265131235, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8749711329730728, |
| "grad_norm": 0.0075807152315974236, |
| "learning_rate": 3.994831455368719e-05, |
| "loss": 0.0016243808204308152, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8867950672024387, |
| "grad_norm": 0.003486819099634886, |
| "learning_rate": 3.283224480455282e-05, |
| "loss": 0.0014689115341752768, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "grad_norm": 0.004220427479594946, |
| "learning_rate": 2.639230671387627e-05, |
| "loss": 0.0012729011941701174, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "eval_bleu": 0.2695832110812109, |
| "eval_ce_loss": 0.0013992262525565297, |
| "eval_loss": 0.0013992262525565297, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "eval_bleu": 0.2695832110812109, |
| "eval_ce_loss": 0.0013992262525565297, |
| "eval_loss": 0.0013992262525565297, |
| "eval_runtime": 110.239, |
| "eval_samples_per_second": 253.93, |
| "eval_steps_per_second": 3.973, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.9104429356611704, |
| "grad_norm": 0.013082730583846569, |
| "learning_rate": 2.063782080083576e-05, |
| "loss": 0.0013599519152194262, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.9222668698905362, |
| "grad_norm": 0.00748586468398571, |
| "learning_rate": 1.557711553001523e-05, |
| "loss": 0.0015039942227303982, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.9340908041199021, |
| "grad_norm": 0.003521893871948123, |
| "learning_rate": 1.1217515257622269e-05, |
| "loss": 0.0014425483532249928, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "grad_norm": 0.008369974792003632, |
| "learning_rate": 7.565329630950746e-06, |
| "loss": 0.0014663139590993524, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "eval_bleu": 0.2669610603654314, |
| "eval_ce_loss": 0.0013518765415306859, |
| "eval_loss": 0.0013518765415306859, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "eval_bleu": 0.2669610603654314, |
| "eval_ce_loss": 0.0013518765415306859, |
| "eval_loss": 0.0013518765415306859, |
| "eval_runtime": 112.2477, |
| "eval_samples_per_second": 249.386, |
| "eval_steps_per_second": 3.902, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.9577386725786338, |
| "grad_norm": 0.008028030395507812, |
| "learning_rate": 4.62584445643166e-06, |
| "loss": 0.0015790105098858476, |
| "step": 20736 |
| }, |
| { |
| "epoch": 0.9695626068079997, |
| "grad_norm": 0.005876564886420965, |
| "learning_rate": 2.40331404948807e-06, |
| "loss": 0.001455229939892888, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.9813865410373654, |
| "grad_norm": 0.008718357421457767, |
| "learning_rate": 9.009550772663965e-07, |
| "loss": 0.0013052865397185087, |
| "step": 21248 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "grad_norm": 0.0048894439823925495, |
| "learning_rate": 1.2094190315575791e-07, |
| "loss": 0.0014210316585376859, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "eval_bleu": 0.2713391630531368, |
| "eval_ce_loss": 0.001370158953104858, |
| "eval_loss": 0.001370158953104858, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "eval_bleu": 0.2713391630531368, |
| "eval_ce_loss": 0.001370158953104858, |
| "eval_loss": 0.001370158953104858, |
| "eval_runtime": 108.048, |
| "eval_samples_per_second": 259.079, |
| "eval_steps_per_second": 4.054, |
| "step": 21504 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 21651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|