{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6148445799270241, "eval_steps": 1024, "global_step": 13312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011823934229365849, "grad_norm": 1.9140732288360596, "learning_rate": 0.000498046875, "loss": 6.9310712814331055, "step": 256 }, { "epoch": 0.023647868458731697, "grad_norm": 0.966057538986206, "learning_rate": 0.000998046875, "loss": 1.017655372619629, "step": 512 }, { "epoch": 0.03547180268809755, "grad_norm": 0.6036155223846436, "learning_rate": 0.000999640996023194, "loss": 0.5299859046936035, "step": 768 }, { "epoch": 0.047295736917463395, "grad_norm": 0.6692569851875305, "learning_rate": 0.0009985588674043958, "loss": 0.43251049518585205, "step": 1024 }, { "epoch": 0.047295736917463395, "eval_bleu": 0.6267380536600159, "eval_ce_loss": 1.563563620935292, "eval_cos_loss": 0.11390362637432198, "eval_loss": 0.40128198402113024, "eval_mse_loss": 0.2176408769061032, "eval_rec_loss": 0.015894381031734215, "flow/cos_sim": 0.8860963983078526, "flow/improvement_ratio": 0.9765386980146034, "flow/mag_ratio_mean": 0.8838755380889597, "flow/mag_ratio_std": 0.1646749794006892, "step": 1024 }, { "epoch": 0.047295736917463395, "eval_bleu": 0.6267380536600159, "eval_ce_loss": 1.563563620935292, "eval_cos_loss": 0.11390362637432198, "eval_loss": 0.40128198402113024, "eval_mse_loss": 0.2176408769061032, "eval_rec_loss": 0.015894381031734215, "eval_runtime": 145.0392, "eval_samples_per_second": 193.003, "eval_steps_per_second": 3.02, "flow/cos_sim": 0.8860963983078526, "flow/improvement_ratio": 0.9765386980146034, "flow/mag_ratio_mean": 0.8838755380889597, "flow/mag_ratio_std": 0.1646749794006892, "step": 1024 }, { "epoch": 0.05911967114682925, "grad_norm": 0.46750983595848083, "learning_rate": 0.0009967551747861387, "loss": 0.3926387429237366, "step": 1280 }, { "epoch": 0.0709436053761951, "grad_norm": 0.5673767328262329, "learning_rate": 0.000994232528651847, "loss": 0.3741423189640045, "step": 1536 }, { "epoch": 0.08276753960556095, "grad_norm": 0.5806387066841125, "learning_rate": 0.0009909945800260092, "loss": 0.35307449102401733, "step": 1792 }, { "epoch": 0.09459147383492679, "grad_norm": 0.6838217973709106, "learning_rate": 0.0009870460151900522, "loss": 0.3399304151535034, "step": 2048 }, { "epoch": 0.09459147383492679, "eval_bleu": 0.6478349738418474, "eval_ce_loss": 1.3434572242982856, "eval_cos_loss": 0.09738318450306649, "eval_loss": 0.3373560005943525, "eval_mse_loss": 0.18800275102597938, "eval_rec_loss": 0.005269206022433341, "flow/cos_sim": 0.9026168323542973, "flow/improvement_ratio": 0.9768092384621433, "flow/mag_ratio_mean": 0.9078535956062682, "flow/mag_ratio_std": 0.12002966954419601, "step": 2048 }, { "epoch": 0.09459147383492679, "eval_bleu": 0.6478349738418474, "eval_ce_loss": 1.3434572242982856, "eval_cos_loss": 0.09738318450306649, "eval_loss": 0.3373560005943525, "eval_mse_loss": 0.18800275102597938, "eval_rec_loss": 0.005269206022433341, "eval_runtime": 143.5996, "eval_samples_per_second": 194.938, "eval_steps_per_second": 3.05, "flow/cos_sim": 0.9026168323542973, "flow/improvement_ratio": 0.9768092384621433, "flow/mag_ratio_mean": 0.9078535956062682, "flow/mag_ratio_std": 0.12002966954419601, "step": 2048 }, { "epoch": 0.10641540806429264, "grad_norm": 0.6462650299072266, "learning_rate": 0.0009823925488998885, "loss": 0.3380688428878784, "step": 2304 }, { "epoch": 0.1182393422936585, "grad_norm": 0.6545562744140625, "learning_rate": 0.0009770409161149525, "loss": 0.3214532732963562, "step": 2560 }, { "epoch": 0.13006327652302435, "grad_norm": 0.6667947173118591, "learning_rate": 0.0009709988622506973, "loss": 0.31182003021240234, "step": 2816 }, { "epoch": 0.1418872107523902, "grad_norm": 0.600439727306366, "learning_rate": 0.000964275131968659, "loss": 0.3082171380519867, "step": 3072 }, { "epoch": 0.1418872107523902, "eval_bleu": 0.6794402302458825, "eval_ce_loss": 1.18399681810919, "eval_cos_loss": 0.08983433964453875, "eval_loss": 0.3044066195414491, "eval_mse_loss": 0.17457320809908652, "eval_rec_loss": 0.0024502936500277726, "flow/cos_sim": 0.9101656942062726, "flow/improvement_ratio": 0.9734727941691603, "flow/mag_ratio_mean": 0.9100713347463303, "flow/mag_ratio_std": 0.09228039255629392, "step": 3072 }, { "epoch": 0.1418872107523902, "eval_bleu": 0.6794402302458825, "eval_ce_loss": 1.18399681810919, "eval_cos_loss": 0.08983433964453875, "eval_loss": 0.3044066195414491, "eval_mse_loss": 0.17457320809908652, "eval_rec_loss": 0.0024502936500277726, "eval_runtime": 142.616, "eval_samples_per_second": 196.282, "eval_steps_per_second": 3.071, "flow/cos_sim": 0.9101656942062726, "flow/improvement_ratio": 0.9734727941691603, "flow/mag_ratio_mean": 0.9100713347463303, "flow/mag_ratio_std": 0.09228039255629392, "step": 3072 }, { "epoch": 0.15371114498175603, "grad_norm": 0.5188813209533691, "learning_rate": 0.0009568794565203123, "loss": 0.3006798326969147, "step": 3328 }, { "epoch": 0.1655350792111219, "grad_norm": 0.3702296316623688, "learning_rate": 0.0009488225396630347, "loss": 0.29647037386894226, "step": 3584 }, { "epoch": 0.17735901344048774, "grad_norm": 0.49310481548309326, "learning_rate": 0.0009401160421685646, "loss": 0.3130263388156891, "step": 3840 }, { "epoch": 0.18918294766985358, "grad_norm": 0.5644649267196655, "learning_rate": 0.0009307725649463714, "loss": 0.2926834523677826, "step": 4096 }, { "epoch": 0.18918294766985358, "eval_bleu": 0.6839715035857402, "eval_ce_loss": 1.1360810732460458, "eval_cos_loss": 0.08503438343536364, "eval_loss": 0.2889118402788084, "eval_mse_loss": 0.16517186124030858, "eval_rec_loss": 0.0016284317239192974, "flow/cos_sim": 0.9149656362457362, "flow/improvement_ratio": 0.9750959768415042, "flow/mag_ratio_mean": 0.9128544130553938, "flow/mag_ratio_std": 0.09354445787325297, "step": 4096 }, { "epoch": 0.18918294766985358, "eval_bleu": 0.6839715035857402, "eval_ce_loss": 1.1360810732460458, "eval_cos_loss": 0.08503438343536364, "eval_loss": 0.2889118402788084, "eval_mse_loss": 0.16517186124030858, "eval_rec_loss": 0.0016284317239192974, "eval_runtime": 143.3522, "eval_samples_per_second": 195.274, "eval_steps_per_second": 3.055, "flow/cos_sim": 0.9149656362457362, "flow/improvement_ratio": 0.9750959768415042, "flow/mag_ratio_mean": 0.9128544130553938, "flow/mag_ratio_std": 0.09354445787325297, "step": 4096 }, { "epoch": 0.20100688189921945, "grad_norm": 0.3648587763309479, "learning_rate": 0.0009208056308063659, "loss": 0.28915518522262573, "step": 4352 }, { "epoch": 0.2128308161285853, "grad_norm": 0.5004331469535828, "learning_rate": 0.0009102296648873445, "loss": 0.28424444794654846, "step": 4608 }, { "epoch": 0.22465475035795113, "grad_norm": 0.27222171425819397, "learning_rate": 0.0008990599737794927, "loss": 0.2838270366191864, "step": 4864 }, { "epoch": 0.236478684587317, "grad_norm": 0.39166298508644104, "learning_rate": 0.0008873127233711644, "loss": 0.2802566885948181, "step": 5120 }, { "epoch": 0.236478684587317, "eval_bleu": 0.6966429437264656, "eval_ce_loss": 1.0860335096784923, "eval_cos_loss": 0.08069509963534738, "eval_loss": 0.27587767567944854, "eval_mse_loss": 0.15802378180228413, "eval_rec_loss": 0.0011810324893420196, "flow/cos_sim": 0.9193049165756191, "flow/improvement_ratio": 0.9735453040360316, "flow/mag_ratio_mean": 0.9165048223652251, "flow/mag_ratio_std": 0.08227625193252955, "step": 5120 }, { "epoch": 0.236478684587317, "eval_bleu": 0.6966429437264656, "eval_ce_loss": 1.0860335096784923, "eval_cos_loss": 0.08069509963534738, "eval_loss": 0.27587767567944854, "eval_mse_loss": 0.15802378180228413, "eval_rec_loss": 0.0011810324893420196, "eval_runtime": 144.8851, "eval_samples_per_second": 193.208, "eval_steps_per_second": 3.023, "flow/cos_sim": 0.9193049165756191, "flow/improvement_ratio": 0.9735453040360316, "flow/mag_ratio_mean": 0.9165048223652251, "flow/mag_ratio_std": 0.08227625193252955, "step": 5120 }, { "epoch": 0.24830261881668284, "grad_norm": 0.4552461802959442, "learning_rate": 0.0008750049154520011, "loss": 0.27747780084609985, "step": 5376 }, { "epoch": 0.2601265530460487, "grad_norm": 0.3430848717689514, "learning_rate": 0.0008621543631062487, "loss": 0.2891384959220886, "step": 5632 }, { "epoch": 0.27195048727541454, "grad_norm": 0.4337022304534912, "learning_rate": 0.0008487796649318904, "loss": 0.274080365896225, "step": 5888 }, { "epoch": 0.2837744215047804, "grad_norm": 0.40342026948928833, "learning_rate": 0.0008349001781229053, "loss": 0.2737179100513458, "step": 6144 }, { "epoch": 0.2837744215047804, "eval_bleu": 0.7005510372485733, "eval_ce_loss": 1.067978764981984, "eval_cos_loss": 0.0791517910994079, "eval_loss": 0.2710464134744313, "eval_mse_loss": 0.15530247285485813, "eval_rec_loss": 0.00103088276002016, "flow/cos_sim": 0.9208482295682986, "flow/improvement_ratio": 0.9731569932476026, "flow/mag_ratio_mean": 0.9284612283314744, "flow/mag_ratio_std": 0.07813496222574962, "step": 6144 }, { "epoch": 0.2837744215047804, "eval_bleu": 0.7005510372485733, "eval_ce_loss": 1.067978764981984, "eval_cos_loss": 0.0791517910994079, "eval_loss": 0.2710464134744313, "eval_mse_loss": 0.15530247285485813, "eval_rec_loss": 0.00103088276002016, "eval_runtime": 142.9519, "eval_samples_per_second": 195.821, "eval_steps_per_second": 3.064, "flow/cos_sim": 0.9208482295682986, "flow/improvement_ratio": 0.9731569932476026, "flow/mag_ratio_mean": 0.9284612283314744, "flow/mag_ratio_std": 0.07813496222574962, "step": 6144 }, { "epoch": 0.2955983557341462, "grad_norm": 0.36669906973838806, "learning_rate": 0.0008205359904536107, "loss": 0.2686825394630432, "step": 6400 }, { "epoch": 0.30742228996351206, "grad_norm": 0.2844366729259491, "learning_rate": 0.0008057078912056363, "loss": 0.26841098070144653, "step": 6656 }, { "epoch": 0.3192462241928779, "grad_norm": 0.341339647769928, "learning_rate": 0.0007904373410796086, "loss": 0.2661970555782318, "step": 6912 }, { "epoch": 0.3310701584222438, "grad_norm": 0.35041218996047974, "learning_rate": 0.0007747464411350876, "loss": 0.26556745171546936, "step": 7168 }, { "epoch": 0.3310701584222438, "eval_bleu": 0.710653856755971, "eval_ce_loss": 1.0037036097213015, "eval_cos_loss": 0.0774884378733156, "eval_loss": 0.2617515635817018, "eval_mse_loss": 0.15282062886784611, "eval_rec_loss": 0.0008117282769037305, "flow/cos_sim": 0.9225115811443765, "flow/improvement_ratio": 0.9752856880834658, "flow/mag_ratio_mean": 0.9207794453999768, "flow/mag_ratio_std": 0.08132727078447059, "step": 7168 }, { "epoch": 0.3310701584222438, "eval_bleu": 0.710653856755971, "eval_ce_loss": 1.0037036097213015, "eval_cos_loss": 0.0774884378733156, "eval_loss": 0.2617515635817018, "eval_mse_loss": 0.15282062886784611, "eval_rec_loss": 0.0008117282769037305, "eval_runtime": 142.6116, "eval_samples_per_second": 196.288, "eval_steps_per_second": 3.071, "flow/cos_sim": 0.9225115811443765, "flow/improvement_ratio": 0.9752856880834658, "flow/mag_ratio_mean": 0.9207794453999768, "flow/mag_ratio_std": 0.08132727078447059, "step": 7168 }, { "epoch": 0.34289409265160964, "grad_norm": 0.328535258769989, "learning_rate": 0.000758657900803716, "loss": 0.27869582176208496, "step": 7424 }, { "epoch": 0.3547180268809755, "grad_norm": 0.27767524123191833, "learning_rate": 0.000742195005021869, "loss": 0.26837044954299927, "step": 7680 }, { "epoch": 0.3665419611103413, "grad_norm": 0.40343427658081055, "learning_rate": 0.0007253815805303786, "loss": 0.26287224888801575, "step": 7936 }, { "epoch": 0.37836589533970716, "grad_norm": 0.3494727611541748, "learning_rate": 0.0007082419613901028, "loss": 0.2608710527420044, "step": 8192 }, { "epoch": 0.37836589533970716, "eval_bleu": 0.7258561394806508, "eval_ce_loss": 0.954990051584701, "eval_cos_loss": 0.07798272708099183, "eval_loss": 0.2584831827239359, "eval_mse_loss": 0.15443320324023566, "eval_rec_loss": 0.0007526992644982298, "flow/cos_sim": 0.9220172950666244, "flow/improvement_ratio": 0.9747015749482804, "flow/mag_ratio_mean": 0.9233354187175019, "flow/mag_ratio_std": 0.07788482179108276, "step": 8192 }, { "epoch": 0.37836589533970716, "eval_bleu": 0.7258561394806508, "eval_ce_loss": 0.954990051584701, "eval_cos_loss": 0.07798272708099183, "eval_loss": 0.2584831827239359, "eval_mse_loss": 0.15443320324023566, "eval_rec_loss": 0.0007526992644982298, "eval_runtime": 144.1923, "eval_samples_per_second": 194.137, "eval_steps_per_second": 3.038, "flow/cos_sim": 0.9220172950666244, "flow/improvement_ratio": 0.9747015749482804, "flow/mag_ratio_mean": 0.9233354187175019, "flow/mag_ratio_std": 0.07788482179108276, "step": 8192 }, { "epoch": 0.390189829569073, "grad_norm": 0.4279082417488098, "learning_rate": 0.0006908009537632514, "loss": 0.2599097788333893, "step": 8448 }, { "epoch": 0.4020137637984389, "grad_norm": 0.43606042861938477, "learning_rate": 0.0006730838000114403, "loss": 0.26111704111099243, "step": 8704 }, { "epoch": 0.41383769802780473, "grad_norm": 0.21560809016227722, "learning_rate": 0.0006551161421624341, "loss": 0.268413245677948, "step": 8960 }, { "epoch": 0.4256616322571706, "grad_norm": 0.5756855607032776, "learning_rate": 0.0006369239847984517, "loss": 0.2562841475009918, "step": 9216 }, { "epoch": 0.4256616322571706, "eval_bleu": 0.7095258647762687, "eval_ce_loss": 1.0158189947474492, "eval_cos_loss": 0.0742807937743457, "eval_loss": 0.256784121705789, "eval_mse_loss": 0.14713338680871546, "eval_rec_loss": 0.0006407552403591884, "flow/cos_sim": 0.9257192251072627, "flow/improvement_ratio": 0.9743275895510635, "flow/mag_ratio_mean": 0.9202466026016566, "flow/mag_ratio_std": 0.0764387545588354, "step": 9216 }, { "epoch": 0.4256616322571706, "eval_bleu": 0.7095258647762687, "eval_ce_loss": 1.0158189947474492, "eval_cos_loss": 0.0742807937743457, "eval_loss": 0.256784121705789, "eval_mse_loss": 0.14713338680871546, "eval_rec_loss": 0.0006407552403591884, "eval_runtime": 145.076, "eval_samples_per_second": 192.954, "eval_steps_per_second": 3.019, "flow/cos_sim": 0.9257192251072627, "flow/improvement_ratio": 0.9743275895510635, "flow/mag_ratio_mean": 0.9202466026016566, "flow/mag_ratio_std": 0.0764387545588354, "step": 9216 }, { "epoch": 0.4374855664865364, "grad_norm": 0.376579225063324, "learning_rate": 0.0006185336574197479, "loss": 0.25477761030197144, "step": 9472 }, { "epoch": 0.44930950071590225, "grad_norm": 0.4893060624599457, "learning_rate": 0.0005999717763379407, "loss": 0.2554967701435089, "step": 9728 }, { "epoch": 0.4611334349452681, "grad_norm": 0.5277368426322937, "learning_rate": 0.0005812652061542363, "loss": 0.2537921667098999, "step": 9984 }, { "epoch": 0.472957369174634, "grad_norm": 0.3493780791759491, "learning_rate": 0.0005624410208783071, "loss": 0.2518165707588196, "step": 10240 }, { "epoch": 0.472957369174634, "eval_bleu": 0.7175600139987302, "eval_ce_loss": 0.9684542848231041, "eval_cos_loss": 0.07330035061902923, "eval_loss": 0.25065779049782994, "eval_mse_loss": 0.1458930650292194, "eval_rec_loss": 0.000589260474122746, "flow/cos_sim": 0.9266996725236989, "flow/improvement_ratio": 0.9765301047394809, "flow/mag_ratio_mean": 0.9194769399351181, "flow/mag_ratio_std": 0.07280441308946914, "step": 10240 }, { "epoch": 0.472957369174634, "eval_bleu": 0.7175600139987302, "eval_ce_loss": 0.9684542848231041, "eval_cos_loss": 0.07330035061902923, "eval_loss": 0.25065779049782994, "eval_mse_loss": 0.1458930650292194, "eval_rec_loss": 0.000589260474122746, "eval_runtime": 144.907, "eval_samples_per_second": 193.179, "eval_steps_per_second": 3.023, "flow/cos_sim": 0.9266996725236989, "flow/improvement_ratio": 0.9765301047394809, "flow/mag_ratio_mean": 0.9194769399351181, "flow/mag_ratio_std": 0.07280441308946914, "step": 10240 }, { "epoch": 0.48478130340399983, "grad_norm": 0.5341387391090393, "learning_rate": 0.0005435264647440881, "loss": 0.2537766695022583, "step": 10496 }, { "epoch": 0.49660523763336567, "grad_norm": 0.37015679478645325, "learning_rate": 0.000524548912779213, "loss": 0.2499093860387802, "step": 10752 }, { "epoch": 0.5084291718627315, "grad_norm": 0.6591759920120239, "learning_rate": 0.0005055358311851499, "loss": 0.25186970829963684, "step": 11008 }, { "epoch": 0.5202531060920974, "grad_norm": 0.31277748942375183, "learning_rate": 0.0004865147375853812, "loss": 0.25062766671180725, "step": 11264 }, { "epoch": 0.5202531060920974, "eval_bleu": 0.7260803484693739, "eval_ce_loss": 0.9400546782092961, "eval_cos_loss": 0.07318596699228298, "eval_loss": 0.2483225218169221, "eval_mse_loss": 0.14649179712210073, "eval_rec_loss": 0.0005066586955975142, "flow/cos_sim": 0.9268140568308634, "flow/improvement_ratio": 0.9762515691045213, "flow/mag_ratio_mean": 0.9157658136326429, "flow/mag_ratio_std": 0.07639177793373256, "step": 11264 }, { "epoch": 0.5202531060920974, "eval_bleu": 0.7260803484693739, "eval_ce_loss": 0.9400546782092961, "eval_cos_loss": 0.07318596699228298, "eval_loss": 0.2483225218169221, "eval_mse_loss": 0.14649179712210073, "eval_rec_loss": 0.0005066586955975142, "eval_runtime": 143.4087, "eval_samples_per_second": 195.197, "eval_steps_per_second": 3.054, "flow/cos_sim": 0.9268140568308634, "flow/improvement_ratio": 0.9762515691045213, "flow/mag_ratio_mean": 0.9157658136326429, "flow/mag_ratio_std": 0.07639177793373256, "step": 11264 }, { "epoch": 0.5320770403214632, "grad_norm": 0.7305059432983398, "learning_rate": 0.0004675131611991607, "loss": 0.2499980330467224, "step": 11520 }, { "epoch": 0.5439009745508291, "grad_norm": 0.2771352231502533, "learning_rate": 0.0004485586029984899, "loss": 0.24800576269626617, "step": 11776 }, { "epoch": 0.5557249087801949, "grad_norm": 0.2686781883239746, "learning_rate": 0.00042967849590597266, "loss": 0.24677802622318268, "step": 12032 }, { "epoch": 0.5675488430095608, "grad_norm": 0.37717506289482117, "learning_rate": 0.0004109001650911621, "loss": 0.24666211009025574, "step": 12288 }, { "epoch": 0.5675488430095608, "eval_bleu": 0.724567852062454, "eval_ce_loss": 0.9509265820185343, "eval_cos_loss": 0.07338333341663014, "eval_loss": 0.24947158049911125, "eval_mse_loss": 0.1465663725518745, "eval_rec_loss": 0.00047421499268288965, "flow/cos_sim": 0.9266166863920482, "flow/improvement_ratio": 0.9748387396607769, "flow/mag_ratio_mean": 0.9328112143631939, "flow/mag_ratio_std": 0.07196474395113993, "step": 12288 }, { "epoch": 0.5675488430095608, "eval_bleu": 0.724567852062454, "eval_ce_loss": 0.9509265820185343, "eval_cos_loss": 0.07338333341663014, "eval_loss": 0.24947158049911125, "eval_mse_loss": 0.1465663725518745, "eval_rec_loss": 0.00047421499268288965, "eval_runtime": 144.4458, "eval_samples_per_second": 193.796, "eval_steps_per_second": 3.032, "flow/cos_sim": 0.9266166863920482, "flow/improvement_ratio": 0.9748387396607769, "flow/mag_ratio_mean": 0.9328112143631939, "flow/mag_ratio_std": 0.07196474395113993, "step": 12288 }, { "epoch": 0.5793727772389267, "grad_norm": 0.7573108673095703, "learning_rate": 0.0003922507884228551, "loss": 0.24700023233890533, "step": 12544 }, { "epoch": 0.5911967114682924, "grad_norm": 0.2122378647327423, "learning_rate": 0.00037375735713457723, "loss": 0.2433316856622696, "step": 12800 }, { "epoch": 0.6030206456976583, "grad_norm": 0.22209292650222778, "learning_rate": 0.00035544663676018276, "loss": 0.246763676404953, "step": 13056 }, { "epoch": 0.6148445799270241, "grad_norm": 0.309989333152771, "learning_rate": 0.00033734512839611255, "loss": 0.24262374639511108, "step": 13312 }, { "epoch": 0.6148445799270241, "eval_bleu": 0.7300156788258321, "eval_ce_loss": 0.933146977737614, "eval_cos_loss": 0.07192521648626077, "eval_loss": 0.24540512995224564, "eval_mse_loss": 0.14446957482526837, "eval_rec_loss": 0.00042833433703537476, "flow/cos_sim": 0.9280747922315989, "flow/improvement_ratio": 0.9743193072271129, "flow/mag_ratio_mean": 0.9250430587219866, "flow/mag_ratio_std": 0.07064938510355612, "step": 13312 }, { "epoch": 0.6148445799270241, "eval_bleu": 0.7300156788258321, "eval_ce_loss": 0.933146977737614, "eval_cos_loss": 0.07192521648626077, "eval_loss": 0.24540512995224564, "eval_mse_loss": 0.14446957482526837, "eval_rec_loss": 0.00042833433703537476, "eval_runtime": 144.2347, "eval_samples_per_second": 194.079, "eval_steps_per_second": 3.037, "flow/cos_sim": 0.9280747922315989, "flow/improvement_ratio": 0.9743193072271129, "flow/mag_ratio_mean": 0.9250430587219866, "flow/mag_ratio_std": 0.07064938510355612, "step": 13312 } ], "logging_steps": 256, "max_steps": 21651, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1024, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }