{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5675488430095608, "eval_steps": 1024, "global_step": 12288, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011823934229365849, "grad_norm": 0.8926580548286438, "learning_rate": 0.000498046875, "loss": 7.760124206542969, "step": 256 }, { "epoch": 0.023647868458731697, "grad_norm": 1.5264979600906372, "learning_rate": 0.000998046875, "loss": 1.7502448558807373, "step": 512 }, { "epoch": 0.03547180268809755, "grad_norm": 1.034440517425537, "learning_rate": 0.000999640996023194, "loss": 1.0943012237548828, "step": 768 }, { "epoch": 0.047295736917463395, "grad_norm": 1.2952566146850586, "learning_rate": 0.0009985588674043958, "loss": 0.9299185276031494, "step": 1024 }, { "epoch": 0.047295736917463395, "eval_bleu": 0.8108942681968176, "eval_cos_loss": 0.18627598218313635, "eval_dec_loss": 0.22058332814371476, "eval_loss": 0.8581492928065122, "eval_mse2_loss": 0.0729683047781388, "eval_mse_loss": 0.5191411869302732, "eval_rec_loss": 0.024248257404848185, "eval_var_loss": 0.0025806165721318493, "flow/cos_sim": 0.8137240362221791, "flow/improvement_ratio": 0.953395878479361, "flow/mag_ratio_mean": 0.8106630417309939, "flow/mag_ratio_std": 0.14785969192714996, "step": 1024 }, { "epoch": 0.047295736917463395, "eval_bleu": 0.8108942681968176, "eval_cos_loss": 0.18627598218313635, "eval_dec_loss": 0.22058332814371476, "eval_loss": 0.8581492928065122, "eval_mse2_loss": 0.0729683047781388, "eval_mse_loss": 0.5191411869302732, "eval_rec_loss": 0.024248257404848185, "eval_runtime": 153.8108, "eval_samples_per_second": 181.996, "eval_steps_per_second": 2.848, "eval_var_loss": 0.0025806165721318493, "flow/cos_sim": 0.8137240362221791, "flow/improvement_ratio": 0.953395878479361, "flow/mag_ratio_mean": 0.8106630417309939, "flow/mag_ratio_std": 0.14785969192714996, "step": 1024 }, { "epoch": 0.05911967114682925, "grad_norm": 1.6030019521713257, "learning_rate": 0.0009967551747861387, "loss": 0.8445956110954285, "step": 1280 }, { "epoch": 0.0709436053761951, "grad_norm": 1.6468690633773804, "learning_rate": 0.000994232528651847, "loss": 0.7937943339347839, "step": 1536 }, { "epoch": 0.08276753960556095, "grad_norm": 1.9545633792877197, "learning_rate": 0.0009909945800260092, "loss": 0.7469877600669861, "step": 1792 }, { "epoch": 0.09459147383492679, "grad_norm": 1.2975454330444336, "learning_rate": 0.0009870460151900522, "loss": 0.7277378439903259, "step": 2048 }, { "epoch": 0.09459147383492679, "eval_bleu": 0.8332413753983823, "eval_cos_loss": 0.12301718736212003, "eval_dec_loss": 0.18318539169791354, "eval_loss": 0.6979760440517234, "eval_mse2_loss": 0.056748984509134945, "eval_mse_loss": 0.43819310152095203, "eval_rec_loss": 0.006146695995894708, "eval_var_loss": 0.0014001506648651542, "flow/cos_sim": 0.8769828294782334, "flow/improvement_ratio": 0.9491692741984101, "flow/mag_ratio_mean": 0.8741380685268472, "flow/mag_ratio_std": 0.1099956316456675, "step": 2048 }, { "epoch": 0.09459147383492679, "eval_bleu": 0.8332413753983823, "eval_cos_loss": 0.12301718736212003, "eval_dec_loss": 0.18318539169791354, "eval_loss": 0.6979760440517234, "eval_mse2_loss": 0.056748984509134945, "eval_mse_loss": 0.43819310152095203, "eval_rec_loss": 0.006146695995894708, "eval_runtime": 147.7361, "eval_samples_per_second": 189.48, "eval_steps_per_second": 2.965, "eval_var_loss": 0.0014001506648651542, "flow/cos_sim": 0.8769828294782334, "flow/improvement_ratio": 0.9491692741984101, "flow/mag_ratio_mean": 0.8741380685268472, "flow/mag_ratio_std": 0.1099956316456675, "step": 2048 }, { "epoch": 0.10641540806429264, "grad_norm": 1.0802249908447266, "learning_rate": 0.0009823925488998885, "loss": 0.7090811133384705, "step": 2304 }, { "epoch": 0.1182393422936585, "grad_norm": 1.3514765501022339, "learning_rate": 0.0009770409161149525, "loss": 0.6976003646850586, "step": 2560 }, { "epoch": 0.13006327652302435, "grad_norm": 1.3863739967346191, "learning_rate": 0.0009709988622506973, "loss": 0.6783488392829895, "step": 2816 }, { "epoch": 0.1418872107523902, "grad_norm": 1.2354881763458252, "learning_rate": 0.000964275131968659, "loss": 0.6734734773635864, "step": 3072 }, { "epoch": 0.1418872107523902, "eval_bleu": 0.8490467382834135, "eval_cos_loss": 0.09506861453867393, "eval_dec_loss": 0.16530320855903707, "eval_loss": 0.6531072377342068, "eval_mse2_loss": 0.052244682587921344, "eval_mse_loss": 0.4211963676426509, "eval_rec_loss": 0.003093622522146896, "eval_var_loss": 0.0017624946489726027, "flow/cos_sim": 0.9049313971985421, "flow/improvement_ratio": 0.950364352905587, "flow/mag_ratio_mean": 0.8911371738671168, "flow/mag_ratio_std": 0.10241742183764775, "step": 3072 }, { "epoch": 0.1418872107523902, "eval_bleu": 0.8490467382834135, "eval_cos_loss": 0.09506861453867393, "eval_dec_loss": 0.16530320855903707, "eval_loss": 0.6531072377342068, "eval_mse2_loss": 0.052244682587921344, "eval_mse_loss": 0.4211963676426509, "eval_rec_loss": 0.003093622522146896, "eval_runtime": 144.2539, "eval_samples_per_second": 194.054, "eval_steps_per_second": 3.036, "eval_var_loss": 0.0017624946489726027, "flow/cos_sim": 0.9049313971985421, "flow/improvement_ratio": 0.950364352905587, "flow/mag_ratio_mean": 0.8911371738671168, "flow/mag_ratio_std": 0.10241742183764775, "step": 3072 }, { "epoch": 0.15371114498175603, "grad_norm": 1.2442104816436768, "learning_rate": 0.0009568794565203123, "loss": 0.6588751673698425, "step": 3328 }, { "epoch": 0.1655350792111219, "grad_norm": 1.032291054725647, "learning_rate": 0.0009488225396630347, "loss": 0.647803008556366, "step": 3584 }, { "epoch": 0.17735901344048774, "grad_norm": 1.2258528470993042, "learning_rate": 0.0009401160421685646, "loss": 0.6433600783348083, "step": 3840 }, { "epoch": 0.18918294766985358, "grad_norm": 1.1139024496078491, "learning_rate": 0.0009307725649463714, "loss": 0.6353314518928528, "step": 4096 }, { "epoch": 0.18918294766985358, "eval_bleu": 0.8504080018321996, "eval_cos_loss": 0.07373750197901029, "eval_dec_loss": 0.15640341749066086, "eval_loss": 0.6174904828610486, "eval_mse2_loss": 0.04752455838024616, "eval_mse_loss": 0.4026154523979039, "eval_rec_loss": 0.002115985062733488, "eval_var_loss": 0.0014573188677226027, "flow/cos_sim": 0.9262625174979641, "flow/improvement_ratio": 0.9461281280539352, "flow/mag_ratio_mean": 0.9282138170716969, "flow/mag_ratio_std": 0.0990274522844787, "step": 4096 }, { "epoch": 0.18918294766985358, "eval_bleu": 0.8504080018321996, "eval_cos_loss": 0.07373750197901029, "eval_dec_loss": 0.15640341749066086, "eval_loss": 0.6174904828610486, "eval_mse2_loss": 0.04752455838024616, "eval_mse_loss": 0.4026154523979039, "eval_rec_loss": 0.002115985062733488, "eval_runtime": 144.4539, "eval_samples_per_second": 193.785, "eval_steps_per_second": 3.032, "eval_var_loss": 0.0014573188677226027, "flow/cos_sim": 0.9262625174979641, "flow/improvement_ratio": 0.9461281280539352, "flow/mag_ratio_mean": 0.9282138170716969, "flow/mag_ratio_std": 0.0990274522844787, "step": 4096 }, { "epoch": 0.20100688189921945, "grad_norm": 1.0958069562911987, "learning_rate": 0.0009208056308063659, "loss": 0.6263965368270874, "step": 4352 }, { "epoch": 0.2128308161285853, "grad_norm": 1.2000305652618408, "learning_rate": 0.0009102296648873445, "loss": 0.618091344833374, "step": 4608 }, { "epoch": 0.22465475035795113, "grad_norm": 0.8440291285514832, "learning_rate": 0.0008990599737794927, "loss": 0.6174848079681396, "step": 4864 }, { "epoch": 0.236478684587317, "grad_norm": 0.9181346893310547, "learning_rate": 0.0008873127233711644, "loss": 0.606370747089386, "step": 5120 }, { "epoch": 0.236478684587317, "eval_bleu": 0.854693698336936, "eval_cos_loss": 0.058853609810613064, "eval_dec_loss": 0.15538246511188272, "eval_loss": 0.5900675073334071, "eval_mse2_loss": 0.04474926833488625, "eval_mse_loss": 0.3815204039828418, "eval_rec_loss": 0.0014702541673647402, "eval_var_loss": 0.0010597542540667808, "flow/cos_sim": 0.9411464126687071, "flow/improvement_ratio": 0.9471585357298046, "flow/mag_ratio_mean": 0.9485053326169105, "flow/mag_ratio_std": 0.0952613887347315, "step": 5120 }, { "epoch": 0.236478684587317, "eval_bleu": 0.854693698336936, "eval_cos_loss": 0.058853609810613064, "eval_dec_loss": 0.15538246511188272, "eval_loss": 0.5900675073334071, "eval_mse2_loss": 0.04474926833488625, "eval_mse_loss": 0.3815204039828418, "eval_rec_loss": 0.0014702541673647402, "eval_runtime": 144.8825, "eval_samples_per_second": 193.212, "eval_steps_per_second": 3.023, "eval_var_loss": 0.0010597542540667808, "flow/cos_sim": 0.9411464126687071, "flow/improvement_ratio": 0.9471585357298046, "flow/mag_ratio_mean": 0.9485053326169105, "flow/mag_ratio_std": 0.0952613887347315, "step": 5120 }, { "epoch": 0.24830261881668284, "grad_norm": 1.0105012655258179, "learning_rate": 0.0008750049154520011, "loss": 0.5982246398925781, "step": 5376 }, { "epoch": 0.2601265530460487, "grad_norm": 1.7041122913360596, "learning_rate": 0.0008621543631062487, "loss": 0.6011320948600769, "step": 5632 }, { "epoch": 0.27195048727541454, "grad_norm": 1.2530128955841064, "learning_rate": 0.0008487796649318904, "loss": 0.5908714532852173, "step": 5888 }, { "epoch": 0.2837744215047804, "grad_norm": 0.8214261531829834, "learning_rate": 0.0008349001781229053, "loss": 0.5860975980758667, "step": 6144 }, { "epoch": 0.2837744215047804, "eval_bleu": 0.8588448853802904, "eval_cos_loss": 0.050389231161371756, "eval_dec_loss": 0.14640251302147564, "eval_loss": 0.5701193273339642, "eval_mse2_loss": 0.04257897145569869, "eval_mse_loss": 0.3735292319837771, "eval_rec_loss": 0.0011887007649628058, "eval_var_loss": 0.001380990084992152, "flow/cos_sim": 0.9496107875756478, "flow/improvement_ratio": 0.9471838724667623, "flow/mag_ratio_mean": 0.9603897756365336, "flow/mag_ratio_std": 0.09184158112020253, "step": 6144 }, { "epoch": 0.2837744215047804, "eval_bleu": 0.8588448853802904, "eval_cos_loss": 0.050389231161371756, "eval_dec_loss": 0.14640251302147564, "eval_loss": 0.5701193273339642, "eval_mse2_loss": 0.04257897145569869, "eval_mse_loss": 0.3735292319837771, "eval_rec_loss": 0.0011887007649628058, "eval_runtime": 145.6643, "eval_samples_per_second": 192.175, "eval_steps_per_second": 3.007, "eval_var_loss": 0.001380990084992152, "flow/cos_sim": 0.9496107875756478, "flow/improvement_ratio": 0.9471838724667623, "flow/mag_ratio_mean": 0.9603897756365336, "flow/mag_ratio_std": 0.09184158112020253, "step": 6144 }, { "epoch": 0.2955983557341462, "grad_norm": 0.9015347361564636, "learning_rate": 0.0008205359904536107, "loss": 0.5818743705749512, "step": 6400 }, { "epoch": 0.30742228996351206, "grad_norm": 0.8061195611953735, "learning_rate": 0.0008057078912056363, "loss": 0.5712096691131592, "step": 6656 }, { "epoch": 0.3192462241928779, "grad_norm": 0.9496876001358032, "learning_rate": 0.0007904373410796086, "loss": 0.5827493667602539, "step": 6912 }, { "epoch": 0.3310701584222438, "grad_norm": 0.9318349957466125, "learning_rate": 0.0007747464411350876, "loss": 0.5713083744049072, "step": 7168 }, { "epoch": 0.3310701584222438, "eval_bleu": 0.8655904613504272, "eval_cos_loss": 0.04345733123793177, "eval_dec_loss": 0.1383160431550382, "eval_loss": 0.560146918988119, "eval_mse2_loss": 0.04110090016093973, "eval_mse_loss": 0.37529142364247203, "eval_rec_loss": 0.0010027640903040077, "eval_var_loss": 9.005472540311074e-05, "flow/cos_sim": 0.956542692227995, "flow/improvement_ratio": 0.9472023688222719, "flow/mag_ratio_mean": 0.9649291216782784, "flow/mag_ratio_std": 0.08745992025488043, "step": 7168 }, { "epoch": 0.3310701584222438, "eval_bleu": 0.8655904613504272, "eval_cos_loss": 0.04345733123793177, "eval_dec_loss": 0.1383160431550382, "eval_loss": 0.560146918988119, "eval_mse2_loss": 0.04110090016093973, "eval_mse_loss": 0.37529142364247203, "eval_rec_loss": 0.0010027640903040077, "eval_runtime": 147.3557, "eval_samples_per_second": 189.969, "eval_steps_per_second": 2.972, "eval_var_loss": 9.005472540311074e-05, "flow/cos_sim": 0.956542692227995, "flow/improvement_ratio": 0.9472023688222719, "flow/mag_ratio_mean": 0.9649291216782784, "flow/mag_ratio_std": 0.08745992025488043, "step": 7168 }, { "epoch": 0.34289409265160964, "grad_norm": 1.0284937620162964, "learning_rate": 0.000758657900803716, "loss": 0.5740544199943542, "step": 7424 }, { "epoch": 0.3547180268809755, "grad_norm": 0.7206848859786987, "learning_rate": 0.000742195005021869, "loss": 0.5760706067085266, "step": 7680 }, { "epoch": 0.3665419611103413, "grad_norm": 1.221917748451233, "learning_rate": 0.0007253815805303786, "loss": 0.566294252872467, "step": 7936 }, { "epoch": 0.37836589533970716, "grad_norm": 0.8819605708122253, "learning_rate": 0.0007082419613901028, "loss": 0.5628067851066589, "step": 8192 }, { "epoch": 0.37836589533970716, "eval_bleu": 0.871946148436791, "eval_cos_loss": 0.03890436059331785, "eval_dec_loss": 0.1332924633104031, "eval_loss": 0.5495835452183196, "eval_mse2_loss": 0.040371610187674496, "eval_mse_loss": 0.3710057751773155, "eval_rec_loss": 0.0008419993847885518, "eval_var_loss": 0.0001812590855986016, "flow/cos_sim": 0.9610956558898159, "flow/improvement_ratio": 0.9478674083267717, "flow/mag_ratio_mean": 0.9691984739205609, "flow/mag_ratio_std": 0.08465842693431737, "step": 8192 }, { "epoch": 0.37836589533970716, "eval_bleu": 0.871946148436791, "eval_cos_loss": 0.03890436059331785, "eval_dec_loss": 0.1332924633104031, "eval_loss": 0.5495835452183196, "eval_mse2_loss": 0.040371610187674496, "eval_mse_loss": 0.3710057751773155, "eval_rec_loss": 0.0008419993847885518, "eval_runtime": 146.7514, "eval_samples_per_second": 190.751, "eval_steps_per_second": 2.985, "eval_var_loss": 0.0001812590855986016, "flow/cos_sim": 0.9610956558898159, "flow/improvement_ratio": 0.9478674083267717, "flow/mag_ratio_mean": 0.9691984739205609, "flow/mag_ratio_std": 0.08465842693431737, "step": 8192 }, { "epoch": 0.390189829569073, "grad_norm": 1.2066078186035156, "learning_rate": 0.0006908009537632514, "loss": 0.5644704699516296, "step": 8448 }, { "epoch": 0.4020137637984389, "grad_norm": 1.2743791341781616, "learning_rate": 0.0006730838000114403, "loss": 0.5624759197235107, "step": 8704 }, { "epoch": 0.41383769802780473, "grad_norm": 0.6424040198326111, "learning_rate": 0.0006551161421624341, "loss": 0.5654159188270569, "step": 8960 }, { "epoch": 0.4256616322571706, "grad_norm": 1.0390995740890503, "learning_rate": 0.0006369239847984517, "loss": 0.5563592910766602, "step": 9216 }, { "epoch": 0.4256616322571706, "eval_bleu": 0.8641208937990813, "eval_cos_loss": 0.034672807719235275, "eval_dec_loss": 0.14222022919962396, "eval_loss": 0.5460346293503835, "eval_mse2_loss": 0.03956130868223704, "eval_mse_loss": 0.3584019422667212, "eval_rec_loss": 0.0007127871282784625, "eval_var_loss": 0.0016710812642694063, "flow/cos_sim": 0.965327212663546, "flow/improvement_ratio": 0.946786184561307, "flow/mag_ratio_mean": 0.9734560983366074, "flow/mag_ratio_std": 0.08033725547872178, "step": 9216 }, { "epoch": 0.4256616322571706, "eval_bleu": 0.8641208937990813, "eval_cos_loss": 0.034672807719235275, "eval_dec_loss": 0.14222022919962396, "eval_loss": 0.5460346293503835, "eval_mse2_loss": 0.03956130868223704, "eval_mse_loss": 0.3584019422667212, "eval_rec_loss": 0.0007127871282784625, "eval_runtime": 146.177, "eval_samples_per_second": 191.501, "eval_steps_per_second": 2.996, "eval_var_loss": 0.0016710812642694063, "flow/cos_sim": 0.965327212663546, "flow/improvement_ratio": 0.946786184561307, "flow/mag_ratio_mean": 0.9734560983366074, "flow/mag_ratio_std": 0.08033725547872178, "step": 9216 }, { "epoch": 0.4374855664865364, "grad_norm": 1.0673410892486572, "learning_rate": 0.0006185336574197479, "loss": 0.55131596326828, "step": 9472 }, { "epoch": 0.44930950071590225, "grad_norm": 1.1385674476623535, "learning_rate": 0.0005999717763379407, "loss": 0.5542811155319214, "step": 9728 }, { "epoch": 0.4611334349452681, "grad_norm": 1.3084577322006226, "learning_rate": 0.0005812652061542363, "loss": 0.5522482395172119, "step": 9984 }, { "epoch": 0.472957369174634, "grad_norm": 0.9078991413116455, "learning_rate": 0.0005624410208783071, "loss": 0.5514112114906311, "step": 10240 }, { "epoch": 0.472957369174634, "eval_bleu": 0.8714027910618674, "eval_cos_loss": 0.031159216023505278, "eval_dec_loss": 0.13065314148744084, "eval_loss": 0.5320979358126584, "eval_mse2_loss": 0.038692950696331455, "eval_mse_loss": 0.3575415439121255, "eval_rec_loss": 0.0006259792361515585, "eval_var_loss": 0.001468397166630993, "flow/cos_sim": 0.968840807404148, "flow/improvement_ratio": 0.9490241132098246, "flow/mag_ratio_mean": 0.9646270977307673, "flow/mag_ratio_std": 0.07568225521407171, "step": 10240 }, { "epoch": 0.472957369174634, "eval_bleu": 0.8714027910618674, "eval_cos_loss": 0.031159216023505278, "eval_dec_loss": 0.13065314148744084, "eval_loss": 0.5320979358126584, "eval_mse2_loss": 0.038692950696331455, "eval_mse_loss": 0.3575415439121255, "eval_rec_loss": 0.0006259792361515585, "eval_runtime": 147.3748, "eval_samples_per_second": 189.944, "eval_steps_per_second": 2.972, "eval_var_loss": 0.001468397166630993, "flow/cos_sim": 0.968840807404148, "flow/improvement_ratio": 0.9490241132098246, "flow/mag_ratio_mean": 0.9646270977307673, "flow/mag_ratio_std": 0.07568225521407171, "step": 10240 }, { "epoch": 0.48478130340399983, "grad_norm": 1.3058608770370483, "learning_rate": 0.0005435264647440881, "loss": 0.547296941280365, "step": 10496 }, { "epoch": 0.49660523763336567, "grad_norm": 1.0200841426849365, "learning_rate": 0.000524548912779213, "loss": 0.544040322303772, "step": 10752 }, { "epoch": 0.5084291718627315, "grad_norm": 1.1076935529708862, "learning_rate": 0.0005055358311851499, "loss": 0.5454155206680298, "step": 11008 }, { "epoch": 0.5202531060920974, "grad_norm": 0.8338369727134705, "learning_rate": 0.0004865147375853812, "loss": 0.5434398651123047, "step": 11264 }, { "epoch": 0.5202531060920974, "eval_bleu": 0.8728636919662941, "eval_cos_loss": 0.029134724701682456, "eval_dec_loss": 0.130046037353201, "eval_loss": 0.5287471506829675, "eval_mse2_loss": 0.037880827574969425, "eval_mse_loss": 0.3566115467243543, "eval_rec_loss": 0.0005072098316288007, "eval_var_loss": 0.0007880572314676084, "flow/cos_sim": 0.9708652933166452, "flow/improvement_ratio": 0.9513337348149792, "flow/mag_ratio_mean": 0.9743897437232815, "flow/mag_ratio_std": 0.07394225260914733, "step": 11264 }, { "epoch": 0.5202531060920974, "eval_bleu": 0.8728636919662941, "eval_cos_loss": 0.029134724701682456, "eval_dec_loss": 0.130046037353201, "eval_loss": 0.5287471506829675, "eval_mse2_loss": 0.037880827574969425, "eval_mse_loss": 0.3566115467243543, "eval_rec_loss": 0.0005072098316288007, "eval_runtime": 147.0551, "eval_samples_per_second": 190.357, "eval_steps_per_second": 2.978, "eval_var_loss": 0.0007880572314676084, "flow/cos_sim": 0.9708652933166452, "flow/improvement_ratio": 0.9513337348149792, "flow/mag_ratio_mean": 0.9743897437232815, "flow/mag_ratio_std": 0.07394225260914733, "step": 11264 }, { "epoch": 0.5320770403214632, "grad_norm": 1.2937544584274292, "learning_rate": 0.0004675131611991607, "loss": 0.5423741936683655, "step": 11520 }, { "epoch": 0.5439009745508291, "grad_norm": 0.9950433373451233, "learning_rate": 0.0004485586029984899, "loss": 0.536012589931488, "step": 11776 }, { "epoch": 0.5557249087801949, "grad_norm": 0.9091536402702332, "learning_rate": 0.00042967849590597266, "loss": 0.5319453477859497, "step": 12032 }, { "epoch": 0.5675488430095608, "grad_norm": 1.1773775815963745, "learning_rate": 0.0004109001650911621, "loss": 0.5343883037567139, "step": 12288 }, { "epoch": 0.5675488430095608, "eval_bleu": 0.8703424178205617, "eval_cos_loss": 0.026759936142559736, "eval_dec_loss": 0.1364484317855884, "eval_loss": 0.5287964605305293, "eval_mse2_loss": 0.03801968024796955, "eval_mse_loss": 0.34965787868793696, "eval_rec_loss": 0.00048410188376538874, "eval_var_loss": 0.0015103762552618437, "flow/cos_sim": 0.9732400857966784, "flow/improvement_ratio": 0.9476525126254722, "flow/mag_ratio_mean": 0.9758574831975649, "flow/mag_ratio_std": 0.06964536890659702, "step": 12288 }, { "epoch": 0.5675488430095608, "eval_bleu": 0.8703424178205617, "eval_cos_loss": 0.026759936142559736, "eval_dec_loss": 0.1364484317855884, "eval_loss": 0.5287964605305293, "eval_mse2_loss": 0.03801968024796955, "eval_mse_loss": 0.34965787868793696, "eval_rec_loss": 0.00048410188376538874, "eval_runtime": 146.8515, "eval_samples_per_second": 190.621, "eval_steps_per_second": 2.983, "eval_var_loss": 0.0015103762552618437, "flow/cos_sim": 0.9732400857966784, "flow/improvement_ratio": 0.9476525126254722, "flow/mag_ratio_mean": 0.9758574831975649, "flow/mag_ratio_std": 0.06964536890659702, "step": 12288 } ], "logging_steps": 256, "max_steps": 21651, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1024, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }