| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9932104752667313, |
| "eval_steps": 1024, |
| "global_step": 21504, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011823934229365849, |
| "grad_norm": 0.8926580548286438, |
| "learning_rate": 0.000498046875, |
| "loss": 7.760124206542969, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.023647868458731697, |
| "grad_norm": 1.5264979600906372, |
| "learning_rate": 0.000998046875, |
| "loss": 1.7502448558807373, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03547180268809755, |
| "grad_norm": 1.034440517425537, |
| "learning_rate": 0.000999640996023194, |
| "loss": 1.0943012237548828, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "grad_norm": 1.2952566146850586, |
| "learning_rate": 0.0009985588674043958, |
| "loss": 0.9299185276031494, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.8108942681968176, |
| "eval_cos_loss": 0.18627598218313635, |
| "eval_dec_loss": 0.22058332814371476, |
| "eval_loss": 0.8581492928065122, |
| "eval_mse2_loss": 0.0729683047781388, |
| "eval_mse_loss": 0.5191411869302732, |
| "eval_rec_loss": 0.024248257404848185, |
| "eval_var_loss": 0.0025806165721318493, |
| "flow/cos_sim": 0.8137240362221791, |
| "flow/improvement_ratio": 0.953395878479361, |
| "flow/mag_ratio_mean": 0.8106630417309939, |
| "flow/mag_ratio_std": 0.14785969192714996, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.8108942681968176, |
| "eval_cos_loss": 0.18627598218313635, |
| "eval_dec_loss": 0.22058332814371476, |
| "eval_loss": 0.8581492928065122, |
| "eval_mse2_loss": 0.0729683047781388, |
| "eval_mse_loss": 0.5191411869302732, |
| "eval_rec_loss": 0.024248257404848185, |
| "eval_runtime": 153.8108, |
| "eval_samples_per_second": 181.996, |
| "eval_steps_per_second": 2.848, |
| "eval_var_loss": 0.0025806165721318493, |
| "flow/cos_sim": 0.8137240362221791, |
| "flow/improvement_ratio": 0.953395878479361, |
| "flow/mag_ratio_mean": 0.8106630417309939, |
| "flow/mag_ratio_std": 0.14785969192714996, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05911967114682925, |
| "grad_norm": 1.6030019521713257, |
| "learning_rate": 0.0009967551747861387, |
| "loss": 0.8445956110954285, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0709436053761951, |
| "grad_norm": 1.6468690633773804, |
| "learning_rate": 0.000994232528651847, |
| "loss": 0.7937943339347839, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.08276753960556095, |
| "grad_norm": 1.9545633792877197, |
| "learning_rate": 0.0009909945800260092, |
| "loss": 0.7469877600669861, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "grad_norm": 1.2975454330444336, |
| "learning_rate": 0.0009870460151900522, |
| "loss": 0.7277378439903259, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.8332413753983823, |
| "eval_cos_loss": 0.12301718736212003, |
| "eval_dec_loss": 0.18318539169791354, |
| "eval_loss": 0.6979760440517234, |
| "eval_mse2_loss": 0.056748984509134945, |
| "eval_mse_loss": 0.43819310152095203, |
| "eval_rec_loss": 0.006146695995894708, |
| "eval_var_loss": 0.0014001506648651542, |
| "flow/cos_sim": 0.8769828294782334, |
| "flow/improvement_ratio": 0.9491692741984101, |
| "flow/mag_ratio_mean": 0.8741380685268472, |
| "flow/mag_ratio_std": 0.1099956316456675, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.8332413753983823, |
| "eval_cos_loss": 0.12301718736212003, |
| "eval_dec_loss": 0.18318539169791354, |
| "eval_loss": 0.6979760440517234, |
| "eval_mse2_loss": 0.056748984509134945, |
| "eval_mse_loss": 0.43819310152095203, |
| "eval_rec_loss": 0.006146695995894708, |
| "eval_runtime": 147.7361, |
| "eval_samples_per_second": 189.48, |
| "eval_steps_per_second": 2.965, |
| "eval_var_loss": 0.0014001506648651542, |
| "flow/cos_sim": 0.8769828294782334, |
| "flow/improvement_ratio": 0.9491692741984101, |
| "flow/mag_ratio_mean": 0.8741380685268472, |
| "flow/mag_ratio_std": 0.1099956316456675, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.10641540806429264, |
| "grad_norm": 1.0802249908447266, |
| "learning_rate": 0.0009823925488998885, |
| "loss": 0.7090811133384705, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.1182393422936585, |
| "grad_norm": 1.3514765501022339, |
| "learning_rate": 0.0009770409161149525, |
| "loss": 0.6976003646850586, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13006327652302435, |
| "grad_norm": 1.3863739967346191, |
| "learning_rate": 0.0009709988622506973, |
| "loss": 0.6783488392829895, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "grad_norm": 1.2354881763458252, |
| "learning_rate": 0.000964275131968659, |
| "loss": 0.6734734773635864, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.8490467382834135, |
| "eval_cos_loss": 0.09506861453867393, |
| "eval_dec_loss": 0.16530320855903707, |
| "eval_loss": 0.6531072377342068, |
| "eval_mse2_loss": 0.052244682587921344, |
| "eval_mse_loss": 0.4211963676426509, |
| "eval_rec_loss": 0.003093622522146896, |
| "eval_var_loss": 0.0017624946489726027, |
| "flow/cos_sim": 0.9049313971985421, |
| "flow/improvement_ratio": 0.950364352905587, |
| "flow/mag_ratio_mean": 0.8911371738671168, |
| "flow/mag_ratio_std": 0.10241742183764775, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.8490467382834135, |
| "eval_cos_loss": 0.09506861453867393, |
| "eval_dec_loss": 0.16530320855903707, |
| "eval_loss": 0.6531072377342068, |
| "eval_mse2_loss": 0.052244682587921344, |
| "eval_mse_loss": 0.4211963676426509, |
| "eval_rec_loss": 0.003093622522146896, |
| "eval_runtime": 144.2539, |
| "eval_samples_per_second": 194.054, |
| "eval_steps_per_second": 3.036, |
| "eval_var_loss": 0.0017624946489726027, |
| "flow/cos_sim": 0.9049313971985421, |
| "flow/improvement_ratio": 0.950364352905587, |
| "flow/mag_ratio_mean": 0.8911371738671168, |
| "flow/mag_ratio_std": 0.10241742183764775, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.15371114498175603, |
| "grad_norm": 1.2442104816436768, |
| "learning_rate": 0.0009568794565203123, |
| "loss": 0.6588751673698425, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.1655350792111219, |
| "grad_norm": 1.032291054725647, |
| "learning_rate": 0.0009488225396630347, |
| "loss": 0.647803008556366, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.17735901344048774, |
| "grad_norm": 1.2258528470993042, |
| "learning_rate": 0.0009401160421685646, |
| "loss": 0.6433600783348083, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "grad_norm": 1.1139024496078491, |
| "learning_rate": 0.0009307725649463714, |
| "loss": 0.6353314518928528, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.8504080018321996, |
| "eval_cos_loss": 0.07373750197901029, |
| "eval_dec_loss": 0.15640341749066086, |
| "eval_loss": 0.6174904828610486, |
| "eval_mse2_loss": 0.04752455838024616, |
| "eval_mse_loss": 0.4026154523979039, |
| "eval_rec_loss": 0.002115985062733488, |
| "eval_var_loss": 0.0014573188677226027, |
| "flow/cos_sim": 0.9262625174979641, |
| "flow/improvement_ratio": 0.9461281280539352, |
| "flow/mag_ratio_mean": 0.9282138170716969, |
| "flow/mag_ratio_std": 0.0990274522844787, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.8504080018321996, |
| "eval_cos_loss": 0.07373750197901029, |
| "eval_dec_loss": 0.15640341749066086, |
| "eval_loss": 0.6174904828610486, |
| "eval_mse2_loss": 0.04752455838024616, |
| "eval_mse_loss": 0.4026154523979039, |
| "eval_rec_loss": 0.002115985062733488, |
| "eval_runtime": 144.4539, |
| "eval_samples_per_second": 193.785, |
| "eval_steps_per_second": 3.032, |
| "eval_var_loss": 0.0014573188677226027, |
| "flow/cos_sim": 0.9262625174979641, |
| "flow/improvement_ratio": 0.9461281280539352, |
| "flow/mag_ratio_mean": 0.9282138170716969, |
| "flow/mag_ratio_std": 0.0990274522844787, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.20100688189921945, |
| "grad_norm": 1.0958069562911987, |
| "learning_rate": 0.0009208056308063659, |
| "loss": 0.6263965368270874, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.2128308161285853, |
| "grad_norm": 1.2000305652618408, |
| "learning_rate": 0.0009102296648873445, |
| "loss": 0.618091344833374, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.22465475035795113, |
| "grad_norm": 0.8440291285514832, |
| "learning_rate": 0.0008990599737794927, |
| "loss": 0.6174848079681396, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "grad_norm": 0.9181346893310547, |
| "learning_rate": 0.0008873127233711644, |
| "loss": 0.606370747089386, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.854693698336936, |
| "eval_cos_loss": 0.058853609810613064, |
| "eval_dec_loss": 0.15538246511188272, |
| "eval_loss": 0.5900675073334071, |
| "eval_mse2_loss": 0.04474926833488625, |
| "eval_mse_loss": 0.3815204039828418, |
| "eval_rec_loss": 0.0014702541673647402, |
| "eval_var_loss": 0.0010597542540667808, |
| "flow/cos_sim": 0.9411464126687071, |
| "flow/improvement_ratio": 0.9471585357298046, |
| "flow/mag_ratio_mean": 0.9485053326169105, |
| "flow/mag_ratio_std": 0.0952613887347315, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.854693698336936, |
| "eval_cos_loss": 0.058853609810613064, |
| "eval_dec_loss": 0.15538246511188272, |
| "eval_loss": 0.5900675073334071, |
| "eval_mse2_loss": 0.04474926833488625, |
| "eval_mse_loss": 0.3815204039828418, |
| "eval_rec_loss": 0.0014702541673647402, |
| "eval_runtime": 144.8825, |
| "eval_samples_per_second": 193.212, |
| "eval_steps_per_second": 3.023, |
| "eval_var_loss": 0.0010597542540667808, |
| "flow/cos_sim": 0.9411464126687071, |
| "flow/improvement_ratio": 0.9471585357298046, |
| "flow/mag_ratio_mean": 0.9485053326169105, |
| "flow/mag_ratio_std": 0.0952613887347315, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.24830261881668284, |
| "grad_norm": 1.0105012655258179, |
| "learning_rate": 0.0008750049154520011, |
| "loss": 0.5982246398925781, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2601265530460487, |
| "grad_norm": 1.7041122913360596, |
| "learning_rate": 0.0008621543631062487, |
| "loss": 0.6011320948600769, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.27195048727541454, |
| "grad_norm": 1.2530128955841064, |
| "learning_rate": 0.0008487796649318904, |
| "loss": 0.5908714532852173, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "grad_norm": 0.8214261531829834, |
| "learning_rate": 0.0008349001781229053, |
| "loss": 0.5860975980758667, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.8588448853802904, |
| "eval_cos_loss": 0.050389231161371756, |
| "eval_dec_loss": 0.14640251302147564, |
| "eval_loss": 0.5701193273339642, |
| "eval_mse2_loss": 0.04257897145569869, |
| "eval_mse_loss": 0.3735292319837771, |
| "eval_rec_loss": 0.0011887007649628058, |
| "eval_var_loss": 0.001380990084992152, |
| "flow/cos_sim": 0.9496107875756478, |
| "flow/improvement_ratio": 0.9471838724667623, |
| "flow/mag_ratio_mean": 0.9603897756365336, |
| "flow/mag_ratio_std": 0.09184158112020253, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.8588448853802904, |
| "eval_cos_loss": 0.050389231161371756, |
| "eval_dec_loss": 0.14640251302147564, |
| "eval_loss": 0.5701193273339642, |
| "eval_mse2_loss": 0.04257897145569869, |
| "eval_mse_loss": 0.3735292319837771, |
| "eval_rec_loss": 0.0011887007649628058, |
| "eval_runtime": 145.6643, |
| "eval_samples_per_second": 192.175, |
| "eval_steps_per_second": 3.007, |
| "eval_var_loss": 0.001380990084992152, |
| "flow/cos_sim": 0.9496107875756478, |
| "flow/improvement_ratio": 0.9471838724667623, |
| "flow/mag_ratio_mean": 0.9603897756365336, |
| "flow/mag_ratio_std": 0.09184158112020253, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2955983557341462, |
| "grad_norm": 0.9015347361564636, |
| "learning_rate": 0.0008205359904536107, |
| "loss": 0.5818743705749512, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.30742228996351206, |
| "grad_norm": 0.8061195611953735, |
| "learning_rate": 0.0008057078912056363, |
| "loss": 0.5712096691131592, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.3192462241928779, |
| "grad_norm": 0.9496876001358032, |
| "learning_rate": 0.0007904373410796086, |
| "loss": 0.5827493667602539, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "grad_norm": 0.9318349957466125, |
| "learning_rate": 0.0007747464411350876, |
| "loss": 0.5713083744049072, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.8655904613504272, |
| "eval_cos_loss": 0.04345733123793177, |
| "eval_dec_loss": 0.1383160431550382, |
| "eval_loss": 0.560146918988119, |
| "eval_mse2_loss": 0.04110090016093973, |
| "eval_mse_loss": 0.37529142364247203, |
| "eval_rec_loss": 0.0010027640903040077, |
| "eval_var_loss": 9.005472540311074e-05, |
| "flow/cos_sim": 0.956542692227995, |
| "flow/improvement_ratio": 0.9472023688222719, |
| "flow/mag_ratio_mean": 0.9649291216782784, |
| "flow/mag_ratio_std": 0.08745992025488043, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.8655904613504272, |
| "eval_cos_loss": 0.04345733123793177, |
| "eval_dec_loss": 0.1383160431550382, |
| "eval_loss": 0.560146918988119, |
| "eval_mse2_loss": 0.04110090016093973, |
| "eval_mse_loss": 0.37529142364247203, |
| "eval_rec_loss": 0.0010027640903040077, |
| "eval_runtime": 147.3557, |
| "eval_samples_per_second": 189.969, |
| "eval_steps_per_second": 2.972, |
| "eval_var_loss": 9.005472540311074e-05, |
| "flow/cos_sim": 0.956542692227995, |
| "flow/improvement_ratio": 0.9472023688222719, |
| "flow/mag_ratio_mean": 0.9649291216782784, |
| "flow/mag_ratio_std": 0.08745992025488043, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.34289409265160964, |
| "grad_norm": 1.0284937620162964, |
| "learning_rate": 0.000758657900803716, |
| "loss": 0.5740544199943542, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3547180268809755, |
| "grad_norm": 0.7206848859786987, |
| "learning_rate": 0.000742195005021869, |
| "loss": 0.5760706067085266, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3665419611103413, |
| "grad_norm": 1.221917748451233, |
| "learning_rate": 0.0007253815805303786, |
| "loss": 0.566294252872467, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "grad_norm": 0.8819605708122253, |
| "learning_rate": 0.0007082419613901028, |
| "loss": 0.5628067851066589, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.871946148436791, |
| "eval_cos_loss": 0.03890436059331785, |
| "eval_dec_loss": 0.1332924633104031, |
| "eval_loss": 0.5495835452183196, |
| "eval_mse2_loss": 0.040371610187674496, |
| "eval_mse_loss": 0.3710057751773155, |
| "eval_rec_loss": 0.0008419993847885518, |
| "eval_var_loss": 0.0001812590855986016, |
| "flow/cos_sim": 0.9610956558898159, |
| "flow/improvement_ratio": 0.9478674083267717, |
| "flow/mag_ratio_mean": 0.9691984739205609, |
| "flow/mag_ratio_std": 0.08465842693431737, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.871946148436791, |
| "eval_cos_loss": 0.03890436059331785, |
| "eval_dec_loss": 0.1332924633104031, |
| "eval_loss": 0.5495835452183196, |
| "eval_mse2_loss": 0.040371610187674496, |
| "eval_mse_loss": 0.3710057751773155, |
| "eval_rec_loss": 0.0008419993847885518, |
| "eval_runtime": 146.7514, |
| "eval_samples_per_second": 190.751, |
| "eval_steps_per_second": 2.985, |
| "eval_var_loss": 0.0001812590855986016, |
| "flow/cos_sim": 0.9610956558898159, |
| "flow/improvement_ratio": 0.9478674083267717, |
| "flow/mag_ratio_mean": 0.9691984739205609, |
| "flow/mag_ratio_std": 0.08465842693431737, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.390189829569073, |
| "grad_norm": 1.2066078186035156, |
| "learning_rate": 0.0006908009537632514, |
| "loss": 0.5644704699516296, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.4020137637984389, |
| "grad_norm": 1.2743791341781616, |
| "learning_rate": 0.0006730838000114403, |
| "loss": 0.5624759197235107, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.41383769802780473, |
| "grad_norm": 0.6424040198326111, |
| "learning_rate": 0.0006551161421624341, |
| "loss": 0.5654159188270569, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "grad_norm": 1.0390995740890503, |
| "learning_rate": 0.0006369239847984517, |
| "loss": 0.5563592910766602, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.8641208937990813, |
| "eval_cos_loss": 0.034672807719235275, |
| "eval_dec_loss": 0.14222022919962396, |
| "eval_loss": 0.5460346293503835, |
| "eval_mse2_loss": 0.03956130868223704, |
| "eval_mse_loss": 0.3584019422667212, |
| "eval_rec_loss": 0.0007127871282784625, |
| "eval_var_loss": 0.0016710812642694063, |
| "flow/cos_sim": 0.965327212663546, |
| "flow/improvement_ratio": 0.946786184561307, |
| "flow/mag_ratio_mean": 0.9734560983366074, |
| "flow/mag_ratio_std": 0.08033725547872178, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.8641208937990813, |
| "eval_cos_loss": 0.034672807719235275, |
| "eval_dec_loss": 0.14222022919962396, |
| "eval_loss": 0.5460346293503835, |
| "eval_mse2_loss": 0.03956130868223704, |
| "eval_mse_loss": 0.3584019422667212, |
| "eval_rec_loss": 0.0007127871282784625, |
| "eval_runtime": 146.177, |
| "eval_samples_per_second": 191.501, |
| "eval_steps_per_second": 2.996, |
| "eval_var_loss": 0.0016710812642694063, |
| "flow/cos_sim": 0.965327212663546, |
| "flow/improvement_ratio": 0.946786184561307, |
| "flow/mag_ratio_mean": 0.9734560983366074, |
| "flow/mag_ratio_std": 0.08033725547872178, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4374855664865364, |
| "grad_norm": 1.0673410892486572, |
| "learning_rate": 0.0006185336574197479, |
| "loss": 0.55131596326828, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.44930950071590225, |
| "grad_norm": 1.1385674476623535, |
| "learning_rate": 0.0005999717763379407, |
| "loss": 0.5542811155319214, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.4611334349452681, |
| "grad_norm": 1.3084577322006226, |
| "learning_rate": 0.0005812652061542363, |
| "loss": 0.5522482395172119, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "grad_norm": 0.9078991413116455, |
| "learning_rate": 0.0005624410208783071, |
| "loss": 0.5514112114906311, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.8714027910618674, |
| "eval_cos_loss": 0.031159216023505278, |
| "eval_dec_loss": 0.13065314148744084, |
| "eval_loss": 0.5320979358126584, |
| "eval_mse2_loss": 0.038692950696331455, |
| "eval_mse_loss": 0.3575415439121255, |
| "eval_rec_loss": 0.0006259792361515585, |
| "eval_var_loss": 0.001468397166630993, |
| "flow/cos_sim": 0.968840807404148, |
| "flow/improvement_ratio": 0.9490241132098246, |
| "flow/mag_ratio_mean": 0.9646270977307673, |
| "flow/mag_ratio_std": 0.07568225521407171, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.8714027910618674, |
| "eval_cos_loss": 0.031159216023505278, |
| "eval_dec_loss": 0.13065314148744084, |
| "eval_loss": 0.5320979358126584, |
| "eval_mse2_loss": 0.038692950696331455, |
| "eval_mse_loss": 0.3575415439121255, |
| "eval_rec_loss": 0.0006259792361515585, |
| "eval_runtime": 147.3748, |
| "eval_samples_per_second": 189.944, |
| "eval_steps_per_second": 2.972, |
| "eval_var_loss": 0.001468397166630993, |
| "flow/cos_sim": 0.968840807404148, |
| "flow/improvement_ratio": 0.9490241132098246, |
| "flow/mag_ratio_mean": 0.9646270977307673, |
| "flow/mag_ratio_std": 0.07568225521407171, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.48478130340399983, |
| "grad_norm": 1.3058608770370483, |
| "learning_rate": 0.0005435264647440881, |
| "loss": 0.547296941280365, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.49660523763336567, |
| "grad_norm": 1.0200841426849365, |
| "learning_rate": 0.000524548912779213, |
| "loss": 0.544040322303772, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.5084291718627315, |
| "grad_norm": 1.1076935529708862, |
| "learning_rate": 0.0005055358311851499, |
| "loss": 0.5454155206680298, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "grad_norm": 0.8338369727134705, |
| "learning_rate": 0.0004865147375853812, |
| "loss": 0.5434398651123047, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.8728636919662941, |
| "eval_cos_loss": 0.029134724701682456, |
| "eval_dec_loss": 0.130046037353201, |
| "eval_loss": 0.5287471506829675, |
| "eval_mse2_loss": 0.037880827574969425, |
| "eval_mse_loss": 0.3566115467243543, |
| "eval_rec_loss": 0.0005072098316288007, |
| "eval_var_loss": 0.0007880572314676084, |
| "flow/cos_sim": 0.9708652933166452, |
| "flow/improvement_ratio": 0.9513337348149792, |
| "flow/mag_ratio_mean": 0.9743897437232815, |
| "flow/mag_ratio_std": 0.07394225260914733, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.8728636919662941, |
| "eval_cos_loss": 0.029134724701682456, |
| "eval_dec_loss": 0.130046037353201, |
| "eval_loss": 0.5287471506829675, |
| "eval_mse2_loss": 0.037880827574969425, |
| "eval_mse_loss": 0.3566115467243543, |
| "eval_rec_loss": 0.0005072098316288007, |
| "eval_runtime": 147.0551, |
| "eval_samples_per_second": 190.357, |
| "eval_steps_per_second": 2.978, |
| "eval_var_loss": 0.0007880572314676084, |
| "flow/cos_sim": 0.9708652933166452, |
| "flow/improvement_ratio": 0.9513337348149792, |
| "flow/mag_ratio_mean": 0.9743897437232815, |
| "flow/mag_ratio_std": 0.07394225260914733, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5320770403214632, |
| "grad_norm": 1.2937544584274292, |
| "learning_rate": 0.0004675131611991607, |
| "loss": 0.5423741936683655, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5439009745508291, |
| "grad_norm": 0.9950433373451233, |
| "learning_rate": 0.0004485586029984899, |
| "loss": 0.536012589931488, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5557249087801949, |
| "grad_norm": 0.9091536402702332, |
| "learning_rate": 0.00042967849590597266, |
| "loss": 0.5319453477859497, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "grad_norm": 1.1773775815963745, |
| "learning_rate": 0.0004109001650911621, |
| "loss": 0.5343883037567139, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.8703424178205617, |
| "eval_cos_loss": 0.026759936142559736, |
| "eval_dec_loss": 0.1364484317855884, |
| "eval_loss": 0.5287964605305293, |
| "eval_mse2_loss": 0.03801968024796955, |
| "eval_mse_loss": 0.34965787868793696, |
| "eval_rec_loss": 0.00048410188376538874, |
| "eval_var_loss": 0.0015103762552618437, |
| "flow/cos_sim": 0.9732400857966784, |
| "flow/improvement_ratio": 0.9476525126254722, |
| "flow/mag_ratio_mean": 0.9758574831975649, |
| "flow/mag_ratio_std": 0.06964536890659702, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.8703424178205617, |
| "eval_cos_loss": 0.026759936142559736, |
| "eval_dec_loss": 0.1364484317855884, |
| "eval_loss": 0.5287964605305293, |
| "eval_mse2_loss": 0.03801968024796955, |
| "eval_mse_loss": 0.34965787868793696, |
| "eval_rec_loss": 0.00048410188376538874, |
| "eval_runtime": 146.8515, |
| "eval_samples_per_second": 190.621, |
| "eval_steps_per_second": 2.983, |
| "eval_var_loss": 0.0015103762552618437, |
| "flow/cos_sim": 0.9732400857966784, |
| "flow/improvement_ratio": 0.9476525126254722, |
| "flow/mag_ratio_mean": 0.9758574831975649, |
| "flow/mag_ratio_std": 0.06964536890659702, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5793727772389267, |
| "grad_norm": 0.8995824456214905, |
| "learning_rate": 0.0003922507884228551, |
| "loss": 0.5339757800102234, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5911967114682924, |
| "grad_norm": 0.7776889801025391, |
| "learning_rate": 0.00037375735713457723, |
| "loss": 0.5342178344726562, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.6030206456976583, |
| "grad_norm": 0.7428849935531616, |
| "learning_rate": 0.00035544663676018276, |
| "loss": 0.5334670543670654, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "grad_norm": 1.0038323402404785, |
| "learning_rate": 0.00033734512839611255, |
| "loss": 0.5338785648345947, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.8742863450888384, |
| "eval_cos_loss": 0.02497413170153964, |
| "eval_dec_loss": 0.13080026295682476, |
| "eval_loss": 0.5225390315600181, |
| "eval_mse2_loss": 0.03739905954853177, |
| "eval_mse_loss": 0.35115713799652987, |
| "eval_rec_loss": 0.0004604576855995448, |
| "eval_var_loss": 0.00022470134578339042, |
| "flow/cos_sim": 0.9750258914411885, |
| "flow/improvement_ratio": 0.9472176692529356, |
| "flow/mag_ratio_mean": 0.9779115002177078, |
| "flow/mag_ratio_std": 0.06864877739181258, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.8742863450888384, |
| "eval_cos_loss": 0.02497413170153964, |
| "eval_dec_loss": 0.13080026295682476, |
| "eval_loss": 0.5225390315600181, |
| "eval_mse2_loss": 0.03739905954853177, |
| "eval_mse_loss": 0.35115713799652987, |
| "eval_rec_loss": 0.0004604576855995448, |
| "eval_runtime": 147.1381, |
| "eval_samples_per_second": 190.25, |
| "eval_steps_per_second": 2.977, |
| "eval_var_loss": 0.00022470134578339042, |
| "flow/cos_sim": 0.9750258914411885, |
| "flow/improvement_ratio": 0.9472176692529356, |
| "flow/mag_ratio_mean": 0.9779115002177078, |
| "flow/mag_ratio_std": 0.06864877739181258, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.62666851415639, |
| "grad_norm": 0.8821900486946106, |
| "learning_rate": 0.0003194790303463687, |
| "loss": 0.5262507200241089, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.6384924483857558, |
| "grad_norm": 1.29916512966156, |
| "learning_rate": 0.00030187420020572406, |
| "loss": 0.5345848798751831, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6503163826151217, |
| "grad_norm": 0.8431739211082458, |
| "learning_rate": 0.00028455611743603626, |
| "loss": 0.5232061147689819, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "grad_norm": 0.7449212670326233, |
| "learning_rate": 0.0002675498464898373, |
| "loss": 0.5273455381393433, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.878961824779555, |
| "eval_cos_loss": 0.023681414896222556, |
| "eval_dec_loss": 0.12285568658687752, |
| "eval_loss": 0.5115440155817493, |
| "eval_mse2_loss": 0.036460889154645404, |
| "eval_mse_loss": 0.34879117816278377, |
| "eval_rec_loss": 0.00043564565198285477, |
| "eval_var_loss": 0.0006324733228988299, |
| "flow/cos_sim": 0.9763186070472682, |
| "flow/improvement_ratio": 0.9486979719710676, |
| "flow/mag_ratio_mean": 0.9747681165939053, |
| "flow/mag_ratio_std": 0.06543600253046375, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.878961824779555, |
| "eval_cos_loss": 0.023681414896222556, |
| "eval_dec_loss": 0.12285568658687752, |
| "eval_loss": 0.5115440155817493, |
| "eval_mse2_loss": 0.036460889154645404, |
| "eval_mse_loss": 0.34879117816278377, |
| "eval_rec_loss": 0.00043564565198285477, |
| "eval_runtime": 145.1315, |
| "eval_samples_per_second": 192.88, |
| "eval_steps_per_second": 3.018, |
| "eval_var_loss": 0.0006324733228988299, |
| "flow/cos_sim": 0.9763186070472682, |
| "flow/improvement_ratio": 0.9486979719710676, |
| "flow/mag_ratio_mean": 0.9747681165939053, |
| "flow/mag_ratio_std": 0.06543600253046375, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6739642510738534, |
| "grad_norm": 1.0061371326446533, |
| "learning_rate": 0.0002508800005345623, |
| "loss": 0.5260058045387268, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6857881853032193, |
| "grad_norm": 1.3661426305770874, |
| "learning_rate": 0.00023457070582992562, |
| "loss": 0.5261355638504028, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6976121195325851, |
| "grad_norm": 0.8166645765304565, |
| "learning_rate": 0.00021864556680999692, |
| "loss": 0.5197286009788513, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "grad_norm": 0.9908707141876221, |
| "learning_rate": 0.0002031276319205152, |
| "loss": 0.5179936289787292, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.8783678378600588, |
| "eval_cos_loss": 0.02282264728745505, |
| "eval_dec_loss": 0.12575754155000868, |
| "eval_loss": 0.5103318595314679, |
| "eval_mse2_loss": 0.03646752246353588, |
| "eval_mse_loss": 0.3436338434752808, |
| "eval_rec_loss": 0.0003914046699129902, |
| "eval_var_loss": 0.0017992829623287672, |
| "flow/cos_sim": 0.9771773755278217, |
| "flow/improvement_ratio": 0.946518616589237, |
| "flow/mag_ratio_mean": 0.9811247203448047, |
| "flow/mag_ratio_std": 0.06391868588076607, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.8783678378600588, |
| "eval_cos_loss": 0.02282264728745505, |
| "eval_dec_loss": 0.12575754155000868, |
| "eval_loss": 0.5103318595314679, |
| "eval_mse2_loss": 0.03646752246353588, |
| "eval_mse_loss": 0.3436338434752808, |
| "eval_rec_loss": 0.0003914046699129902, |
| "eval_runtime": 145.6453, |
| "eval_samples_per_second": 192.2, |
| "eval_steps_per_second": 3.007, |
| "eval_var_loss": 0.0017992829623287672, |
| "flow/cos_sim": 0.9771773755278217, |
| "flow/improvement_ratio": 0.946518616589237, |
| "flow/mag_ratio_mean": 0.9811247203448047, |
| "flow/mag_ratio_std": 0.06391868588076607, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.7212599879913169, |
| "grad_norm": 1.3719778060913086, |
| "learning_rate": 0.00018803936026088542, |
| "loss": 0.5231001973152161, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.7330839222206826, |
| "grad_norm": 1.1663880348205566, |
| "learning_rate": 0.00017340258907913464, |
| "loss": 0.5187292695045471, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.7449078564500485, |
| "grad_norm": 1.2364073991775513, |
| "learning_rate": 0.0001592385021668743, |
| "loss": 0.5198975801467896, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "grad_norm": 0.7590259313583374, |
| "learning_rate": 0.0001455675992000087, |
| "loss": 0.5145216584205627, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "eval_bleu": 0.8783302939786105, |
| "eval_cos_loss": 0.02200101312998359, |
| "eval_dec_loss": 0.12474109981311103, |
| "eval_loss": 0.5037317795840572, |
| "eval_mse2_loss": 0.03571532880164445, |
| "eval_mse_loss": 0.3386642832870353, |
| "eval_rec_loss": 0.00036252345826848117, |
| "eval_var_loss": 0.0020484401755136985, |
| "flow/cos_sim": 0.9779990089538435, |
| "flow/improvement_ratio": 0.9449677000579224, |
| "flow/mag_ratio_mean": 0.9812718658414605, |
| "flow/mag_ratio_std": 0.062364814267174835, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "eval_bleu": 0.8783302939786105, |
| "eval_cos_loss": 0.02200101312998359, |
| "eval_dec_loss": 0.12474109981311103, |
| "eval_loss": 0.5037317795840572, |
| "eval_mse2_loss": 0.03571532880164445, |
| "eval_mse_loss": 0.3386642832870353, |
| "eval_rec_loss": 0.00036252345826848117, |
| "eval_runtime": 146.8615, |
| "eval_samples_per_second": 190.608, |
| "eval_steps_per_second": 2.982, |
| "eval_var_loss": 0.0020484401755136985, |
| "flow/cos_sim": 0.9779990089538435, |
| "flow/improvement_ratio": 0.9449677000579224, |
| "flow/mag_ratio_mean": 0.9812718658414605, |
| "flow/mag_ratio_std": 0.062364814267174835, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7685557249087802, |
| "grad_norm": 0.5260149836540222, |
| "learning_rate": 0.000132409666069565, |
| "loss": 0.5210624933242798, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.780379659138146, |
| "grad_norm": 0.9234253168106079, |
| "learning_rate": 0.0001197837462455823, |
| "loss": 0.5090124011039734, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.7922035933675119, |
| "grad_norm": 0.6521694660186768, |
| "learning_rate": 0.00010770811321550749, |
| "loss": 0.518164336681366, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "grad_norm": 1.1480274200439453, |
| "learning_rate": 9.620024403698591e-05, |
| "loss": 0.5141870379447937, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "eval_bleu": 0.8834884178752624, |
| "eval_cos_loss": 0.0215921389347274, |
| "eval_dec_loss": 0.12070586444750496, |
| "eval_loss": 0.503621021021991, |
| "eval_mse2_loss": 0.03600667195488193, |
| "eval_mse_loss": 0.3440090203530168, |
| "eval_rec_loss": 0.00033989991171857754, |
| "eval_var_loss": 0.000400351607091895, |
| "flow/cos_sim": 0.9784078830725527, |
| "flow/improvement_ratio": 0.9481515654418022, |
| "flow/mag_ratio_mean": 0.9792273077246261, |
| "flow/mag_ratio_std": 0.061406915386517845, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "eval_bleu": 0.8834884178752624, |
| "eval_cos_loss": 0.0215921389347274, |
| "eval_dec_loss": 0.12070586444750496, |
| "eval_loss": 0.503621021021991, |
| "eval_mse2_loss": 0.03600667195488193, |
| "eval_mse_loss": 0.3440090203530168, |
| "eval_rec_loss": 0.00033989991171857754, |
| "eval_runtime": 151.6926, |
| "eval_samples_per_second": 184.538, |
| "eval_steps_per_second": 2.887, |
| "eval_var_loss": 0.000400351607091895, |
| "flow/cos_sim": 0.9784078830725527, |
| "flow/improvement_ratio": 0.9481515654418022, |
| "flow/mag_ratio_mean": 0.9792273077246261, |
| "flow/mag_ratio_std": 0.061406915386517845, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8158514618262436, |
| "grad_norm": 0.9857662320137024, |
| "learning_rate": 8.527679404332429e-05, |
| "loss": 0.5135464668273926, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.8276753960556095, |
| "grad_norm": 0.4826514422893524, |
| "learning_rate": 7.495357273823544e-05, |
| "loss": 0.5152989029884338, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.8394993302849753, |
| "grad_norm": 0.5454884171485901, |
| "learning_rate": 6.524552091475183e-05, |
| "loss": 0.5149614810943604, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "grad_norm": 0.518525242805481, |
| "learning_rate": 5.6166689031422024e-05, |
| "loss": 0.5079946517944336, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "eval_bleu": 0.8796902491543857, |
| "eval_cos_loss": 0.020958454849955427, |
| "eval_dec_loss": 0.12389364775661464, |
| "eval_loss": 0.498462415517193, |
| "eval_mse2_loss": 0.03562375955436736, |
| "eval_mse_loss": 0.3354686865387442, |
| "eval_rec_loss": 0.0003364354677980215, |
| "eval_var_loss": 0.001044042578570919, |
| "flow/cos_sim": 0.9790415661792232, |
| "flow/improvement_ratio": 0.946279785676634, |
| "flow/mag_ratio_mean": 0.9808841357220254, |
| "flow/mag_ratio_std": 0.059541590231126304, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "eval_bleu": 0.8796902491543857, |
| "eval_cos_loss": 0.020958454849955427, |
| "eval_dec_loss": 0.12389364775661464, |
| "eval_loss": 0.498462415517193, |
| "eval_mse2_loss": 0.03562375955436736, |
| "eval_mse_loss": 0.3354686865387442, |
| "eval_rec_loss": 0.0003364354677980215, |
| "eval_runtime": 146.5651, |
| "eval_samples_per_second": 190.994, |
| "eval_steps_per_second": 2.988, |
| "eval_var_loss": 0.001044042578570919, |
| "flow/cos_sim": 0.9790415661792232, |
| "flow/improvement_ratio": 0.946279785676634, |
| "flow/mag_ratio_mean": 0.9808841357220254, |
| "flow/mag_ratio_std": 0.059541590231126304, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8631471987437069, |
| "grad_norm": 0.6128277778625488, |
| "learning_rate": 4.773021687709067e-05, |
| "loss": 0.5122405290603638, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8749711329730728, |
| "grad_norm": 0.5204885005950928, |
| "learning_rate": 3.994831455368719e-05, |
| "loss": 0.5127500891685486, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8867950672024387, |
| "grad_norm": 0.602541446685791, |
| "learning_rate": 3.283224480455282e-05, |
| "loss": 0.5094860196113586, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "grad_norm": 1.1697250604629517, |
| "learning_rate": 2.639230671387627e-05, |
| "loss": 0.5139985084533691, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "eval_bleu": 0.8812131229893231, |
| "eval_cos_loss": 0.021022337101842172, |
| "eval_dec_loss": 0.12231827581292826, |
| "eval_loss": 0.49907271203385095, |
| "eval_mse2_loss": 0.03566523693229782, |
| "eval_mse_loss": 0.33820473304077914, |
| "eval_rec_loss": 0.00033781120677735894, |
| "eval_var_loss": 0.0004444209407998002, |
| "flow/cos_sim": 0.9789776832001394, |
| "flow/improvement_ratio": 0.9461018956143018, |
| "flow/mag_ratio_mean": 0.9799266838591937, |
| "flow/mag_ratio_std": 0.06017088978530065, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "eval_bleu": 0.8812131229893231, |
| "eval_cos_loss": 0.021022337101842172, |
| "eval_dec_loss": 0.12231827581292826, |
| "eval_loss": 0.49907271203385095, |
| "eval_mse2_loss": 0.03566523693229782, |
| "eval_mse_loss": 0.33820473304077914, |
| "eval_rec_loss": 0.00033781120677735894, |
| "eval_runtime": 148.1279, |
| "eval_samples_per_second": 188.979, |
| "eval_steps_per_second": 2.957, |
| "eval_var_loss": 0.0004444209407998002, |
| "flow/cos_sim": 0.9789776832001394, |
| "flow/improvement_ratio": 0.9461018956143018, |
| "flow/mag_ratio_mean": 0.9799266838591937, |
| "flow/mag_ratio_std": 0.06017088978530065, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.9104429356611704, |
| "grad_norm": 0.6937961578369141, |
| "learning_rate": 2.063782080083576e-05, |
| "loss": 0.5091792941093445, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.9222668698905362, |
| "grad_norm": 0.6024012565612793, |
| "learning_rate": 1.557711553001523e-05, |
| "loss": 0.5097566246986389, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.9340908041199021, |
| "grad_norm": 1.2192533016204834, |
| "learning_rate": 1.1217515257622269e-05, |
| "loss": 0.5056952238082886, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "grad_norm": 1.0494381189346313, |
| "learning_rate": 7.565329630950746e-06, |
| "loss": 0.5079949498176575, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "eval_bleu": 0.8789524110812165, |
| "eval_cos_loss": 0.020721596508333672, |
| "eval_dec_loss": 0.12240356183929803, |
| "eval_loss": 0.4938700148095823, |
| "eval_mse2_loss": 0.035587785259467555, |
| "eval_mse_loss": 0.3327356912365787, |
| "eval_rec_loss": 0.0003278744384406143, |
| "eval_var_loss": 0.0007429427752211758, |
| "flow/cos_sim": 0.9792784298663815, |
| "flow/improvement_ratio": 0.9488933942361509, |
| "flow/mag_ratio_mean": 0.9814422096563801, |
| "flow/mag_ratio_std": 0.05891708919106553, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "eval_bleu": 0.8789524110812165, |
| "eval_cos_loss": 0.020721596508333672, |
| "eval_dec_loss": 0.12240356183929803, |
| "eval_loss": 0.4938700148095823, |
| "eval_mse2_loss": 0.035587785259467555, |
| "eval_mse_loss": 0.3327356912365787, |
| "eval_rec_loss": 0.0003278744384406143, |
| "eval_runtime": 147.5662, |
| "eval_samples_per_second": 189.698, |
| "eval_steps_per_second": 2.968, |
| "eval_var_loss": 0.0007429427752211758, |
| "flow/cos_sim": 0.9792784298663815, |
| "flow/improvement_ratio": 0.9488933942361509, |
| "flow/mag_ratio_mean": 0.9814422096563801, |
| "flow/mag_ratio_std": 0.05891708919106553, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.9577386725786338, |
| "grad_norm": 1.0610053539276123, |
| "learning_rate": 4.62584445643166e-06, |
| "loss": 0.5107224583625793, |
| "step": 20736 |
| }, |
| { |
| "epoch": 0.9695626068079997, |
| "grad_norm": 0.3382054269313812, |
| "learning_rate": 2.40331404948807e-06, |
| "loss": 0.5061094760894775, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.9813865410373654, |
| "grad_norm": 0.8232002854347229, |
| "learning_rate": 9.009550772663965e-07, |
| "loss": 0.5084012150764465, |
| "step": 21248 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "grad_norm": 0.4803735911846161, |
| "learning_rate": 1.2094190315575791e-07, |
| "loss": 0.502625048160553, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "eval_bleu": 0.878898171590065, |
| "eval_cos_loss": 0.020868751262931248, |
| "eval_dec_loss": 0.12675162426463954, |
| "eval_loss": 0.5008763382423959, |
| "eval_mse2_loss": 0.03563899073545655, |
| "eval_mse_loss": 0.33525587856497396, |
| "eval_rec_loss": 0.00033000375075045425, |
| "eval_var_loss": 0.0008129659853025114, |
| "flow/cos_sim": 0.9791312738912835, |
| "flow/improvement_ratio": 0.9457402734179475, |
| "flow/mag_ratio_mean": 0.9810643102208229, |
| "flow/mag_ratio_std": 0.059360814905942305, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "eval_bleu": 0.878898171590065, |
| "eval_cos_loss": 0.020868751262931248, |
| "eval_dec_loss": 0.12675162426463954, |
| "eval_loss": 0.5008763382423959, |
| "eval_mse2_loss": 0.03563899073545655, |
| "eval_mse_loss": 0.33525587856497396, |
| "eval_rec_loss": 0.00033000375075045425, |
| "eval_runtime": 146.8291, |
| "eval_samples_per_second": 190.65, |
| "eval_steps_per_second": 2.983, |
| "eval_var_loss": 0.0008129659853025114, |
| "flow/cos_sim": 0.9791312738912835, |
| "flow/improvement_ratio": 0.9457402734179475, |
| "flow/mag_ratio_mean": 0.9810643102208229, |
| "flow/mag_ratio_std": 0.059360814905942305, |
| "step": 21504 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 21651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|