| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5675488430095608, |
| "eval_steps": 1024, |
| "global_step": 12288, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011823934229365849, |
| "grad_norm": 0.8926580548286438, |
| "learning_rate": 0.000498046875, |
| "loss": 7.760124206542969, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.023647868458731697, |
| "grad_norm": 1.5264979600906372, |
| "learning_rate": 0.000998046875, |
| "loss": 1.7502448558807373, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03547180268809755, |
| "grad_norm": 1.034440517425537, |
| "learning_rate": 0.000999640996023194, |
| "loss": 1.0943012237548828, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "grad_norm": 1.2952566146850586, |
| "learning_rate": 0.0009985588674043958, |
| "loss": 0.9299185276031494, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.8108942681968176, |
| "eval_cos_loss": 0.18627598218313635, |
| "eval_dec_loss": 0.22058332814371476, |
| "eval_loss": 0.8581492928065122, |
| "eval_mse2_loss": 0.0729683047781388, |
| "eval_mse_loss": 0.5191411869302732, |
| "eval_rec_loss": 0.024248257404848185, |
| "eval_var_loss": 0.0025806165721318493, |
| "flow/cos_sim": 0.8137240362221791, |
| "flow/improvement_ratio": 0.953395878479361, |
| "flow/mag_ratio_mean": 0.8106630417309939, |
| "flow/mag_ratio_std": 0.14785969192714996, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.8108942681968176, |
| "eval_cos_loss": 0.18627598218313635, |
| "eval_dec_loss": 0.22058332814371476, |
| "eval_loss": 0.8581492928065122, |
| "eval_mse2_loss": 0.0729683047781388, |
| "eval_mse_loss": 0.5191411869302732, |
| "eval_rec_loss": 0.024248257404848185, |
| "eval_runtime": 153.8108, |
| "eval_samples_per_second": 181.996, |
| "eval_steps_per_second": 2.848, |
| "eval_var_loss": 0.0025806165721318493, |
| "flow/cos_sim": 0.8137240362221791, |
| "flow/improvement_ratio": 0.953395878479361, |
| "flow/mag_ratio_mean": 0.8106630417309939, |
| "flow/mag_ratio_std": 0.14785969192714996, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05911967114682925, |
| "grad_norm": 1.6030019521713257, |
| "learning_rate": 0.0009967551747861387, |
| "loss": 0.8445956110954285, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0709436053761951, |
| "grad_norm": 1.6468690633773804, |
| "learning_rate": 0.000994232528651847, |
| "loss": 0.7937943339347839, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.08276753960556095, |
| "grad_norm": 1.9545633792877197, |
| "learning_rate": 0.0009909945800260092, |
| "loss": 0.7469877600669861, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "grad_norm": 1.2975454330444336, |
| "learning_rate": 0.0009870460151900522, |
| "loss": 0.7277378439903259, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.8332413753983823, |
| "eval_cos_loss": 0.12301718736212003, |
| "eval_dec_loss": 0.18318539169791354, |
| "eval_loss": 0.6979760440517234, |
| "eval_mse2_loss": 0.056748984509134945, |
| "eval_mse_loss": 0.43819310152095203, |
| "eval_rec_loss": 0.006146695995894708, |
| "eval_var_loss": 0.0014001506648651542, |
| "flow/cos_sim": 0.8769828294782334, |
| "flow/improvement_ratio": 0.9491692741984101, |
| "flow/mag_ratio_mean": 0.8741380685268472, |
| "flow/mag_ratio_std": 0.1099956316456675, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.8332413753983823, |
| "eval_cos_loss": 0.12301718736212003, |
| "eval_dec_loss": 0.18318539169791354, |
| "eval_loss": 0.6979760440517234, |
| "eval_mse2_loss": 0.056748984509134945, |
| "eval_mse_loss": 0.43819310152095203, |
| "eval_rec_loss": 0.006146695995894708, |
| "eval_runtime": 147.7361, |
| "eval_samples_per_second": 189.48, |
| "eval_steps_per_second": 2.965, |
| "eval_var_loss": 0.0014001506648651542, |
| "flow/cos_sim": 0.8769828294782334, |
| "flow/improvement_ratio": 0.9491692741984101, |
| "flow/mag_ratio_mean": 0.8741380685268472, |
| "flow/mag_ratio_std": 0.1099956316456675, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.10641540806429264, |
| "grad_norm": 1.0802249908447266, |
| "learning_rate": 0.0009823925488998885, |
| "loss": 0.7090811133384705, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.1182393422936585, |
| "grad_norm": 1.3514765501022339, |
| "learning_rate": 0.0009770409161149525, |
| "loss": 0.6976003646850586, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13006327652302435, |
| "grad_norm": 1.3863739967346191, |
| "learning_rate": 0.0009709988622506973, |
| "loss": 0.6783488392829895, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "grad_norm": 1.2354881763458252, |
| "learning_rate": 0.000964275131968659, |
| "loss": 0.6734734773635864, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.8490467382834135, |
| "eval_cos_loss": 0.09506861453867393, |
| "eval_dec_loss": 0.16530320855903707, |
| "eval_loss": 0.6531072377342068, |
| "eval_mse2_loss": 0.052244682587921344, |
| "eval_mse_loss": 0.4211963676426509, |
| "eval_rec_loss": 0.003093622522146896, |
| "eval_var_loss": 0.0017624946489726027, |
| "flow/cos_sim": 0.9049313971985421, |
| "flow/improvement_ratio": 0.950364352905587, |
| "flow/mag_ratio_mean": 0.8911371738671168, |
| "flow/mag_ratio_std": 0.10241742183764775, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.8490467382834135, |
| "eval_cos_loss": 0.09506861453867393, |
| "eval_dec_loss": 0.16530320855903707, |
| "eval_loss": 0.6531072377342068, |
| "eval_mse2_loss": 0.052244682587921344, |
| "eval_mse_loss": 0.4211963676426509, |
| "eval_rec_loss": 0.003093622522146896, |
| "eval_runtime": 144.2539, |
| "eval_samples_per_second": 194.054, |
| "eval_steps_per_second": 3.036, |
| "eval_var_loss": 0.0017624946489726027, |
| "flow/cos_sim": 0.9049313971985421, |
| "flow/improvement_ratio": 0.950364352905587, |
| "flow/mag_ratio_mean": 0.8911371738671168, |
| "flow/mag_ratio_std": 0.10241742183764775, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.15371114498175603, |
| "grad_norm": 1.2442104816436768, |
| "learning_rate": 0.0009568794565203123, |
| "loss": 0.6588751673698425, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.1655350792111219, |
| "grad_norm": 1.032291054725647, |
| "learning_rate": 0.0009488225396630347, |
| "loss": 0.647803008556366, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.17735901344048774, |
| "grad_norm": 1.2258528470993042, |
| "learning_rate": 0.0009401160421685646, |
| "loss": 0.6433600783348083, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "grad_norm": 1.1139024496078491, |
| "learning_rate": 0.0009307725649463714, |
| "loss": 0.6353314518928528, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.8504080018321996, |
| "eval_cos_loss": 0.07373750197901029, |
| "eval_dec_loss": 0.15640341749066086, |
| "eval_loss": 0.6174904828610486, |
| "eval_mse2_loss": 0.04752455838024616, |
| "eval_mse_loss": 0.4026154523979039, |
| "eval_rec_loss": 0.002115985062733488, |
| "eval_var_loss": 0.0014573188677226027, |
| "flow/cos_sim": 0.9262625174979641, |
| "flow/improvement_ratio": 0.9461281280539352, |
| "flow/mag_ratio_mean": 0.9282138170716969, |
| "flow/mag_ratio_std": 0.0990274522844787, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.8504080018321996, |
| "eval_cos_loss": 0.07373750197901029, |
| "eval_dec_loss": 0.15640341749066086, |
| "eval_loss": 0.6174904828610486, |
| "eval_mse2_loss": 0.04752455838024616, |
| "eval_mse_loss": 0.4026154523979039, |
| "eval_rec_loss": 0.002115985062733488, |
| "eval_runtime": 144.4539, |
| "eval_samples_per_second": 193.785, |
| "eval_steps_per_second": 3.032, |
| "eval_var_loss": 0.0014573188677226027, |
| "flow/cos_sim": 0.9262625174979641, |
| "flow/improvement_ratio": 0.9461281280539352, |
| "flow/mag_ratio_mean": 0.9282138170716969, |
| "flow/mag_ratio_std": 0.0990274522844787, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.20100688189921945, |
| "grad_norm": 1.0958069562911987, |
| "learning_rate": 0.0009208056308063659, |
| "loss": 0.6263965368270874, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.2128308161285853, |
| "grad_norm": 1.2000305652618408, |
| "learning_rate": 0.0009102296648873445, |
| "loss": 0.618091344833374, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.22465475035795113, |
| "grad_norm": 0.8440291285514832, |
| "learning_rate": 0.0008990599737794927, |
| "loss": 0.6174848079681396, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "grad_norm": 0.9181346893310547, |
| "learning_rate": 0.0008873127233711644, |
| "loss": 0.606370747089386, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.854693698336936, |
| "eval_cos_loss": 0.058853609810613064, |
| "eval_dec_loss": 0.15538246511188272, |
| "eval_loss": 0.5900675073334071, |
| "eval_mse2_loss": 0.04474926833488625, |
| "eval_mse_loss": 0.3815204039828418, |
| "eval_rec_loss": 0.0014702541673647402, |
| "eval_var_loss": 0.0010597542540667808, |
| "flow/cos_sim": 0.9411464126687071, |
| "flow/improvement_ratio": 0.9471585357298046, |
| "flow/mag_ratio_mean": 0.9485053326169105, |
| "flow/mag_ratio_std": 0.0952613887347315, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.854693698336936, |
| "eval_cos_loss": 0.058853609810613064, |
| "eval_dec_loss": 0.15538246511188272, |
| "eval_loss": 0.5900675073334071, |
| "eval_mse2_loss": 0.04474926833488625, |
| "eval_mse_loss": 0.3815204039828418, |
| "eval_rec_loss": 0.0014702541673647402, |
| "eval_runtime": 144.8825, |
| "eval_samples_per_second": 193.212, |
| "eval_steps_per_second": 3.023, |
| "eval_var_loss": 0.0010597542540667808, |
| "flow/cos_sim": 0.9411464126687071, |
| "flow/improvement_ratio": 0.9471585357298046, |
| "flow/mag_ratio_mean": 0.9485053326169105, |
| "flow/mag_ratio_std": 0.0952613887347315, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.24830261881668284, |
| "grad_norm": 1.0105012655258179, |
| "learning_rate": 0.0008750049154520011, |
| "loss": 0.5982246398925781, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2601265530460487, |
| "grad_norm": 1.7041122913360596, |
| "learning_rate": 0.0008621543631062487, |
| "loss": 0.6011320948600769, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.27195048727541454, |
| "grad_norm": 1.2530128955841064, |
| "learning_rate": 0.0008487796649318904, |
| "loss": 0.5908714532852173, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "grad_norm": 0.8214261531829834, |
| "learning_rate": 0.0008349001781229053, |
| "loss": 0.5860975980758667, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.8588448853802904, |
| "eval_cos_loss": 0.050389231161371756, |
| "eval_dec_loss": 0.14640251302147564, |
| "eval_loss": 0.5701193273339642, |
| "eval_mse2_loss": 0.04257897145569869, |
| "eval_mse_loss": 0.3735292319837771, |
| "eval_rec_loss": 0.0011887007649628058, |
| "eval_var_loss": 0.001380990084992152, |
| "flow/cos_sim": 0.9496107875756478, |
| "flow/improvement_ratio": 0.9471838724667623, |
| "flow/mag_ratio_mean": 0.9603897756365336, |
| "flow/mag_ratio_std": 0.09184158112020253, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.8588448853802904, |
| "eval_cos_loss": 0.050389231161371756, |
| "eval_dec_loss": 0.14640251302147564, |
| "eval_loss": 0.5701193273339642, |
| "eval_mse2_loss": 0.04257897145569869, |
| "eval_mse_loss": 0.3735292319837771, |
| "eval_rec_loss": 0.0011887007649628058, |
| "eval_runtime": 145.6643, |
| "eval_samples_per_second": 192.175, |
| "eval_steps_per_second": 3.007, |
| "eval_var_loss": 0.001380990084992152, |
| "flow/cos_sim": 0.9496107875756478, |
| "flow/improvement_ratio": 0.9471838724667623, |
| "flow/mag_ratio_mean": 0.9603897756365336, |
| "flow/mag_ratio_std": 0.09184158112020253, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2955983557341462, |
| "grad_norm": 0.9015347361564636, |
| "learning_rate": 0.0008205359904536107, |
| "loss": 0.5818743705749512, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.30742228996351206, |
| "grad_norm": 0.8061195611953735, |
| "learning_rate": 0.0008057078912056363, |
| "loss": 0.5712096691131592, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.3192462241928779, |
| "grad_norm": 0.9496876001358032, |
| "learning_rate": 0.0007904373410796086, |
| "loss": 0.5827493667602539, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "grad_norm": 0.9318349957466125, |
| "learning_rate": 0.0007747464411350876, |
| "loss": 0.5713083744049072, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.8655904613504272, |
| "eval_cos_loss": 0.04345733123793177, |
| "eval_dec_loss": 0.1383160431550382, |
| "eval_loss": 0.560146918988119, |
| "eval_mse2_loss": 0.04110090016093973, |
| "eval_mse_loss": 0.37529142364247203, |
| "eval_rec_loss": 0.0010027640903040077, |
| "eval_var_loss": 9.005472540311074e-05, |
| "flow/cos_sim": 0.956542692227995, |
| "flow/improvement_ratio": 0.9472023688222719, |
| "flow/mag_ratio_mean": 0.9649291216782784, |
| "flow/mag_ratio_std": 0.08745992025488043, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.8655904613504272, |
| "eval_cos_loss": 0.04345733123793177, |
| "eval_dec_loss": 0.1383160431550382, |
| "eval_loss": 0.560146918988119, |
| "eval_mse2_loss": 0.04110090016093973, |
| "eval_mse_loss": 0.37529142364247203, |
| "eval_rec_loss": 0.0010027640903040077, |
| "eval_runtime": 147.3557, |
| "eval_samples_per_second": 189.969, |
| "eval_steps_per_second": 2.972, |
| "eval_var_loss": 9.005472540311074e-05, |
| "flow/cos_sim": 0.956542692227995, |
| "flow/improvement_ratio": 0.9472023688222719, |
| "flow/mag_ratio_mean": 0.9649291216782784, |
| "flow/mag_ratio_std": 0.08745992025488043, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.34289409265160964, |
| "grad_norm": 1.0284937620162964, |
| "learning_rate": 0.000758657900803716, |
| "loss": 0.5740544199943542, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3547180268809755, |
| "grad_norm": 0.7206848859786987, |
| "learning_rate": 0.000742195005021869, |
| "loss": 0.5760706067085266, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3665419611103413, |
| "grad_norm": 1.221917748451233, |
| "learning_rate": 0.0007253815805303786, |
| "loss": 0.566294252872467, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "grad_norm": 0.8819605708122253, |
| "learning_rate": 0.0007082419613901028, |
| "loss": 0.5628067851066589, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.871946148436791, |
| "eval_cos_loss": 0.03890436059331785, |
| "eval_dec_loss": 0.1332924633104031, |
| "eval_loss": 0.5495835452183196, |
| "eval_mse2_loss": 0.040371610187674496, |
| "eval_mse_loss": 0.3710057751773155, |
| "eval_rec_loss": 0.0008419993847885518, |
| "eval_var_loss": 0.0001812590855986016, |
| "flow/cos_sim": 0.9610956558898159, |
| "flow/improvement_ratio": 0.9478674083267717, |
| "flow/mag_ratio_mean": 0.9691984739205609, |
| "flow/mag_ratio_std": 0.08465842693431737, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.871946148436791, |
| "eval_cos_loss": 0.03890436059331785, |
| "eval_dec_loss": 0.1332924633104031, |
| "eval_loss": 0.5495835452183196, |
| "eval_mse2_loss": 0.040371610187674496, |
| "eval_mse_loss": 0.3710057751773155, |
| "eval_rec_loss": 0.0008419993847885518, |
| "eval_runtime": 146.7514, |
| "eval_samples_per_second": 190.751, |
| "eval_steps_per_second": 2.985, |
| "eval_var_loss": 0.0001812590855986016, |
| "flow/cos_sim": 0.9610956558898159, |
| "flow/improvement_ratio": 0.9478674083267717, |
| "flow/mag_ratio_mean": 0.9691984739205609, |
| "flow/mag_ratio_std": 0.08465842693431737, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.390189829569073, |
| "grad_norm": 1.2066078186035156, |
| "learning_rate": 0.0006908009537632514, |
| "loss": 0.5644704699516296, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.4020137637984389, |
| "grad_norm": 1.2743791341781616, |
| "learning_rate": 0.0006730838000114403, |
| "loss": 0.5624759197235107, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.41383769802780473, |
| "grad_norm": 0.6424040198326111, |
| "learning_rate": 0.0006551161421624341, |
| "loss": 0.5654159188270569, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "grad_norm": 1.0390995740890503, |
| "learning_rate": 0.0006369239847984517, |
| "loss": 0.5563592910766602, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.8641208937990813, |
| "eval_cos_loss": 0.034672807719235275, |
| "eval_dec_loss": 0.14222022919962396, |
| "eval_loss": 0.5460346293503835, |
| "eval_mse2_loss": 0.03956130868223704, |
| "eval_mse_loss": 0.3584019422667212, |
| "eval_rec_loss": 0.0007127871282784625, |
| "eval_var_loss": 0.0016710812642694063, |
| "flow/cos_sim": 0.965327212663546, |
| "flow/improvement_ratio": 0.946786184561307, |
| "flow/mag_ratio_mean": 0.9734560983366074, |
| "flow/mag_ratio_std": 0.08033725547872178, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.8641208937990813, |
| "eval_cos_loss": 0.034672807719235275, |
| "eval_dec_loss": 0.14222022919962396, |
| "eval_loss": 0.5460346293503835, |
| "eval_mse2_loss": 0.03956130868223704, |
| "eval_mse_loss": 0.3584019422667212, |
| "eval_rec_loss": 0.0007127871282784625, |
| "eval_runtime": 146.177, |
| "eval_samples_per_second": 191.501, |
| "eval_steps_per_second": 2.996, |
| "eval_var_loss": 0.0016710812642694063, |
| "flow/cos_sim": 0.965327212663546, |
| "flow/improvement_ratio": 0.946786184561307, |
| "flow/mag_ratio_mean": 0.9734560983366074, |
| "flow/mag_ratio_std": 0.08033725547872178, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4374855664865364, |
| "grad_norm": 1.0673410892486572, |
| "learning_rate": 0.0006185336574197479, |
| "loss": 0.55131596326828, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.44930950071590225, |
| "grad_norm": 1.1385674476623535, |
| "learning_rate": 0.0005999717763379407, |
| "loss": 0.5542811155319214, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.4611334349452681, |
| "grad_norm": 1.3084577322006226, |
| "learning_rate": 0.0005812652061542363, |
| "loss": 0.5522482395172119, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "grad_norm": 0.9078991413116455, |
| "learning_rate": 0.0005624410208783071, |
| "loss": 0.5514112114906311, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.8714027910618674, |
| "eval_cos_loss": 0.031159216023505278, |
| "eval_dec_loss": 0.13065314148744084, |
| "eval_loss": 0.5320979358126584, |
| "eval_mse2_loss": 0.038692950696331455, |
| "eval_mse_loss": 0.3575415439121255, |
| "eval_rec_loss": 0.0006259792361515585, |
| "eval_var_loss": 0.001468397166630993, |
| "flow/cos_sim": 0.968840807404148, |
| "flow/improvement_ratio": 0.9490241132098246, |
| "flow/mag_ratio_mean": 0.9646270977307673, |
| "flow/mag_ratio_std": 0.07568225521407171, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.8714027910618674, |
| "eval_cos_loss": 0.031159216023505278, |
| "eval_dec_loss": 0.13065314148744084, |
| "eval_loss": 0.5320979358126584, |
| "eval_mse2_loss": 0.038692950696331455, |
| "eval_mse_loss": 0.3575415439121255, |
| "eval_rec_loss": 0.0006259792361515585, |
| "eval_runtime": 147.3748, |
| "eval_samples_per_second": 189.944, |
| "eval_steps_per_second": 2.972, |
| "eval_var_loss": 0.001468397166630993, |
| "flow/cos_sim": 0.968840807404148, |
| "flow/improvement_ratio": 0.9490241132098246, |
| "flow/mag_ratio_mean": 0.9646270977307673, |
| "flow/mag_ratio_std": 0.07568225521407171, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.48478130340399983, |
| "grad_norm": 1.3058608770370483, |
| "learning_rate": 0.0005435264647440881, |
| "loss": 0.547296941280365, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.49660523763336567, |
| "grad_norm": 1.0200841426849365, |
| "learning_rate": 0.000524548912779213, |
| "loss": 0.544040322303772, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.5084291718627315, |
| "grad_norm": 1.1076935529708862, |
| "learning_rate": 0.0005055358311851499, |
| "loss": 0.5454155206680298, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "grad_norm": 0.8338369727134705, |
| "learning_rate": 0.0004865147375853812, |
| "loss": 0.5434398651123047, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.8728636919662941, |
| "eval_cos_loss": 0.029134724701682456, |
| "eval_dec_loss": 0.130046037353201, |
| "eval_loss": 0.5287471506829675, |
| "eval_mse2_loss": 0.037880827574969425, |
| "eval_mse_loss": 0.3566115467243543, |
| "eval_rec_loss": 0.0005072098316288007, |
| "eval_var_loss": 0.0007880572314676084, |
| "flow/cos_sim": 0.9708652933166452, |
| "flow/improvement_ratio": 0.9513337348149792, |
| "flow/mag_ratio_mean": 0.9743897437232815, |
| "flow/mag_ratio_std": 0.07394225260914733, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.8728636919662941, |
| "eval_cos_loss": 0.029134724701682456, |
| "eval_dec_loss": 0.130046037353201, |
| "eval_loss": 0.5287471506829675, |
| "eval_mse2_loss": 0.037880827574969425, |
| "eval_mse_loss": 0.3566115467243543, |
| "eval_rec_loss": 0.0005072098316288007, |
| "eval_runtime": 147.0551, |
| "eval_samples_per_second": 190.357, |
| "eval_steps_per_second": 2.978, |
| "eval_var_loss": 0.0007880572314676084, |
| "flow/cos_sim": 0.9708652933166452, |
| "flow/improvement_ratio": 0.9513337348149792, |
| "flow/mag_ratio_mean": 0.9743897437232815, |
| "flow/mag_ratio_std": 0.07394225260914733, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5320770403214632, |
| "grad_norm": 1.2937544584274292, |
| "learning_rate": 0.0004675131611991607, |
| "loss": 0.5423741936683655, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5439009745508291, |
| "grad_norm": 0.9950433373451233, |
| "learning_rate": 0.0004485586029984899, |
| "loss": 0.536012589931488, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5557249087801949, |
| "grad_norm": 0.9091536402702332, |
| "learning_rate": 0.00042967849590597266, |
| "loss": 0.5319453477859497, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "grad_norm": 1.1773775815963745, |
| "learning_rate": 0.0004109001650911621, |
| "loss": 0.5343883037567139, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.8703424178205617, |
| "eval_cos_loss": 0.026759936142559736, |
| "eval_dec_loss": 0.1364484317855884, |
| "eval_loss": 0.5287964605305293, |
| "eval_mse2_loss": 0.03801968024796955, |
| "eval_mse_loss": 0.34965787868793696, |
| "eval_rec_loss": 0.00048410188376538874, |
| "eval_var_loss": 0.0015103762552618437, |
| "flow/cos_sim": 0.9732400857966784, |
| "flow/improvement_ratio": 0.9476525126254722, |
| "flow/mag_ratio_mean": 0.9758574831975649, |
| "flow/mag_ratio_std": 0.06964536890659702, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.8703424178205617, |
| "eval_cos_loss": 0.026759936142559736, |
| "eval_dec_loss": 0.1364484317855884, |
| "eval_loss": 0.5287964605305293, |
| "eval_mse2_loss": 0.03801968024796955, |
| "eval_mse_loss": 0.34965787868793696, |
| "eval_rec_loss": 0.00048410188376538874, |
| "eval_runtime": 146.8515, |
| "eval_samples_per_second": 190.621, |
| "eval_steps_per_second": 2.983, |
| "eval_var_loss": 0.0015103762552618437, |
| "flow/cos_sim": 0.9732400857966784, |
| "flow/improvement_ratio": 0.9476525126254722, |
| "flow/mag_ratio_mean": 0.9758574831975649, |
| "flow/mag_ratio_std": 0.06964536890659702, |
| "step": 12288 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 21651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|