| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5675488430095608, | |
| "eval_steps": 1024, | |
| "global_step": 12288, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.011823934229365849, | |
| "grad_norm": 1.1338618993759155, | |
| "learning_rate": 1.9615384615384617e-05, | |
| "loss": 10.4459, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.023647868458731697, | |
| "grad_norm": 1.0851895809173584, | |
| "learning_rate": 3.930769230769231e-05, | |
| "loss": 7.9458, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.03547180268809755, | |
| "grad_norm": 0.9216171503067017, | |
| "learning_rate": 4.999617095521894e-05, | |
| "loss": 5.6401, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.047295736917463395, | |
| "grad_norm": 0.5787180066108704, | |
| "learning_rate": 4.9961092368776736e-05, | |
| "loss": 3.8256, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.047295736917463395, | |
| "eval_acr_loss": 0.9939580324305791, | |
| "eval_across_var": 0.003025606172010473, | |
| "eval_bleu": 0.5515564027779047, | |
| "eval_ce_loss": 2.3591716888288383, | |
| "eval_cos_loss": 0.9272929947125857, | |
| "eval_cov": 0.0706253400131992, | |
| "eval_cov_loss": 0.00802828647306861, | |
| "eval_global_var": 0.2767718589469178, | |
| "eval_loss": 2.832596059803549, | |
| "eval_mse_loss": 1.9087850182023767, | |
| "eval_per_var": 0.2680246013484589, | |
| "eval_within_var": 0.27379363795665845, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.047295736917463395, | |
| "eval_acr_loss": 0.9939580324305791, | |
| "eval_across_var": 0.003025606172010473, | |
| "eval_bleu": 0.5515564027779047, | |
| "eval_ce_loss": 2.3591716888288383, | |
| "eval_cos_loss": 0.9272929947125857, | |
| "eval_cov": 0.0706253400131992, | |
| "eval_cov_loss": 0.00802828647306861, | |
| "eval_global_var": 0.2767718589469178, | |
| "eval_loss": 2.832596059803549, | |
| "eval_mse_loss": 1.9087850182023767, | |
| "eval_per_var": 0.2680246013484589, | |
| "eval_runtime": 159.2542, | |
| "eval_samples_per_second": 175.776, | |
| "eval_steps_per_second": 2.75, | |
| "eval_within_var": 0.27379363795665845, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.05911967114682925, | |
| "grad_norm": 0.38924261927604675, | |
| "learning_rate": 4.988941132556799e-05, | |
| "loss": 2.7681, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.0709436053761951, | |
| "grad_norm": 0.3134535551071167, | |
| "learning_rate": 4.9781232937269974e-05, | |
| "loss": 2.1522, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.08276753960556095, | |
| "grad_norm": 0.2510242760181427, | |
| "learning_rate": 4.963671583455164e-05, | |
| "loss": 1.7487, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.09459147383492679, | |
| "grad_norm": 0.22504042088985443, | |
| "learning_rate": 4.945607193446079e-05, | |
| "loss": 1.4694, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.09459147383492679, | |
| "eval_acr_loss": 0.9901781947645423, | |
| "eval_across_var": 0.004923156680556261, | |
| "eval_bleu": 0.8030880203505176, | |
| "eval_ce_loss": 0.700762519825539, | |
| "eval_cos_loss": 0.7673689819634233, | |
| "eval_cov": 0.07080983897866723, | |
| "eval_cov_loss": 0.008125514746749917, | |
| "eval_global_var": 0.4061385202625571, | |
| "eval_loss": 1.1126885499856243, | |
| "eval_mse_loss": 1.6458245439616512, | |
| "eval_per_var": 0.3933824513056507, | |
| "eval_within_var": 0.4013502570592105, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.09459147383492679, | |
| "eval_acr_loss": 0.9901781947645423, | |
| "eval_across_var": 0.004923156680556261, | |
| "eval_bleu": 0.8030880203505176, | |
| "eval_ce_loss": 0.700762519825539, | |
| "eval_cos_loss": 0.7673689819634233, | |
| "eval_cov": 0.07080983897866723, | |
| "eval_cov_loss": 0.008125514746749917, | |
| "eval_global_var": 0.4061385202625571, | |
| "eval_loss": 1.1126885499856243, | |
| "eval_mse_loss": 1.6458245439616512, | |
| "eval_per_var": 0.3933824513056507, | |
| "eval_runtime": 155.1424, | |
| "eval_samples_per_second": 180.434, | |
| "eval_steps_per_second": 2.823, | |
| "eval_within_var": 0.4013502570592105, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.10641540806429264, | |
| "grad_norm": 0.18675386905670166, | |
| "learning_rate": 4.923956612967301e-05, | |
| "loss": 1.2664, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.1182393422936585, | |
| "grad_norm": 0.18214967846870422, | |
| "learning_rate": 4.898751590005826e-05, | |
| "loss": 1.1058, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.13006327652302435, | |
| "grad_norm": 0.15246237814426422, | |
| "learning_rate": 4.870029084713462e-05, | |
| "loss": 0.981, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 0.1418872107523902, | |
| "grad_norm": 0.1368647962808609, | |
| "learning_rate": 4.837831215209188e-05, | |
| "loss": 0.8816, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 0.1418872107523902, | |
| "eval_acr_loss": 0.9858010461613468, | |
| "eval_across_var": 0.007125071276481089, | |
| "eval_bleu": 0.8956566730262217, | |
| "eval_ce_loss": 0.32717657133460587, | |
| "eval_cos_loss": 0.6143105866974348, | |
| "eval_cov": 0.06988288496182934, | |
| "eval_cov_loss": 0.007913092302252032, | |
| "eval_global_var": 0.5146751926369864, | |
| "eval_loss": 0.6788079935938256, | |
| "eval_mse_loss": 1.3672495941593223, | |
| "eval_per_var": 0.4983278039383562, | |
| "eval_within_var": 0.5077701699516001, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 0.1418872107523902, | |
| "eval_acr_loss": 0.9858010461613468, | |
| "eval_across_var": 0.007125071276481089, | |
| "eval_bleu": 0.8956566730262217, | |
| "eval_ce_loss": 0.32717657133460587, | |
| "eval_cos_loss": 0.6143105866974348, | |
| "eval_cov": 0.06988288496182934, | |
| "eval_cov_loss": 0.007913092302252032, | |
| "eval_global_var": 0.5146751926369864, | |
| "eval_loss": 0.6788079935938256, | |
| "eval_mse_loss": 1.3672495941593223, | |
| "eval_per_var": 0.4983278039383562, | |
| "eval_runtime": 156.011, | |
| "eval_samples_per_second": 179.43, | |
| "eval_steps_per_second": 2.807, | |
| "eval_within_var": 0.5077701699516001, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 0.15371114498175603, | |
| "grad_norm": 0.13020840287208557, | |
| "learning_rate": 4.802205195817963e-05, | |
| "loss": 0.8019, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 0.1655350792111219, | |
| "grad_norm": 0.12300444394350052, | |
| "learning_rate": 4.763203267836576e-05, | |
| "loss": 0.7339, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 0.17735901344048774, | |
| "grad_norm": 0.10956571996212006, | |
| "learning_rate": 4.720882622928019e-05, | |
| "loss": 0.6774, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.18918294766985358, | |
| "grad_norm": 0.11182258278131485, | |
| "learning_rate": 4.675305319256765e-05, | |
| "loss": 0.6307, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 0.18918294766985358, | |
| "eval_acr_loss": 0.9774447257660296, | |
| "eval_across_var": 0.011342588644134536, | |
| "eval_bleu": 0.9352497637682005, | |
| "eval_ce_loss": 0.18979091641225226, | |
| "eval_cos_loss": 0.49574217472446563, | |
| "eval_cov": 0.06955429843571632, | |
| "eval_cov_loss": 0.007831381105490403, | |
| "eval_global_var": 0.6115789544092466, | |
| "eval_loss": 0.49388179734145127, | |
| "eval_mse_loss": 1.144643609110079, | |
| "eval_per_var": 0.592599529109589, | |
| "eval_within_var": 0.600530758568141, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 0.18918294766985358, | |
| "eval_acr_loss": 0.9774447257660296, | |
| "eval_across_var": 0.011342588644134536, | |
| "eval_bleu": 0.9352497637682005, | |
| "eval_ce_loss": 0.18979091641225226, | |
| "eval_cos_loss": 0.49574217472446563, | |
| "eval_cov": 0.06955429843571632, | |
| "eval_cov_loss": 0.007831381105490403, | |
| "eval_global_var": 0.6115789544092466, | |
| "eval_loss": 0.49388179734145127, | |
| "eval_mse_loss": 1.144643609110079, | |
| "eval_per_var": 0.592599529109589, | |
| "eval_runtime": 155.2844, | |
| "eval_samples_per_second": 180.269, | |
| "eval_steps_per_second": 2.821, | |
| "eval_within_var": 0.600530758568141, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 0.20100688189921945, | |
| "grad_norm": 0.1080719456076622, | |
| "learning_rate": 4.6265381904878854e-05, | |
| "loss": 0.588, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 0.2128308161285853, | |
| "grad_norm": 0.10819243639707565, | |
| "learning_rate": 4.57465274778347e-05, | |
| "loss": 0.5554, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 0.22465475035795113, | |
| "grad_norm": 0.1115206629037857, | |
| "learning_rate": 4.519725074940068e-05, | |
| "loss": 0.5198, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 0.236478684587317, | |
| "grad_norm": 0.1552964597940445, | |
| "learning_rate": 4.461835716820895e-05, | |
| "loss": 0.473, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.236478684587317, | |
| "eval_acr_loss": 0.11126784305088222, | |
| "eval_across_var": 0.680014660641483, | |
| "eval_bleu": 0.9523306149700821, | |
| "eval_ce_loss": 0.12964293614165967, | |
| "eval_cos_loss": 0.4240787292588247, | |
| "eval_cov": 0.07667060747538527, | |
| "eval_cov_loss": 0.010102802944969232, | |
| "eval_global_var": 1.6775805329623288, | |
| "eval_loss": 0.31928212995126365, | |
| "eval_mse_loss": 1.0156728980475909, | |
| "eval_per_var": 1.6460250784817352, | |
| "eval_within_var": 1.0011268127454471, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.236478684587317, | |
| "eval_acr_loss": 0.11126784305088222, | |
| "eval_across_var": 0.680014660641483, | |
| "eval_bleu": 0.9523306149700821, | |
| "eval_ce_loss": 0.12964293614165967, | |
| "eval_cos_loss": 0.4240787292588247, | |
| "eval_cov": 0.07667060747538527, | |
| "eval_cov_loss": 0.010102802944969232, | |
| "eval_global_var": 1.6775805329623288, | |
| "eval_loss": 0.31928212995126365, | |
| "eval_mse_loss": 1.0156728980475909, | |
| "eval_per_var": 1.6460250784817352, | |
| "eval_runtime": 154.2182, | |
| "eval_samples_per_second": 181.516, | |
| "eval_steps_per_second": 2.84, | |
| "eval_within_var": 1.0011268127454471, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.24830261881668284, | |
| "grad_norm": 0.11613737791776657, | |
| "learning_rate": 4.401069561246422e-05, | |
| "loss": 0.3958, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 0.2601265530460487, | |
| "grad_norm": 0.11101594567298889, | |
| "learning_rate": 4.337515714516545e-05, | |
| "loss": 0.3648, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 0.27195048727541454, | |
| "grad_norm": 0.14844343066215515, | |
| "learning_rate": 4.2712673707468434e-05, | |
| "loss": 0.3464, | |
| "step": 5888 | |
| }, | |
| { | |
| "epoch": 0.2837744215047804, | |
| "grad_norm": 0.10389428585767746, | |
| "learning_rate": 4.202421675210565e-05, | |
| "loss": 0.3281, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 0.2837744215047804, | |
| "eval_acr_loss": 0.015462962337612025, | |
| "eval_across_var": 0.9652468334866441, | |
| "eval_bleu": 0.9665450842704094, | |
| "eval_ce_loss": 0.08923274265882904, | |
| "eval_cos_loss": 0.35504293285276245, | |
| "eval_cov": 0.06710940844392123, | |
| "eval_cov_loss": 0.007327720047650884, | |
| "eval_global_var": 2.2822310216894977, | |
| "eval_loss": 0.24187510450408883, | |
| "eval_mse_loss": 0.8843358904803724, | |
| "eval_per_var": 2.3488914454908674, | |
| "eval_within_var": 1.3261088448572376, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 0.2837744215047804, | |
| "eval_acr_loss": 0.015462962337612025, | |
| "eval_across_var": 0.9652468334866441, | |
| "eval_bleu": 0.9665450842704094, | |
| "eval_ce_loss": 0.08923274265882904, | |
| "eval_cos_loss": 0.35504293285276245, | |
| "eval_cov": 0.06710940844392123, | |
| "eval_cov_loss": 0.007327720047650884, | |
| "eval_global_var": 2.2822310216894977, | |
| "eval_loss": 0.24187510450408883, | |
| "eval_mse_loss": 0.8843358904803724, | |
| "eval_per_var": 2.3488914454908674, | |
| "eval_runtime": 155.0579, | |
| "eval_samples_per_second": 180.533, | |
| "eval_steps_per_second": 2.825, | |
| "eval_within_var": 1.3261088448572376, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 0.2955983557341462, | |
| "grad_norm": 0.129238098859787, | |
| "learning_rate": 4.131079581886694e-05, | |
| "loss": 0.3099, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.30742228996351206, | |
| "grad_norm": 0.1061507984995842, | |
| "learning_rate": 4.057345705423016e-05, | |
| "loss": 0.2963, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 0.3192462241928779, | |
| "grad_norm": 0.10803277790546417, | |
| "learning_rate": 3.981328167731251e-05, | |
| "loss": 0.2854, | |
| "step": 6912 | |
| }, | |
| { | |
| "epoch": 0.3310701584222438, | |
| "grad_norm": 0.10297808796167374, | |
| "learning_rate": 3.9031384394391954e-05, | |
| "loss": 0.2709, | |
| "step": 7168 | |
| }, | |
| { | |
| "epoch": 0.3310701584222438, | |
| "eval_acr_loss": 0.014716924630300589, | |
| "eval_across_var": 0.9652062489834006, | |
| "eval_bleu": 0.9750850142716162, | |
| "eval_ce_loss": 0.0659905160653945, | |
| "eval_cos_loss": 0.31133458848413265, | |
| "eval_cov": 0.06627595805686358, | |
| "eval_cov_loss": 0.007171058822125537, | |
| "eval_global_var": 2.3926940639269407, | |
| "eval_loss": 0.20149783922793113, | |
| "eval_mse_loss": 0.8055339337211765, | |
| "eval_per_var": 2.513992936643836, | |
| "eval_within_var": 1.4371973874906427, | |
| "step": 7168 | |
| }, | |
| { | |
| "epoch": 0.3310701584222438, | |
| "eval_acr_loss": 0.014716924630300589, | |
| "eval_across_var": 0.9652062489834006, | |
| "eval_bleu": 0.9750850142716162, | |
| "eval_ce_loss": 0.0659905160653945, | |
| "eval_cos_loss": 0.31133458848413265, | |
| "eval_cov": 0.06627595805686358, | |
| "eval_cov_loss": 0.007171058822125537, | |
| "eval_global_var": 2.3926940639269407, | |
| "eval_loss": 0.20149783922793113, | |
| "eval_mse_loss": 0.8055339337211765, | |
| "eval_per_var": 2.513992936643836, | |
| "eval_runtime": 152.9262, | |
| "eval_samples_per_second": 183.049, | |
| "eval_steps_per_second": 2.864, | |
| "eval_within_var": 1.4371973874906427, | |
| "step": 7168 | |
| }, | |
| { | |
| "epoch": 0.34289409265160964, | |
| "grad_norm": 0.08976765722036362, | |
| "learning_rate": 3.822891176432382e-05, | |
| "loss": 0.2629, | |
| "step": 7424 | |
| }, | |
| { | |
| "epoch": 0.3547180268809755, | |
| "grad_norm": 0.25151142477989197, | |
| "learning_rate": 3.7407040517249335e-05, | |
| "loss": 0.2533, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.3665419611103413, | |
| "grad_norm": 0.09347163140773773, | |
| "learning_rate": 3.6566975829061614e-05, | |
| "loss": 0.2437, | |
| "step": 7936 | |
| }, | |
| { | |
| "epoch": 0.37836589533970716, | |
| "grad_norm": 0.15004394948482513, | |
| "learning_rate": 3.5709949554159355e-05, | |
| "loss": 0.2348, | |
| "step": 8192 | |
| }, | |
| { | |
| "epoch": 0.37836589533970716, | |
| "eval_acr_loss": 0.014583685287022457, | |
| "eval_across_var": 0.9538344581649728, | |
| "eval_bleu": 0.980561100967318, | |
| "eval_ce_loss": 0.05129089445454073, | |
| "eval_cos_loss": 0.28137742255104187, | |
| "eval_cov": 0.06562282614511987, | |
| "eval_cov_loss": 0.007066378377369482, | |
| "eval_global_var": 2.479759738869863, | |
| "eval_loss": 0.17527394500225102, | |
| "eval_mse_loss": 0.7551626324925793, | |
| "eval_per_var": 2.6612933433219177, | |
| "eval_within_var": 1.53611661745533, | |
| "step": 8192 | |
| }, | |
| { | |
| "epoch": 0.37836589533970716, | |
| "eval_acr_loss": 0.014583685287022457, | |
| "eval_across_var": 0.9538344581649728, | |
| "eval_bleu": 0.980561100967318, | |
| "eval_ce_loss": 0.05129089445454073, | |
| "eval_cos_loss": 0.28137742255104187, | |
| "eval_cov": 0.06562282614511987, | |
| "eval_cov_loss": 0.007066378377369482, | |
| "eval_global_var": 2.479759738869863, | |
| "eval_loss": 0.17527394500225102, | |
| "eval_mse_loss": 0.7551626324925793, | |
| "eval_per_var": 2.6612933433219177, | |
| "eval_runtime": 151.3473, | |
| "eval_samples_per_second": 184.959, | |
| "eval_steps_per_second": 2.894, | |
| "eval_within_var": 1.53611661745533, | |
| "step": 8192 | |
| }, | |
| { | |
| "epoch": 0.390189829569073, | |
| "grad_norm": 0.09314695745706558, | |
| "learning_rate": 3.483721841907964e-05, | |
| "loss": 0.2288, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 0.4020137637984389, | |
| "grad_norm": 0.09459128230810165, | |
| "learning_rate": 3.395006217965885e-05, | |
| "loss": 0.2225, | |
| "step": 8704 | |
| }, | |
| { | |
| "epoch": 0.41383769802780473, | |
| "grad_norm": 0.1038607731461525, | |
| "learning_rate": 3.3049781744423665e-05, | |
| "loss": 0.215, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.4256616322571706, | |
| "grad_norm": 0.06964848935604095, | |
| "learning_rate": 3.213769726696439e-05, | |
| "loss": 0.2103, | |
| "step": 9216 | |
| }, | |
| { | |
| "epoch": 0.4256616322571706, | |
| "eval_acr_loss": 0.01336662589730249, | |
| "eval_across_var": 0.987674045780478, | |
| "eval_bleu": 0.9840502134478613, | |
| "eval_ce_loss": 0.0413797390200708, | |
| "eval_cos_loss": 0.2597663049080056, | |
| "eval_cov": 0.0652730584688927, | |
| "eval_cov_loss": 0.007007736591494655, | |
| "eval_global_var": 2.6066415168378994, | |
| "eval_loss": 0.15707423451216254, | |
| "eval_mse_loss": 0.7215510081482804, | |
| "eval_per_var": 2.776688249143836, | |
| "eval_within_var": 1.6285416399507218, | |
| "step": 9216 | |
| }, | |
| { | |
| "epoch": 0.4256616322571706, | |
| "eval_acr_loss": 0.01336662589730249, | |
| "eval_across_var": 0.987674045780478, | |
| "eval_bleu": 0.9840502134478613, | |
| "eval_ce_loss": 0.0413797390200708, | |
| "eval_cos_loss": 0.2597663049080056, | |
| "eval_cov": 0.0652730584688927, | |
| "eval_cov_loss": 0.007007736591494655, | |
| "eval_global_var": 2.6066415168378994, | |
| "eval_loss": 0.15707423451216254, | |
| "eval_mse_loss": 0.7215510081482804, | |
| "eval_per_var": 2.776688249143836, | |
| "eval_runtime": 151.5839, | |
| "eval_samples_per_second": 184.67, | |
| "eval_steps_per_second": 2.889, | |
| "eval_within_var": 1.6285416399507218, | |
| "step": 9216 | |
| }, | |
| { | |
| "epoch": 0.4374855664865364, | |
| "grad_norm": 0.07443105429410934, | |
| "learning_rate": 3.121514621008757e-05, | |
| "loss": 0.2053, | |
| "step": 9472 | |
| }, | |
| { | |
| "epoch": 0.44930950071590225, | |
| "grad_norm": 0.08320944011211395, | |
| "learning_rate": 3.0283481384586697e-05, | |
| "loss": 0.2017, | |
| "step": 9728 | |
| }, | |
| { | |
| "epoch": 0.4611334349452681, | |
| "grad_norm": 0.1169746071100235, | |
| "learning_rate": 2.9344068965507027e-05, | |
| "loss": 0.1966, | |
| "step": 9984 | |
| }, | |
| { | |
| "epoch": 0.472957369174634, | |
| "grad_norm": 0.08953411877155304, | |
| "learning_rate": 2.840199155190943e-05, | |
| "loss": 0.1938, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.472957369174634, | |
| "eval_acr_loss": 0.012974254141045301, | |
| "eval_across_var": 0.9815338524781405, | |
| "eval_bleu": 0.9866441773938769, | |
| "eval_ce_loss": 0.034374653984745755, | |
| "eval_cos_loss": 0.24375769958648508, | |
| "eval_cov": 0.06485466107930223, | |
| "eval_cov_loss": 0.006919686747374668, | |
| "eval_global_var": 2.6645574700342465, | |
| "eval_loss": 0.1440824061359989, | |
| "eval_mse_loss": 0.6987405730981261, | |
| "eval_per_var": 2.866460652111872, | |
| "eval_within_var": 1.6929740287941886, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.472957369174634, | |
| "eval_acr_loss": 0.012974254141045301, | |
| "eval_across_var": 0.9815338524781405, | |
| "eval_bleu": 0.9866441773938769, | |
| "eval_ce_loss": 0.034374653984745755, | |
| "eval_cos_loss": 0.24375769958648508, | |
| "eval_cov": 0.06485466107930223, | |
| "eval_cov_loss": 0.006919686747374668, | |
| "eval_global_var": 2.6645574700342465, | |
| "eval_loss": 0.1440824061359989, | |
| "eval_mse_loss": 0.6987405730981261, | |
| "eval_per_var": 2.866460652111872, | |
| "eval_runtime": 151.0941, | |
| "eval_samples_per_second": 185.269, | |
| "eval_steps_per_second": 2.899, | |
| "eval_within_var": 1.6929740287941886, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.48478130340399983, | |
| "grad_norm": 0.08886408805847168, | |
| "learning_rate": 2.745124265175868e-05, | |
| "loss": 0.1892, | |
| "step": 10496 | |
| }, | |
| { | |
| "epoch": 0.49660523763336567, | |
| "grad_norm": 0.11508477479219437, | |
| "learning_rate": 2.6496899297412598e-05, | |
| "loss": 0.1853, | |
| "step": 10752 | |
| }, | |
| { | |
| "epoch": 0.5084291718627315, | |
| "grad_norm": 0.0753609761595726, | |
| "learning_rate": 2.554036091926675e-05, | |
| "loss": 0.1839, | |
| "step": 11008 | |
| }, | |
| { | |
| "epoch": 0.5202531060920974, | |
| "grad_norm": 0.09823817759752274, | |
| "learning_rate": 2.4583030166456618e-05, | |
| "loss": 0.18, | |
| "step": 11264 | |
| }, | |
| { | |
| "epoch": 0.5202531060920974, | |
| "eval_acr_loss": 0.013261525430046982, | |
| "eval_across_var": 1.0205278622505327, | |
| "eval_bleu": 0.9883516965111658, | |
| "eval_ce_loss": 0.029720396500880316, | |
| "eval_cos_loss": 0.23187202939840212, | |
| "eval_cov": 0.06469566310377425, | |
| "eval_cov_loss": 0.006903172251619569, | |
| "eval_global_var": 2.7817003781392695, | |
| "eval_loss": 0.13512653335248498, | |
| "eval_mse_loss": 0.6834642570040542, | |
| "eval_per_var": 3.0010434503424657, | |
| "eval_within_var": 1.7715753001165173, | |
| "step": 11264 | |
| }, | |
| { | |
| "epoch": 0.5202531060920974, | |
| "eval_acr_loss": 0.013261525430046982, | |
| "eval_across_var": 1.0205278622505327, | |
| "eval_bleu": 0.9883516965111658, | |
| "eval_ce_loss": 0.029720396500880316, | |
| "eval_cos_loss": 0.23187202939840212, | |
| "eval_cov": 0.06469566310377425, | |
| "eval_cov_loss": 0.006903172251619569, | |
| "eval_global_var": 2.7817003781392695, | |
| "eval_loss": 0.13512653335248498, | |
| "eval_mse_loss": 0.6834642570040542, | |
| "eval_per_var": 3.0010434503424657, | |
| "eval_runtime": 151.0663, | |
| "eval_samples_per_second": 185.303, | |
| "eval_steps_per_second": 2.899, | |
| "eval_within_var": 1.7715753001165173, | |
| "step": 11264 | |
| }, | |
| { | |
| "epoch": 0.5320770403214632, | |
| "grad_norm": 0.09726043790578842, | |
| "learning_rate": 2.3626310850040373e-05, | |
| "loss": 0.1772, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.5439009745508291, | |
| "grad_norm": 0.0948578342795372, | |
| "learning_rate": 2.2671605884477816e-05, | |
| "loss": 0.1754, | |
| "step": 11776 | |
| }, | |
| { | |
| "epoch": 0.5557249087801949, | |
| "grad_norm": 0.08570394665002823, | |
| "learning_rate": 2.1720315230424133e-05, | |
| "loss": 0.1733, | |
| "step": 12032 | |
| }, | |
| { | |
| "epoch": 0.5675488430095608, | |
| "grad_norm": 0.10034994781017303, | |
| "learning_rate": 2.0777519879097458e-05, | |
| "loss": 0.1722, | |
| "step": 12288 | |
| }, | |
| { | |
| "epoch": 0.5675488430095608, | |
| "eval_acr_loss": 0.01237716493560104, | |
| "eval_across_var": 0.997331017772901, | |
| "eval_bleu": 0.9896437649984566, | |
| "eval_ce_loss": 0.026328031107630222, | |
| "eval_cos_loss": 0.22285830515296493, | |
| "eval_cov": 0.064281376529502, | |
| "eval_cov_loss": 0.006827230591828761, | |
| "eval_global_var": 2.807202482876712, | |
| "eval_loss": 0.12841411385702217, | |
| "eval_mse_loss": 0.6729901853489549, | |
| "eval_per_var": 3.002407962328767, | |
| "eval_within_var": 1.8194298801356799, | |
| "step": 12288 | |
| }, | |
| { | |
| "epoch": 0.5675488430095608, | |
| "eval_acr_loss": 0.01237716493560104, | |
| "eval_across_var": 0.997331017772901, | |
| "eval_bleu": 0.9896437649984566, | |
| "eval_ce_loss": 0.026328031107630222, | |
| "eval_cos_loss": 0.22285830515296493, | |
| "eval_cov": 0.064281376529502, | |
| "eval_cov_loss": 0.006827230591828761, | |
| "eval_global_var": 2.807202482876712, | |
| "eval_loss": 0.12841411385702217, | |
| "eval_mse_loss": 0.6729901853489549, | |
| "eval_per_var": 3.002407962328767, | |
| "eval_runtime": 149.5452, | |
| "eval_samples_per_second": 187.188, | |
| "eval_steps_per_second": 2.929, | |
| "eval_within_var": 1.8194298801356799, | |
| "step": 12288 | |
| } | |
| ], | |
| "logging_steps": 256, | |
| "max_steps": 21651, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1024, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |