| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9932104752667313, |
| "eval_steps": 1024, |
| "global_step": 21504, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011823934229365849, |
| "grad_norm": 0.24549634754657745, |
| "learning_rate": 0.000498046875, |
| "loss": 4.18386697769165, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.023647868458731697, |
| "grad_norm": 0.2271815687417984, |
| "learning_rate": 0.000998046875, |
| "loss": 2.770956039428711, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.03547180268809755, |
| "grad_norm": 0.10401463508605957, |
| "learning_rate": 0.000999640996023194, |
| "loss": 1.3252710103988647, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "grad_norm": 0.14152967929840088, |
| "learning_rate": 0.0009985588674043958, |
| "loss": 0.9731757044792175, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.3892877345908514, |
| "eval_ce_loss": 0.9097963889984235, |
| "eval_loss": 0.9097963889984235, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.047295736917463395, |
| "eval_bleu": 0.3892877345908514, |
| "eval_ce_loss": 0.9097963889984235, |
| "eval_loss": 0.9097963889984235, |
| "eval_runtime": 121.8207, |
| "eval_samples_per_second": 229.789, |
| "eval_steps_per_second": 3.595, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.05911967114682925, |
| "grad_norm": 0.15999071300029755, |
| "learning_rate": 0.0009967551747861387, |
| "loss": 0.8742998242378235, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.0709436053761951, |
| "grad_norm": 0.16329661011695862, |
| "learning_rate": 0.000994232528651847, |
| "loss": 0.8178759217262268, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.08276753960556095, |
| "grad_norm": 0.15794983506202698, |
| "learning_rate": 0.0009909945800260092, |
| "loss": 0.7751419544219971, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "grad_norm": 0.16746914386749268, |
| "learning_rate": 0.0009870460151900522, |
| "loss": 0.7435040473937988, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.44248013946091835, |
| "eval_ce_loss": 0.7321214277178185, |
| "eval_loss": 0.7321214277178185, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.09459147383492679, |
| "eval_bleu": 0.44248013946091835, |
| "eval_ce_loss": 0.7321214277178185, |
| "eval_loss": 0.7321214277178185, |
| "eval_runtime": 118.3848, |
| "eval_samples_per_second": 236.458, |
| "eval_steps_per_second": 3.7, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.10641540806429264, |
| "grad_norm": 0.1850295215845108, |
| "learning_rate": 0.0009823925488998885, |
| "loss": 0.7177194952964783, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.1182393422936585, |
| "grad_norm": 0.22075414657592773, |
| "learning_rate": 0.0009770409161149525, |
| "loss": 0.7007547616958618, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13006327652302435, |
| "grad_norm": 0.18754425644874573, |
| "learning_rate": 0.0009709988622506973, |
| "loss": 0.6803107857704163, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "grad_norm": 0.27784082293510437, |
| "learning_rate": 0.000964275131968659, |
| "loss": 0.667098343372345, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.45531419834435466, |
| "eval_ce_loss": 0.6618382356482554, |
| "eval_loss": 0.6618382356482554, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.1418872107523902, |
| "eval_bleu": 0.45531419834435466, |
| "eval_ce_loss": 0.6618382356482554, |
| "eval_loss": 0.6618382356482554, |
| "eval_runtime": 118.8245, |
| "eval_samples_per_second": 235.583, |
| "eval_steps_per_second": 3.686, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.15371114498175603, |
| "grad_norm": 0.19028860330581665, |
| "learning_rate": 0.0009568794565203123, |
| "loss": 0.6527599692344666, |
| "step": 3328 |
| }, |
| { |
| "epoch": 0.1655350792111219, |
| "grad_norm": 0.20103782415390015, |
| "learning_rate": 0.0009488225396630347, |
| "loss": 0.6440002918243408, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.17735901344048774, |
| "grad_norm": 0.20544974505901337, |
| "learning_rate": 0.0009401160421685646, |
| "loss": 0.6339991688728333, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "grad_norm": 0.20827855169773102, |
| "learning_rate": 0.0009307725649463714, |
| "loss": 0.6260778903961182, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.46845902018751817, |
| "eval_ce_loss": 0.6253500647468654, |
| "eval_loss": 0.6253500647468654, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.18918294766985358, |
| "eval_bleu": 0.46845902018751817, |
| "eval_ce_loss": 0.6253500647468654, |
| "eval_loss": 0.6253500647468654, |
| "eval_runtime": 119.8358, |
| "eval_samples_per_second": 233.595, |
| "eval_steps_per_second": 3.655, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.20100688189921945, |
| "grad_norm": 0.21191295981407166, |
| "learning_rate": 0.0009208056308063659, |
| "loss": 0.6236065030097961, |
| "step": 4352 |
| }, |
| { |
| "epoch": 0.2128308161285853, |
| "grad_norm": 0.24691034853458405, |
| "learning_rate": 0.0009102296648873445, |
| "loss": 0.612799346446991, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.22465475035795113, |
| "grad_norm": 0.20382587611675262, |
| "learning_rate": 0.0008990599737794927, |
| "loss": 0.6066073179244995, |
| "step": 4864 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "grad_norm": 0.20030871033668518, |
| "learning_rate": 0.0008873127233711644, |
| "loss": 0.599780797958374, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.47697047709820556, |
| "eval_ce_loss": 0.5962924270733306, |
| "eval_loss": 0.5962924270733306, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.236478684587317, |
| "eval_bleu": 0.47697047709820556, |
| "eval_ce_loss": 0.5962924270733306, |
| "eval_loss": 0.5962924270733306, |
| "eval_runtime": 120.1746, |
| "eval_samples_per_second": 232.936, |
| "eval_steps_per_second": 3.645, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.24830261881668284, |
| "grad_norm": 0.2779148519039154, |
| "learning_rate": 0.0008750049154520011, |
| "loss": 0.5962232351303101, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.2601265530460487, |
| "grad_norm": 0.25544700026512146, |
| "learning_rate": 0.0008621543631062487, |
| "loss": 0.588756263256073, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.27195048727541454, |
| "grad_norm": 0.2710427939891815, |
| "learning_rate": 0.0008487796649318904, |
| "loss": 0.5883460640907288, |
| "step": 5888 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "grad_norm": 0.2350832223892212, |
| "learning_rate": 0.0008349001781229053, |
| "loss": 0.5858848094940186, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.48177328442502626, |
| "eval_ce_loss": 0.5815416241753592, |
| "eval_loss": 0.5815416241753592, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2837744215047804, |
| "eval_bleu": 0.48177328442502626, |
| "eval_ce_loss": 0.5815416241753592, |
| "eval_loss": 0.5815416241753592, |
| "eval_runtime": 120.4092, |
| "eval_samples_per_second": 232.482, |
| "eval_steps_per_second": 3.638, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.2955983557341462, |
| "grad_norm": 0.2077747881412506, |
| "learning_rate": 0.0008205359904536107, |
| "loss": 0.579054594039917, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.30742228996351206, |
| "grad_norm": 0.23695871233940125, |
| "learning_rate": 0.0008057078912056363, |
| "loss": 0.5759257674217224, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.3192462241928779, |
| "grad_norm": 0.20875827968120575, |
| "learning_rate": 0.0007904373410796086, |
| "loss": 0.5730624794960022, |
| "step": 6912 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "grad_norm": 0.32154712080955505, |
| "learning_rate": 0.0007747464411350876, |
| "loss": 0.569648802280426, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.48784394354563465, |
| "eval_ce_loss": 0.571360412185595, |
| "eval_loss": 0.571360412185595, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.3310701584222438, |
| "eval_bleu": 0.48784394354563465, |
| "eval_ce_loss": 0.571360412185595, |
| "eval_loss": 0.571360412185595, |
| "eval_runtime": 120.5359, |
| "eval_samples_per_second": 232.238, |
| "eval_steps_per_second": 3.634, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.34289409265160964, |
| "grad_norm": 0.22952647507190704, |
| "learning_rate": 0.000758657900803716, |
| "loss": 0.5667076110839844, |
| "step": 7424 |
| }, |
| { |
| "epoch": 0.3547180268809755, |
| "grad_norm": 0.23854298889636993, |
| "learning_rate": 0.000742195005021869, |
| "loss": 0.5655595064163208, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3665419611103413, |
| "grad_norm": 0.2523816227912903, |
| "learning_rate": 0.0007253815805303786, |
| "loss": 0.5611915588378906, |
| "step": 7936 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "grad_norm": 0.21064619719982147, |
| "learning_rate": 0.0007082419613901028, |
| "loss": 0.5565797686576843, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.4903212059069678, |
| "eval_ce_loss": 0.5588067457692264, |
| "eval_loss": 0.5588067457692264, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.37836589533970716, |
| "eval_bleu": 0.4903212059069678, |
| "eval_ce_loss": 0.5588067457692264, |
| "eval_loss": 0.5588067457692264, |
| "eval_runtime": 119.8012, |
| "eval_samples_per_second": 233.662, |
| "eval_steps_per_second": 3.656, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.390189829569073, |
| "grad_norm": 0.2359577864408493, |
| "learning_rate": 0.0006908009537632514, |
| "loss": 0.557487428188324, |
| "step": 8448 |
| }, |
| { |
| "epoch": 0.4020137637984389, |
| "grad_norm": 0.28248342871665955, |
| "learning_rate": 0.0006730838000114403, |
| "loss": 0.557571530342102, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.41383769802780473, |
| "grad_norm": 0.22688360512256622, |
| "learning_rate": 0.0006551161421624341, |
| "loss": 0.5515068173408508, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "grad_norm": 0.22482667863368988, |
| "learning_rate": 0.0006369239847984517, |
| "loss": 0.5492411851882935, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.49488542715926903, |
| "eval_ce_loss": 0.5491624213244817, |
| "eval_loss": 0.5491624213244817, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4256616322571706, |
| "eval_bleu": 0.49488542715926903, |
| "eval_ce_loss": 0.5491624213244817, |
| "eval_loss": 0.5491624213244817, |
| "eval_runtime": 119.2059, |
| "eval_samples_per_second": 234.829, |
| "eval_steps_per_second": 3.674, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.4374855664865364, |
| "grad_norm": 0.23678575456142426, |
| "learning_rate": 0.0006185336574197479, |
| "loss": 0.5475496649742126, |
| "step": 9472 |
| }, |
| { |
| "epoch": 0.44930950071590225, |
| "grad_norm": 0.23306351900100708, |
| "learning_rate": 0.0005999717763379407, |
| "loss": 0.5489075779914856, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.4611334349452681, |
| "grad_norm": 0.24446141719818115, |
| "learning_rate": 0.0005812652061542363, |
| "loss": 0.5464962720870972, |
| "step": 9984 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "grad_norm": 0.26656046509742737, |
| "learning_rate": 0.0005624410208783071, |
| "loss": 0.5421015024185181, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.49757119992436116, |
| "eval_ce_loss": 0.5445564628055651, |
| "eval_loss": 0.5445564628055651, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.472957369174634, |
| "eval_bleu": 0.49757119992436116, |
| "eval_ce_loss": 0.5445564628055651, |
| "eval_loss": 0.5445564628055651, |
| "eval_runtime": 119.9032, |
| "eval_samples_per_second": 233.463, |
| "eval_steps_per_second": 3.653, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.48478130340399983, |
| "grad_norm": 0.25631916522979736, |
| "learning_rate": 0.0005435264647440881, |
| "loss": 0.5437694787979126, |
| "step": 10496 |
| }, |
| { |
| "epoch": 0.49660523763336567, |
| "grad_norm": 0.24505920708179474, |
| "learning_rate": 0.000524548912779213, |
| "loss": 0.5406980514526367, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.5084291718627315, |
| "grad_norm": 0.2363251894712448, |
| "learning_rate": 0.0005055358311851499, |
| "loss": 0.5423585772514343, |
| "step": 11008 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "grad_norm": 0.2904762923717499, |
| "learning_rate": 0.0004865147375853812, |
| "loss": 0.5392704010009766, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.49735706994160356, |
| "eval_ce_loss": 0.5361337422916334, |
| "eval_loss": 0.5361337422916334, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5202531060920974, |
| "eval_bleu": 0.49735706994160356, |
| "eval_ce_loss": 0.5361337422916334, |
| "eval_loss": 0.5361337422916334, |
| "eval_runtime": 118.0528, |
| "eval_samples_per_second": 237.123, |
| "eval_steps_per_second": 3.71, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.5320770403214632, |
| "grad_norm": 0.2444256991147995, |
| "learning_rate": 0.0004675131611991607, |
| "loss": 0.536561906337738, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5439009745508291, |
| "grad_norm": 0.225338414311409, |
| "learning_rate": 0.0004485586029984899, |
| "loss": 0.5376089811325073, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.5557249087801949, |
| "grad_norm": 0.24263811111450195, |
| "learning_rate": 0.00042967849590597266, |
| "loss": 0.5349382162094116, |
| "step": 12032 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "grad_norm": 0.24349793791770935, |
| "learning_rate": 0.0004109001650911621, |
| "loss": 0.5335611701011658, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.5003156304933045, |
| "eval_ce_loss": 0.5322896588474648, |
| "eval_loss": 0.5322896588474648, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5675488430095608, |
| "eval_bleu": 0.5003156304933045, |
| "eval_ce_loss": 0.5322896588474648, |
| "eval_loss": 0.5322896588474648, |
| "eval_runtime": 123.196, |
| "eval_samples_per_second": 227.223, |
| "eval_steps_per_second": 3.555, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.5793727772389267, |
| "grad_norm": 0.22607196867465973, |
| "learning_rate": 0.0003922507884228551, |
| "loss": 0.5312694907188416, |
| "step": 12544 |
| }, |
| { |
| "epoch": 0.5911967114682924, |
| "grad_norm": 0.2581818997859955, |
| "learning_rate": 0.00037375735713457723, |
| "loss": 0.5293662548065186, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.6030206456976583, |
| "grad_norm": 0.2328752726316452, |
| "learning_rate": 0.00035544663676018276, |
| "loss": 0.5298049449920654, |
| "step": 13056 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "grad_norm": 0.22804544866085052, |
| "learning_rate": 0.00033734512839611255, |
| "loss": 0.5289351940155029, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.505403307988388, |
| "eval_ce_loss": 0.527684580585728, |
| "eval_loss": 0.527684580585728, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.6148445799270241, |
| "eval_bleu": 0.505403307988388, |
| "eval_ce_loss": 0.527684580585728, |
| "eval_loss": 0.527684580585728, |
| "eval_runtime": 119.1767, |
| "eval_samples_per_second": 234.887, |
| "eval_steps_per_second": 3.675, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.62666851415639, |
| "grad_norm": 0.25363898277282715, |
| "learning_rate": 0.0003194790303463687, |
| "loss": 0.5256697535514832, |
| "step": 13568 |
| }, |
| { |
| "epoch": 0.6384924483857558, |
| "grad_norm": 0.2509095370769501, |
| "learning_rate": 0.00030187420020572406, |
| "loss": 0.5276732444763184, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.6503163826151217, |
| "grad_norm": 0.2644820809364319, |
| "learning_rate": 0.00028455611743603626, |
| "loss": 0.5258468985557556, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "grad_norm": 0.2666020691394806, |
| "learning_rate": 0.0002675498464898373, |
| "loss": 0.5252395272254944, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.5072326089834556, |
| "eval_ce_loss": 0.5239842616395863, |
| "eval_loss": 0.5239842616395863, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6621403168444876, |
| "eval_bleu": 0.5072326089834556, |
| "eval_ce_loss": 0.5239842616395863, |
| "eval_loss": 0.5239842616395863, |
| "eval_runtime": 121.1147, |
| "eval_samples_per_second": 231.128, |
| "eval_steps_per_second": 3.616, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.6739642510738534, |
| "grad_norm": 0.305530309677124, |
| "learning_rate": 0.0002508800005345623, |
| "loss": 0.5255744457244873, |
| "step": 14592 |
| }, |
| { |
| "epoch": 0.6857881853032193, |
| "grad_norm": 0.22708480060100555, |
| "learning_rate": 0.00023457070582992562, |
| "loss": 0.5249497294425964, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.6976121195325851, |
| "grad_norm": 0.22963008284568787, |
| "learning_rate": 0.00021864556680999692, |
| "loss": 0.5229964852333069, |
| "step": 15104 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "grad_norm": 0.22102124989032745, |
| "learning_rate": 0.0002031276319205152, |
| "loss": 0.5240696668624878, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.5042547631198441, |
| "eval_ce_loss": 0.5232533255940703, |
| "eval_loss": 0.5232533255940703, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.709436053761951, |
| "eval_bleu": 0.5042547631198441, |
| "eval_ce_loss": 0.5232533255940703, |
| "eval_loss": 0.5232533255940703, |
| "eval_runtime": 121.5857, |
| "eval_samples_per_second": 230.233, |
| "eval_steps_per_second": 3.602, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.7212599879913169, |
| "grad_norm": 0.23232056200504303, |
| "learning_rate": 0.00018803936026088542, |
| "loss": 0.5210784077644348, |
| "step": 15616 |
| }, |
| { |
| "epoch": 0.7330839222206826, |
| "grad_norm": 0.2541489899158478, |
| "learning_rate": 0.00017340258907913464, |
| "loss": 0.5206863284111023, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.7449078564500485, |
| "grad_norm": 0.2417968064546585, |
| "learning_rate": 0.0001592385021668743, |
| "loss": 0.5179308652877808, |
| "step": 16128 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "grad_norm": 0.24219931662082672, |
| "learning_rate": 0.0001455675992000087, |
| "loss": 0.5221379995346069, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "eval_bleu": 0.5066068090628344, |
| "eval_ce_loss": 0.5208139295039111, |
| "eval_loss": 0.5208139295039111, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7567317906794143, |
| "eval_bleu": 0.5066068090628344, |
| "eval_ce_loss": 0.5208139295039111, |
| "eval_loss": 0.5208139295039111, |
| "eval_runtime": 120.8269, |
| "eval_samples_per_second": 231.679, |
| "eval_steps_per_second": 3.625, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.7685557249087802, |
| "grad_norm": 0.2539346218109131, |
| "learning_rate": 0.000132409666069565, |
| "loss": 0.5175387859344482, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.780379659138146, |
| "grad_norm": 0.22402986884117126, |
| "learning_rate": 0.0001197837462455823, |
| "loss": 0.5182628035545349, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.7922035933675119, |
| "grad_norm": 0.23848603665828705, |
| "learning_rate": 0.00010770811321550749, |
| "loss": 0.516541063785553, |
| "step": 17152 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "grad_norm": 0.22720517218112946, |
| "learning_rate": 9.620024403698591e-05, |
| "loss": 0.5187172889709473, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "eval_bleu": 0.5108638112211565, |
| "eval_ce_loss": 0.5187523302422267, |
| "eval_loss": 0.5187523302422267, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8040275275968778, |
| "eval_bleu": 0.5108638112211565, |
| "eval_ce_loss": 0.5187523302422267, |
| "eval_loss": 0.5187523302422267, |
| "eval_runtime": 120.6635, |
| "eval_samples_per_second": 231.992, |
| "eval_steps_per_second": 3.63, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.8158514618262436, |
| "grad_norm": 0.23969151079654694, |
| "learning_rate": 8.527679404332429e-05, |
| "loss": 0.516950249671936, |
| "step": 17664 |
| }, |
| { |
| "epoch": 0.8276753960556095, |
| "grad_norm": 0.2770597040653229, |
| "learning_rate": 7.495357273823544e-05, |
| "loss": 0.5192644000053406, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.8394993302849753, |
| "grad_norm": 0.22556257247924805, |
| "learning_rate": 6.524552091475183e-05, |
| "loss": 0.5170964598655701, |
| "step": 18176 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "grad_norm": 0.23745474219322205, |
| "learning_rate": 5.6166689031422024e-05, |
| "loss": 0.5167249441146851, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "eval_bleu": 0.5090472136817807, |
| "eval_ce_loss": 0.5178870771709643, |
| "eval_loss": 0.5178870771709643, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8513232645143411, |
| "eval_bleu": 0.5090472136817807, |
| "eval_ce_loss": 0.5178870771709643, |
| "eval_loss": 0.5178870771709643, |
| "eval_runtime": 118.7626, |
| "eval_samples_per_second": 235.705, |
| "eval_steps_per_second": 3.688, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.8631471987437069, |
| "grad_norm": 0.2293296754360199, |
| "learning_rate": 4.773021687709067e-05, |
| "loss": 0.5173235535621643, |
| "step": 18688 |
| }, |
| { |
| "epoch": 0.8749711329730728, |
| "grad_norm": 0.24479453265666962, |
| "learning_rate": 3.994831455368719e-05, |
| "loss": 0.5143179297447205, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.8867950672024387, |
| "grad_norm": 0.2324228435754776, |
| "learning_rate": 3.283224480455282e-05, |
| "loss": 0.5171309113502502, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "grad_norm": 0.21480675041675568, |
| "learning_rate": 2.639230671387627e-05, |
| "loss": 0.5147438645362854, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "eval_bleu": 0.5083390638340656, |
| "eval_ce_loss": 0.5156200128739283, |
| "eval_loss": 0.5156200128739283, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.8986190014318045, |
| "eval_bleu": 0.5083390638340656, |
| "eval_ce_loss": 0.5156200128739283, |
| "eval_loss": 0.5156200128739283, |
| "eval_runtime": 121.597, |
| "eval_samples_per_second": 230.211, |
| "eval_steps_per_second": 3.602, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.9104429356611704, |
| "grad_norm": 0.23823200166225433, |
| "learning_rate": 2.063782080083576e-05, |
| "loss": 0.5157837867736816, |
| "step": 19712 |
| }, |
| { |
| "epoch": 0.9222668698905362, |
| "grad_norm": 0.22363300621509552, |
| "learning_rate": 1.557711553001523e-05, |
| "loss": 0.5182561874389648, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.9340908041199021, |
| "grad_norm": 0.23723983764648438, |
| "learning_rate": 1.1217515257622269e-05, |
| "loss": 0.5126692652702332, |
| "step": 20224 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "grad_norm": 0.24191035330295563, |
| "learning_rate": 7.565329630950746e-06, |
| "loss": 0.5159070491790771, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "eval_bleu": 0.5118187869284645, |
| "eval_ce_loss": 0.5148512013426655, |
| "eval_loss": 0.5148512013426655, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.945914738349268, |
| "eval_bleu": 0.5118187869284645, |
| "eval_ce_loss": 0.5148512013426655, |
| "eval_loss": 0.5148512013426655, |
| "eval_runtime": 120.5786, |
| "eval_samples_per_second": 232.156, |
| "eval_steps_per_second": 3.632, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.9577386725786338, |
| "grad_norm": 0.22184860706329346, |
| "learning_rate": 4.62584445643166e-06, |
| "loss": 0.5139285326004028, |
| "step": 20736 |
| }, |
| { |
| "epoch": 0.9695626068079997, |
| "grad_norm": 0.2565344572067261, |
| "learning_rate": 2.40331404948807e-06, |
| "loss": 0.5169987082481384, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.9813865410373654, |
| "grad_norm": 0.24548502266407013, |
| "learning_rate": 9.009550772663965e-07, |
| "loss": 0.5152971744537354, |
| "step": 21248 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "grad_norm": 0.24441944062709808, |
| "learning_rate": 1.2094190315575791e-07, |
| "loss": 0.5162725448608398, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "eval_bleu": 0.5100991731531398, |
| "eval_ce_loss": 0.5133357846165356, |
| "eval_loss": 0.5133357846165356, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.9932104752667313, |
| "eval_bleu": 0.5100991731531398, |
| "eval_ce_loss": 0.5133357846165356, |
| "eval_loss": 0.5133357846165356, |
| "eval_runtime": 120.5305, |
| "eval_samples_per_second": 232.248, |
| "eval_steps_per_second": 3.634, |
| "step": 21504 |
| } |
| ], |
| "logging_steps": 256, |
| "max_steps": 21651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1024, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|