| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 1032, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05830903790087463, | |
| "grad_norm": 14.626969535556903, | |
| "learning_rate": 9.000000000000001e-07, | |
| "loss": 1.5047, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11661807580174927, | |
| "grad_norm": 7.637318846998983, | |
| "learning_rate": 1.9000000000000002e-06, | |
| "loss": 1.1973, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1749271137026239, | |
| "grad_norm": 3.691260729275177, | |
| "learning_rate": 2.9e-06, | |
| "loss": 0.8, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.23323615160349853, | |
| "grad_norm": 2.1291989636182427, | |
| "learning_rate": 3.900000000000001e-06, | |
| "loss": 0.6175, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2915451895043732, | |
| "grad_norm": 2.14798091897299, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 0.5677, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3498542274052478, | |
| "grad_norm": 2.0456305815246005, | |
| "learning_rate": 5.9e-06, | |
| "loss": 0.5051, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 1.988581124794002, | |
| "learning_rate": 6.9e-06, | |
| "loss": 0.4964, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.46647230320699706, | |
| "grad_norm": 2.303466906806559, | |
| "learning_rate": 7.9e-06, | |
| "loss": 0.4929, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5247813411078717, | |
| "grad_norm": 1.9377203696272898, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 0.4703, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5830903790087464, | |
| "grad_norm": 2.190981800812453, | |
| "learning_rate": 9.9e-06, | |
| "loss": 0.4443, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.641399416909621, | |
| "grad_norm": 2.0744832345963493, | |
| "learning_rate": 9.997699301870489e-06, | |
| "loss": 0.4364, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6997084548104956, | |
| "grad_norm": 1.7704017643532202, | |
| "learning_rate": 9.98974898953517e-06, | |
| "loss": 0.4147, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7580174927113703, | |
| "grad_norm": 1.747873159096446, | |
| "learning_rate": 9.976129689724575e-06, | |
| "loss": 0.4294, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 1.7960844701869685, | |
| "learning_rate": 9.956856875690006e-06, | |
| "loss": 0.4168, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8746355685131195, | |
| "grad_norm": 1.7736875630417643, | |
| "learning_rate": 9.931952443791704e-06, | |
| "loss": 0.4085, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9329446064139941, | |
| "grad_norm": 2.2132067152288752, | |
| "learning_rate": 9.901444688621801e-06, | |
| "loss": 0.4063, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9912536443148688, | |
| "grad_norm": 1.8315327950009368, | |
| "learning_rate": 9.865368270858082e-06, | |
| "loss": 0.3978, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0466472303206997, | |
| "grad_norm": 1.6782697102135526, | |
| "learning_rate": 9.82376417788506e-06, | |
| "loss": 0.343, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1049562682215743, | |
| "grad_norm": 1.5675824197763997, | |
| "learning_rate": 9.77667967722711e-06, | |
| "loss": 0.297, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.163265306122449, | |
| "grad_norm": 1.9145136006604275, | |
| "learning_rate": 9.724168262846567e-06, | |
| "loss": 0.3153, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2215743440233235, | |
| "grad_norm": 1.7203060932188563, | |
| "learning_rate": 9.666289594367804e-06, | |
| "loss": 0.3036, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.2798833819241984, | |
| "grad_norm": 1.7012502607051327, | |
| "learning_rate": 9.603109429296333e-06, | |
| "loss": 0.3246, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.3381924198250728, | |
| "grad_norm": 1.6624551186289933, | |
| "learning_rate": 9.534699548309949e-06, | |
| "loss": 0.302, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.3965014577259476, | |
| "grad_norm": 1.442216855717199, | |
| "learning_rate": 9.461137673706768e-06, | |
| "loss": 0.3068, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.4548104956268222, | |
| "grad_norm": 1.6576009785614365, | |
| "learning_rate": 9.382507381102848e-06, | |
| "loss": 0.298, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.5131195335276968, | |
| "grad_norm": 1.8058128149355792, | |
| "learning_rate": 9.298898004479698e-06, | |
| "loss": 0.3075, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.5714285714285714, | |
| "grad_norm": 1.4784242670602321, | |
| "learning_rate": 9.210404534689536e-06, | |
| "loss": 0.3057, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.629737609329446, | |
| "grad_norm": 1.546258930542768, | |
| "learning_rate": 9.117127511533654e-06, | |
| "loss": 0.2988, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.6880466472303208, | |
| "grad_norm": 1.695476568663106, | |
| "learning_rate": 9.019172909536442e-06, | |
| "loss": 0.3033, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.7463556851311952, | |
| "grad_norm": 1.6544653059743208, | |
| "learning_rate": 8.9166520175449e-06, | |
| "loss": 0.3035, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.80466472303207, | |
| "grad_norm": 1.6077779186418766, | |
| "learning_rate": 8.809681312290398e-06, | |
| "loss": 0.3037, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.8629737609329446, | |
| "grad_norm": 1.6177268312586421, | |
| "learning_rate": 8.698382326056341e-06, | |
| "loss": 0.2921, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.9212827988338192, | |
| "grad_norm": 1.603497878343536, | |
| "learning_rate": 8.582881508602082e-06, | |
| "loss": 0.2948, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.9795918367346939, | |
| "grad_norm": 1.3886954878888524, | |
| "learning_rate": 8.46331008349997e-06, | |
| "loss": 0.2946, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.0349854227405246, | |
| "grad_norm": 1.499985665960878, | |
| "learning_rate": 8.339803899048737e-06, | |
| "loss": 0.2193, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.0932944606413995, | |
| "grad_norm": 1.435584840559424, | |
| "learning_rate": 8.212503273932594e-06, | |
| "loss": 0.1747, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.151603498542274, | |
| "grad_norm": 1.4871062012555394, | |
| "learning_rate": 8.081552837801428e-06, | |
| "loss": 0.1778, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.2099125364431487, | |
| "grad_norm": 1.4218330629504066, | |
| "learning_rate": 7.947101366953177e-06, | |
| "loss": 0.176, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.2682215743440235, | |
| "grad_norm": 1.2427066721460804, | |
| "learning_rate": 7.809301615305098e-06, | |
| "loss": 0.169, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.326530612244898, | |
| "grad_norm": 1.491373179347201, | |
| "learning_rate": 7.668310140845944e-06, | |
| "loss": 0.1828, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.3848396501457727, | |
| "grad_norm": 1.1382514278247085, | |
| "learning_rate": 7.524287127766245e-06, | |
| "loss": 0.1691, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.443148688046647, | |
| "grad_norm": 1.4250482419303996, | |
| "learning_rate": 7.377396204468754e-06, | |
| "loss": 0.1813, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.501457725947522, | |
| "grad_norm": 1.3115974493545381, | |
| "learning_rate": 7.227804257665838e-06, | |
| "loss": 0.1739, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.5597667638483967, | |
| "grad_norm": 1.460585330094477, | |
| "learning_rate": 7.075681242775017e-06, | |
| "loss": 0.1857, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.618075801749271, | |
| "grad_norm": 1.2364257305075577, | |
| "learning_rate": 6.921199990828056e-06, | |
| "loss": 0.1864, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.6763848396501455, | |
| "grad_norm": 1.460927926445741, | |
| "learning_rate": 6.764536012113005e-06, | |
| "loss": 0.1927, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.7346938775510203, | |
| "grad_norm": 1.1802915819618254, | |
| "learning_rate": 6.605867296772262e-06, | |
| "loss": 0.1716, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.793002915451895, | |
| "grad_norm": 1.2215463060704888, | |
| "learning_rate": 6.445374112583196e-06, | |
| "loss": 0.1843, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.8513119533527695, | |
| "grad_norm": 1.1217299502317768, | |
| "learning_rate": 6.2832388001511034e-06, | |
| "loss": 0.1711, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.9096209912536444, | |
| "grad_norm": 1.359355058632944, | |
| "learning_rate": 6.119645565747165e-06, | |
| "loss": 0.1739, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.9679300291545188, | |
| "grad_norm": 1.2912101573020145, | |
| "learning_rate": 5.954780272026761e-06, | |
| "loss": 0.1757, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.0233236151603498, | |
| "grad_norm": 1.0681809205213206, | |
| "learning_rate": 5.788830226865929e-06, | |
| "loss": 0.1391, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.0816326530612246, | |
| "grad_norm": 1.2260857222791004, | |
| "learning_rate": 5.621983970555881e-06, | |
| "loss": 0.0892, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.139941690962099, | |
| "grad_norm": 1.0189023598379496, | |
| "learning_rate": 5.454431061597312e-06, | |
| "loss": 0.0872, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.198250728862974, | |
| "grad_norm": 1.1572966564929497, | |
| "learning_rate": 5.286361861337924e-06, | |
| "loss": 0.0849, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.256559766763848, | |
| "grad_norm": 1.3012849394193384, | |
| "learning_rate": 5.1179673176977915e-06, | |
| "loss": 0.0871, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.314868804664723, | |
| "grad_norm": 1.0416650556204414, | |
| "learning_rate": 4.9494387482283185e-06, | |
| "loss": 0.0905, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.373177842565598, | |
| "grad_norm": 1.210951177453276, | |
| "learning_rate": 4.7809676227512455e-06, | |
| "loss": 0.0895, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.431486880466472, | |
| "grad_norm": 1.21811830246229, | |
| "learning_rate": 4.612745345824652e-06, | |
| "loss": 0.0953, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.489795918367347, | |
| "grad_norm": 1.0389113088365345, | |
| "learning_rate": 4.444963039283114e-06, | |
| "loss": 0.0924, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.5481049562682214, | |
| "grad_norm": 1.0509213042877452, | |
| "learning_rate": 4.277811325099072e-06, | |
| "loss": 0.0933, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.6064139941690962, | |
| "grad_norm": 1.031466931202058, | |
| "learning_rate": 4.111480108812096e-06, | |
| "loss": 0.0921, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.664723032069971, | |
| "grad_norm": 0.9674006736435579, | |
| "learning_rate": 3.946158363772118e-06, | |
| "loss": 0.0884, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.7230320699708455, | |
| "grad_norm": 1.052555764127678, | |
| "learning_rate": 3.78203391644175e-06, | |
| "loss": 0.0881, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.78134110787172, | |
| "grad_norm": 1.041731792136989, | |
| "learning_rate": 3.6192932330016074e-06, | |
| "loss": 0.0904, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.8396501457725947, | |
| "grad_norm": 1.1688175176919982, | |
| "learning_rate": 3.4581212075010834e-06, | |
| "loss": 0.0901, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.8979591836734695, | |
| "grad_norm": 1.102720481784069, | |
| "learning_rate": 3.2987009517952716e-06, | |
| "loss": 0.0915, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.956268221574344, | |
| "grad_norm": 1.0261487225837105, | |
| "learning_rate": 3.1412135875066853e-06, | |
| "loss": 0.0886, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.011661807580175, | |
| "grad_norm": 0.6607620413640864, | |
| "learning_rate": 2.9858380402481203e-06, | |
| "loss": 0.0711, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.069970845481049, | |
| "grad_norm": 0.8936608342352569, | |
| "learning_rate": 2.8327508363404816e-06, | |
| "loss": 0.0389, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.128279883381924, | |
| "grad_norm": 0.7891996422928471, | |
| "learning_rate": 2.6821259022565106e-06, | |
| "loss": 0.0367, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.186588921282799, | |
| "grad_norm": 0.9396857177599008, | |
| "learning_rate": 2.5341343670182373e-06, | |
| "loss": 0.0371, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.244897959183674, | |
| "grad_norm": 0.7935821446040255, | |
| "learning_rate": 2.3889443677727386e-06, | |
| "loss": 0.035, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.303206997084548, | |
| "grad_norm": 0.7947020670633791, | |
| "learning_rate": 2.2467208587670054e-06, | |
| "loss": 0.0384, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.3615160349854225, | |
| "grad_norm": 0.8946315335018554, | |
| "learning_rate": 2.107625423939015e-06, | |
| "loss": 0.0368, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.419825072886297, | |
| "grad_norm": 0.7944653560307688, | |
| "learning_rate": 1.9718160933378905e-06, | |
| "loss": 0.0368, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.478134110787172, | |
| "grad_norm": 0.7774636550435158, | |
| "learning_rate": 1.8394471635817158e-06, | |
| "loss": 0.0349, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.536443148688047, | |
| "grad_norm": 0.8756408218531322, | |
| "learning_rate": 1.7106690225570343e-06, | |
| "loss": 0.0371, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.594752186588921, | |
| "grad_norm": 0.8687861976493633, | |
| "learning_rate": 1.5856279785591321e-06, | |
| "loss": 0.0339, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.653061224489796, | |
| "grad_norm": 0.8189000224644959, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.0323, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.711370262390671, | |
| "grad_norm": 0.8369884028081186, | |
| "learning_rate": 1.3473210243436673e-06, | |
| "loss": 0.0334, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.769679300291545, | |
| "grad_norm": 0.7785311317469292, | |
| "learning_rate": 1.2343258610397397e-06, | |
| "loss": 0.0326, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.827988338192419, | |
| "grad_norm": 0.8404324074760597, | |
| "learning_rate": 1.1256089809870336e-06, | |
| "loss": 0.0334, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.886297376093294, | |
| "grad_norm": 0.8675304198425063, | |
| "learning_rate": 1.0212939003449128e-06, | |
| "loss": 0.0345, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.944606413994169, | |
| "grad_norm": 0.8431136894720528, | |
| "learning_rate": 9.214991342705304e-07, | |
| "loss": 0.0346, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.1768071446682524, | |
| "learning_rate": 8.263380622705796e-07, | |
| "loss": 0.0337, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.058309037900875, | |
| "grad_norm": 0.40110032583503596, | |
| "learning_rate": 7.359187993878109e-07, | |
| "loss": 0.0158, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.11661807580175, | |
| "grad_norm": 0.49375416237980635, | |
| "learning_rate": 6.503440733686251e-07, | |
| "loss": 0.013, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.174927113702624, | |
| "grad_norm": 0.5363565719022582, | |
| "learning_rate": 5.69711107951334e-07, | |
| "loss": 0.013, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.233236151603498, | |
| "grad_norm": 0.5236558514075167, | |
| "learning_rate": 4.941115124076679e-07, | |
| "loss": 0.0123, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.291545189504373, | |
| "grad_norm": 0.49489565231551663, | |
| "learning_rate": 4.2363117746302476e-07, | |
| "loss": 0.012, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.349854227405248, | |
| "grad_norm": 0.5305713447576351, | |
| "learning_rate": 3.583501777137227e-07, | |
| "loss": 0.0125, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.408163265306122, | |
| "grad_norm": 0.8039219674866297, | |
| "learning_rate": 2.9834268065210006e-07, | |
| "loss": 0.0125, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.466472303206997, | |
| "grad_norm": 0.3862288158846098, | |
| "learning_rate": 2.4367686240284127e-07, | |
| "loss": 0.0124, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.524781341107872, | |
| "grad_norm": 0.47054782769610126, | |
| "learning_rate": 1.9441483026626185e-07, | |
| "loss": 0.0113, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.5830903790087465, | |
| "grad_norm": 0.4671541625745262, | |
| "learning_rate": 1.506125521565327e-07, | |
| "loss": 0.0116, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.641399416909621, | |
| "grad_norm": 0.48837436287072683, | |
| "learning_rate": 1.1231979301504048e-07, | |
| "loss": 0.0117, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.699708454810495, | |
| "grad_norm": 0.4538198562291079, | |
| "learning_rate": 7.958005827110039e-08, | |
| "loss": 0.0113, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 5.75801749271137, | |
| "grad_norm": 0.5730409751947306, | |
| "learning_rate": 5.2430544414273645e-08, | |
| "loss": 0.0126, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 5.816326530612245, | |
| "grad_norm": 0.6123539079634027, | |
| "learning_rate": 3.0902096734442554e-08, | |
| "loss": 0.0121, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.87463556851312, | |
| "grad_norm": 0.5748592685210242, | |
| "learning_rate": 1.5019174277645098e-08, | |
| "loss": 0.0114, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 5.932944606413994, | |
| "grad_norm": 0.4633724477797094, | |
| "learning_rate": 4.799822057502401e-09, | |
| "loss": 0.0123, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 5.9912536443148685, | |
| "grad_norm": 0.4377554362173321, | |
| "learning_rate": 2.5565055379261904e-10, | |
| "loss": 0.0116, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 1032, | |
| "total_flos": 884370134269952.0, | |
| "train_loss": 0.20082982304657615, | |
| "train_runtime": 10237.3944, | |
| "train_samples_per_second": 12.861, | |
| "train_steps_per_second": 0.101 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1032, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 884370134269952.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |