| { | |
| "best_global_step": 1000, | |
| "best_metric": 0.7969963550567627, | |
| "best_model_checkpoint": "checkpoints/lora_tutor/checkpoint-1000", | |
| "epoch": 0.35634743875278396, | |
| "eval_steps": 200, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00035634743875278396, | |
| "grad_norm": 99.66374969482422, | |
| "learning_rate": 0.0, | |
| "loss": 4.1982, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0017817371937639199, | |
| "grad_norm": 57.29983139038086, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 4.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0035634743875278397, | |
| "grad_norm": 27.13236427307129, | |
| "learning_rate": 7.5e-06, | |
| "loss": 2.4799, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005345211581291759, | |
| "grad_norm": 13.770986557006836, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 1.4129, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0071269487750556795, | |
| "grad_norm": 9.720105171203613, | |
| "learning_rate": 1.5833333333333333e-05, | |
| "loss": 1.2207, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008908685968819599, | |
| "grad_norm": 10.442448616027832, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2037, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.010690423162583519, | |
| "grad_norm": 8.392529487609863, | |
| "learning_rate": 2.4166666666666667e-05, | |
| "loss": 1.1535, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012472160356347439, | |
| "grad_norm": 7.494682788848877, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "loss": 1.1494, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.014253897550111359, | |
| "grad_norm": 7.756562232971191, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 1.1588, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.016035634743875277, | |
| "grad_norm": 5.802969932556152, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 1.1049, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.017817371937639197, | |
| "grad_norm": 7.09335470199585, | |
| "learning_rate": 4.0833333333333334e-05, | |
| "loss": 1.0902, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019599109131403118, | |
| "grad_norm": 5.961513042449951, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.0484, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.021380846325167038, | |
| "grad_norm": 4.003515720367432, | |
| "learning_rate": 4.9166666666666665e-05, | |
| "loss": 1.0418, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.023162583518930958, | |
| "grad_norm": 4.197242259979248, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 1.0574, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.024944320712694878, | |
| "grad_norm": 4.823288917541504, | |
| "learning_rate": 5.7499999999999995e-05, | |
| "loss": 1.0194, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.026726057906458798, | |
| "grad_norm": 6.500214099884033, | |
| "learning_rate": 6.166666666666667e-05, | |
| "loss": 1.0146, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.028507795100222718, | |
| "grad_norm": 6.800583362579346, | |
| "learning_rate": 6.583333333333334e-05, | |
| "loss": 1.0192, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.030289532293986638, | |
| "grad_norm": 5.19222354888916, | |
| "learning_rate": 7e-05, | |
| "loss": 1.0455, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.032071269487750555, | |
| "grad_norm": 6.357260704040527, | |
| "learning_rate": 7.416666666666668e-05, | |
| "loss": 1.029, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.033853006681514475, | |
| "grad_norm": 5.543500900268555, | |
| "learning_rate": 7.833333333333333e-05, | |
| "loss": 1.0228, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.035634743875278395, | |
| "grad_norm": 4.388900279998779, | |
| "learning_rate": 8.25e-05, | |
| "loss": 1.044, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.037416481069042315, | |
| "grad_norm": 5.311753273010254, | |
| "learning_rate": 8.666666666666667e-05, | |
| "loss": 1.063, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.039198218262806235, | |
| "grad_norm": 5.037621974945068, | |
| "learning_rate": 9.083333333333334e-05, | |
| "loss": 1.0138, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.040979955456570155, | |
| "grad_norm": 5.125575542449951, | |
| "learning_rate": 9.5e-05, | |
| "loss": 1.0403, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.042761692650334075, | |
| "grad_norm": 5.154388904571533, | |
| "learning_rate": 9.916666666666667e-05, | |
| "loss": 1.015, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.044543429844097995, | |
| "grad_norm": 3.9509270191192627, | |
| "learning_rate": 9.999661540018812e-05, | |
| "loss": 1.0027, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.046325167037861915, | |
| "grad_norm": 3.7814090251922607, | |
| "learning_rate": 9.998286624877786e-05, | |
| "loss": 0.9863, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.048106904231625836, | |
| "grad_norm": 3.7585690021514893, | |
| "learning_rate": 9.995854391448606e-05, | |
| "loss": 1.0459, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.049888641425389756, | |
| "grad_norm": 6.917703628540039, | |
| "learning_rate": 9.992365354236557e-05, | |
| "loss": 1.0719, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.051670378619153676, | |
| "grad_norm": 3.885483503341675, | |
| "learning_rate": 9.987820251299122e-05, | |
| "loss": 1.0123, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.053452115812917596, | |
| "grad_norm": 3.287639617919922, | |
| "learning_rate": 9.982220044089859e-05, | |
| "loss": 0.9903, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.055233853006681516, | |
| "grad_norm": 3.95298171043396, | |
| "learning_rate": 9.975565917255016e-05, | |
| "loss": 0.9841, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.057015590200445436, | |
| "grad_norm": 4.5531721115112305, | |
| "learning_rate": 9.967859278382938e-05, | |
| "loss": 0.9968, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.058797327394209356, | |
| "grad_norm": 4.4977641105651855, | |
| "learning_rate": 9.959101757706308e-05, | |
| "loss": 1.006, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.060579064587973276, | |
| "grad_norm": 3.260209798812866, | |
| "learning_rate": 9.949295207757299e-05, | |
| "loss": 0.9557, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.062360801781737196, | |
| "grad_norm": 3.9708852767944336, | |
| "learning_rate": 9.938441702975689e-05, | |
| "loss": 0.9914, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.06414253897550111, | |
| "grad_norm": 3.731992721557617, | |
| "learning_rate": 9.926543539270048e-05, | |
| "loss": 0.9794, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06592427616926504, | |
| "grad_norm": 3.153402805328369, | |
| "learning_rate": 9.913603233532067e-05, | |
| "loss": 0.9525, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.06770601336302895, | |
| "grad_norm": 2.9249067306518555, | |
| "learning_rate": 9.899623523104149e-05, | |
| "loss": 0.9578, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06948775055679288, | |
| "grad_norm": 2.661738872528076, | |
| "learning_rate": 9.884607365200356e-05, | |
| "loss": 0.9711, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.07126948775055679, | |
| "grad_norm": 3.0224714279174805, | |
| "learning_rate": 9.868557936280855e-05, | |
| "loss": 0.9693, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07126948775055679, | |
| "eval_loss": 0.9798622131347656, | |
| "eval_runtime": 249.2057, | |
| "eval_samples_per_second": 20.02, | |
| "eval_steps_per_second": 2.504, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07305122494432072, | |
| "grad_norm": 2.5287749767303467, | |
| "learning_rate": 9.851478631379982e-05, | |
| "loss": 0.9299, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.07483296213808463, | |
| "grad_norm": 2.9961535930633545, | |
| "learning_rate": 9.83337306338807e-05, | |
| "loss": 0.9606, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07661469933184856, | |
| "grad_norm": 3.6630430221557617, | |
| "learning_rate": 9.814245062287189e-05, | |
| "loss": 0.9546, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.07839643652561247, | |
| "grad_norm": 2.665858030319214, | |
| "learning_rate": 9.794098674340965e-05, | |
| "loss": 0.958, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0801781737193764, | |
| "grad_norm": 2.741337776184082, | |
| "learning_rate": 9.77293816123866e-05, | |
| "loss": 0.963, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.08195991091314031, | |
| "grad_norm": 2.693640947341919, | |
| "learning_rate": 9.750767999193656e-05, | |
| "loss": 0.9677, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08374164810690424, | |
| "grad_norm": 2.718897581100464, | |
| "learning_rate": 9.727592877996585e-05, | |
| "loss": 0.9551, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.08552338530066815, | |
| "grad_norm": 3.1531124114990234, | |
| "learning_rate": 9.70341770002326e-05, | |
| "loss": 0.9692, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08730512249443208, | |
| "grad_norm": 2.4551897048950195, | |
| "learning_rate": 9.678247579197657e-05, | |
| "loss": 0.9727, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.08908685968819599, | |
| "grad_norm": 2.886244058609009, | |
| "learning_rate": 9.652087839910124e-05, | |
| "loss": 0.9537, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09086859688195992, | |
| "grad_norm": 2.8074824810028076, | |
| "learning_rate": 9.62494401589108e-05, | |
| "loss": 0.9327, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.09265033407572383, | |
| "grad_norm": 2.750798463821411, | |
| "learning_rate": 9.596821849040447e-05, | |
| "loss": 0.9228, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09443207126948774, | |
| "grad_norm": 2.552215337753296, | |
| "learning_rate": 9.567727288213005e-05, | |
| "loss": 0.9423, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.09621380846325167, | |
| "grad_norm": 2.3609156608581543, | |
| "learning_rate": 9.537666487960019e-05, | |
| "loss": 0.9676, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09799554565701558, | |
| "grad_norm": 2.8906874656677246, | |
| "learning_rate": 9.506645807227312e-05, | |
| "loss": 0.955, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.09977728285077951, | |
| "grad_norm": 2.660022497177124, | |
| "learning_rate": 9.474671808010126e-05, | |
| "loss": 0.9695, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10155902004454342, | |
| "grad_norm": 3.261420726776123, | |
| "learning_rate": 9.441751253965021e-05, | |
| "loss": 0.9477, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.10334075723830735, | |
| "grad_norm": 3.65535044670105, | |
| "learning_rate": 9.407891108979117e-05, | |
| "loss": 0.9724, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10512249443207126, | |
| "grad_norm": 6.143333911895752, | |
| "learning_rate": 9.373098535696979e-05, | |
| "loss": 0.9477, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.10690423162583519, | |
| "grad_norm": 3.469689130783081, | |
| "learning_rate": 9.337380894005463e-05, | |
| "loss": 0.9286, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1086859688195991, | |
| "grad_norm": 2.4321353435516357, | |
| "learning_rate": 9.300745739476829e-05, | |
| "loss": 0.9681, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.11046770601336303, | |
| "grad_norm": 2.3954951763153076, | |
| "learning_rate": 9.263200821770461e-05, | |
| "loss": 0.9223, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11224944320712694, | |
| "grad_norm": 3.206364154815674, | |
| "learning_rate": 9.224754082993552e-05, | |
| "loss": 0.9111, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.11403118040089087, | |
| "grad_norm": 2.411461591720581, | |
| "learning_rate": 9.185413656021036e-05, | |
| "loss": 0.9254, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11581291759465479, | |
| "grad_norm": 3.2764694690704346, | |
| "learning_rate": 9.145187862775209e-05, | |
| "loss": 0.9388, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.11759465478841871, | |
| "grad_norm": 2.724217653274536, | |
| "learning_rate": 9.104085212465336e-05, | |
| "loss": 0.9493, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11937639198218263, | |
| "grad_norm": 2.4242122173309326, | |
| "learning_rate": 9.062114399787647e-05, | |
| "loss": 0.9439, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.12115812917594655, | |
| "grad_norm": 2.391575813293457, | |
| "learning_rate": 9.019284303086087e-05, | |
| "loss": 0.9253, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.12293986636971047, | |
| "grad_norm": 2.7728800773620605, | |
| "learning_rate": 8.97560398247424e-05, | |
| "loss": 0.946, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.12472160356347439, | |
| "grad_norm": 3.3350629806518555, | |
| "learning_rate": 8.931082677918771e-05, | |
| "loss": 0.9318, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12650334075723832, | |
| "grad_norm": 2.887850761413574, | |
| "learning_rate": 8.885729807284856e-05, | |
| "loss": 0.9407, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.12828507795100222, | |
| "grad_norm": 2.461491107940674, | |
| "learning_rate": 8.839554964343943e-05, | |
| "loss": 0.9748, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.13006681514476615, | |
| "grad_norm": 2.649059772491455, | |
| "learning_rate": 8.792567916744346e-05, | |
| "loss": 0.9569, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.13184855233853007, | |
| "grad_norm": 2.505889415740967, | |
| "learning_rate": 8.744778603945011e-05, | |
| "loss": 0.9235, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.133630289532294, | |
| "grad_norm": 3.084015369415283, | |
| "learning_rate": 8.69619713511298e-05, | |
| "loss": 0.9466, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1354120267260579, | |
| "grad_norm": 2.242276191711426, | |
| "learning_rate": 8.646833786984927e-05, | |
| "loss": 0.8958, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.13719376391982183, | |
| "grad_norm": 2.439112424850464, | |
| "learning_rate": 8.596699001693255e-05, | |
| "loss": 0.9211, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.13897550111358575, | |
| "grad_norm": 2.7526488304138184, | |
| "learning_rate": 8.545803384557219e-05, | |
| "loss": 0.9218, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.14075723830734965, | |
| "grad_norm": 2.521644353866577, | |
| "learning_rate": 8.4941577018395e-05, | |
| "loss": 0.9365, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.14253897550111358, | |
| "grad_norm": 2.8012807369232178, | |
| "learning_rate": 8.44177287846877e-05, | |
| "loss": 0.8991, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14253897550111358, | |
| "eval_loss": 0.9173732995986938, | |
| "eval_runtime": 250.7158, | |
| "eval_samples_per_second": 19.899, | |
| "eval_steps_per_second": 2.489, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1443207126948775, | |
| "grad_norm": 2.3261518478393555, | |
| "learning_rate": 8.388659995728661e-05, | |
| "loss": 0.8968, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.14610244988864143, | |
| "grad_norm": 2.2134907245635986, | |
| "learning_rate": 8.334830288913682e-05, | |
| "loss": 0.91, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.14788418708240533, | |
| "grad_norm": 3.5786261558532715, | |
| "learning_rate": 8.280295144952536e-05, | |
| "loss": 0.9175, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.14966592427616926, | |
| "grad_norm": 2.7428812980651855, | |
| "learning_rate": 8.225066099999392e-05, | |
| "loss": 0.9345, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1514476614699332, | |
| "grad_norm": 2.246025800704956, | |
| "learning_rate": 8.169154836993551e-05, | |
| "loss": 0.9067, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.15322939866369711, | |
| "grad_norm": 2.188469886779785, | |
| "learning_rate": 8.112573183188099e-05, | |
| "loss": 0.9537, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.155011135857461, | |
| "grad_norm": 2.545259475708008, | |
| "learning_rate": 8.055333107647999e-05, | |
| "loss": 0.9159, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.15679287305122494, | |
| "grad_norm": 2.421093463897705, | |
| "learning_rate": 7.99744671871822e-05, | |
| "loss": 0.9034, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.15857461024498887, | |
| "grad_norm": 2.5586888790130615, | |
| "learning_rate": 7.938926261462366e-05, | |
| "loss": 0.9072, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.1603563474387528, | |
| "grad_norm": 2.444941759109497, | |
| "learning_rate": 7.879784115072417e-05, | |
| "loss": 0.9101, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1621380846325167, | |
| "grad_norm": 2.3764047622680664, | |
| "learning_rate": 7.820032790250074e-05, | |
| "loss": 0.9065, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.16391982182628062, | |
| "grad_norm": 2.34041428565979, | |
| "learning_rate": 7.75968492656029e-05, | |
| "loss": 0.8791, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.16570155902004455, | |
| "grad_norm": 2.013155698776245, | |
| "learning_rate": 7.698753289757565e-05, | |
| "loss": 0.9058, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.16748329621380847, | |
| "grad_norm": 2.3692591190338135, | |
| "learning_rate": 7.6372507690855e-05, | |
| "loss": 0.8898, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.16926503340757237, | |
| "grad_norm": 2.4539620876312256, | |
| "learning_rate": 7.575190374550272e-05, | |
| "loss": 0.9201, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.1710467706013363, | |
| "grad_norm": 2.6015443801879883, | |
| "learning_rate": 7.51258523416855e-05, | |
| "loss": 0.8823, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.17282850779510023, | |
| "grad_norm": 2.413839101791382, | |
| "learning_rate": 7.449448591190435e-05, | |
| "loss": 0.9196, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.17461024498886416, | |
| "grad_norm": 2.1962289810180664, | |
| "learning_rate": 7.385793801298042e-05, | |
| "loss": 0.8869, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.17639198218262805, | |
| "grad_norm": 2.994487762451172, | |
| "learning_rate": 7.321634329780286e-05, | |
| "loss": 0.9103, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.17817371937639198, | |
| "grad_norm": 2.9973297119140625, | |
| "learning_rate": 7.256983748684485e-05, | |
| "loss": 0.9083, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1799554565701559, | |
| "grad_norm": 2.6006710529327393, | |
| "learning_rate": 7.191855733945387e-05, | |
| "loss": 0.9131, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.18173719376391984, | |
| "grad_norm": 2.4508118629455566, | |
| "learning_rate": 7.126264062492217e-05, | |
| "loss": 0.8762, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.18351893095768373, | |
| "grad_norm": 2.8403897285461426, | |
| "learning_rate": 7.060222609334343e-05, | |
| "loss": 0.8673, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.18530066815144766, | |
| "grad_norm": 2.5483813285827637, | |
| "learning_rate": 6.993745344626231e-05, | |
| "loss": 0.8812, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1870824053452116, | |
| "grad_norm": 1.927654504776001, | |
| "learning_rate": 6.926846330712242e-05, | |
| "loss": 0.9213, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.1888641425389755, | |
| "grad_norm": 2.8513023853302, | |
| "learning_rate": 6.859539719151933e-05, | |
| "loss": 0.8911, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.19064587973273942, | |
| "grad_norm": 2.6732981204986572, | |
| "learning_rate": 6.7918397477265e-05, | |
| "loss": 0.9018, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.19242761692650334, | |
| "grad_norm": 2.3755311965942383, | |
| "learning_rate": 6.723760737426971e-05, | |
| "loss": 0.8803, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.19420935412026727, | |
| "grad_norm": 2.5072877407073975, | |
| "learning_rate": 6.65531708942479e-05, | |
| "loss": 0.9066, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.19599109131403117, | |
| "grad_norm": 2.3347630500793457, | |
| "learning_rate": 6.586523282025462e-05, | |
| "loss": 0.8999, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1977728285077951, | |
| "grad_norm": 2.4541633129119873, | |
| "learning_rate": 6.517393867605855e-05, | |
| "loss": 0.9024, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.19955456570155902, | |
| "grad_norm": 2.89241361618042, | |
| "learning_rate": 6.447943469535856e-05, | |
| "loss": 0.8802, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.20133630289532295, | |
| "grad_norm": 2.635859251022339, | |
| "learning_rate": 6.378186779084995e-05, | |
| "loss": 0.91, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.20311804008908685, | |
| "grad_norm": 2.5360910892486572, | |
| "learning_rate": 6.308138552314718e-05, | |
| "loss": 0.883, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.20489977728285078, | |
| "grad_norm": 2.0861408710479736, | |
| "learning_rate": 6.23781360695693e-05, | |
| "loss": 0.9051, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.2066815144766147, | |
| "grad_norm": 1.938452959060669, | |
| "learning_rate": 6.167226819279528e-05, | |
| "loss": 0.8763, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.20846325167037863, | |
| "grad_norm": 2.333118200302124, | |
| "learning_rate": 6.096393120939516e-05, | |
| "loss": 0.8939, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.21024498886414253, | |
| "grad_norm": 2.2652223110198975, | |
| "learning_rate": 6.0253274958244386e-05, | |
| "loss": 0.8992, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.21202672605790646, | |
| "grad_norm": 1.830731749534607, | |
| "learning_rate": 5.9540449768827246e-05, | |
| "loss": 0.8617, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.21380846325167038, | |
| "grad_norm": 2.4237635135650635, | |
| "learning_rate": 5.882560642943696e-05, | |
| "loss": 0.9189, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21380846325167038, | |
| "eval_loss": 0.8756723999977112, | |
| "eval_runtime": 256.5875, | |
| "eval_samples_per_second": 19.444, | |
| "eval_steps_per_second": 2.432, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2155902004454343, | |
| "grad_norm": 2.6089930534362793, | |
| "learning_rate": 5.810889615527838e-05, | |
| "loss": 0.9052, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.2173719376391982, | |
| "grad_norm": 2.457108974456787, | |
| "learning_rate": 5.7390470556480545e-05, | |
| "loss": 0.8959, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.21915367483296214, | |
| "grad_norm": 2.3315470218658447, | |
| "learning_rate": 5.667048160602564e-05, | |
| "loss": 0.8772, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.22093541202672606, | |
| "grad_norm": 2.0484960079193115, | |
| "learning_rate": 5.5949081607601274e-05, | |
| "loss": 0.8387, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.22271714922049, | |
| "grad_norm": 2.341867208480835, | |
| "learning_rate": 5.522642316338268e-05, | |
| "loss": 0.8778, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2244988864142539, | |
| "grad_norm": 2.4177300930023193, | |
| "learning_rate": 5.450265914175187e-05, | |
| "loss": 0.8936, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.22628062360801782, | |
| "grad_norm": 2.4489850997924805, | |
| "learning_rate": 5.377794264496041e-05, | |
| "loss": 0.8654, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.22806236080178174, | |
| "grad_norm": 2.468477964401245, | |
| "learning_rate": 5.3052426976742855e-05, | |
| "loss": 0.8467, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.22984409799554567, | |
| "grad_norm": 2.1568973064422607, | |
| "learning_rate": 5.232626560988735e-05, | |
| "loss": 0.8337, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.23162583518930957, | |
| "grad_norm": 2.248286485671997, | |
| "learning_rate": 5.159961215377065e-05, | |
| "loss": 0.8626, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2334075723830735, | |
| "grad_norm": 2.197516918182373, | |
| "learning_rate": 5.0872620321864185e-05, | |
| "loss": 0.8857, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.23518930957683742, | |
| "grad_norm": 2.0258774757385254, | |
| "learning_rate": 5.0145443899218105e-05, | |
| "loss": 0.8693, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.23697104677060132, | |
| "grad_norm": 2.576545000076294, | |
| "learning_rate": 4.941823670993016e-05, | |
| "loss": 0.8585, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.23875278396436525, | |
| "grad_norm": 2.1643807888031006, | |
| "learning_rate": 4.869115258460635e-05, | |
| "loss": 0.8844, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.24053452115812918, | |
| "grad_norm": 1.8109593391418457, | |
| "learning_rate": 4.7964345327820217e-05, | |
| "loss": 0.8526, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.2423162583518931, | |
| "grad_norm": 2.2996315956115723, | |
| "learning_rate": 4.723796868557758e-05, | |
| "loss": 0.8588, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.244097995545657, | |
| "grad_norm": 2.109656810760498, | |
| "learning_rate": 4.6512176312793736e-05, | |
| "loss": 0.8657, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.24587973273942093, | |
| "grad_norm": 2.0365986824035645, | |
| "learning_rate": 4.578712174078986e-05, | |
| "loss": 0.8722, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.24766146993318486, | |
| "grad_norm": 2.396369695663452, | |
| "learning_rate": 4.506295834481561e-05, | |
| "loss": 0.8595, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.24944320712694878, | |
| "grad_norm": 1.9721331596374512, | |
| "learning_rate": 4.433983931160467e-05, | |
| "loss": 0.845, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2512249443207127, | |
| "grad_norm": 2.6028833389282227, | |
| "learning_rate": 4.361791760697027e-05, | |
| "loss": 0.8756, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.25300668151447664, | |
| "grad_norm": 2.5747413635253906, | |
| "learning_rate": 4.289734594344738e-05, | |
| "loss": 0.8553, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.25478841870824054, | |
| "grad_norm": 2.2102746963500977, | |
| "learning_rate": 4.2178276747988446e-05, | |
| "loss": 0.8301, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.25657015590200444, | |
| "grad_norm": 2.2053496837615967, | |
| "learning_rate": 4.146086212971967e-05, | |
| "loss": 0.8347, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2583518930957684, | |
| "grad_norm": 2.1658267974853516, | |
| "learning_rate": 4.074525384776428e-05, | |
| "loss": 0.8583, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.2601336302895323, | |
| "grad_norm": 2.4658656120300293, | |
| "learning_rate": 4.003160327914015e-05, | |
| "loss": 0.8448, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2619153674832962, | |
| "grad_norm": 2.5138092041015625, | |
| "learning_rate": 3.932006138673801e-05, | |
| "loss": 0.7994, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.26369710467706015, | |
| "grad_norm": 2.3678791522979736, | |
| "learning_rate": 3.861077868738733e-05, | |
| "loss": 0.8543, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.26547884187082404, | |
| "grad_norm": 2.174612283706665, | |
| "learning_rate": 3.790390522001662e-05, | |
| "loss": 0.8255, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.267260579064588, | |
| "grad_norm": 2.633901596069336, | |
| "learning_rate": 3.719959051391472e-05, | |
| "loss": 0.8574, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2690423162583519, | |
| "grad_norm": 2.3723981380462646, | |
| "learning_rate": 3.649798355709997e-05, | |
| "loss": 0.8313, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.2708240534521158, | |
| "grad_norm": 2.452537775039673, | |
| "learning_rate": 3.579923276480387e-05, | |
| "loss": 0.8332, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.27260579064587975, | |
| "grad_norm": 2.7250778675079346, | |
| "learning_rate": 3.51034859480759e-05, | |
| "loss": 0.8345, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.27438752783964365, | |
| "grad_norm": 2.827697992324829, | |
| "learning_rate": 3.44108902825161e-05, | |
| "loss": 0.8547, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.27616926503340755, | |
| "grad_norm": 2.2842516899108887, | |
| "learning_rate": 3.372159227714218e-05, | |
| "loss": 0.8245, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2779510022271715, | |
| "grad_norm": 2.4392411708831787, | |
| "learning_rate": 3.303573774339745e-05, | |
| "loss": 0.827, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2797327394209354, | |
| "grad_norm": 2.548760175704956, | |
| "learning_rate": 3.235347176430656e-05, | |
| "loss": 0.8085, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.2815144766146993, | |
| "grad_norm": 2.289919376373291, | |
| "learning_rate": 3.167493866378514e-05, | |
| "loss": 0.8725, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.28329621380846326, | |
| "grad_norm": 2.1732709407806396, | |
| "learning_rate": 3.100028197611006e-05, | |
| "loss": 0.8184, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.28507795100222716, | |
| "grad_norm": 2.4083878993988037, | |
| "learning_rate": 3.0329644415556758e-05, | |
| "loss": 0.8186, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.28507795100222716, | |
| "eval_loss": 0.8320774435997009, | |
| "eval_runtime": 261.5096, | |
| "eval_samples_per_second": 19.078, | |
| "eval_steps_per_second": 2.386, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2868596881959911, | |
| "grad_norm": 2.4117252826690674, | |
| "learning_rate": 2.9663167846209998e-05, | |
| "loss": 0.8061, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.288641425389755, | |
| "grad_norm": 2.716094493865967, | |
| "learning_rate": 2.9000993251954527e-05, | |
| "loss": 0.8372, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2904231625835189, | |
| "grad_norm": 1.987546443939209, | |
| "learning_rate": 2.8343260706651864e-05, | |
| "loss": 0.8539, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.29220489977728287, | |
| "grad_norm": 2.1564650535583496, | |
| "learning_rate": 2.7690109344509563e-05, | |
| "loss": 0.8398, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.29398663697104677, | |
| "grad_norm": 2.394848108291626, | |
| "learning_rate": 2.7041677330649407e-05, | |
| "loss": 0.8257, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.29576837416481067, | |
| "grad_norm": 2.211273670196533, | |
| "learning_rate": 2.639810183188045e-05, | |
| "loss": 0.8238, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2975501113585746, | |
| "grad_norm": 2.2479021549224854, | |
| "learning_rate": 2.575951898768315e-05, | |
| "loss": 0.8277, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.2993318485523385, | |
| "grad_norm": 2.60609769821167, | |
| "learning_rate": 2.5126063881411188e-05, | |
| "loss": 0.8371, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3011135857461025, | |
| "grad_norm": 2.4049665927886963, | |
| "learning_rate": 2.4497870511716235e-05, | |
| "loss": 0.8077, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3028953229398664, | |
| "grad_norm": 2.140543222427368, | |
| "learning_rate": 2.3875071764202563e-05, | |
| "loss": 0.8288, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3046770601336303, | |
| "grad_norm": 2.6508686542510986, | |
| "learning_rate": 2.3257799383316798e-05, | |
| "loss": 0.848, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.30645879732739423, | |
| "grad_norm": 2.6622097492218018, | |
| "learning_rate": 2.264618394447927e-05, | |
| "loss": 0.8133, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3082405345211581, | |
| "grad_norm": 2.2243332862854004, | |
| "learning_rate": 2.2040354826462668e-05, | |
| "loss": 0.8227, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.310022271714922, | |
| "grad_norm": 2.4186229705810547, | |
| "learning_rate": 2.1440440184023564e-05, | |
| "loss": 0.7982, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.311804008908686, | |
| "grad_norm": 2.1508822441101074, | |
| "learning_rate": 2.0846566920793266e-05, | |
| "loss": 0.8421, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.3135857461024499, | |
| "grad_norm": 2.5740039348602295, | |
| "learning_rate": 2.0258860662432942e-05, | |
| "loss": 0.8337, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.31536748329621384, | |
| "grad_norm": 2.060276985168457, | |
| "learning_rate": 1.967744573005934e-05, | |
| "loss": 0.8319, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.31714922048997773, | |
| "grad_norm": 2.0549917221069336, | |
| "learning_rate": 1.9102445113946343e-05, | |
| "loss": 0.7851, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.31893095768374163, | |
| "grad_norm": 2.7247533798217773, | |
| "learning_rate": 1.8533980447508137e-05, | |
| "loss": 0.8113, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.3207126948775056, | |
| "grad_norm": 2.852099657058716, | |
| "learning_rate": 1.797217198156924e-05, | |
| "loss": 0.8502, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3224944320712695, | |
| "grad_norm": 2.2780370712280273, | |
| "learning_rate": 1.7417138558927244e-05, | |
| "loss": 0.8175, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.3242761692650334, | |
| "grad_norm": 2.220999240875244, | |
| "learning_rate": 1.6868997589213136e-05, | |
| "loss": 0.8107, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.32605790645879734, | |
| "grad_norm": 2.26967191696167, | |
| "learning_rate": 1.6327865024054984e-05, | |
| "loss": 0.815, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.32783964365256124, | |
| "grad_norm": 3.1814401149749756, | |
| "learning_rate": 1.5793855332550005e-05, | |
| "loss": 0.8274, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.32962138084632514, | |
| "grad_norm": 2.5263116359710693, | |
| "learning_rate": 1.526708147705013e-05, | |
| "loss": 0.8126, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.3314031180400891, | |
| "grad_norm": 2.7154064178466797, | |
| "learning_rate": 1.4747654889266476e-05, | |
| "loss": 0.8147, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.333184855233853, | |
| "grad_norm": 2.2681655883789062, | |
| "learning_rate": 1.4235685446697433e-05, | |
| "loss": 0.8247, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.33496659242761695, | |
| "grad_norm": 1.97934889793396, | |
| "learning_rate": 1.373128144938563e-05, | |
| "loss": 0.7941, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.33674832962138085, | |
| "grad_norm": 2.35060977935791, | |
| "learning_rate": 1.3234549597008571e-05, | |
| "loss": 0.8306, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.33853006681514475, | |
| "grad_norm": 2.231822967529297, | |
| "learning_rate": 1.2745594966307823e-05, | |
| "loss": 0.8044, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3403118040089087, | |
| "grad_norm": 2.1318812370300293, | |
| "learning_rate": 1.22645209888614e-05, | |
| "loss": 0.7989, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.3420935412026726, | |
| "grad_norm": 2.565772294998169, | |
| "learning_rate": 1.1791429429204342e-05, | |
| "loss": 0.7852, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3438752783964365, | |
| "grad_norm": 2.2323334217071533, | |
| "learning_rate": 1.132642036330181e-05, | |
| "loss": 0.798, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.34565701559020046, | |
| "grad_norm": 2.159836769104004, | |
| "learning_rate": 1.0869592157379304e-05, | |
| "loss": 0.7913, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.34743875278396436, | |
| "grad_norm": 2.292523145675659, | |
| "learning_rate": 1.0421041447114838e-05, | |
| "loss": 0.8303, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.3492204899777283, | |
| "grad_norm": 2.540412187576294, | |
| "learning_rate": 9.980863117196815e-06, | |
| "loss": 0.8174, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3510022271714922, | |
| "grad_norm": 2.6382853984832764, | |
| "learning_rate": 9.549150281252633e-06, | |
| "loss": 0.7803, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.3527839643652561, | |
| "grad_norm": 2.3101236820220947, | |
| "learning_rate": 9.125994262151682e-06, | |
| "loss": 0.8372, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.35456570155902006, | |
| "grad_norm": 2.285560131072998, | |
| "learning_rate": 8.711484572687296e-06, | |
| "loss": 0.7965, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.35634743875278396, | |
| "grad_norm": 1.8707315921783447, | |
| "learning_rate": 8.305708896641594e-06, | |
| "loss": 0.8255, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.35634743875278396, | |
| "eval_loss": 0.7969963550567627, | |
| "eval_runtime": 258.8113, | |
| "eval_samples_per_second": 19.277, | |
| "eval_steps_per_second": 2.411, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1495650375386112.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |