| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8112582781456954, | |
| "eval_steps": 100, | |
| "global_step": 4900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008278145695364239, | |
| "grad_norm": 2.812814950942993, | |
| "learning_rate": 2.7041942604856512e-06, | |
| "loss": 1.4882, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.016556291390728478, | |
| "grad_norm": 2.6125919818878174, | |
| "learning_rate": 5.463576158940398e-06, | |
| "loss": 1.2862, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016556291390728478, | |
| "eval_loss": 1.2020893096923828, | |
| "eval_runtime": 1896.4557, | |
| "eval_samples_per_second": 3.185, | |
| "eval_steps_per_second": 3.185, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.024834437086092714, | |
| "grad_norm": 2.393519878387451, | |
| "learning_rate": 8.222958057395145e-06, | |
| "loss": 1.1353, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.033112582781456956, | |
| "grad_norm": 2.5604305267333984, | |
| "learning_rate": 1.0982339955849891e-05, | |
| "loss": 1.0539, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.033112582781456956, | |
| "eval_loss": 1.0201358795166016, | |
| "eval_runtime": 1896.3026, | |
| "eval_samples_per_second": 3.185, | |
| "eval_steps_per_second": 3.185, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.041390728476821195, | |
| "grad_norm": 3.0053958892822266, | |
| "learning_rate": 1.3741721854304637e-05, | |
| "loss": 0.9816, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.04966887417218543, | |
| "grad_norm": 3.028010368347168, | |
| "learning_rate": 1.6501103752759385e-05, | |
| "loss": 0.9372, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.057947019867549666, | |
| "grad_norm": 2.484466552734375, | |
| "learning_rate": 1.926048565121413e-05, | |
| "loss": 0.9137, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06622516556291391, | |
| "grad_norm": 3.078425884246826, | |
| "learning_rate": 2.2019867549668874e-05, | |
| "loss": 0.8786, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.07450331125827815, | |
| "grad_norm": 2.6672778129577637, | |
| "learning_rate": 2.477924944812362e-05, | |
| "loss": 0.8348, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08278145695364239, | |
| "grad_norm": 2.8199753761291504, | |
| "learning_rate": 2.753863134657837e-05, | |
| "loss": 0.8417, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09105960264900662, | |
| "grad_norm": 2.961965322494507, | |
| "learning_rate": 3.0298013245033112e-05, | |
| "loss": 0.8209, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.09933774834437085, | |
| "grad_norm": 2.5217514038085938, | |
| "learning_rate": 3.305739514348786e-05, | |
| "loss": 0.8235, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1076158940397351, | |
| "grad_norm": 2.4740138053894043, | |
| "learning_rate": 3.581677704194261e-05, | |
| "loss": 0.7918, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.11589403973509933, | |
| "grad_norm": 2.5529448986053467, | |
| "learning_rate": 3.8576158940397354e-05, | |
| "loss": 0.7749, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.12417218543046357, | |
| "grad_norm": 2.151698589324951, | |
| "learning_rate": 4.13355408388521e-05, | |
| "loss": 0.7838, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 1.9026315212249756, | |
| "learning_rate": 4.4094922737306846e-05, | |
| "loss": 0.7668, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.14072847682119205, | |
| "grad_norm": 1.9555529356002808, | |
| "learning_rate": 4.685430463576159e-05, | |
| "loss": 0.7684, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.1490066225165563, | |
| "grad_norm": 2.308894157409668, | |
| "learning_rate": 4.961368653421634e-05, | |
| "loss": 0.7661, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.15728476821192053, | |
| "grad_norm": 2.3362715244293213, | |
| "learning_rate": 5.237306843267108e-05, | |
| "loss": 0.736, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.16556291390728478, | |
| "grad_norm": 1.8228410482406616, | |
| "learning_rate": 5.513245033112583e-05, | |
| "loss": 0.7213, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.173841059602649, | |
| "grad_norm": 1.8289754390716553, | |
| "learning_rate": 5.789183222958058e-05, | |
| "loss": 0.7335, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.18211920529801323, | |
| "grad_norm": 1.4989681243896484, | |
| "learning_rate": 6.065121412803533e-05, | |
| "loss": 0.7326, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.19039735099337748, | |
| "grad_norm": 1.5326098203659058, | |
| "learning_rate": 6.341059602649006e-05, | |
| "loss": 0.7311, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1986754966887417, | |
| "grad_norm": 1.4897147417068481, | |
| "learning_rate": 6.616997792494481e-05, | |
| "loss": 0.6918, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.20695364238410596, | |
| "grad_norm": 1.634765863418579, | |
| "learning_rate": 6.892935982339957e-05, | |
| "loss": 0.7051, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2152317880794702, | |
| "grad_norm": 1.4463587999343872, | |
| "learning_rate": 7.168874172185431e-05, | |
| "loss": 0.6955, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.22350993377483444, | |
| "grad_norm": 1.632133960723877, | |
| "learning_rate": 7.444812362030905e-05, | |
| "loss": 0.6901, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.23178807947019867, | |
| "grad_norm": 1.4062328338623047, | |
| "learning_rate": 7.72075055187638e-05, | |
| "loss": 0.6833, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.24006622516556292, | |
| "grad_norm": 1.2914466857910156, | |
| "learning_rate": 7.996688741721855e-05, | |
| "loss": 0.6663, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.24834437086092714, | |
| "grad_norm": 1.4995919466018677, | |
| "learning_rate": 8.272626931567329e-05, | |
| "loss": 0.6959, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.25662251655629137, | |
| "grad_norm": 1.1299749612808228, | |
| "learning_rate": 8.548565121412803e-05, | |
| "loss": 0.6685, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 1.329004168510437, | |
| "learning_rate": 8.824503311258279e-05, | |
| "loss": 0.6678, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.2731788079470199, | |
| "grad_norm": 1.5191948413848877, | |
| "learning_rate": 9.100441501103754e-05, | |
| "loss": 0.6731, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.2814569536423841, | |
| "grad_norm": 1.739169716835022, | |
| "learning_rate": 9.376379690949227e-05, | |
| "loss": 0.6691, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2897350993377483, | |
| "grad_norm": 1.2906118631362915, | |
| "learning_rate": 9.652317880794703e-05, | |
| "loss": 0.6718, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.2980132450331126, | |
| "grad_norm": 1.289502501487732, | |
| "learning_rate": 9.928256070640178e-05, | |
| "loss": 0.6581, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.30629139072847683, | |
| "grad_norm": 1.3923128843307495, | |
| "learning_rate": 9.999872989402833e-05, | |
| "loss": 0.6589, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.31456953642384106, | |
| "grad_norm": 1.1048816442489624, | |
| "learning_rate": 9.999297790520483e-05, | |
| "loss": 0.6341, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3228476821192053, | |
| "grad_norm": 1.3568603992462158, | |
| "learning_rate": 9.998258777484084e-05, | |
| "loss": 0.6318, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.33112582781456956, | |
| "grad_norm": 0.923786997795105, | |
| "learning_rate": 9.996756046688961e-05, | |
| "loss": 0.6318, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3394039735099338, | |
| "grad_norm": 1.102367877960205, | |
| "learning_rate": 9.994789737552259e-05, | |
| "loss": 0.6193, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.347682119205298, | |
| "grad_norm": 1.0738896131515503, | |
| "learning_rate": 9.992360032500001e-05, | |
| "loss": 0.6184, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.35596026490066224, | |
| "grad_norm": 1.279288649559021, | |
| "learning_rate": 9.98946715695016e-05, | |
| "loss": 0.626, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.36423841059602646, | |
| "grad_norm": 1.2009036540985107, | |
| "learning_rate": 9.986111379291759e-05, | |
| "loss": 0.6305, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.37251655629139074, | |
| "grad_norm": 0.8177038431167603, | |
| "learning_rate": 9.982293010859955e-05, | |
| "loss": 0.6266, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.38079470198675497, | |
| "grad_norm": 1.2464983463287354, | |
| "learning_rate": 9.978012405907165e-05, | |
| "loss": 0.6148, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.3890728476821192, | |
| "grad_norm": 1.2841860055923462, | |
| "learning_rate": 9.973269961570195e-05, | |
| "loss": 0.5946, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 1.2200431823730469, | |
| "learning_rate": 9.968066117833401e-05, | |
| "loss": 0.6166, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4056291390728477, | |
| "grad_norm": 1.128247857093811, | |
| "learning_rate": 9.962401357487863e-05, | |
| "loss": 0.5992, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.4139072847682119, | |
| "grad_norm": 1.0683091878890991, | |
| "learning_rate": 9.956276206086597e-05, | |
| "loss": 0.6048, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.42218543046357615, | |
| "grad_norm": 1.1819758415222168, | |
| "learning_rate": 9.949691231895791e-05, | |
| "loss": 0.5944, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.4304635761589404, | |
| "grad_norm": 1.0043411254882812, | |
| "learning_rate": 9.942647045842095e-05, | |
| "loss": 0.5962, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.43874172185430466, | |
| "grad_norm": 1.0588668584823608, | |
| "learning_rate": 9.93514430145593e-05, | |
| "loss": 0.6067, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.4470198675496689, | |
| "grad_norm": 0.9364084601402283, | |
| "learning_rate": 9.927183694810862e-05, | |
| "loss": 0.5928, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.4552980132450331, | |
| "grad_norm": 1.155172348022461, | |
| "learning_rate": 9.918765964459022e-05, | |
| "loss": 0.5987, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.46357615894039733, | |
| "grad_norm": 1.1639224290847778, | |
| "learning_rate": 9.909891891362587e-05, | |
| "loss": 0.5745, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4718543046357616, | |
| "grad_norm": 0.9658174514770508, | |
| "learning_rate": 9.900562298821323e-05, | |
| "loss": 0.5825, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.48013245033112584, | |
| "grad_norm": 1.118033766746521, | |
| "learning_rate": 9.890778052396205e-05, | |
| "loss": 0.5806, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.48841059602649006, | |
| "grad_norm": 0.9781912565231323, | |
| "learning_rate": 9.880540059829115e-05, | |
| "loss": 0.5712, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.4966887417218543, | |
| "grad_norm": 1.2145684957504272, | |
| "learning_rate": 9.869849270958622e-05, | |
| "loss": 0.5855, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5049668874172185, | |
| "grad_norm": 0.999279260635376, | |
| "learning_rate": 9.858706677631862e-05, | |
| "loss": 0.5843, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5132450331125827, | |
| "grad_norm": 1.098258137702942, | |
| "learning_rate": 9.847113313612517e-05, | |
| "loss": 0.5605, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.5215231788079471, | |
| "grad_norm": 0.627949059009552, | |
| "learning_rate": 9.835070254484912e-05, | |
| "loss": 0.5538, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 1.0991902351379395, | |
| "learning_rate": 9.822578617554219e-05, | |
| "loss": 0.5555, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5380794701986755, | |
| "grad_norm": 0.9670843482017517, | |
| "learning_rate": 9.8096395617428e-05, | |
| "loss": 0.5647, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5463576158940397, | |
| "grad_norm": 0.9838133454322815, | |
| "learning_rate": 9.796254287482693e-05, | |
| "loss": 0.5561, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.554635761589404, | |
| "grad_norm": 1.1465744972229004, | |
| "learning_rate": 9.782424036604234e-05, | |
| "loss": 0.559, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5629139072847682, | |
| "grad_norm": 1.1423758268356323, | |
| "learning_rate": 9.768150092220849e-05, | |
| "loss": 0.5517, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.5711920529801324, | |
| "grad_norm": 1.1365066766738892, | |
| "learning_rate": 9.753433778610008e-05, | |
| "loss": 0.5464, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5794701986754967, | |
| "grad_norm": 0.81045001745224, | |
| "learning_rate": 9.738276461090371e-05, | |
| "loss": 0.5493, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5877483443708609, | |
| "grad_norm": 1.0236687660217285, | |
| "learning_rate": 9.72267954589511e-05, | |
| "loss": 0.567, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5960264900662252, | |
| "grad_norm": 0.9495602250099182, | |
| "learning_rate": 9.706644480041455e-05, | |
| "loss": 0.5474, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6043046357615894, | |
| "grad_norm": 0.960738480091095, | |
| "learning_rate": 9.690172751196437e-05, | |
| "loss": 0.5238, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.6125827814569537, | |
| "grad_norm": 1.0488675832748413, | |
| "learning_rate": 9.67326588753887e-05, | |
| "loss": 0.521, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.6208609271523179, | |
| "grad_norm": 0.8753538727760315, | |
| "learning_rate": 9.65592545761758e-05, | |
| "loss": 0.5232, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.6291390728476821, | |
| "grad_norm": 1.0551217794418335, | |
| "learning_rate": 9.638153070205871e-05, | |
| "loss": 0.5432, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6374172185430463, | |
| "grad_norm": 1.158676028251648, | |
| "learning_rate": 9.619950374152278e-05, | |
| "loss": 0.5416, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.6456953642384106, | |
| "grad_norm": 1.0036752223968506, | |
| "learning_rate": 9.601319058227589e-05, | |
| "loss": 0.5496, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6539735099337748, | |
| "grad_norm": 0.8905594348907471, | |
| "learning_rate": 9.58226085096817e-05, | |
| "loss": 0.5335, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 0.9868190884590149, | |
| "learning_rate": 9.562777520515598e-05, | |
| "loss": 0.5094, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6705298013245033, | |
| "grad_norm": 0.9672690629959106, | |
| "learning_rate": 9.542870874452618e-05, | |
| "loss": 0.5061, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.6788079470198676, | |
| "grad_norm": 1.044123888015747, | |
| "learning_rate": 9.52254275963545e-05, | |
| "loss": 0.5253, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.6870860927152318, | |
| "grad_norm": 1.0346958637237549, | |
| "learning_rate": 9.501795062022434e-05, | |
| "loss": 0.5149, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.695364238410596, | |
| "grad_norm": 1.0799248218536377, | |
| "learning_rate": 9.48062970649907e-05, | |
| "loss": 0.5207, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.7036423841059603, | |
| "grad_norm": 0.9847925901412964, | |
| "learning_rate": 9.459048656699427e-05, | |
| "loss": 0.531, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.7119205298013245, | |
| "grad_norm": 1.134179949760437, | |
| "learning_rate": 9.43705391482397e-05, | |
| "loss": 0.5202, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.7201986754966887, | |
| "grad_norm": 0.9750307202339172, | |
| "learning_rate": 9.414647521453798e-05, | |
| "loss": 0.5183, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.7284768211920529, | |
| "grad_norm": 1.372010350227356, | |
| "learning_rate": 9.391831555361341e-05, | |
| "loss": 0.5203, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.7367549668874173, | |
| "grad_norm": 0.9671643376350403, | |
| "learning_rate": 9.36860813331748e-05, | |
| "loss": 0.5313, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.7450331125827815, | |
| "grad_norm": 1.270264983177185, | |
| "learning_rate": 9.344979409895178e-05, | |
| "loss": 0.5236, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7533112582781457, | |
| "grad_norm": 1.1816293001174927, | |
| "learning_rate": 9.320947577269581e-05, | |
| "loss": 0.518, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7615894039735099, | |
| "grad_norm": 0.8809527158737183, | |
| "learning_rate": 9.29651486501464e-05, | |
| "loss": 0.5086, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7698675496688742, | |
| "grad_norm": 0.9570929408073425, | |
| "learning_rate": 9.271683539896257e-05, | |
| "loss": 0.5195, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7781456953642384, | |
| "grad_norm": 1.147157907485962, | |
| "learning_rate": 9.246455905661983e-05, | |
| "loss": 0.509, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.7864238410596026, | |
| "grad_norm": 0.9548070430755615, | |
| "learning_rate": 9.220834302827295e-05, | |
| "loss": 0.5078, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.7947019867549668, | |
| "grad_norm": 1.0823866128921509, | |
| "learning_rate": 9.194821108458438e-05, | |
| "loss": 0.5088, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.8029801324503312, | |
| "grad_norm": 1.0077965259552002, | |
| "learning_rate": 9.168418735951902e-05, | |
| "loss": 0.4994, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.8112582781456954, | |
| "grad_norm": 1.1390560865402222, | |
| "learning_rate": 9.141629634810516e-05, | |
| "loss": 0.5098, | |
| "step": 4900 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 18120, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.842256577599898e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |