| { | |
| "best_global_step": 4954, | |
| "best_metric": 1.776762843132019, | |
| "best_model_checkpoint": "./mcqa_qwen3_letter_best/checkpoint-4954", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4954, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010092854259184497, | |
| "grad_norm": 48.55782699584961, | |
| "learning_rate": 8.879919273461152e-07, | |
| "loss": 2.3985, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.020185708518368994, | |
| "grad_norm": 25.696617126464844, | |
| "learning_rate": 1.8970736629667005e-06, | |
| "loss": 2.053, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.030278562777553492, | |
| "grad_norm": 27.860021591186523, | |
| "learning_rate": 2.906155398587286e-06, | |
| "loss": 1.9305, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04037141703673799, | |
| "grad_norm": 17.68500518798828, | |
| "learning_rate": 3.915237134207871e-06, | |
| "loss": 1.9294, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.050464271295922486, | |
| "grad_norm": 26.112218856811523, | |
| "learning_rate": 4.924318869828457e-06, | |
| "loss": 1.8834, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.060557125555106985, | |
| "grad_norm": 25.835376739501953, | |
| "learning_rate": 5.933400605449042e-06, | |
| "loss": 1.8517, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07064997981429148, | |
| "grad_norm": 22.44589614868164, | |
| "learning_rate": 6.942482341069627e-06, | |
| "loss": 1.8978, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08074283407347597, | |
| "grad_norm": 32.82951354980469, | |
| "learning_rate": 7.951564076690212e-06, | |
| "loss": 1.8867, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09083568833266048, | |
| "grad_norm": 35.665794372558594, | |
| "learning_rate": 8.960645812310798e-06, | |
| "loss": 1.9055, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.10092854259184497, | |
| "grad_norm": 22.500865936279297, | |
| "learning_rate": 9.969727547931384e-06, | |
| "loss": 1.8755, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11102139685102948, | |
| "grad_norm": 40.59410095214844, | |
| "learning_rate": 1.0978809283551967e-05, | |
| "loss": 1.8881, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.12111425111021397, | |
| "grad_norm": 28.769454956054688, | |
| "learning_rate": 1.1987891019172555e-05, | |
| "loss": 1.8713, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.13120710536939847, | |
| "grad_norm": 17.596820831298828, | |
| "learning_rate": 1.299697275479314e-05, | |
| "loss": 1.8694, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.14129995962858297, | |
| "grad_norm": 17.149999618530273, | |
| "learning_rate": 1.4006054490413725e-05, | |
| "loss": 1.8809, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.15139281388776746, | |
| "grad_norm": 19.181955337524414, | |
| "learning_rate": 1.5015136226034311e-05, | |
| "loss": 1.8697, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.16148566814695195, | |
| "grad_norm": 24.227073669433594, | |
| "learning_rate": 1.6024217961654894e-05, | |
| "loss": 1.9201, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.17157852240613647, | |
| "grad_norm": 18.42403221130371, | |
| "learning_rate": 1.703329969727548e-05, | |
| "loss": 1.8876, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.18167137666532096, | |
| "grad_norm": 21.015230178833008, | |
| "learning_rate": 1.8042381432896066e-05, | |
| "loss": 1.8697, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.19176423092450545, | |
| "grad_norm": 16.02488899230957, | |
| "learning_rate": 1.905146316851665e-05, | |
| "loss": 1.9102, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.20185708518368994, | |
| "grad_norm": 25.045923233032227, | |
| "learning_rate": 1.9993271279578333e-05, | |
| "loss": 1.9121, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.21194993944287444, | |
| "grad_norm": 17.414430618286133, | |
| "learning_rate": 1.9881125939217227e-05, | |
| "loss": 1.9449, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.22204279370205895, | |
| "grad_norm": 15.37423324584961, | |
| "learning_rate": 1.976898059885612e-05, | |
| "loss": 1.9139, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.23213564796124345, | |
| "grad_norm": 20.543489456176758, | |
| "learning_rate": 1.965683525849501e-05, | |
| "loss": 1.92, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.24222850222042794, | |
| "grad_norm": 12.01870346069336, | |
| "learning_rate": 1.9544689918133902e-05, | |
| "loss": 1.8962, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.25232135647961246, | |
| "grad_norm": 15.475773811340332, | |
| "learning_rate": 1.9432544577772796e-05, | |
| "loss": 1.9483, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.26241421073879695, | |
| "grad_norm": 11.753213882446289, | |
| "learning_rate": 1.9320399237411686e-05, | |
| "loss": 1.919, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.27250706499798144, | |
| "grad_norm": 14.90489673614502, | |
| "learning_rate": 1.920825389705058e-05, | |
| "loss": 1.8742, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.28259991925716593, | |
| "grad_norm": 12.925189971923828, | |
| "learning_rate": 1.909610855668947e-05, | |
| "loss": 1.8822, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2926927735163504, | |
| "grad_norm": 17.215579986572266, | |
| "learning_rate": 1.898396321632836e-05, | |
| "loss": 1.8796, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3027856277755349, | |
| "grad_norm": 16.483861923217773, | |
| "learning_rate": 1.8871817875967255e-05, | |
| "loss": 1.8442, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3128784820347194, | |
| "grad_norm": 18.10808753967285, | |
| "learning_rate": 1.875967253560615e-05, | |
| "loss": 1.9131, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.3229713362939039, | |
| "grad_norm": 14.261265754699707, | |
| "learning_rate": 1.864752719524504e-05, | |
| "loss": 1.7602, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3330641905530884, | |
| "grad_norm": 16.223392486572266, | |
| "learning_rate": 1.8535381854883933e-05, | |
| "loss": 1.8392, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.34315704481227294, | |
| "grad_norm": 14.012106895446777, | |
| "learning_rate": 1.8423236514522824e-05, | |
| "loss": 1.8335, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.35324989907145743, | |
| "grad_norm": 13.234374046325684, | |
| "learning_rate": 1.8311091174161714e-05, | |
| "loss": 1.8501, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.3633427533306419, | |
| "grad_norm": 11.787166595458984, | |
| "learning_rate": 1.8198945833800608e-05, | |
| "loss": 1.8704, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.3734356075898264, | |
| "grad_norm": 15.64974308013916, | |
| "learning_rate": 1.80868004934395e-05, | |
| "loss": 1.85, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.3835284618490109, | |
| "grad_norm": 13.893998146057129, | |
| "learning_rate": 1.7974655153078392e-05, | |
| "loss": 1.8807, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3936213161081954, | |
| "grad_norm": 15.42603588104248, | |
| "learning_rate": 1.7862509812717283e-05, | |
| "loss": 1.8124, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4037141703673799, | |
| "grad_norm": 12.293023109436035, | |
| "learning_rate": 1.7750364472356173e-05, | |
| "loss": 1.8112, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4138070246265644, | |
| "grad_norm": 17.576618194580078, | |
| "learning_rate": 1.7638219131995067e-05, | |
| "loss": 1.8468, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.42389987888574887, | |
| "grad_norm": 36.62916946411133, | |
| "learning_rate": 1.752607379163396e-05, | |
| "loss": 1.8563, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.43399273314493336, | |
| "grad_norm": 12.232354164123535, | |
| "learning_rate": 1.741392845127285e-05, | |
| "loss": 1.8643, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4440855874041179, | |
| "grad_norm": 9.772968292236328, | |
| "learning_rate": 1.7301783110911742e-05, | |
| "loss": 1.8686, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4541784416633024, | |
| "grad_norm": 13.78654956817627, | |
| "learning_rate": 1.7189637770550636e-05, | |
| "loss": 1.8477, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.4642712959224869, | |
| "grad_norm": 14.448091506958008, | |
| "learning_rate": 1.7077492430189526e-05, | |
| "loss": 1.828, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4743641501816714, | |
| "grad_norm": 10.872529983520508, | |
| "learning_rate": 1.696534708982842e-05, | |
| "loss": 1.7916, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.4844570044408559, | |
| "grad_norm": 14.716806411743164, | |
| "learning_rate": 1.685320174946731e-05, | |
| "loss": 1.7982, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.49454985870004037, | |
| "grad_norm": 15.155656814575195, | |
| "learning_rate": 1.67410564091062e-05, | |
| "loss": 1.8422, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.5046427129592249, | |
| "grad_norm": 11.369612693786621, | |
| "learning_rate": 1.6628911068745095e-05, | |
| "loss": 1.8217, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5147355672184094, | |
| "grad_norm": 15.491066932678223, | |
| "learning_rate": 1.651676572838399e-05, | |
| "loss": 1.8487, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5248284214775939, | |
| "grad_norm": 12.249984741210938, | |
| "learning_rate": 1.640462038802288e-05, | |
| "loss": 1.7951, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5349212757367784, | |
| "grad_norm": 14.075465202331543, | |
| "learning_rate": 1.629247504766177e-05, | |
| "loss": 1.8115, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5450141299959629, | |
| "grad_norm": 9.785154342651367, | |
| "learning_rate": 1.6180329707300664e-05, | |
| "loss": 1.8576, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5551069842551474, | |
| "grad_norm": 14.559487342834473, | |
| "learning_rate": 1.6068184366939554e-05, | |
| "loss": 1.8263, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5651998385143319, | |
| "grad_norm": 15.150165557861328, | |
| "learning_rate": 1.5956039026578448e-05, | |
| "loss": 1.8029, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5752926927735164, | |
| "grad_norm": 13.863632202148438, | |
| "learning_rate": 1.584389368621734e-05, | |
| "loss": 1.7863, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5853855470327008, | |
| "grad_norm": 9.358270645141602, | |
| "learning_rate": 1.573174834585623e-05, | |
| "loss": 1.806, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5954784012918853, | |
| "grad_norm": 12.770975112915039, | |
| "learning_rate": 1.5619603005495123e-05, | |
| "loss": 1.7417, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.6055712555510698, | |
| "grad_norm": 12.026569366455078, | |
| "learning_rate": 1.5507457665134017e-05, | |
| "loss": 1.7623, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6156641098102543, | |
| "grad_norm": 9.8405122756958, | |
| "learning_rate": 1.5395312324772907e-05, | |
| "loss": 1.7941, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.6257569640694388, | |
| "grad_norm": 13.649519920349121, | |
| "learning_rate": 1.5283166984411798e-05, | |
| "loss": 1.7499, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6358498183286233, | |
| "grad_norm": 13.303316116333008, | |
| "learning_rate": 1.5171021644050692e-05, | |
| "loss": 1.7821, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6459426725878078, | |
| "grad_norm": 14.893158912658691, | |
| "learning_rate": 1.5058876303689582e-05, | |
| "loss": 1.8423, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6560355268469923, | |
| "grad_norm": 14.434380531311035, | |
| "learning_rate": 1.4946730963328474e-05, | |
| "loss": 1.8138, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6661283811061768, | |
| "grad_norm": 9.59044075012207, | |
| "learning_rate": 1.4834585622967368e-05, | |
| "loss": 1.7734, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6762212353653613, | |
| "grad_norm": 12.524561882019043, | |
| "learning_rate": 1.4722440282606259e-05, | |
| "loss": 1.8246, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6863140896245459, | |
| "grad_norm": 13.521296501159668, | |
| "learning_rate": 1.4610294942245151e-05, | |
| "loss": 1.7847, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6964069438837304, | |
| "grad_norm": 10.999866485595703, | |
| "learning_rate": 1.4498149601884043e-05, | |
| "loss": 1.8027, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.7064997981429149, | |
| "grad_norm": 15.364250183105469, | |
| "learning_rate": 1.4386004261522934e-05, | |
| "loss": 1.7802, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7165926524020994, | |
| "grad_norm": 13.141353607177734, | |
| "learning_rate": 1.4273858921161828e-05, | |
| "loss": 1.7464, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.7266855066612838, | |
| "grad_norm": 9.018637657165527, | |
| "learning_rate": 1.4161713580800718e-05, | |
| "loss": 1.7553, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7367783609204683, | |
| "grad_norm": 11.081124305725098, | |
| "learning_rate": 1.404956824043961e-05, | |
| "loss": 1.7922, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.7468712151796528, | |
| "grad_norm": 10.0188627243042, | |
| "learning_rate": 1.3937422900078504e-05, | |
| "loss": 1.7769, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7569640694388373, | |
| "grad_norm": 10.286458015441895, | |
| "learning_rate": 1.3825277559717395e-05, | |
| "loss": 1.7696, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7670569236980218, | |
| "grad_norm": 11.746405601501465, | |
| "learning_rate": 1.3713132219356287e-05, | |
| "loss": 1.7188, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7771497779572063, | |
| "grad_norm": 11.215723991394043, | |
| "learning_rate": 1.3600986878995179e-05, | |
| "loss": 1.6803, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7872426322163908, | |
| "grad_norm": 8.982596397399902, | |
| "learning_rate": 1.348884153863407e-05, | |
| "loss": 1.7696, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7973354864755753, | |
| "grad_norm": 12.450457572937012, | |
| "learning_rate": 1.3376696198272963e-05, | |
| "loss": 1.8021, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.8074283407347598, | |
| "grad_norm": 10.87128734588623, | |
| "learning_rate": 1.3264550857911855e-05, | |
| "loss": 1.7492, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8175211949939443, | |
| "grad_norm": 11.78647518157959, | |
| "learning_rate": 1.3152405517550746e-05, | |
| "loss": 1.7883, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8276140492531288, | |
| "grad_norm": 12.425263404846191, | |
| "learning_rate": 1.3040260177189638e-05, | |
| "loss": 1.7546, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8377069035123133, | |
| "grad_norm": 11.663323402404785, | |
| "learning_rate": 1.2928114836828532e-05, | |
| "loss": 1.8018, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.8477997577714977, | |
| "grad_norm": 17.913087844848633, | |
| "learning_rate": 1.2815969496467423e-05, | |
| "loss": 1.7827, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8578926120306822, | |
| "grad_norm": 9.219327926635742, | |
| "learning_rate": 1.2703824156106315e-05, | |
| "loss": 1.7245, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8679854662898667, | |
| "grad_norm": 11.107460021972656, | |
| "learning_rate": 1.2591678815745207e-05, | |
| "loss": 1.7264, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8780783205490512, | |
| "grad_norm": 10.487607955932617, | |
| "learning_rate": 1.2479533475384097e-05, | |
| "loss": 1.753, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8881711748082358, | |
| "grad_norm": 13.2865571975708, | |
| "learning_rate": 1.2367388135022991e-05, | |
| "loss": 1.7317, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8982640290674203, | |
| "grad_norm": 10.927115440368652, | |
| "learning_rate": 1.2255242794661883e-05, | |
| "loss": 1.7651, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.9083568833266048, | |
| "grad_norm": 10.536073684692383, | |
| "learning_rate": 1.2143097454300774e-05, | |
| "loss": 1.7578, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9184497375857893, | |
| "grad_norm": 13.544109344482422, | |
| "learning_rate": 1.2030952113939666e-05, | |
| "loss": 1.7505, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.9285425918449738, | |
| "grad_norm": 9.343710899353027, | |
| "learning_rate": 1.1921049680385782e-05, | |
| "loss": 1.6865, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.9386354461041583, | |
| "grad_norm": 11.518623352050781, | |
| "learning_rate": 1.1808904340024674e-05, | |
| "loss": 1.7203, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9487283003633428, | |
| "grad_norm": 7.897172927856445, | |
| "learning_rate": 1.1696758999663564e-05, | |
| "loss": 1.7201, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9588211546225273, | |
| "grad_norm": 11.530837059020996, | |
| "learning_rate": 1.1584613659302457e-05, | |
| "loss": 1.8117, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9689140088817118, | |
| "grad_norm": 11.721019744873047, | |
| "learning_rate": 1.147246831894135e-05, | |
| "loss": 1.7663, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9790068631408962, | |
| "grad_norm": 11.470191955566406, | |
| "learning_rate": 1.1360322978580241e-05, | |
| "loss": 1.7655, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9890997174000807, | |
| "grad_norm": 12.892107009887695, | |
| "learning_rate": 1.1248177638219133e-05, | |
| "loss": 1.759, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9991925716592652, | |
| "grad_norm": 13.869138717651367, | |
| "learning_rate": 1.1136032297858025e-05, | |
| "loss": 1.7831, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.776762843132019, | |
| "eval_runtime": 226.5804, | |
| "eval_samples_per_second": 16.396, | |
| "eval_steps_per_second": 2.052, | |
| "step": 4954 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 9908, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.681334714807091e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |