| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9904153354632586, | |
| "eval_steps": 500, | |
| "global_step": 312, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009584664536741214, | |
| "grad_norm": 5.816953152720709, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.8809, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.019169329073482427, | |
| "grad_norm": 5.964463857967548, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.8476, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.02875399361022364, | |
| "grad_norm": 5.85622862207096, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.8749, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.038338658146964855, | |
| "grad_norm": 5.365349431954843, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8422, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.04792332268370607, | |
| "grad_norm": 4.006201698306361, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.8005, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05750798722044728, | |
| "grad_norm": 2.122270635740227, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.7687, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0670926517571885, | |
| "grad_norm": 3.975962985256971, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 0.7686, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.07667731629392971, | |
| "grad_norm": 4.3633197716567595, | |
| "learning_rate": 1e-05, | |
| "loss": 0.761, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.08626198083067092, | |
| "grad_norm": 4.812484653337378, | |
| "learning_rate": 1.125e-05, | |
| "loss": 0.7451, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.09584664536741214, | |
| "grad_norm": 4.996167212561038, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.7657, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10543130990415335, | |
| "grad_norm": 3.391593689539008, | |
| "learning_rate": 1.375e-05, | |
| "loss": 0.7293, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.11501597444089456, | |
| "grad_norm": 2.024258637124186, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.6753, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.12460063897763578, | |
| "grad_norm": 2.368340767997839, | |
| "learning_rate": 1.6250000000000002e-05, | |
| "loss": 0.675, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.134185303514377, | |
| "grad_norm": 1.9034533687196296, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 0.6346, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.14376996805111822, | |
| "grad_norm": 1.511274358606332, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.6395, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.15335463258785942, | |
| "grad_norm": 1.5258008797446443, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6605, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.16293929712460065, | |
| "grad_norm": 1.4701453015485093, | |
| "learning_rate": 2.125e-05, | |
| "loss": 0.6345, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.17252396166134185, | |
| "grad_norm": 1.139297657192997, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.6313, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.18210862619808307, | |
| "grad_norm": 1.0628460283930738, | |
| "learning_rate": 2.375e-05, | |
| "loss": 0.6093, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.19169329073482427, | |
| "grad_norm": 1.0582580403121424, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.5867, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2012779552715655, | |
| "grad_norm": 0.8977556913012904, | |
| "learning_rate": 2.625e-05, | |
| "loss": 0.5876, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.2108626198083067, | |
| "grad_norm": 1.3235677545982394, | |
| "learning_rate": 2.75e-05, | |
| "loss": 0.5909, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.22044728434504793, | |
| "grad_norm": 0.8869847698482135, | |
| "learning_rate": 2.875e-05, | |
| "loss": 0.5739, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.23003194888178913, | |
| "grad_norm": 1.1759129421868764, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.588, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.23961661341853036, | |
| "grad_norm": 0.9909103854124853, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.5675, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.24920127795527156, | |
| "grad_norm": 1.0713203550156574, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.5492, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.25878594249201275, | |
| "grad_norm": 0.9586606498055252, | |
| "learning_rate": 3.375e-05, | |
| "loss": 0.5539, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.268370607028754, | |
| "grad_norm": 0.9213875490205139, | |
| "learning_rate": 3.5000000000000004e-05, | |
| "loss": 0.5319, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2779552715654952, | |
| "grad_norm": 1.0198534952792746, | |
| "learning_rate": 3.625e-05, | |
| "loss": 0.5722, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.28753993610223644, | |
| "grad_norm": 0.9853545050808614, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.5514, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2971246006389776, | |
| "grad_norm": 0.9771316358011265, | |
| "learning_rate": 3.875e-05, | |
| "loss": 0.548, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.30670926517571884, | |
| "grad_norm": 1.070148811782629, | |
| "learning_rate": 4e-05, | |
| "loss": 0.5776, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.31629392971246006, | |
| "grad_norm": 0.8966401903811619, | |
| "learning_rate": 3.9998741135094016e-05, | |
| "loss": 0.5444, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3258785942492013, | |
| "grad_norm": 0.893875729745141, | |
| "learning_rate": 3.999496469885014e-05, | |
| "loss": 0.5704, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.3354632587859425, | |
| "grad_norm": 1.072867766600089, | |
| "learning_rate": 3.998867116667067e-05, | |
| "loss": 0.5477, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.3450479233226837, | |
| "grad_norm": 0.9635968083722172, | |
| "learning_rate": 3.9979861330826295e-05, | |
| "loss": 0.5534, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3546325878594249, | |
| "grad_norm": 0.9899242418062558, | |
| "learning_rate": 3.996853630035634e-05, | |
| "loss": 0.5655, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.36421725239616615, | |
| "grad_norm": 0.9130531711455715, | |
| "learning_rate": 3.995469750092912e-05, | |
| "loss": 0.5817, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3738019169329074, | |
| "grad_norm": 0.9387442528138782, | |
| "learning_rate": 3.9938346674662565e-05, | |
| "loss": 0.5044, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.38338658146964855, | |
| "grad_norm": 1.0142174519180858, | |
| "learning_rate": 3.991948587990479e-05, | |
| "loss": 0.5359, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3929712460063898, | |
| "grad_norm": 0.8959196832437484, | |
| "learning_rate": 3.989811749097505e-05, | |
| "loss": 0.5537, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.402555910543131, | |
| "grad_norm": 0.9987977928627828, | |
| "learning_rate": 3.9874244197864856e-05, | |
| "loss": 0.5782, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.41214057507987223, | |
| "grad_norm": 0.8845338538227933, | |
| "learning_rate": 3.984786900589929e-05, | |
| "loss": 0.5415, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.4217252396166134, | |
| "grad_norm": 1.0047800925961548, | |
| "learning_rate": 3.98189952353587e-05, | |
| "loss": 0.5542, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.43130990415335463, | |
| "grad_norm": 0.8022173762000696, | |
| "learning_rate": 3.9787626521060736e-05, | |
| "loss": 0.5483, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.44089456869009586, | |
| "grad_norm": 0.7883000207976826, | |
| "learning_rate": 3.9753766811902756e-05, | |
| "loss": 0.5473, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.4504792332268371, | |
| "grad_norm": 0.8434281567738428, | |
| "learning_rate": 3.971742037036472e-05, | |
| "loss": 0.5634, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.46006389776357826, | |
| "grad_norm": 1.0277496566918978, | |
| "learning_rate": 3.96785917719726e-05, | |
| "loss": 0.558, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4696485623003195, | |
| "grad_norm": 0.99005810315587, | |
| "learning_rate": 3.9637285904722376e-05, | |
| "loss": 0.5508, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.4792332268370607, | |
| "grad_norm": 1.0782113201945098, | |
| "learning_rate": 3.9593507968464714e-05, | |
| "loss": 0.5478, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.48881789137380194, | |
| "grad_norm": 0.721872938484921, | |
| "learning_rate": 3.9547263474250385e-05, | |
| "loss": 0.5366, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.4984025559105431, | |
| "grad_norm": 1.2142224090633027, | |
| "learning_rate": 3.949855824363647e-05, | |
| "loss": 0.5544, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.5079872204472844, | |
| "grad_norm": 0.9185255274236219, | |
| "learning_rate": 3.9447398407953536e-05, | |
| "loss": 0.4976, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.5175718849840255, | |
| "grad_norm": 1.0876007980497202, | |
| "learning_rate": 3.939379040753374e-05, | |
| "loss": 0.5374, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.5271565495207667, | |
| "grad_norm": 1.2327514959394714, | |
| "learning_rate": 3.933774099090013e-05, | |
| "loss": 0.5517, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.536741214057508, | |
| "grad_norm": 0.6905831120274069, | |
| "learning_rate": 3.927925721391707e-05, | |
| "loss": 0.5114, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.5463258785942492, | |
| "grad_norm": 1.3327650152930188, | |
| "learning_rate": 3.9218346438901996e-05, | |
| "loss": 0.5556, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5559105431309904, | |
| "grad_norm": 0.7267801369686653, | |
| "learning_rate": 3.9155016333698615e-05, | |
| "loss": 0.5446, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5654952076677316, | |
| "grad_norm": 1.1332074133198213, | |
| "learning_rate": 3.908927487071162e-05, | |
| "loss": 0.5297, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5750798722044729, | |
| "grad_norm": 1.1446700448302367, | |
| "learning_rate": 3.9021130325903076e-05, | |
| "loss": 0.5283, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5846645367412141, | |
| "grad_norm": 0.9428641512869639, | |
| "learning_rate": 3.895059127775058e-05, | |
| "loss": 0.5276, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5942492012779552, | |
| "grad_norm": 0.984662057057416, | |
| "learning_rate": 3.8877666606167354e-05, | |
| "loss": 0.5053, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.6038338658146964, | |
| "grad_norm": 0.9200152881682313, | |
| "learning_rate": 3.880236549138438e-05, | |
| "loss": 0.5019, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.6134185303514377, | |
| "grad_norm": 0.9735224575622512, | |
| "learning_rate": 3.872469741279475e-05, | |
| "loss": 0.5027, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.6230031948881789, | |
| "grad_norm": 0.8601738218870937, | |
| "learning_rate": 3.8644672147760286e-05, | |
| "loss": 0.5373, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.6325878594249201, | |
| "grad_norm": 1.0690694771471052, | |
| "learning_rate": 3.856229977038078e-05, | |
| "loss": 0.5141, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.6421725239616614, | |
| "grad_norm": 0.639345682465469, | |
| "learning_rate": 3.8477590650225735e-05, | |
| "loss": 0.5237, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.6517571884984026, | |
| "grad_norm": 0.9825658908653957, | |
| "learning_rate": 3.839055545102902e-05, | |
| "loss": 0.5218, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.6613418530351438, | |
| "grad_norm": 0.6640592278410514, | |
| "learning_rate": 3.83012051293464e-05, | |
| "loss": 0.5106, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.670926517571885, | |
| "grad_norm": 0.9942824925976919, | |
| "learning_rate": 3.8209550933176324e-05, | |
| "loss": 0.5371, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6805111821086262, | |
| "grad_norm": 0.8321580496371637, | |
| "learning_rate": 3.8115604400543885e-05, | |
| "loss": 0.5285, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6900958466453674, | |
| "grad_norm": 0.6720920559775677, | |
| "learning_rate": 3.801937735804838e-05, | |
| "loss": 0.5226, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6996805111821086, | |
| "grad_norm": 0.869173718218821, | |
| "learning_rate": 3.792088191937451e-05, | |
| "loss": 0.5575, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.7092651757188498, | |
| "grad_norm": 0.9252932692964674, | |
| "learning_rate": 3.782013048376736e-05, | |
| "loss": 0.5331, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.7188498402555911, | |
| "grad_norm": 0.7617188628088486, | |
| "learning_rate": 3.77171357344716e-05, | |
| "loss": 0.4955, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.7284345047923323, | |
| "grad_norm": 0.8768762141880961, | |
| "learning_rate": 3.761191063713476e-05, | |
| "loss": 0.5319, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.7380191693290735, | |
| "grad_norm": 0.5856671731846674, | |
| "learning_rate": 3.7504468438175076e-05, | |
| "loss": 0.5286, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.7476038338658147, | |
| "grad_norm": 0.7357152016449541, | |
| "learning_rate": 3.7394822663113915e-05, | |
| "loss": 0.5202, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.7571884984025559, | |
| "grad_norm": 0.8421603022757149, | |
| "learning_rate": 3.72829871148731e-05, | |
| "loss": 0.5094, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.7667731629392971, | |
| "grad_norm": 0.7711330057528273, | |
| "learning_rate": 3.716897587203733e-05, | |
| "loss": 0.5067, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7763578274760383, | |
| "grad_norm": 0.8061539763692872, | |
| "learning_rate": 3.705280328708185e-05, | |
| "loss": 0.5052, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.7859424920127795, | |
| "grad_norm": 0.9052464984501254, | |
| "learning_rate": 3.6934483984565684e-05, | |
| "loss": 0.5396, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7955271565495208, | |
| "grad_norm": 0.7613495063825277, | |
| "learning_rate": 3.681403285929061e-05, | |
| "loss": 0.5372, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.805111821086262, | |
| "grad_norm": 0.9112267984327458, | |
| "learning_rate": 3.669146507442606e-05, | |
| "loss": 0.5022, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.8146964856230032, | |
| "grad_norm": 0.776862619807027, | |
| "learning_rate": 3.6566796059600334e-05, | |
| "loss": 0.4943, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.8242811501597445, | |
| "grad_norm": 0.6857530550183694, | |
| "learning_rate": 3.644004150895821e-05, | |
| "loss": 0.5137, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.8338658146964856, | |
| "grad_norm": 1.0001214073388247, | |
| "learning_rate": 3.631121737918521e-05, | |
| "loss": 0.5211, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.8434504792332268, | |
| "grad_norm": 0.614100200634229, | |
| "learning_rate": 3.6180339887498953e-05, | |
| "loss": 0.5426, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.853035143769968, | |
| "grad_norm": 0.7416467757559503, | |
| "learning_rate": 3.6047425509607566e-05, | |
| "loss": 0.4952, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.8626198083067093, | |
| "grad_norm": 0.8216759125475441, | |
| "learning_rate": 3.591249097763562e-05, | |
| "loss": 0.5101, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8722044728434505, | |
| "grad_norm": 0.8783521934817947, | |
| "learning_rate": 3.5775553278017824e-05, | |
| "loss": 0.5298, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.8817891373801917, | |
| "grad_norm": 1.0305280265606676, | |
| "learning_rate": 3.56366296493606e-05, | |
| "loss": 0.509, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.8913738019169329, | |
| "grad_norm": 0.9394240012360305, | |
| "learning_rate": 3.5495737580272024e-05, | |
| "loss": 0.5078, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.9009584664536742, | |
| "grad_norm": 0.9309348675119059, | |
| "learning_rate": 3.535289480716023e-05, | |
| "loss": 0.5207, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.9105431309904153, | |
| "grad_norm": 0.9046584824077331, | |
| "learning_rate": 3.520811931200063e-05, | |
| "loss": 0.5141, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.9201277955271565, | |
| "grad_norm": 0.9752162620950977, | |
| "learning_rate": 3.5061429320072225e-05, | |
| "loss": 0.509, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.9297124600638977, | |
| "grad_norm": 0.6999864028990369, | |
| "learning_rate": 3.4912843297663315e-05, | |
| "loss": 0.5009, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.939297124600639, | |
| "grad_norm": 0.8545396872964545, | |
| "learning_rate": 3.476237994974682e-05, | |
| "loss": 0.5094, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.9488817891373802, | |
| "grad_norm": 0.7634132561961192, | |
| "learning_rate": 3.4610058217625554e-05, | |
| "loss": 0.5254, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.9584664536741214, | |
| "grad_norm": 0.7968935260475134, | |
| "learning_rate": 3.4455897276547836e-05, | |
| "loss": 0.5103, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9680511182108626, | |
| "grad_norm": 0.8779383622683993, | |
| "learning_rate": 3.429991653329351e-05, | |
| "loss": 0.4962, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.9776357827476039, | |
| "grad_norm": 0.8873245389104287, | |
| "learning_rate": 3.4142135623730954e-05, | |
| "loss": 0.4886, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.987220447284345, | |
| "grad_norm": 0.6369979342430059, | |
| "learning_rate": 3.398257441034515e-05, | |
| "loss": 0.5153, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.9968051118210862, | |
| "grad_norm": 0.7723810447745028, | |
| "learning_rate": 3.38212529797373e-05, | |
| "loss": 0.5153, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.0063897763578276, | |
| "grad_norm": 1.3732822336657626, | |
| "learning_rate": 3.365819164009614e-05, | |
| "loss": 0.8678, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.0159744408945688, | |
| "grad_norm": 0.6491143685994553, | |
| "learning_rate": 3.349341091864149e-05, | |
| "loss": 0.3831, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.0255591054313098, | |
| "grad_norm": 0.7068223705318214, | |
| "learning_rate": 3.3326931559040084e-05, | |
| "loss": 0.4093, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.035143769968051, | |
| "grad_norm": 0.7448924108254381, | |
| "learning_rate": 3.315877451879426e-05, | |
| "loss": 0.4151, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.0447284345047922, | |
| "grad_norm": 0.6408577512276367, | |
| "learning_rate": 3.298896096660367e-05, | |
| "loss": 0.4591, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.0543130990415335, | |
| "grad_norm": 0.7245321395554075, | |
| "learning_rate": 3.2817512279700486e-05, | |
| "loss": 0.3995, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.0638977635782747, | |
| "grad_norm": 0.8050047056671462, | |
| "learning_rate": 3.26444500411582e-05, | |
| "loss": 0.4517, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.073482428115016, | |
| "grad_norm": 0.5692172276488292, | |
| "learning_rate": 3.246979603717467e-05, | |
| "loss": 0.3451, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.0830670926517572, | |
| "grad_norm": 0.7143343812753972, | |
| "learning_rate": 3.2293572254329546e-05, | |
| "loss": 0.4343, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.0926517571884984, | |
| "grad_norm": 0.621683888292599, | |
| "learning_rate": 3.21158008768164e-05, | |
| "loss": 0.4192, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.1022364217252396, | |
| "grad_norm": 0.7045370668340967, | |
| "learning_rate": 3.1936504283650076e-05, | |
| "loss": 0.4369, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.1118210862619808, | |
| "grad_norm": 0.689049088523779, | |
| "learning_rate": 3.1755705045849465e-05, | |
| "loss": 0.3995, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.121405750798722, | |
| "grad_norm": 0.6154467041297189, | |
| "learning_rate": 3.157342592359612e-05, | |
| "loss": 0.4309, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.1309904153354633, | |
| "grad_norm": 0.6721872707368349, | |
| "learning_rate": 3.138968986336904e-05, | |
| "loss": 0.3839, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.1405750798722045, | |
| "grad_norm": 0.6919770496482566, | |
| "learning_rate": 3.1204519995056056e-05, | |
| "loss": 0.4536, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.1501597444089458, | |
| "grad_norm": 0.548860031455983, | |
| "learning_rate": 3.101793962904205e-05, | |
| "loss": 0.3669, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.159744408945687, | |
| "grad_norm": 0.662490642467861, | |
| "learning_rate": 3.082997225327452e-05, | |
| "loss": 0.4413, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.1693290734824282, | |
| "grad_norm": 0.6083191080476268, | |
| "learning_rate": 3.064064153030673e-05, | |
| "loss": 0.3873, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.1789137380191694, | |
| "grad_norm": 0.6537310951440493, | |
| "learning_rate": 3.0449971294318977e-05, | |
| "loss": 0.4173, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.1884984025559104, | |
| "grad_norm": 0.6521406167992244, | |
| "learning_rate": 3.0257985548118127e-05, | |
| "loss": 0.4252, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.1980830670926517, | |
| "grad_norm": 0.6149682224088401, | |
| "learning_rate": 3.0064708460116007e-05, | |
| "loss": 0.4375, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.207667731629393, | |
| "grad_norm": 0.5660735007468374, | |
| "learning_rate": 2.987016436128694e-05, | |
| "loss": 0.4002, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.2172523961661341, | |
| "grad_norm": 0.6860797663501914, | |
| "learning_rate": 2.9674377742104798e-05, | |
| "loss": 0.4501, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.2268370607028753, | |
| "grad_norm": 0.5579864217961026, | |
| "learning_rate": 2.9477373249459974e-05, | |
| "loss": 0.3881, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.2364217252396166, | |
| "grad_norm": 0.7520213328572914, | |
| "learning_rate": 2.9279175683556684e-05, | |
| "loss": 0.4309, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.2460063897763578, | |
| "grad_norm": 0.5836183639678406, | |
| "learning_rate": 2.9079809994790937e-05, | |
| "loss": 0.4493, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.255591054313099, | |
| "grad_norm": 0.5454961635732396, | |
| "learning_rate": 2.8879301280609645e-05, | |
| "loss": 0.4248, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.2651757188498403, | |
| "grad_norm": 0.5814578805359331, | |
| "learning_rate": 2.8677674782351164e-05, | |
| "loss": 0.4133, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.2747603833865815, | |
| "grad_norm": 0.529954247172676, | |
| "learning_rate": 2.8474955882067776e-05, | |
| "loss": 0.4165, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.2843450479233227, | |
| "grad_norm": 0.5428271229133367, | |
| "learning_rate": 2.8271170099330415e-05, | |
| "loss": 0.4439, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.293929712460064, | |
| "grad_norm": 0.6847831612932209, | |
| "learning_rate": 2.8066343088016105e-05, | |
| "loss": 0.441, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.3035143769968052, | |
| "grad_norm": 0.49210190834226275, | |
| "learning_rate": 2.7860500633078475e-05, | |
| "loss": 0.4165, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.3130990415335464, | |
| "grad_norm": 0.6253827417860478, | |
| "learning_rate": 2.7653668647301797e-05, | |
| "loss": 0.4177, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.3226837060702876, | |
| "grad_norm": 0.5902351365403176, | |
| "learning_rate": 2.7445873168038906e-05, | |
| "loss": 0.418, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.3322683706070286, | |
| "grad_norm": 0.5490945288774098, | |
| "learning_rate": 2.7237140353933445e-05, | |
| "loss": 0.4352, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.34185303514377, | |
| "grad_norm": 0.6497645956557252, | |
| "learning_rate": 2.7027496481626858e-05, | |
| "loss": 0.442, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.351437699680511, | |
| "grad_norm": 0.542052646410243, | |
| "learning_rate": 2.68169679424505e-05, | |
| "loss": 0.3954, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.3610223642172525, | |
| "grad_norm": 0.6469294947936336, | |
| "learning_rate": 2.6605581239103347e-05, | |
| "loss": 0.4324, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.3706070287539935, | |
| "grad_norm": 0.6114843722995497, | |
| "learning_rate": 2.6393362982315632e-05, | |
| "loss": 0.4086, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.3801916932907348, | |
| "grad_norm": 0.5302481010581994, | |
| "learning_rate": 2.618033988749895e-05, | |
| "loss": 0.3797, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.389776357827476, | |
| "grad_norm": 0.6290020988684742, | |
| "learning_rate": 2.5966538771383124e-05, | |
| "loss": 0.4616, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.3993610223642172, | |
| "grad_norm": 0.4520856565609043, | |
| "learning_rate": 2.5751986548640345e-05, | |
| "loss": 0.3691, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.4089456869009584, | |
| "grad_norm": 0.6229237189143199, | |
| "learning_rate": 2.5536710228496986e-05, | |
| "loss": 0.4428, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.4185303514376997, | |
| "grad_norm": 0.5985663011286853, | |
| "learning_rate": 2.5320736911333503e-05, | |
| "loss": 0.4101, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.428115015974441, | |
| "grad_norm": 0.5602348014424229, | |
| "learning_rate": 2.5104093785272854e-05, | |
| "loss": 0.3799, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.4376996805111821, | |
| "grad_norm": 0.5489525751898331, | |
| "learning_rate": 2.4886808122757882e-05, | |
| "loss": 0.4414, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.4472843450479234, | |
| "grad_norm": 0.624773105379871, | |
| "learning_rate": 2.4668907277118114e-05, | |
| "loss": 0.381, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.4568690095846646, | |
| "grad_norm": 0.5426555057346395, | |
| "learning_rate": 2.445041867912629e-05, | |
| "loss": 0.4539, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.4664536741214058, | |
| "grad_norm": 0.55536633578182, | |
| "learning_rate": 2.423136983354526e-05, | |
| "loss": 0.3802, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.476038338658147, | |
| "grad_norm": 0.4416725668124843, | |
| "learning_rate": 2.401178831566546e-05, | |
| "loss": 0.3748, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.4856230031948883, | |
| "grad_norm": 0.6230029207177754, | |
| "learning_rate": 2.379170176783357e-05, | |
| "loss": 0.4455, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.4952076677316293, | |
| "grad_norm": 0.42530667418850676, | |
| "learning_rate": 2.3571137895972735e-05, | |
| "loss": 0.3917, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.5047923322683707, | |
| "grad_norm": 0.5190777897196971, | |
| "learning_rate": 2.335012446609473e-05, | |
| "loss": 0.3944, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.5143769968051117, | |
| "grad_norm": 0.46663489738662295, | |
| "learning_rate": 2.312868930080462e-05, | |
| "loss": 0.4337, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.5239616613418532, | |
| "grad_norm": 0.4797855409160743, | |
| "learning_rate": 2.2906860275798257e-05, | |
| "loss": 0.3447, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.5335463258785942, | |
| "grad_norm": 0.48961810463783534, | |
| "learning_rate": 2.2684665316353112e-05, | |
| "loss": 0.4036, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.5431309904153354, | |
| "grad_norm": 0.5015403228340246, | |
| "learning_rate": 2.246213239381286e-05, | |
| "loss": 0.4494, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.5527156549520766, | |
| "grad_norm": 0.49826419920046305, | |
| "learning_rate": 2.2239289522066157e-05, | |
| "loss": 0.3662, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.5623003194888179, | |
| "grad_norm": 0.5982999217769431, | |
| "learning_rate": 2.201616475402009e-05, | |
| "loss": 0.4716, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.571884984025559, | |
| "grad_norm": 0.4282708816156415, | |
| "learning_rate": 2.179278617806867e-05, | |
| "loss": 0.3435, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.5814696485623003, | |
| "grad_norm": 0.4734658108408669, | |
| "learning_rate": 2.1569181914556904e-05, | |
| "loss": 0.4212, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.5910543130990416, | |
| "grad_norm": 0.49859752482578384, | |
| "learning_rate": 2.1345380112240796e-05, | |
| "loss": 0.425, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.6006389776357828, | |
| "grad_norm": 0.47157462374026493, | |
| "learning_rate": 2.1121408944743838e-05, | |
| "loss": 0.4507, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.610223642172524, | |
| "grad_norm": 0.4248405831748351, | |
| "learning_rate": 2.08972966070103e-05, | |
| "loss": 0.3602, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.619808306709265, | |
| "grad_norm": 0.5006569891631065, | |
| "learning_rate": 2.0673071311755885e-05, | |
| "loss": 0.4462, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.6293929712460065, | |
| "grad_norm": 0.43452414054342864, | |
| "learning_rate": 2.0448761285916103e-05, | |
| "loss": 0.3398, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.6389776357827475, | |
| "grad_norm": 0.5593870865885154, | |
| "learning_rate": 2.022439476709292e-05, | |
| "loss": 0.4578, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.648562300319489, | |
| "grad_norm": 0.48660923787373483, | |
| "learning_rate": 2e-05, | |
| "loss": 0.402, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.65814696485623, | |
| "grad_norm": 0.47042892474320597, | |
| "learning_rate": 1.9775605232907085e-05, | |
| "loss": 0.3732, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.6677316293929714, | |
| "grad_norm": 0.4637788148192955, | |
| "learning_rate": 1.9551238714083903e-05, | |
| "loss": 0.4164, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.6773162939297124, | |
| "grad_norm": 0.5061846620195124, | |
| "learning_rate": 1.932692868824413e-05, | |
| "loss": 0.375, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.6869009584664538, | |
| "grad_norm": 0.4389361045425349, | |
| "learning_rate": 1.910270339298971e-05, | |
| "loss": 0.4262, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.6964856230031948, | |
| "grad_norm": 0.4781379704434354, | |
| "learning_rate": 1.8878591055256165e-05, | |
| "loss": 0.4094, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.706070287539936, | |
| "grad_norm": 0.4719818469093975, | |
| "learning_rate": 1.8654619887759207e-05, | |
| "loss": 0.4398, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.7156549520766773, | |
| "grad_norm": 0.4401976778891567, | |
| "learning_rate": 1.8430818085443106e-05, | |
| "loss": 0.3973, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.7252396166134185, | |
| "grad_norm": 0.5070123943848236, | |
| "learning_rate": 1.8207213821931332e-05, | |
| "loss": 0.4068, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.7348242811501597, | |
| "grad_norm": 0.4165356789504617, | |
| "learning_rate": 1.7983835245979914e-05, | |
| "loss": 0.4368, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.744408945686901, | |
| "grad_norm": 0.5166587940461813, | |
| "learning_rate": 1.7760710477933846e-05, | |
| "loss": 0.4043, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.7539936102236422, | |
| "grad_norm": 0.48269705466364915, | |
| "learning_rate": 1.7537867606187145e-05, | |
| "loss": 0.4118, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.7635782747603834, | |
| "grad_norm": 0.47756180477333937, | |
| "learning_rate": 1.7315334683646898e-05, | |
| "loss": 0.42, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.7731629392971247, | |
| "grad_norm": 0.4765695567575237, | |
| "learning_rate": 1.7093139724201753e-05, | |
| "loss": 0.4012, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.7827476038338657, | |
| "grad_norm": 0.4681108878215689, | |
| "learning_rate": 1.687131069919538e-05, | |
| "loss": 0.41, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.792332268370607, | |
| "grad_norm": 0.46037561395697485, | |
| "learning_rate": 1.6649875533905276e-05, | |
| "loss": 0.4043, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.8019169329073481, | |
| "grad_norm": 0.419707165268871, | |
| "learning_rate": 1.642886210402727e-05, | |
| "loss": 0.3914, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.8115015974440896, | |
| "grad_norm": 0.4619831244311854, | |
| "learning_rate": 1.620829823216643e-05, | |
| "loss": 0.4228, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.8210862619808306, | |
| "grad_norm": 0.4361667434307363, | |
| "learning_rate": 1.5988211684334548e-05, | |
| "loss": 0.4047, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.830670926517572, | |
| "grad_norm": 0.46079446208057, | |
| "learning_rate": 1.5768630166454746e-05, | |
| "loss": 0.4362, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.840255591054313, | |
| "grad_norm": 0.4550050932615957, | |
| "learning_rate": 1.5549581320873715e-05, | |
| "loss": 0.4118, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.8498402555910545, | |
| "grad_norm": 0.39293704953570213, | |
| "learning_rate": 1.53310927228819e-05, | |
| "loss": 0.407, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.8594249201277955, | |
| "grad_norm": 0.4352517300933002, | |
| "learning_rate": 1.5113191877242116e-05, | |
| "loss": 0.3869, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.8690095846645367, | |
| "grad_norm": 0.41513949179810855, | |
| "learning_rate": 1.4895906214727149e-05, | |
| "loss": 0.3755, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.878594249201278, | |
| "grad_norm": 0.422581641804126, | |
| "learning_rate": 1.46792630886665e-05, | |
| "loss": 0.3707, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.8881789137380192, | |
| "grad_norm": 0.4315158978443658, | |
| "learning_rate": 1.4463289771503015e-05, | |
| "loss": 0.4135, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.8977635782747604, | |
| "grad_norm": 0.4439035469190941, | |
| "learning_rate": 1.4248013451359657e-05, | |
| "loss": 0.4299, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.9073482428115016, | |
| "grad_norm": 0.38114332649556365, | |
| "learning_rate": 1.403346122861688e-05, | |
| "loss": 0.3986, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.9169329073482428, | |
| "grad_norm": 0.4562893320122979, | |
| "learning_rate": 1.3819660112501054e-05, | |
| "loss": 0.4447, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.926517571884984, | |
| "grad_norm": 0.41109636995050963, | |
| "learning_rate": 1.3606637017684375e-05, | |
| "loss": 0.4231, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.9361022364217253, | |
| "grad_norm": 0.4108514256627659, | |
| "learning_rate": 1.3394418760896665e-05, | |
| "loss": 0.3919, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.9456869009584663, | |
| "grad_norm": 0.4403482836797348, | |
| "learning_rate": 1.31830320575495e-05, | |
| "loss": 0.4004, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.9552715654952078, | |
| "grad_norm": 0.46058376777126453, | |
| "learning_rate": 1.2972503518373145e-05, | |
| "loss": 0.4271, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.9648562300319488, | |
| "grad_norm": 0.4549563896458377, | |
| "learning_rate": 1.2762859646066561e-05, | |
| "loss": 0.4155, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.9744408945686902, | |
| "grad_norm": 0.47102947639065273, | |
| "learning_rate": 1.2554126831961097e-05, | |
| "loss": 0.3947, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.9840255591054312, | |
| "grad_norm": 0.43255866772480006, | |
| "learning_rate": 1.2346331352698206e-05, | |
| "loss": 0.3965, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.9936102236421727, | |
| "grad_norm": 0.6055961125329625, | |
| "learning_rate": 1.213949936692153e-05, | |
| "loss": 0.4308, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 2.0031948881789137, | |
| "grad_norm": 0.8201258360584602, | |
| "learning_rate": 1.1933656911983901e-05, | |
| "loss": 0.5536, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 2.012779552715655, | |
| "grad_norm": 0.5116622937539458, | |
| "learning_rate": 1.1728829900669592e-05, | |
| "loss": 0.3256, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.022364217252396, | |
| "grad_norm": 0.6977084341791524, | |
| "learning_rate": 1.1525044117932227e-05, | |
| "loss": 0.3275, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 2.0319488817891376, | |
| "grad_norm": 0.639393679219998, | |
| "learning_rate": 1.132232521764884e-05, | |
| "loss": 0.3009, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.0415335463258786, | |
| "grad_norm": 0.4890609363134351, | |
| "learning_rate": 1.1120698719390362e-05, | |
| "loss": 0.3387, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 2.0511182108626196, | |
| "grad_norm": 0.5588000853620018, | |
| "learning_rate": 1.0920190005209066e-05, | |
| "loss": 0.3105, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 2.060702875399361, | |
| "grad_norm": 0.5036006567654044, | |
| "learning_rate": 1.0720824316443321e-05, | |
| "loss": 0.2901, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.070287539936102, | |
| "grad_norm": 0.5915822002117069, | |
| "learning_rate": 1.0522626750540029e-05, | |
| "loss": 0.3493, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.0798722044728435, | |
| "grad_norm": 0.4214007118584772, | |
| "learning_rate": 1.0325622257895205e-05, | |
| "loss": 0.2985, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 2.0894568690095845, | |
| "grad_norm": 0.5295101270711986, | |
| "learning_rate": 1.0129835638713064e-05, | |
| "loss": 0.3166, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 2.099041533546326, | |
| "grad_norm": 0.4906090405908181, | |
| "learning_rate": 9.935291539884e-06, | |
| "loss": 0.2843, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 2.108626198083067, | |
| "grad_norm": 0.3938053903767187, | |
| "learning_rate": 9.74201445188188e-06, | |
| "loss": 0.3163, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.1182108626198084, | |
| "grad_norm": 0.5012038895473048, | |
| "learning_rate": 9.550028705681024e-06, | |
| "loss": 0.3222, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 2.1277955271565494, | |
| "grad_norm": 0.4321669775385389, | |
| "learning_rate": 9.359358469693272e-06, | |
| "loss": 0.2909, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 2.137380191693291, | |
| "grad_norm": 0.4117178329226722, | |
| "learning_rate": 9.170027746725487e-06, | |
| "loss": 0.3146, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 2.146964856230032, | |
| "grad_norm": 0.3793388916643685, | |
| "learning_rate": 8.982060370957953e-06, | |
| "loss": 0.3017, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.1565495207667733, | |
| "grad_norm": 0.4535610427107228, | |
| "learning_rate": 8.795480004943946e-06, | |
| "loss": 0.3456, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.1661341853035143, | |
| "grad_norm": 0.37244536401873557, | |
| "learning_rate": 8.610310136630962e-06, | |
| "loss": 0.2761, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 2.1757188498402558, | |
| "grad_norm": 0.3905670548801518, | |
| "learning_rate": 8.426574076403887e-06, | |
| "loss": 0.302, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 2.1853035143769968, | |
| "grad_norm": 0.4359210822644894, | |
| "learning_rate": 8.24429495415054e-06, | |
| "loss": 0.338, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 2.194888178913738, | |
| "grad_norm": 0.35920435170508774, | |
| "learning_rate": 8.063495716349929e-06, | |
| "loss": 0.3081, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 2.2044728434504792, | |
| "grad_norm": 0.37621369347346134, | |
| "learning_rate": 7.884199123183604e-06, | |
| "loss": 0.3419, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.2140575079872207, | |
| "grad_norm": 0.3458946393058899, | |
| "learning_rate": 7.706427745670458e-06, | |
| "loss": 0.2749, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 2.2236421725239617, | |
| "grad_norm": 0.4279444858711213, | |
| "learning_rate": 7.530203962825331e-06, | |
| "loss": 0.3365, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 2.2332268370607027, | |
| "grad_norm": 0.3940413928415214, | |
| "learning_rate": 7.355549958841808e-06, | |
| "loss": 0.3282, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 2.242811501597444, | |
| "grad_norm": 0.39687410854343524, | |
| "learning_rate": 7.182487720299518e-06, | |
| "loss": 0.2943, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 2.252396166134185, | |
| "grad_norm": 0.42285032823633234, | |
| "learning_rate": 7.01103903339633e-06, | |
| "loss": 0.3252, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.2619808306709266, | |
| "grad_norm": 0.3730435843457758, | |
| "learning_rate": 6.841225481205749e-06, | |
| "loss": 0.2755, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 2.2715654952076676, | |
| "grad_norm": 0.35400532023912695, | |
| "learning_rate": 6.6730684409599225e-06, | |
| "loss": 0.2962, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 2.281150159744409, | |
| "grad_norm": 0.39975466800088416, | |
| "learning_rate": 6.5065890813585145e-06, | |
| "loss": 0.2951, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 2.29073482428115, | |
| "grad_norm": 0.4291346122689877, | |
| "learning_rate": 6.3418083599038624e-06, | |
| "loss": 0.317, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 2.3003194888178915, | |
| "grad_norm": 0.3209273222551871, | |
| "learning_rate": 6.178747020262708e-06, | |
| "loss": 0.2881, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.3099041533546325, | |
| "grad_norm": 0.3305467433463152, | |
| "learning_rate": 6.017425589654853e-06, | |
| "loss": 0.2595, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 2.319488817891374, | |
| "grad_norm": 0.38111354545103865, | |
| "learning_rate": 5.857864376269051e-06, | |
| "loss": 0.326, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 2.329073482428115, | |
| "grad_norm": 0.30011533883033215, | |
| "learning_rate": 5.700083466706494e-06, | |
| "loss": 0.2744, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 2.3386581469648564, | |
| "grad_norm": 0.3813259748607558, | |
| "learning_rate": 5.544102723452171e-06, | |
| "loss": 0.3105, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.3482428115015974, | |
| "grad_norm": 0.35123991131982374, | |
| "learning_rate": 5.38994178237445e-06, | |
| "loss": 0.316, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.357827476038339, | |
| "grad_norm": 0.32456708820534624, | |
| "learning_rate": 5.237620050253189e-06, | |
| "loss": 0.3226, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 2.36741214057508, | |
| "grad_norm": 0.2976838087469898, | |
| "learning_rate": 5.087156702336689e-06, | |
| "loss": 0.2614, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 2.376996805111821, | |
| "grad_norm": 0.36552843906148524, | |
| "learning_rate": 4.938570679927784e-06, | |
| "loss": 0.3123, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 2.3865814696485623, | |
| "grad_norm": 0.35095503173458703, | |
| "learning_rate": 4.791880687999382e-06, | |
| "loss": 0.3123, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 2.3961661341853033, | |
| "grad_norm": 0.30625640312787195, | |
| "learning_rate": 4.647105192839778e-06, | |
| "loss": 0.3176, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.405750798722045, | |
| "grad_norm": 0.341761533375559, | |
| "learning_rate": 4.504262419727983e-06, | |
| "loss": 0.3144, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 2.415335463258786, | |
| "grad_norm": 0.3533342249857026, | |
| "learning_rate": 4.363370350639405e-06, | |
| "loss": 0.34, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 2.4249201277955272, | |
| "grad_norm": 0.34883281084308354, | |
| "learning_rate": 4.2244467219821806e-06, | |
| "loss": 0.2955, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 2.4345047923322682, | |
| "grad_norm": 0.32463619347820316, | |
| "learning_rate": 4.087509022364382e-06, | |
| "loss": 0.283, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 2.4440894568690097, | |
| "grad_norm": 0.33770338798840704, | |
| "learning_rate": 3.952574490392443e-06, | |
| "loss": 0.3177, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.4536741214057507, | |
| "grad_norm": 0.3110029435092563, | |
| "learning_rate": 3.819660112501053e-06, | |
| "loss": 0.302, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 2.463258785942492, | |
| "grad_norm": 0.30800590517974463, | |
| "learning_rate": 3.6887826208147968e-06, | |
| "loss": 0.302, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 2.472843450479233, | |
| "grad_norm": 0.3232681352037157, | |
| "learning_rate": 3.5599584910418037e-06, | |
| "loss": 0.2646, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 2.4824281150159746, | |
| "grad_norm": 0.36421296660907726, | |
| "learning_rate": 3.433203940399672e-06, | |
| "loss": 0.3084, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 2.4920127795527156, | |
| "grad_norm": 0.3831030312811903, | |
| "learning_rate": 3.3085349255739475e-06, | |
| "loss": 0.3469, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.501597444089457, | |
| "grad_norm": 0.3325867758576896, | |
| "learning_rate": 3.1859671407093984e-06, | |
| "loss": 0.3093, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 2.511182108626198, | |
| "grad_norm": 0.337706654419708, | |
| "learning_rate": 3.0655160154343177e-06, | |
| "loss": 0.2979, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 2.520766773162939, | |
| "grad_norm": 0.32967438884893885, | |
| "learning_rate": 2.947196712918157e-06, | |
| "loss": 0.3047, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 2.5303514376996805, | |
| "grad_norm": 0.3233433151169985, | |
| "learning_rate": 2.8310241279626784e-06, | |
| "loss": 0.2969, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 2.539936102236422, | |
| "grad_norm": 0.31301087906685726, | |
| "learning_rate": 2.7170128851269084e-06, | |
| "loss": 0.2981, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.549520766773163, | |
| "grad_norm": 0.32786149056284464, | |
| "learning_rate": 2.6051773368860935e-06, | |
| "loss": 0.3308, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 2.559105431309904, | |
| "grad_norm": 0.31611139290200363, | |
| "learning_rate": 2.4955315618249263e-06, | |
| "loss": 0.3134, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 2.5686900958466454, | |
| "grad_norm": 0.3202278737140233, | |
| "learning_rate": 2.38808936286524e-06, | |
| "loss": 0.2988, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 2.5782747603833864, | |
| "grad_norm": 0.3187169467092785, | |
| "learning_rate": 2.2828642655284038e-06, | |
| "loss": 0.275, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 2.587859424920128, | |
| "grad_norm": 0.31150312167717303, | |
| "learning_rate": 2.1798695162326444e-06, | |
| "loss": 0.2913, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.597444089456869, | |
| "grad_norm": 0.3199602869029221, | |
| "learning_rate": 2.0791180806254975e-06, | |
| "loss": 0.3142, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 2.6070287539936103, | |
| "grad_norm": 0.2859711870729773, | |
| "learning_rate": 1.9806226419516195e-06, | |
| "loss": 0.2994, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 2.6166134185303513, | |
| "grad_norm": 0.31034616192733694, | |
| "learning_rate": 1.8843955994561191e-06, | |
| "loss": 0.3301, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 2.626198083067093, | |
| "grad_norm": 0.2911594390655468, | |
| "learning_rate": 1.790449066823683e-06, | |
| "loss": 0.3136, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 2.635782747603834, | |
| "grad_norm": 0.31846174945632705, | |
| "learning_rate": 1.6987948706536038e-06, | |
| "loss": 0.3265, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.6453674121405752, | |
| "grad_norm": 0.3091371745510634, | |
| "learning_rate": 1.6094445489709886e-06, | |
| "loss": 0.3116, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 2.6549520766773163, | |
| "grad_norm": 0.28744483162457096, | |
| "learning_rate": 1.5224093497742654e-06, | |
| "loss": 0.3081, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 2.6645367412140573, | |
| "grad_norm": 0.3010104381158104, | |
| "learning_rate": 1.4377002296192233e-06, | |
| "loss": 0.2914, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 2.6741214057507987, | |
| "grad_norm": 0.3184703269379148, | |
| "learning_rate": 1.3553278522397162e-06, | |
| "loss": 0.3398, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 2.68370607028754, | |
| "grad_norm": 0.3012745996881748, | |
| "learning_rate": 1.275302587205256e-06, | |
| "loss": 0.2895, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.693290734824281, | |
| "grad_norm": 0.32944896697723586, | |
| "learning_rate": 1.1976345086156193e-06, | |
| "loss": 0.3205, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 2.702875399361022, | |
| "grad_norm": 0.30329250730790236, | |
| "learning_rate": 1.1223333938326486e-06, | |
| "loss": 0.307, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.7124600638977636, | |
| "grad_norm": 0.31477266554231703, | |
| "learning_rate": 1.0494087222494253e-06, | |
| "loss": 0.2977, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 2.722044728434505, | |
| "grad_norm": 0.27218751575717026, | |
| "learning_rate": 9.788696740969295e-07, | |
| "loss": 0.2727, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.731629392971246, | |
| "grad_norm": 0.2957347661302385, | |
| "learning_rate": 9.107251292883856e-07, | |
| "loss": 0.3241, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.741214057507987, | |
| "grad_norm": 0.2754457767022288, | |
| "learning_rate": 8.44983666301391e-07, | |
| "loss": 0.2888, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.7507987220447285, | |
| "grad_norm": 0.3009061607925657, | |
| "learning_rate": 7.816535610980103e-07, | |
| "loss": 0.3236, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 2.7603833865814695, | |
| "grad_norm": 0.3042692164321764, | |
| "learning_rate": 7.207427860829352e-07, | |
| "loss": 0.3293, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.769968051118211, | |
| "grad_norm": 0.3084699080559463, | |
| "learning_rate": 6.622590090998727e-07, | |
| "loss": 0.2896, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 2.779552715654952, | |
| "grad_norm": 0.2874746995613232, | |
| "learning_rate": 6.062095924662625e-07, | |
| "loss": 0.2977, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.7891373801916934, | |
| "grad_norm": 0.30432287110598966, | |
| "learning_rate": 5.526015920464689e-07, | |
| "loss": 0.3527, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.7987220447284344, | |
| "grad_norm": 0.2752619163287045, | |
| "learning_rate": 5.014417563635276e-07, | |
| "loss": 0.2575, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.8083067092651754, | |
| "grad_norm": 0.3000832642958543, | |
| "learning_rate": 4.5273652574961745e-07, | |
| "loss": 0.3026, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 2.817891373801917, | |
| "grad_norm": 0.28459336557371145, | |
| "learning_rate": 4.064920315352905e-07, | |
| "loss": 0.2953, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.8274760383386583, | |
| "grad_norm": 0.3403049604495616, | |
| "learning_rate": 3.62714095277632e-07, | |
| "loss": 0.3698, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.8370607028753994, | |
| "grad_norm": 0.28503876136131184, | |
| "learning_rate": 3.214082280274067e-07, | |
| "loss": 0.2709, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.8466453674121404, | |
| "grad_norm": 0.299085461383954, | |
| "learning_rate": 2.825796296352823e-07, | |
| "loss": 0.3268, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 2.856230031948882, | |
| "grad_norm": 0.3016256794104278, | |
| "learning_rate": 2.462331880972468e-07, | |
| "loss": 0.3032, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.8658146964856233, | |
| "grad_norm": 0.45115419737664014, | |
| "learning_rate": 2.123734789392673e-07, | |
| "loss": 0.3539, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 2.8753993610223643, | |
| "grad_norm": 0.2661772363746791, | |
| "learning_rate": 1.81004764641306e-07, | |
| "loss": 0.2696, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.8849840255591053, | |
| "grad_norm": 0.2901415907606709, | |
| "learning_rate": 1.5213099410071873e-07, | |
| "loss": 0.3199, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 2.8945686900958467, | |
| "grad_norm": 0.29953435371037257, | |
| "learning_rate": 1.2575580213514792e-07, | |
| "loss": 0.3403, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.9041533546325877, | |
| "grad_norm": 0.28555862760239925, | |
| "learning_rate": 1.0188250902495312e-07, | |
| "loss": 0.3211, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 2.913738019169329, | |
| "grad_norm": 0.3026928327893977, | |
| "learning_rate": 8.051412009521864e-08, | |
| "loss": 0.3422, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.92332268370607, | |
| "grad_norm": 0.27586171376383506, | |
| "learning_rate": 6.165332533744072e-08, | |
| "loss": 0.2635, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.9329073482428116, | |
| "grad_norm": 0.2797989784483068, | |
| "learning_rate": 4.530249907087836e-08, | |
| "loss": 0.2942, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.9424920127795526, | |
| "grad_norm": 0.32210333279055925, | |
| "learning_rate": 3.146369964366791e-08, | |
| "loss": 0.3829, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 2.952076677316294, | |
| "grad_norm": 0.27055679198155164, | |
| "learning_rate": 2.0138669173708213e-08, | |
| "loss": 0.2838, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.961661341853035, | |
| "grad_norm": 0.28816307097851795, | |
| "learning_rate": 1.1328833329333767e-08, | |
| "loss": 0.3031, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.9712460063897765, | |
| "grad_norm": 0.28899005094993563, | |
| "learning_rate": 5.0353011498693875e-09, | |
| "loss": 0.3185, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.9808306709265175, | |
| "grad_norm": 0.26574618706072956, | |
| "learning_rate": 1.2588649059885883e-09, | |
| "loss": 0.2968, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.9904153354632586, | |
| "grad_norm": 0.28718668323400676, | |
| "learning_rate": 0.0, | |
| "loss": 0.3009, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.9904153354632586, | |
| "step": 312, | |
| "total_flos": 4.4115474684667494e+17, | |
| "train_loss": 0.43267787887881964, | |
| "train_runtime": 10181.5896, | |
| "train_samples_per_second": 2.946, | |
| "train_steps_per_second": 0.031 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 312, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.4115474684667494e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |