| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 337, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.2208438903093337, |
| "epoch": 0.01486988847583643, |
| "grad_norm": 0.55859375, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 2.118885803222656, |
| "mean_token_accuracy": 0.5921677611768246, |
| "num_tokens": 162022.0, |
| "step": 5 |
| }, |
| { |
| "entropy": 1.2382806837558746, |
| "epoch": 0.02973977695167286, |
| "grad_norm": 0.08056640625, |
| "learning_rate": 3.6e-05, |
| "loss": 1.6261102676391601, |
| "mean_token_accuracy": 0.647309884428978, |
| "num_tokens": 324206.0, |
| "step": 10 |
| }, |
| { |
| "entropy": 1.5387765020132065, |
| "epoch": 0.04460966542750929, |
| "grad_norm": 0.07470703125, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 1.5022705078125, |
| "mean_token_accuracy": 0.6562040001153946, |
| "num_tokens": 486182.0, |
| "step": 15 |
| }, |
| { |
| "entropy": 1.479974600672722, |
| "epoch": 0.05947955390334572, |
| "grad_norm": 0.0703125, |
| "learning_rate": 7.6e-05, |
| "loss": 1.3166878700256348, |
| "mean_token_accuracy": 0.6958272859454155, |
| "num_tokens": 648008.0, |
| "step": 20 |
| }, |
| { |
| "entropy": 1.048148949444294, |
| "epoch": 0.07434944237918216, |
| "grad_norm": 0.0751953125, |
| "learning_rate": 9.6e-05, |
| "loss": 1.0947124481201171, |
| "mean_token_accuracy": 0.7425475120544434, |
| "num_tokens": 809135.0, |
| "step": 25 |
| }, |
| { |
| "entropy": 0.8074702247977257, |
| "epoch": 0.08921933085501858, |
| "grad_norm": 0.047607421875, |
| "learning_rate": 0.000116, |
| "loss": 0.8717484474182129, |
| "mean_token_accuracy": 0.8009411200881005, |
| "num_tokens": 971805.0, |
| "step": 30 |
| }, |
| { |
| "entropy": 0.7200885951519013, |
| "epoch": 0.10408921933085502, |
| "grad_norm": 0.0299072265625, |
| "learning_rate": 0.00013600000000000003, |
| "loss": 0.7251162052154541, |
| "mean_token_accuracy": 0.8336925625801086, |
| "num_tokens": 1133288.0, |
| "step": 35 |
| }, |
| { |
| "entropy": 0.6965990558266639, |
| "epoch": 0.11895910780669144, |
| "grad_norm": 0.0279541015625, |
| "learning_rate": 0.00015600000000000002, |
| "loss": 0.7003275871276855, |
| "mean_token_accuracy": 0.8368907004594803, |
| "num_tokens": 1296330.0, |
| "step": 40 |
| }, |
| { |
| "entropy": 0.6499797679483891, |
| "epoch": 0.13382899628252787, |
| "grad_norm": 0.0228271484375, |
| "learning_rate": 0.00017600000000000002, |
| "loss": 0.6792300701141357, |
| "mean_token_accuracy": 0.8395274579524994, |
| "num_tokens": 1458535.0, |
| "step": 45 |
| }, |
| { |
| "entropy": 0.6847037307918071, |
| "epoch": 0.14869888475836432, |
| "grad_norm": 0.02294921875, |
| "learning_rate": 0.000196, |
| "loss": 0.6913325786590576, |
| "mean_token_accuracy": 0.8368435353040695, |
| "num_tokens": 1619838.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.6361287623643875, |
| "epoch": 0.16356877323420074, |
| "grad_norm": 0.0263671875, |
| "learning_rate": 0.00019990415784861047, |
| "loss": 0.6388590335845947, |
| "mean_token_accuracy": 0.840661846101284, |
| "num_tokens": 1781356.0, |
| "step": 55 |
| }, |
| { |
| "entropy": 0.6305057637393474, |
| "epoch": 0.17843866171003717, |
| "grad_norm": 0.0233154296875, |
| "learning_rate": 0.00019951511394922507, |
| "loss": 0.6357249736785888, |
| "mean_token_accuracy": 0.8429885223507881, |
| "num_tokens": 1943586.0, |
| "step": 60 |
| }, |
| { |
| "entropy": 0.6157322488725185, |
| "epoch": 0.19330855018587362, |
| "grad_norm": 0.024658203125, |
| "learning_rate": 0.00019882804237803488, |
| "loss": 0.6146057605743408, |
| "mean_token_accuracy": 0.8475824564695358, |
| "num_tokens": 2105272.0, |
| "step": 65 |
| }, |
| { |
| "entropy": 0.6572050869464874, |
| "epoch": 0.20817843866171004, |
| "grad_norm": 0.025390625, |
| "learning_rate": 0.00019784500077565944, |
| "loss": 0.6681472778320312, |
| "mean_token_accuracy": 0.8372571259737015, |
| "num_tokens": 2268167.0, |
| "step": 70 |
| }, |
| { |
| "entropy": 0.5824843347072601, |
| "epoch": 0.22304832713754646, |
| "grad_norm": 0.02294921875, |
| "learning_rate": 0.00019656893315319837, |
| "loss": 0.5855841636657715, |
| "mean_token_accuracy": 0.8480771005153656, |
| "num_tokens": 2430543.0, |
| "step": 75 |
| }, |
| { |
| "entropy": 0.5482360351830721, |
| "epoch": 0.2379182156133829, |
| "grad_norm": 0.018310546875, |
| "learning_rate": 0.00019500366107551252, |
| "loss": 0.5611765861511231, |
| "mean_token_accuracy": 0.8610954374074936, |
| "num_tokens": 2592695.0, |
| "step": 80 |
| }, |
| { |
| "entropy": 0.5686104819178581, |
| "epoch": 0.2527881040892193, |
| "grad_norm": 0.0272216796875, |
| "learning_rate": 0.00019315387221640874, |
| "loss": 0.581544017791748, |
| "mean_token_accuracy": 0.8532564893364907, |
| "num_tokens": 2753981.0, |
| "step": 85 |
| }, |
| { |
| "entropy": 0.49499988108873366, |
| "epoch": 0.26765799256505574, |
| "grad_norm": 0.02001953125, |
| "learning_rate": 0.00019102510632000363, |
| "loss": 0.4944618225097656, |
| "mean_token_accuracy": 0.8736693963408471, |
| "num_tokens": 2915266.0, |
| "step": 90 |
| }, |
| { |
| "entropy": 0.5620223179459571, |
| "epoch": 0.2825278810408922, |
| "grad_norm": 0.018310546875, |
| "learning_rate": 0.00018862373861030837, |
| "loss": 0.5739808082580566, |
| "mean_token_accuracy": 0.8533669888973237, |
| "num_tokens": 3077274.0, |
| "step": 95 |
| }, |
| { |
| "entropy": 0.5610138960182667, |
| "epoch": 0.29739776951672864, |
| "grad_norm": 0.033203125, |
| "learning_rate": 0.00018595696069872013, |
| "loss": 0.5716083526611329, |
| "mean_token_accuracy": 0.8515751019120217, |
| "num_tokens": 3240540.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.5545240785926581, |
| "epoch": 0.31226765799256506, |
| "grad_norm": 0.0184326171875, |
| "learning_rate": 0.00018303275904659806, |
| "loss": 0.5515320301055908, |
| "mean_token_accuracy": 0.8615253224968911, |
| "num_tokens": 3403401.0, |
| "step": 105 |
| }, |
| { |
| "entropy": 0.5450488172471524, |
| "epoch": 0.3271375464684015, |
| "grad_norm": 0.0233154296875, |
| "learning_rate": 0.00017985989104742434, |
| "loss": 0.5519495487213135, |
| "mean_token_accuracy": 0.8612264782190323, |
| "num_tokens": 3565026.0, |
| "step": 110 |
| }, |
| { |
| "entropy": 0.5393761422485113, |
| "epoch": 0.3420074349442379, |
| "grad_norm": 0.020263671875, |
| "learning_rate": 0.00017644785880017874, |
| "loss": 0.5512795448303223, |
| "mean_token_accuracy": 0.8637161552906036, |
| "num_tokens": 3727695.0, |
| "step": 115 |
| }, |
| { |
| "entropy": 0.5624090366065502, |
| "epoch": 0.35687732342007433, |
| "grad_norm": 0.0234375, |
| "learning_rate": 0.00017280688065247118, |
| "loss": 0.5684682846069335, |
| "mean_token_accuracy": 0.8566416442394257, |
| "num_tokens": 3889998.0, |
| "step": 120 |
| }, |
| { |
| "entropy": 0.5313072741031647, |
| "epoch": 0.37174721189591076, |
| "grad_norm": 0.0201416015625, |
| "learning_rate": 0.00016894786059865383, |
| "loss": 0.5380096435546875, |
| "mean_token_accuracy": 0.8626979544758797, |
| "num_tokens": 4052483.0, |
| "step": 125 |
| }, |
| { |
| "entropy": 0.5576793540269136, |
| "epoch": 0.38661710037174724, |
| "grad_norm": 0.0205078125, |
| "learning_rate": 0.00016488235562455965, |
| "loss": 0.5612647533416748, |
| "mean_token_accuracy": 0.8576673969626427, |
| "num_tokens": 4215207.0, |
| "step": 130 |
| }, |
| { |
| "entropy": 0.5940531171858311, |
| "epoch": 0.40148698884758366, |
| "grad_norm": 2.765625, |
| "learning_rate": 0.0001606225410966638, |
| "loss": 0.6199069499969483, |
| "mean_token_accuracy": 0.8474377766251564, |
| "num_tokens": 4375245.0, |
| "step": 135 |
| }, |
| { |
| "entropy": 0.6086725879460573, |
| "epoch": 0.4163568773234201, |
| "grad_norm": 0.7421875, |
| "learning_rate": 0.00015618117429931926, |
| "loss": 0.7376153469085693, |
| "mean_token_accuracy": 0.8377696231007576, |
| "num_tokens": 4536593.0, |
| "step": 140 |
| }, |
| { |
| "entropy": 0.6506265237927437, |
| "epoch": 0.4312267657992565, |
| "grad_norm": 0.04150390625, |
| "learning_rate": 0.0001515715562292662, |
| "loss": 0.6828119277954101, |
| "mean_token_accuracy": 0.835132221877575, |
| "num_tokens": 4697609.0, |
| "step": 145 |
| }, |
| { |
| "entropy": 0.5346488334238529, |
| "epoch": 0.44609665427509293, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 0.00014680749176183274, |
| "loss": 0.5391588687896729, |
| "mean_token_accuracy": 0.8637033969163894, |
| "num_tokens": 4859476.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.5779553644359112, |
| "epoch": 0.46096654275092935, |
| "grad_norm": 0.0198974609375, |
| "learning_rate": 0.00014190324830812067, |
| "loss": 0.5770033836364746, |
| "mean_token_accuracy": 0.8554806470870971, |
| "num_tokens": 5022222.0, |
| "step": 155 |
| }, |
| { |
| "entropy": 0.5745491735637188, |
| "epoch": 0.4758364312267658, |
| "grad_norm": 0.0218505859375, |
| "learning_rate": 0.00013687351308699027, |
| "loss": 0.5682717323303222, |
| "mean_token_accuracy": 0.8550971180200577, |
| "num_tokens": 5184389.0, |
| "step": 160 |
| }, |
| { |
| "entropy": 0.5769639994949103, |
| "epoch": 0.49070631970260226, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 0.00013173334913980534, |
| "loss": 0.5720061302185059, |
| "mean_token_accuracy": 0.857777065038681, |
| "num_tokens": 5346122.0, |
| "step": 165 |
| }, |
| { |
| "entropy": 0.521759420260787, |
| "epoch": 0.5055762081784386, |
| "grad_norm": 0.0257568359375, |
| "learning_rate": 0.0001264981502196662, |
| "loss": 0.5244236469268799, |
| "mean_token_accuracy": 0.8673782303929329, |
| "num_tokens": 5509440.0, |
| "step": 170 |
| }, |
| { |
| "entropy": 0.5925083503127098, |
| "epoch": 0.5204460966542751, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 0.00012118359469022712, |
| "loss": 0.595769739151001, |
| "mean_token_accuracy": 0.8512872710824013, |
| "num_tokens": 5672412.0, |
| "step": 175 |
| }, |
| { |
| "entropy": 0.6406555585563183, |
| "epoch": 0.5353159851301115, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 0.00011580559857216347, |
| "loss": 0.645458173751831, |
| "mean_token_accuracy": 0.8404021769762039, |
| "num_tokens": 5833737.0, |
| "step": 180 |
| }, |
| { |
| "entropy": 0.5327178973704576, |
| "epoch": 0.550185873605948, |
| "grad_norm": 0.032958984375, |
| "learning_rate": 0.0001103802678779032, |
| "loss": 0.536646842956543, |
| "mean_token_accuracy": 0.8626102104783058, |
| "num_tokens": 5995659.0, |
| "step": 185 |
| }, |
| { |
| "entropy": 0.606436661630869, |
| "epoch": 0.5650557620817844, |
| "grad_norm": 0.0250244140625, |
| "learning_rate": 0.00010492385037737207, |
| "loss": 0.5936649322509766, |
| "mean_token_accuracy": 0.8469037219882012, |
| "num_tokens": 6157375.0, |
| "step": 190 |
| }, |
| { |
| "entropy": 0.5063752841204405, |
| "epoch": 0.5799256505576208, |
| "grad_norm": 0.0274658203125, |
| "learning_rate": 9.945268693920346e-05, |
| "loss": 0.5121519088745117, |
| "mean_token_accuracy": 0.8688234716653824, |
| "num_tokens": 6320049.0, |
| "step": 195 |
| }, |
| { |
| "entropy": 0.5400943882763386, |
| "epoch": 0.5947955390334573, |
| "grad_norm": 0.0224609375, |
| "learning_rate": 9.398316259313637e-05, |
| "loss": 0.5355047225952149, |
| "mean_token_accuracy": 0.8615404218435287, |
| "num_tokens": 6482227.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.5946259450167417, |
| "epoch": 0.6096654275092936, |
| "grad_norm": 0.0189208984375, |
| "learning_rate": 8.853165746015997e-05, |
| "loss": 0.5926107883453369, |
| "mean_token_accuracy": 0.8485366463661194, |
| "num_tokens": 6644489.0, |
| "step": 205 |
| }, |
| { |
| "entropy": 0.5018576122820377, |
| "epoch": 0.6245353159851301, |
| "grad_norm": 0.0191650390625, |
| "learning_rate": 8.311449769735873e-05, |
| "loss": 0.5042452335357666, |
| "mean_token_accuracy": 0.8692936778068543, |
| "num_tokens": 6806204.0, |
| "step": 210 |
| }, |
| { |
| "entropy": 0.5548792567104102, |
| "epoch": 0.6394052044609665, |
| "grad_norm": 0.0223388671875, |
| "learning_rate": 7.774790660436858e-05, |
| "loss": 0.563843059539795, |
| "mean_token_accuracy": 0.8553685575723649, |
| "num_tokens": 6967855.0, |
| "step": 215 |
| }, |
| { |
| "entropy": 0.5388505697250366, |
| "epoch": 0.654275092936803, |
| "grad_norm": 0.0174560546875, |
| "learning_rate": 7.244795603787036e-05, |
| "loss": 0.5390424728393555, |
| "mean_token_accuracy": 0.8610016539692879, |
| "num_tokens": 7128748.0, |
| "step": 220 |
| }, |
| { |
| "entropy": 0.5537650570273399, |
| "epoch": 0.6691449814126395, |
| "grad_norm": 0.0191650390625, |
| "learning_rate": 6.723051827962445e-05, |
| "loss": 0.54982008934021, |
| "mean_token_accuracy": 0.8591041445732117, |
| "num_tokens": 7290603.0, |
| "step": 225 |
| }, |
| { |
| "entropy": 0.5197319515049458, |
| "epoch": 0.6840148698884758, |
| "grad_norm": 0.017822265625, |
| "learning_rate": 6.211121850219175e-05, |
| "loss": 0.5279690265655518, |
| "mean_token_accuracy": 0.8655778139829635, |
| "num_tokens": 7453375.0, |
| "step": 230 |
| }, |
| { |
| "entropy": 0.49672888703644275, |
| "epoch": 0.6988847583643123, |
| "grad_norm": 0.0198974609375, |
| "learning_rate": 5.7105387974697063e-05, |
| "loss": 0.4927337646484375, |
| "mean_token_accuracy": 0.8734062314033508, |
| "num_tokens": 7616243.0, |
| "step": 235 |
| }, |
| { |
| "entropy": 0.5233525596559048, |
| "epoch": 0.7137546468401487, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 5.222801814877369e-05, |
| "loss": 0.5159758567810059, |
| "mean_token_accuracy": 0.8658116608858109, |
| "num_tokens": 7776719.0, |
| "step": 240 |
| }, |
| { |
| "entropy": 0.515385128930211, |
| "epoch": 0.7286245353159851, |
| "grad_norm": 0.0177001953125, |
| "learning_rate": 4.749371576219317e-05, |
| "loss": 0.5128642559051514, |
| "mean_token_accuracy": 0.8703905552625656, |
| "num_tokens": 7938195.0, |
| "step": 245 |
| }, |
| { |
| "entropy": 0.4934384971857071, |
| "epoch": 0.7434944237918215, |
| "grad_norm": 0.0216064453125, |
| "learning_rate": 4.291665909463477e-05, |
| "loss": 0.4990520477294922, |
| "mean_token_accuracy": 0.8704771339893341, |
| "num_tokens": 8098729.0, |
| "step": 250 |
| }, |
| { |
| "entropy": 0.5353568136692047, |
| "epoch": 0.758364312267658, |
| "grad_norm": 0.02294921875, |
| "learning_rate": 3.8510555506600974e-05, |
| "loss": 0.542482566833496, |
| "mean_token_accuracy": 0.8630232095718384, |
| "num_tokens": 8261835.0, |
| "step": 255 |
| }, |
| { |
| "entropy": 0.48506755754351616, |
| "epoch": 0.7732342007434945, |
| "grad_norm": 0.021484375, |
| "learning_rate": 3.4288600388640714e-05, |
| "loss": 0.4917303085327148, |
| "mean_token_accuracy": 0.8723696529865265, |
| "num_tokens": 8423616.0, |
| "step": 260 |
| }, |
| { |
| "entropy": 0.5245153240859508, |
| "epoch": 0.7881040892193308, |
| "grad_norm": 0.019287109375, |
| "learning_rate": 3.026343764381887e-05, |
| "loss": 0.5242561340332031, |
| "mean_token_accuracy": 0.8671647250652313, |
| "num_tokens": 8585786.0, |
| "step": 265 |
| }, |
| { |
| "entropy": 0.525696974992752, |
| "epoch": 0.8029739776951673, |
| "grad_norm": 0.0245361328125, |
| "learning_rate": 2.6447121821779917e-05, |
| "loss": 0.5204005718231202, |
| "mean_token_accuracy": 0.8655787914991379, |
| "num_tokens": 8747495.0, |
| "step": 270 |
| }, |
| { |
| "entropy": 0.44850245080888274, |
| "epoch": 0.8178438661710037, |
| "grad_norm": 0.0201416015625, |
| "learning_rate": 2.2851082017805703e-05, |
| "loss": 0.4419082164764404, |
| "mean_token_accuracy": 0.8838535219430923, |
| "num_tokens": 8909848.0, |
| "step": 275 |
| }, |
| { |
| "entropy": 0.583437193930149, |
| "epoch": 0.8327137546468402, |
| "grad_norm": 0.02099609375, |
| "learning_rate": 1.9486087644983054e-05, |
| "loss": 0.5837182521820068, |
| "mean_token_accuracy": 0.8495006680488586, |
| "num_tokens": 9069163.0, |
| "step": 280 |
| }, |
| { |
| "entropy": 0.5175457876175642, |
| "epoch": 0.8475836431226765, |
| "grad_norm": 0.0245361328125, |
| "learning_rate": 1.6362216181986002e-05, |
| "loss": 0.5189806461334229, |
| "mean_token_accuracy": 0.8652941584587097, |
| "num_tokens": 9230950.0, |
| "step": 285 |
| }, |
| { |
| "entropy": 0.5524289276450872, |
| "epoch": 0.862453531598513, |
| "grad_norm": 0.0216064453125, |
| "learning_rate": 1.3488822993062089e-05, |
| "loss": 0.5572507858276368, |
| "mean_token_accuracy": 0.8533222541213036, |
| "num_tokens": 9392141.0, |
| "step": 290 |
| }, |
| { |
| "entropy": 0.5627229742705822, |
| "epoch": 0.8773234200743495, |
| "grad_norm": 0.0247802734375, |
| "learning_rate": 1.0874513310605628e-05, |
| "loss": 0.5729455471038818, |
| "mean_token_accuracy": 0.8517941504716873, |
| "num_tokens": 9553843.0, |
| "step": 295 |
| }, |
| { |
| "entropy": 0.49910875745117667, |
| "epoch": 0.8921933085501859, |
| "grad_norm": 0.0238037109375, |
| "learning_rate": 8.527116464224127e-06, |
| "loss": 0.4991349697113037, |
| "mean_token_accuracy": 0.8706730246543884, |
| "num_tokens": 9716187.0, |
| "step": 300 |
| }, |
| { |
| "entropy": 0.4898031514137983, |
| "epoch": 0.9070631970260223, |
| "grad_norm": 0.0205078125, |
| "learning_rate": 6.453662433477136e-06, |
| "loss": 0.4925398826599121, |
| "mean_token_accuracy": 0.8753444463014602, |
| "num_tokens": 9879023.0, |
| "step": 305 |
| }, |
| { |
| "entropy": 0.45501707717776296, |
| "epoch": 0.9219330855018587, |
| "grad_norm": 0.0203857421875, |
| "learning_rate": 4.660360794506946e-06, |
| "loss": 0.4548198699951172, |
| "mean_token_accuracy": 0.8821182236075401, |
| "num_tokens": 10041800.0, |
| "step": 310 |
| }, |
| { |
| "entropy": 0.5240208253264427, |
| "epoch": 0.9368029739776952, |
| "grad_norm": 0.0233154296875, |
| "learning_rate": 3.1525821236119577e-06, |
| "loss": 0.5236988067626953, |
| "mean_token_accuracy": 0.8641670763492584, |
| "num_tokens": 10204694.0, |
| "step": 315 |
| }, |
| { |
| "entropy": 0.538949977979064, |
| "epoch": 0.9516728624535316, |
| "grad_norm": 0.0220947265625, |
| "learning_rate": 1.934841913455032e-06, |
| "loss": 0.5439452648162841, |
| "mean_token_accuracy": 0.8550667986273766, |
| "num_tokens": 10366660.0, |
| "step": 320 |
| }, |
| { |
| "entropy": 0.5102543152868748, |
| "epoch": 0.966542750929368, |
| "grad_norm": 0.018310546875, |
| "learning_rate": 1.010787050074835e-06, |
| "loss": 0.5104735374450684, |
| "mean_token_accuracy": 0.8640209168195725, |
| "num_tokens": 10529304.0, |
| "step": 325 |
| }, |
| { |
| "entropy": 0.4799253273755312, |
| "epoch": 0.9814126394052045, |
| "grad_norm": 0.019287109375, |
| "learning_rate": 3.831848911984959e-07, |
| "loss": 0.47628107070922854, |
| "mean_token_accuracy": 0.8758200943470001, |
| "num_tokens": 10692105.0, |
| "step": 330 |
| }, |
| { |
| "entropy": 0.45510734170675277, |
| "epoch": 0.9962825278810409, |
| "grad_norm": 0.0213623046875, |
| "learning_rate": 5.391497856399585e-08, |
| "loss": 0.4581630229949951, |
| "mean_token_accuracy": 0.8793978497385979, |
| "num_tokens": 10854094.0, |
| "step": 335 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 337, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.965993164065407e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|