| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0069881201956673656, | |
| "grad_norm": 1.4883168935775757, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 1.0398, | |
| "mean_token_accuracy": 0.6855247735977172, | |
| "num_tokens": 10370767.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.013976240391334731, | |
| "grad_norm": 0.8389180898666382, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.0478, | |
| "mean_token_accuracy": 0.6838523238897324, | |
| "num_tokens": 20777189.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.020964360587002098, | |
| "grad_norm": 0.9392557740211487, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 1.0355, | |
| "mean_token_accuracy": 0.686600410938263, | |
| "num_tokens": 31178208.0, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.027952480782669462, | |
| "grad_norm": 0.5707754492759705, | |
| "learning_rate": 3.5185185185185187e-06, | |
| "loss": 1.0352, | |
| "mean_token_accuracy": 0.686421462893486, | |
| "num_tokens": 41592308.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03494060097833683, | |
| "grad_norm": 0.6185972094535828, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 1.032, | |
| "mean_token_accuracy": 0.6872807204723358, | |
| "num_tokens": 51979715.0, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.041928721174004195, | |
| "grad_norm": 0.463251531124115, | |
| "learning_rate": 5.370370370370371e-06, | |
| "loss": 1.0152, | |
| "mean_token_accuracy": 0.6908860236406327, | |
| "num_tokens": 62394982.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04891684136967156, | |
| "grad_norm": 0.3636445999145508, | |
| "learning_rate": 6.296296296296297e-06, | |
| "loss": 1.0179, | |
| "mean_token_accuracy": 0.6905231744050979, | |
| "num_tokens": 72803742.0, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.055904961565338925, | |
| "grad_norm": 0.2937327027320862, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 1.0266, | |
| "mean_token_accuracy": 0.6888427168130875, | |
| "num_tokens": 83202891.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06289308176100629, | |
| "grad_norm": 0.2677152156829834, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 1.0124, | |
| "mean_token_accuracy": 0.6918304771184921, | |
| "num_tokens": 93587913.0, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.06988120195667366, | |
| "grad_norm": 0.2599441707134247, | |
| "learning_rate": 9.074074074074075e-06, | |
| "loss": 1.0038, | |
| "mean_token_accuracy": 0.6940747231245041, | |
| "num_tokens": 103964666.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07686932215234102, | |
| "grad_norm": 0.2430637627840042, | |
| "learning_rate": 1e-05, | |
| "loss": 0.9976, | |
| "mean_token_accuracy": 0.6953895181417465, | |
| "num_tokens": 114353247.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08385744234800839, | |
| "grad_norm": 0.25146690011024475, | |
| "learning_rate": 1.0925925925925926e-05, | |
| "loss": 1.0005, | |
| "mean_token_accuracy": 0.694663581252098, | |
| "num_tokens": 124731779.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09084556254367575, | |
| "grad_norm": 0.22198280692100525, | |
| "learning_rate": 1.1851851851851852e-05, | |
| "loss": 1.0101, | |
| "mean_token_accuracy": 0.6926915943622589, | |
| "num_tokens": 135144687.0, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.09783368273934312, | |
| "grad_norm": 0.22740793228149414, | |
| "learning_rate": 1.2777777777777777e-05, | |
| "loss": 1.0097, | |
| "mean_token_accuracy": 0.6922657936811447, | |
| "num_tokens": 145526205.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10482180293501048, | |
| "grad_norm": 0.22107183933258057, | |
| "learning_rate": 1.3703703703703706e-05, | |
| "loss": 1.0119, | |
| "mean_token_accuracy": 0.6913794606924057, | |
| "num_tokens": 155919187.0, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11180992313067785, | |
| "grad_norm": 0.27914658188819885, | |
| "learning_rate": 1.4629629629629631e-05, | |
| "loss": 1.0028, | |
| "mean_token_accuracy": 0.6938924849033355, | |
| "num_tokens": 166311147.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1187980433263452, | |
| "grad_norm": 0.2629740238189697, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 1.0155, | |
| "mean_token_accuracy": 0.6906299769878388, | |
| "num_tokens": 176702317.0, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.12578616352201258, | |
| "grad_norm": 0.2621951997280121, | |
| "learning_rate": 1.6481481481481482e-05, | |
| "loss": 1.0055, | |
| "mean_token_accuracy": 0.6934900671243668, | |
| "num_tokens": 187121773.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13277428371767994, | |
| "grad_norm": 0.24768415093421936, | |
| "learning_rate": 1.740740740740741e-05, | |
| "loss": 1.0067, | |
| "mean_token_accuracy": 0.6931978613138199, | |
| "num_tokens": 197538039.0, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.13976240391334732, | |
| "grad_norm": 0.279287189245224, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 1.0027, | |
| "mean_token_accuracy": 0.6943970769643784, | |
| "num_tokens": 207934697.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14675052410901468, | |
| "grad_norm": 0.4129040241241455, | |
| "learning_rate": 1.925925925925926e-05, | |
| "loss": 1.0033, | |
| "mean_token_accuracy": 0.6932864665985108, | |
| "num_tokens": 218334496.0, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.15373864430468204, | |
| "grad_norm": 0.5416187644004822, | |
| "learning_rate": 1.9999991106543355e-05, | |
| "loss": 1.0084, | |
| "mean_token_accuracy": 0.6928850650787354, | |
| "num_tokens": 228759368.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1607267645003494, | |
| "grad_norm": 0.4506029188632965, | |
| "learning_rate": 1.999967983777526e-05, | |
| "loss": 1.0188, | |
| "mean_token_accuracy": 0.690243837237358, | |
| "num_tokens": 239138531.0, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.16771488469601678, | |
| "grad_norm": 0.273882120847702, | |
| "learning_rate": 1.9998923917266123e-05, | |
| "loss": 1.0004, | |
| "mean_token_accuracy": 0.6942124694585801, | |
| "num_tokens": 249518869.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17470300489168414, | |
| "grad_norm": 0.32800787687301636, | |
| "learning_rate": 1.999772338983404e-05, | |
| "loss": 0.9993, | |
| "mean_token_accuracy": 0.6946051716804504, | |
| "num_tokens": 259936610.0, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1816911250873515, | |
| "grad_norm": 0.35255053639411926, | |
| "learning_rate": 1.9996078326657592e-05, | |
| "loss": 1.0021, | |
| "mean_token_accuracy": 0.6943468749523163, | |
| "num_tokens": 270341806.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 0.35514041781425476, | |
| "learning_rate": 1.9993988825271643e-05, | |
| "loss": 1.0001, | |
| "mean_token_accuracy": 0.6942067295312881, | |
| "num_tokens": 280749074.0, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.19566736547868624, | |
| "grad_norm": 0.31045007705688477, | |
| "learning_rate": 1.9991455009561535e-05, | |
| "loss": 1.0009, | |
| "mean_token_accuracy": 0.694583398103714, | |
| "num_tokens": 291068981.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2026554856743536, | |
| "grad_norm": 0.37079402804374695, | |
| "learning_rate": 1.9988477029755756e-05, | |
| "loss": 0.9966, | |
| "mean_token_accuracy": 0.6956235706806183, | |
| "num_tokens": 301442780.0, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.20964360587002095, | |
| "grad_norm": 0.3016902208328247, | |
| "learning_rate": 1.9985055062417034e-05, | |
| "loss": 1.0004, | |
| "mean_token_accuracy": 0.6947520196437835, | |
| "num_tokens": 311853604.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.21663172606568834, | |
| "grad_norm": 0.3417947292327881, | |
| "learning_rate": 1.9981189310431857e-05, | |
| "loss": 0.9993, | |
| "mean_token_accuracy": 0.6947573721408844, | |
| "num_tokens": 322261710.0, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2236198462613557, | |
| "grad_norm": 0.3535808324813843, | |
| "learning_rate": 1.9976880002998463e-05, | |
| "loss": 0.9922, | |
| "mean_token_accuracy": 0.6957294464111328, | |
| "num_tokens": 332668576.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23060796645702306, | |
| "grad_norm": 0.2367454469203949, | |
| "learning_rate": 1.9972127395613225e-05, | |
| "loss": 0.9972, | |
| "mean_token_accuracy": 0.6954801857471467, | |
| "num_tokens": 343060718.0, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2375960866526904, | |
| "grad_norm": 0.26334941387176514, | |
| "learning_rate": 1.9966931770055523e-05, | |
| "loss": 0.995, | |
| "mean_token_accuracy": 0.6958191096782684, | |
| "num_tokens": 353461725.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2445842068483578, | |
| "grad_norm": 0.3441227078437805, | |
| "learning_rate": 1.9961293434371044e-05, | |
| "loss": 1.0044, | |
| "mean_token_accuracy": 0.6938858300447464, | |
| "num_tokens": 363838201.0, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.25157232704402516, | |
| "grad_norm": 0.2843206822872162, | |
| "learning_rate": 1.9955212722853487e-05, | |
| "loss": 0.9995, | |
| "mean_token_accuracy": 0.6946384638547898, | |
| "num_tokens": 374258478.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2585604472396925, | |
| "grad_norm": 0.2655826210975647, | |
| "learning_rate": 1.9948689996024773e-05, | |
| "loss": 0.9981, | |
| "mean_token_accuracy": 0.6951499044895172, | |
| "num_tokens": 384649889.0, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2655485674353599, | |
| "grad_norm": 0.3091617822647095, | |
| "learning_rate": 1.9941725640613655e-05, | |
| "loss": 0.9909, | |
| "mean_token_accuracy": 0.6968458265066146, | |
| "num_tokens": 395029124.0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.27253668763102723, | |
| "grad_norm": 0.3030296564102173, | |
| "learning_rate": 1.9934320069532797e-05, | |
| "loss": 0.9942, | |
| "mean_token_accuracy": 0.6959166437387466, | |
| "num_tokens": 405425365.0, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.27952480782669464, | |
| "grad_norm": 0.25540319085121155, | |
| "learning_rate": 1.9926473721854284e-05, | |
| "loss": 1.0046, | |
| "mean_token_accuracy": 0.6931773543357849, | |
| "num_tokens": 415831263.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.286512928022362, | |
| "grad_norm": 0.33107173442840576, | |
| "learning_rate": 1.99181870627836e-05, | |
| "loss": 1.0116, | |
| "mean_token_accuracy": 0.6916185826063156, | |
| "num_tokens": 426180521.0, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.29350104821802936, | |
| "grad_norm": 0.39433753490448, | |
| "learning_rate": 1.990946058363202e-05, | |
| "loss": 0.9969, | |
| "mean_token_accuracy": 0.6951790243387223, | |
| "num_tokens": 436550729.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3004891684136967, | |
| "grad_norm": 0.382136732339859, | |
| "learning_rate": 1.9900294801787537e-05, | |
| "loss": 0.9978, | |
| "mean_token_accuracy": 0.6952319622039795, | |
| "num_tokens": 446953885.0, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3074772886093641, | |
| "grad_norm": 0.3471049666404724, | |
| "learning_rate": 1.9890690260684122e-05, | |
| "loss": 1.009, | |
| "mean_token_accuracy": 0.6918054640293121, | |
| "num_tokens": 457368923.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.31446540880503143, | |
| "grad_norm": 0.2898043692111969, | |
| "learning_rate": 1.9880647529769536e-05, | |
| "loss": 0.9957, | |
| "mean_token_accuracy": 0.6954202592372895, | |
| "num_tokens": 467745509.0, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3214535290006988, | |
| "grad_norm": 0.2959822118282318, | |
| "learning_rate": 1.987016720447157e-05, | |
| "loss": 1.0166, | |
| "mean_token_accuracy": 0.6907585173845291, | |
| "num_tokens": 478125444.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3284416491963662, | |
| "grad_norm": 0.33595606684684753, | |
| "learning_rate": 1.985924990616274e-05, | |
| "loss": 0.9994, | |
| "mean_token_accuracy": 0.6947229593992233, | |
| "num_tokens": 488548530.0, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.33542976939203356, | |
| "grad_norm": 0.3360680937767029, | |
| "learning_rate": 1.9847896282123435e-05, | |
| "loss": 0.9982, | |
| "mean_token_accuracy": 0.6951431065797806, | |
| "num_tokens": 498959347.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3424178895877009, | |
| "grad_norm": 0.2827513813972473, | |
| "learning_rate": 1.9836107005503543e-05, | |
| "loss": 0.9966, | |
| "mean_token_accuracy": 0.6951984494924546, | |
| "num_tokens": 509357796.0, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.3494060097833683, | |
| "grad_norm": 0.426392138004303, | |
| "learning_rate": 1.982388277528256e-05, | |
| "loss": 0.9975, | |
| "mean_token_accuracy": 0.6952957093715668, | |
| "num_tokens": 519757159.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.35639412997903563, | |
| "grad_norm": 0.36645272374153137, | |
| "learning_rate": 1.981122431622813e-05, | |
| "loss": 0.9955, | |
| "mean_token_accuracy": 0.6955067068338394, | |
| "num_tokens": 530137537.0, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.363382250174703, | |
| "grad_norm": 0.30606377124786377, | |
| "learning_rate": 1.979813237885306e-05, | |
| "loss": 1.0024, | |
| "mean_token_accuracy": 0.6938055425882339, | |
| "num_tokens": 540516735.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 0.33657437562942505, | |
| "learning_rate": 1.9784607739370854e-05, | |
| "loss": 1.0008, | |
| "mean_token_accuracy": 0.6939801007509232, | |
| "num_tokens": 550924844.0, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 0.3700354993343353, | |
| "learning_rate": 1.9770651199649684e-05, | |
| "loss": 1.008, | |
| "mean_token_accuracy": 0.6923987686634063, | |
| "num_tokens": 561322928.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3843466107617051, | |
| "grad_norm": 0.4612869918346405, | |
| "learning_rate": 1.9756263587164823e-05, | |
| "loss": 0.9964, | |
| "mean_token_accuracy": 0.6956883102655411, | |
| "num_tokens": 571719820.0, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3913347309573725, | |
| "grad_norm": 0.28541725873947144, | |
| "learning_rate": 1.974144575494961e-05, | |
| "loss": 1.0052, | |
| "mean_token_accuracy": 0.6935481756925583, | |
| "num_tokens": 582133553.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.39832285115303984, | |
| "grad_norm": 0.27733850479125977, | |
| "learning_rate": 1.9726198581544875e-05, | |
| "loss": 1.0038, | |
| "mean_token_accuracy": 0.6932170450687408, | |
| "num_tokens": 592552862.0, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.4053109713487072, | |
| "grad_norm": 0.25651562213897705, | |
| "learning_rate": 1.971052297094683e-05, | |
| "loss": 0.9981, | |
| "mean_token_accuracy": 0.6951008081436157, | |
| "num_tokens": 602969229.0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.41229909154437455, | |
| "grad_norm": 0.2750900685787201, | |
| "learning_rate": 1.969441985255349e-05, | |
| "loss": 0.9995, | |
| "mean_token_accuracy": 0.6946498632431031, | |
| "num_tokens": 613327395.0, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4192872117400419, | |
| "grad_norm": 0.3030959665775299, | |
| "learning_rate": 1.967789018110956e-05, | |
| "loss": 0.9938, | |
| "mean_token_accuracy": 0.6957368463277817, | |
| "num_tokens": 623734734.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.42627533193570927, | |
| "grad_norm": 0.2968519330024719, | |
| "learning_rate": 1.9660934936649836e-05, | |
| "loss": 0.9986, | |
| "mean_token_accuracy": 0.6947296649217606, | |
| "num_tokens": 634141013.0, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.4332634521313767, | |
| "grad_norm": 0.32919952273368835, | |
| "learning_rate": 1.9643555124441093e-05, | |
| "loss": 0.994, | |
| "mean_token_accuracy": 0.6957409083843231, | |
| "num_tokens": 644493377.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.44025157232704404, | |
| "grad_norm": 0.36588236689567566, | |
| "learning_rate": 1.9625751774922485e-05, | |
| "loss": 0.9918, | |
| "mean_token_accuracy": 0.6964637786149979, | |
| "num_tokens": 654861392.0, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.4472396925227114, | |
| "grad_norm": 0.3859914541244507, | |
| "learning_rate": 1.9607525943644458e-05, | |
| "loss": 0.997, | |
| "mean_token_accuracy": 0.6950535237789154, | |
| "num_tokens": 665281740.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.45422781271837875, | |
| "grad_norm": 0.35685864090919495, | |
| "learning_rate": 1.9588878711206147e-05, | |
| "loss": 0.9942, | |
| "mean_token_accuracy": 0.6956727713346481, | |
| "num_tokens": 675674938.0, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4612159329140461, | |
| "grad_norm": 0.29461824893951416, | |
| "learning_rate": 1.956981118319134e-05, | |
| "loss": 1.002, | |
| "mean_token_accuracy": 0.6937934666872024, | |
| "num_tokens": 686085726.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.46820405310971347, | |
| "grad_norm": 0.25779464840888977, | |
| "learning_rate": 1.9550324490102893e-05, | |
| "loss": 0.9996, | |
| "mean_token_accuracy": 0.6942994087934494, | |
| "num_tokens": 696519911.0, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.4751921733053808, | |
| "grad_norm": 0.32080188393592834, | |
| "learning_rate": 1.9530419787295732e-05, | |
| "loss": 1.0017, | |
| "mean_token_accuracy": 0.6941420465707779, | |
| "num_tokens": 706915324.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.48218029350104824, | |
| "grad_norm": 0.3333379030227661, | |
| "learning_rate": 1.951009825490833e-05, | |
| "loss": 0.9978, | |
| "mean_token_accuracy": 0.6944221198558808, | |
| "num_tokens": 717279358.0, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.4891684136967156, | |
| "grad_norm": 0.2867525517940521, | |
| "learning_rate": 1.9489361097792763e-05, | |
| "loss": 0.9956, | |
| "mean_token_accuracy": 0.6951427847146988, | |
| "num_tokens": 727668296.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.49615653389238296, | |
| "grad_norm": 0.28376054763793945, | |
| "learning_rate": 1.9468209545443243e-05, | |
| "loss": 0.9947, | |
| "mean_token_accuracy": 0.6956383109092712, | |
| "num_tokens": 738059747.0, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5031446540880503, | |
| "grad_norm": 0.3152044117450714, | |
| "learning_rate": 1.944664485192325e-05, | |
| "loss": 0.9943, | |
| "mean_token_accuracy": 0.6953181773424149, | |
| "num_tokens": 748421972.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5101327742837177, | |
| "grad_norm": 0.30702683329582214, | |
| "learning_rate": 1.9424668295791172e-05, | |
| "loss": 0.9916, | |
| "mean_token_accuracy": 0.696011558175087, | |
| "num_tokens": 758808879.0, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.517120894479385, | |
| "grad_norm": 0.32153916358947754, | |
| "learning_rate": 1.9402281180024483e-05, | |
| "loss": 0.9981, | |
| "mean_token_accuracy": 0.6946611881256104, | |
| "num_tokens": 769184194.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5241090146750524, | |
| "grad_norm": 0.25884702801704407, | |
| "learning_rate": 1.9379484831942516e-05, | |
| "loss": 0.9936, | |
| "mean_token_accuracy": 0.6954076588153839, | |
| "num_tokens": 779577281.0, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5310971348707197, | |
| "grad_norm": 0.33953726291656494, | |
| "learning_rate": 1.9356280603127745e-05, | |
| "loss": 0.9923, | |
| "mean_token_accuracy": 0.6961508899927139, | |
| "num_tokens": 789973447.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5380852550663872, | |
| "grad_norm": 0.26376229524612427, | |
| "learning_rate": 1.9332669869345663e-05, | |
| "loss": 0.9867, | |
| "mean_token_accuracy": 0.697160255908966, | |
| "num_tokens": 800391253.0, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5450733752620545, | |
| "grad_norm": 0.26495668292045593, | |
| "learning_rate": 1.930865403046322e-05, | |
| "loss": 0.9986, | |
| "mean_token_accuracy": 0.6942210704088211, | |
| "num_tokens": 810774695.0, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5520614954577219, | |
| "grad_norm": 0.35614827275276184, | |
| "learning_rate": 1.9284234510365808e-05, | |
| "loss": 0.9884, | |
| "mean_token_accuracy": 0.6969966471195221, | |
| "num_tokens": 821116961.0, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5590496156533893, | |
| "grad_norm": 0.3040587604045868, | |
| "learning_rate": 1.9259412756872843e-05, | |
| "loss": 0.9894, | |
| "mean_token_accuracy": 0.6961709499359131, | |
| "num_tokens": 831489732.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 0.4788316786289215, | |
| "learning_rate": 1.923419024165194e-05, | |
| "loss": 1.0035, | |
| "mean_token_accuracy": 0.6934469550848007, | |
| "num_tokens": 841868264.0, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.573025856044724, | |
| "grad_norm": 0.26638856530189514, | |
| "learning_rate": 1.9208568460131638e-05, | |
| "loss": 0.988, | |
| "mean_token_accuracy": 0.6969901770353317, | |
| "num_tokens": 852276790.0, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5800139762403913, | |
| "grad_norm": 0.29064929485321045, | |
| "learning_rate": 1.918254893141276e-05, | |
| "loss": 0.988, | |
| "mean_token_accuracy": 0.6970887899398803, | |
| "num_tokens": 862638829.0, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5870020964360587, | |
| "grad_norm": 0.3532664179801941, | |
| "learning_rate": 1.9156133198178325e-05, | |
| "loss": 0.9957, | |
| "mean_token_accuracy": 0.6949932664632797, | |
| "num_tokens": 873044735.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.593990216631726, | |
| "grad_norm": 0.26006296277046204, | |
| "learning_rate": 1.9129322826602093e-05, | |
| "loss": 0.9834, | |
| "mean_token_accuracy": 0.6978975415229798, | |
| "num_tokens": 883461462.0, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6009783368273934, | |
| "grad_norm": 0.30922138690948486, | |
| "learning_rate": 1.9102119406255704e-05, | |
| "loss": 0.9917, | |
| "mean_token_accuracy": 0.6963249862194061, | |
| "num_tokens": 893808723.0, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6079664570230608, | |
| "grad_norm": 0.2605006694793701, | |
| "learning_rate": 1.907452455001444e-05, | |
| "loss": 0.9922, | |
| "mean_token_accuracy": 0.6961820662021637, | |
| "num_tokens": 904174818.0, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6149545772187281, | |
| "grad_norm": 0.3060205578804016, | |
| "learning_rate": 1.9046539893961596e-05, | |
| "loss": 0.9858, | |
| "mean_token_accuracy": 0.6980990767478943, | |
| "num_tokens": 914558969.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6219426974143956, | |
| "grad_norm": 0.3001151382923126, | |
| "learning_rate": 1.9018167097291465e-05, | |
| "loss": 1.0048, | |
| "mean_token_accuracy": 0.6929802745580673, | |
| "num_tokens": 924948007.0, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.6289308176100629, | |
| "grad_norm": 0.2708394527435303, | |
| "learning_rate": 1.8989407842210986e-05, | |
| "loss": 1.0065, | |
| "mean_token_accuracy": 0.6926934719085693, | |
| "num_tokens": 935357079.0, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6359189378057303, | |
| "grad_norm": 0.29378563165664673, | |
| "learning_rate": 1.8960263833839993e-05, | |
| "loss": 0.991, | |
| "mean_token_accuracy": 0.6963881731033326, | |
| "num_tokens": 945773313.0, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.6429070580013976, | |
| "grad_norm": 0.34197095036506653, | |
| "learning_rate": 1.8930736800110124e-05, | |
| "loss": 0.9877, | |
| "mean_token_accuracy": 0.6973118752241134, | |
| "num_tokens": 956143314.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.649895178197065, | |
| "grad_norm": 0.30472254753112793, | |
| "learning_rate": 1.8900828491662372e-05, | |
| "loss": 0.9852, | |
| "mean_token_accuracy": 0.6980044454336166, | |
| "num_tokens": 966548893.0, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.6568832983927324, | |
| "grad_norm": 0.2430526465177536, | |
| "learning_rate": 1.8870540681743288e-05, | |
| "loss": 0.9842, | |
| "mean_token_accuracy": 0.6978294104337692, | |
| "num_tokens": 976941511.0, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6638714185883997, | |
| "grad_norm": 0.25737932324409485, | |
| "learning_rate": 1.8839875166099855e-05, | |
| "loss": 0.9858, | |
| "mean_token_accuracy": 0.6972843915224075, | |
| "num_tokens": 987316088.0, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.6708595387840671, | |
| "grad_norm": 0.36384668946266174, | |
| "learning_rate": 1.8808833762873006e-05, | |
| "loss": 0.9855, | |
| "mean_token_accuracy": 0.6972428739070893, | |
| "num_tokens": 997698210.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6778476589797344, | |
| "grad_norm": 0.253492146730423, | |
| "learning_rate": 1.8777418312489834e-05, | |
| "loss": 0.9875, | |
| "mean_token_accuracy": 0.6970369219779968, | |
| "num_tokens": 1008096298.0, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.6848357791754018, | |
| "grad_norm": 0.26920533180236816, | |
| "learning_rate": 1.8745630677554486e-05, | |
| "loss": 0.9993, | |
| "mean_token_accuracy": 0.6944381654262543, | |
| "num_tokens": 1018465735.0, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6918238993710691, | |
| "grad_norm": 0.3240351974964142, | |
| "learning_rate": 1.871347274273771e-05, | |
| "loss": 0.983, | |
| "mean_token_accuracy": 0.6982408493757248, | |
| "num_tokens": 1028829779.0, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.6988120195667366, | |
| "grad_norm": 0.2850527763366699, | |
| "learning_rate": 1.8680946414665117e-05, | |
| "loss": 0.9924, | |
| "mean_token_accuracy": 0.6957588851451874, | |
| "num_tokens": 1039212930.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.705800139762404, | |
| "grad_norm": 0.28837037086486816, | |
| "learning_rate": 1.864805362180416e-05, | |
| "loss": 0.9843, | |
| "mean_token_accuracy": 0.6984927475452423, | |
| "num_tokens": 1049622375.0, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.7127882599580713, | |
| "grad_norm": 0.3023139238357544, | |
| "learning_rate": 1.8614796314349765e-05, | |
| "loss": 0.9875, | |
| "mean_token_accuracy": 0.6971228659152985, | |
| "num_tokens": 1060021012.0, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7197763801537387, | |
| "grad_norm": 0.2863965630531311, | |
| "learning_rate": 1.8581176464108727e-05, | |
| "loss": 1.0023, | |
| "mean_token_accuracy": 0.6931641280651093, | |
| "num_tokens": 1070415145.0, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.726764500349406, | |
| "grad_norm": 0.2860371470451355, | |
| "learning_rate": 1.8547196064382798e-05, | |
| "loss": 0.9867, | |
| "mean_token_accuracy": 0.6977067708969116, | |
| "num_tokens": 1080814626.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7337526205450734, | |
| "grad_norm": 0.27839553356170654, | |
| "learning_rate": 1.8512857129850502e-05, | |
| "loss": 0.9974, | |
| "mean_token_accuracy": 0.6940527439117432, | |
| "num_tokens": 1091225187.0, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.25351476669311523, | |
| "learning_rate": 1.8478161696447686e-05, | |
| "loss": 0.9935, | |
| "mean_token_accuracy": 0.6956435739994049, | |
| "num_tokens": 1101580066.0, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7477288609364081, | |
| "grad_norm": 0.30183982849121094, | |
| "learning_rate": 1.8443111821246808e-05, | |
| "loss": 0.9759, | |
| "mean_token_accuracy": 0.6996494948863983, | |
| "num_tokens": 1111994907.0, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 0.3375386893749237, | |
| "learning_rate": 1.8407709582334986e-05, | |
| "loss": 0.984, | |
| "mean_token_accuracy": 0.698539337515831, | |
| "num_tokens": 1122408439.0, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7617051013277428, | |
| "grad_norm": 0.2689567804336548, | |
| "learning_rate": 1.8371957078690772e-05, | |
| "loss": 0.9839, | |
| "mean_token_accuracy": 0.6976371347904206, | |
| "num_tokens": 1132816803.0, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.7686932215234102, | |
| "grad_norm": 0.30557841062545776, | |
| "learning_rate": 1.833585643005973e-05, | |
| "loss": 0.9953, | |
| "mean_token_accuracy": 0.6952255100011826, | |
| "num_tokens": 1143223094.0, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7756813417190775, | |
| "grad_norm": 0.24475328624248505, | |
| "learning_rate": 1.8299409776828726e-05, | |
| "loss": 0.9856, | |
| "mean_token_accuracy": 0.6971423864364624, | |
| "num_tokens": 1153614904.0, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.782669461914745, | |
| "grad_norm": 0.2633828818798065, | |
| "learning_rate": 1.8262619279899045e-05, | |
| "loss": 0.983, | |
| "mean_token_accuracy": 0.6979476481676101, | |
| "num_tokens": 1164008710.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7896575821104123, | |
| "grad_norm": 0.31182652711868286, | |
| "learning_rate": 1.8225487120558275e-05, | |
| "loss": 0.9894, | |
| "mean_token_accuracy": 0.6967566043138504, | |
| "num_tokens": 1174400555.0, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.7966457023060797, | |
| "grad_norm": 0.26585254073143005, | |
| "learning_rate": 1.8188015500350968e-05, | |
| "loss": 0.9907, | |
| "mean_token_accuracy": 0.6964362800121308, | |
| "num_tokens": 1184789828.0, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.803633822501747, | |
| "grad_norm": 0.30836501717567444, | |
| "learning_rate": 1.8150206640948114e-05, | |
| "loss": 0.9882, | |
| "mean_token_accuracy": 0.6964727938175201, | |
| "num_tokens": 1195143536.0, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.8106219426974144, | |
| "grad_norm": 0.23804894089698792, | |
| "learning_rate": 1.811206278401544e-05, | |
| "loss": 1.0016, | |
| "mean_token_accuracy": 0.6934799730777741, | |
| "num_tokens": 1205515568.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8176100628930818, | |
| "grad_norm": 0.24671317636966705, | |
| "learning_rate": 1.807358619108046e-05, | |
| "loss": 0.9917, | |
| "mean_token_accuracy": 0.6958341568708419, | |
| "num_tokens": 1215941400.0, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.8245981830887491, | |
| "grad_norm": 0.2816368341445923, | |
| "learning_rate": 1.803477914339843e-05, | |
| "loss": 0.9848, | |
| "mean_token_accuracy": 0.6971950948238372, | |
| "num_tokens": 1226344021.0, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8315863032844165, | |
| "grad_norm": 0.26695674657821655, | |
| "learning_rate": 1.7995643941817088e-05, | |
| "loss": 0.9979, | |
| "mean_token_accuracy": 0.6942486584186554, | |
| "num_tokens": 1236748640.0, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.8385744234800838, | |
| "grad_norm": 0.2597620487213135, | |
| "learning_rate": 1.795618290664021e-05, | |
| "loss": 0.9846, | |
| "mean_token_accuracy": 0.6978382110595703, | |
| "num_tokens": 1247150254.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8455625436757512, | |
| "grad_norm": 0.2926052212715149, | |
| "learning_rate": 1.7916398377490073e-05, | |
| "loss": 0.9839, | |
| "mean_token_accuracy": 0.6982168465852737, | |
| "num_tokens": 1257534085.0, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.8525506638714185, | |
| "grad_norm": 0.2637452185153961, | |
| "learning_rate": 1.7876292713168723e-05, | |
| "loss": 0.9935, | |
| "mean_token_accuracy": 0.695600014925003, | |
| "num_tokens": 1267918085.0, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.859538784067086, | |
| "grad_norm": 0.29727014899253845, | |
| "learning_rate": 1.7835868291518128e-05, | |
| "loss": 0.9866, | |
| "mean_token_accuracy": 0.6969569563865662, | |
| "num_tokens": 1278294024.0, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.8665269042627534, | |
| "grad_norm": 0.2716880440711975, | |
| "learning_rate": 1.77951275092792e-05, | |
| "loss": 0.9826, | |
| "mean_token_accuracy": 0.697507557272911, | |
| "num_tokens": 1288661352.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8735150244584207, | |
| "grad_norm": 0.2710786759853363, | |
| "learning_rate": 1.7754072781949683e-05, | |
| "loss": 0.9957, | |
| "mean_token_accuracy": 0.6947832137346268, | |
| "num_tokens": 1299045987.0, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.8805031446540881, | |
| "grad_norm": 0.24244153499603271, | |
| "learning_rate": 1.771270654364095e-05, | |
| "loss": 0.9912, | |
| "mean_token_accuracy": 0.6964092016220093, | |
| "num_tokens": 1309404095.0, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8874912648497554, | |
| "grad_norm": 0.2675671875476837, | |
| "learning_rate": 1.7671031246933684e-05, | |
| "loss": 0.9907, | |
| "mean_token_accuracy": 0.6958764642477036, | |
| "num_tokens": 1319817107.0, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.8944793850454228, | |
| "grad_norm": 0.24089151620864868, | |
| "learning_rate": 1.762904936273246e-05, | |
| "loss": 0.9827, | |
| "mean_token_accuracy": 0.6984389781951904, | |
| "num_tokens": 1330221560.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9014675052410901, | |
| "grad_norm": 0.27282679080963135, | |
| "learning_rate": 1.7586763380119257e-05, | |
| "loss": 0.9927, | |
| "mean_token_accuracy": 0.6955932974815369, | |
| "num_tokens": 1340634686.0, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.9084556254367575, | |
| "grad_norm": 0.27260783314704895, | |
| "learning_rate": 1.7544175806205866e-05, | |
| "loss": 0.9903, | |
| "mean_token_accuracy": 0.6963932275772095, | |
| "num_tokens": 1351003877.0, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9154437456324249, | |
| "grad_norm": 0.2771081030368805, | |
| "learning_rate": 1.7501289165985262e-05, | |
| "loss": 0.9834, | |
| "mean_token_accuracy": 0.6979602754116059, | |
| "num_tokens": 1361402284.0, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.9224318658280922, | |
| "grad_norm": 0.2612958550453186, | |
| "learning_rate": 1.745810600218189e-05, | |
| "loss": 0.9935, | |
| "mean_token_accuracy": 0.6954259991645813, | |
| "num_tokens": 1371789396.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9294199860237596, | |
| "grad_norm": 0.2653946578502655, | |
| "learning_rate": 1.7414628875100903e-05, | |
| "loss": 0.993, | |
| "mean_token_accuracy": 0.6953835368156434, | |
| "num_tokens": 1382207353.0, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.9364081062194269, | |
| "grad_norm": 0.2772393524646759, | |
| "learning_rate": 1.7370860362476376e-05, | |
| "loss": 0.9718, | |
| "mean_token_accuracy": 0.7005853295326233, | |
| "num_tokens": 1392570580.0, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 0.24524082243442535, | |
| "learning_rate": 1.732680305931847e-05, | |
| "loss": 0.982, | |
| "mean_token_accuracy": 0.6978916674852371, | |
| "num_tokens": 1402973016.0, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.9503843466107617, | |
| "grad_norm": 0.23882606625556946, | |
| "learning_rate": 1.7282459577759558e-05, | |
| "loss": 0.994, | |
| "mean_token_accuracy": 0.6950913339853286, | |
| "num_tokens": 1413391023.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9573724668064291, | |
| "grad_norm": 0.2698175013065338, | |
| "learning_rate": 1.7237832546899383e-05, | |
| "loss": 0.9764, | |
| "mean_token_accuracy": 0.7002421110868454, | |
| "num_tokens": 1423756209.0, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.9643605870020965, | |
| "grad_norm": 0.24084943532943726, | |
| "learning_rate": 1.719292461264915e-05, | |
| "loss": 0.9921, | |
| "mean_token_accuracy": 0.6954464077949524, | |
| "num_tokens": 1434141449.0, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9713487071977638, | |
| "grad_norm": 0.2785717248916626, | |
| "learning_rate": 1.7147738437574682e-05, | |
| "loss": 0.9783, | |
| "mean_token_accuracy": 0.699154207110405, | |
| "num_tokens": 1444495476.0, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.9783368273934312, | |
| "grad_norm": 0.32852408289909363, | |
| "learning_rate": 1.710227670073852e-05, | |
| "loss": 0.9926, | |
| "mean_token_accuracy": 0.6958128333091735, | |
| "num_tokens": 1454908018.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9853249475890985, | |
| "grad_norm": 0.2404838651418686, | |
| "learning_rate": 1.705654209754113e-05, | |
| "loss": 0.9741, | |
| "mean_token_accuracy": 0.7001296132802963, | |
| "num_tokens": 1465317940.0, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.9923130677847659, | |
| "grad_norm": 0.2792597711086273, | |
| "learning_rate": 1.701053733956105e-05, | |
| "loss": 0.9798, | |
| "mean_token_accuracy": 0.6991855531930924, | |
| "num_tokens": 1475718987.0, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.9993011879804332, | |
| "grad_norm": 0.28146040439605713, | |
| "learning_rate": 1.696426515439414e-05, | |
| "loss": 0.9803, | |
| "mean_token_accuracy": 0.6980899155139924, | |
| "num_tokens": 1486112797.0, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.0055904961565338, | |
| "grad_norm": 0.4210590422153473, | |
| "learning_rate": 1.691772828549189e-05, | |
| "loss": 0.9511, | |
| "mean_token_accuracy": 0.7056844896740384, | |
| "num_tokens": 1495486842.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0125786163522013, | |
| "grad_norm": 0.40947115421295166, | |
| "learning_rate": 1.687092949199871e-05, | |
| "loss": 0.9641, | |
| "mean_token_accuracy": 0.701970437169075, | |
| "num_tokens": 1505887963.0, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.0195667365478687, | |
| "grad_norm": 0.34101659059524536, | |
| "learning_rate": 1.682387154858838e-05, | |
| "loss": 0.9542, | |
| "mean_token_accuracy": 0.7043519228696823, | |
| "num_tokens": 1516294528.0, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.026554856743536, | |
| "grad_norm": 0.2541981041431427, | |
| "learning_rate": 1.6776557245299532e-05, | |
| "loss": 0.9457, | |
| "mean_token_accuracy": 0.7069508880376816, | |
| "num_tokens": 1526652289.0, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.0335429769392033, | |
| "grad_norm": 0.248768150806427, | |
| "learning_rate": 1.672898938737023e-05, | |
| "loss": 0.9473, | |
| "mean_token_accuracy": 0.7058848321437836, | |
| "num_tokens": 1537056955.0, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0405310971348707, | |
| "grad_norm": 0.2375871241092682, | |
| "learning_rate": 1.668117079507164e-05, | |
| "loss": 0.9546, | |
| "mean_token_accuracy": 0.7042842835187912, | |
| "num_tokens": 1547469838.0, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.047519217330538, | |
| "grad_norm": 0.24888072907924652, | |
| "learning_rate": 1.6633104303540842e-05, | |
| "loss": 0.9358, | |
| "mean_token_accuracy": 0.7089408129453659, | |
| "num_tokens": 1557844495.0, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0545073375262055, | |
| "grad_norm": 0.2507837116718292, | |
| "learning_rate": 1.6584792762612706e-05, | |
| "loss": 0.9497, | |
| "mean_token_accuracy": 0.7055849075317383, | |
| "num_tokens": 1568244116.0, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.0614954577218727, | |
| "grad_norm": 0.2577778100967407, | |
| "learning_rate": 1.6536239036650942e-05, | |
| "loss": 0.9548, | |
| "mean_token_accuracy": 0.7038821280002594, | |
| "num_tokens": 1578606457.0, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0684835779175401, | |
| "grad_norm": 0.24672797322273254, | |
| "learning_rate": 1.6487446004378273e-05, | |
| "loss": 0.9427, | |
| "mean_token_accuracy": 0.7073481321334839, | |
| "num_tokens": 1588972856.0, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.0754716981132075, | |
| "grad_norm": 0.21917957067489624, | |
| "learning_rate": 1.6438416558705767e-05, | |
| "loss": 0.9592, | |
| "mean_token_accuracy": 0.7032468438148498, | |
| "num_tokens": 1599361363.0, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.082459818308875, | |
| "grad_norm": 0.2600899934768677, | |
| "learning_rate": 1.6389153606561293e-05, | |
| "loss": 0.9466, | |
| "mean_token_accuracy": 0.7060303270816803, | |
| "num_tokens": 1609753390.0, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.0894479385045424, | |
| "grad_norm": 0.24663616716861725, | |
| "learning_rate": 1.6339660068717197e-05, | |
| "loss": 0.9567, | |
| "mean_token_accuracy": 0.7034341752529144, | |
| "num_tokens": 1620132659.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.0964360587002095, | |
| "grad_norm": 0.30449482798576355, | |
| "learning_rate": 1.6289938879617114e-05, | |
| "loss": 0.9377, | |
| "mean_token_accuracy": 0.7081806808710098, | |
| "num_tokens": 1630536804.0, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.103424178895877, | |
| "grad_norm": 0.3289632797241211, | |
| "learning_rate": 1.623999298720199e-05, | |
| "loss": 0.9518, | |
| "mean_token_accuracy": 0.7046901375055313, | |
| "num_tokens": 1640935243.0, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1104122990915444, | |
| "grad_norm": 0.26432016491889954, | |
| "learning_rate": 1.618982535273531e-05, | |
| "loss": 0.9463, | |
| "mean_token_accuracy": 0.7065684378147126, | |
| "num_tokens": 1651324542.0, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.1174004192872118, | |
| "grad_norm": 0.243063285946846, | |
| "learning_rate": 1.6139438950627513e-05, | |
| "loss": 0.9534, | |
| "mean_token_accuracy": 0.7047319650650025, | |
| "num_tokens": 1661713439.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1243885394828792, | |
| "grad_norm": 0.24993912875652313, | |
| "learning_rate": 1.608883676825965e-05, | |
| "loss": 0.9484, | |
| "mean_token_accuracy": 0.7057817548513412, | |
| "num_tokens": 1672104702.0, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.1313766596785464, | |
| "grad_norm": 0.2645328640937805, | |
| "learning_rate": 1.6038021805806257e-05, | |
| "loss": 0.951, | |
| "mean_token_accuracy": 0.7048044502735138, | |
| "num_tokens": 1682485345.0, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1383647798742138, | |
| "grad_norm": 0.22507497668266296, | |
| "learning_rate": 1.5986997076057477e-05, | |
| "loss": 0.9533, | |
| "mean_token_accuracy": 0.7048071801662446, | |
| "num_tokens": 1692860107.0, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.1453529000698812, | |
| "grad_norm": 0.3045750856399536, | |
| "learning_rate": 1.593576560424045e-05, | |
| "loss": 0.9445, | |
| "mean_token_accuracy": 0.7067113369703293, | |
| "num_tokens": 1703270041.0, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.1523410202655486, | |
| "grad_norm": 0.2575872540473938, | |
| "learning_rate": 1.588433042783992e-05, | |
| "loss": 0.958, | |
| "mean_token_accuracy": 0.7032164961099625, | |
| "num_tokens": 1713664308.0, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.159329140461216, | |
| "grad_norm": 0.2729121148586273, | |
| "learning_rate": 1.5832694596418174e-05, | |
| "loss": 0.9398, | |
| "mean_token_accuracy": 0.7079699605703353, | |
| "num_tokens": 1724045480.0, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.1663172606568832, | |
| "grad_norm": 0.23945994675159454, | |
| "learning_rate": 1.57808611714342e-05, | |
| "loss": 0.9472, | |
| "mean_token_accuracy": 0.7062007933855057, | |
| "num_tokens": 1734427138.0, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.1733053808525507, | |
| "grad_norm": 0.2659391164779663, | |
| "learning_rate": 1.5728833226062216e-05, | |
| "loss": 0.952, | |
| "mean_token_accuracy": 0.7049013942480087, | |
| "num_tokens": 1744839569.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.180293501048218, | |
| "grad_norm": 0.28336286544799805, | |
| "learning_rate": 1.5676613845009434e-05, | |
| "loss": 0.9489, | |
| "mean_token_accuracy": 0.7052598625421524, | |
| "num_tokens": 1755242100.0, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.1872816212438855, | |
| "grad_norm": 0.2740361988544464, | |
| "learning_rate": 1.562420612433318e-05, | |
| "loss": 0.9585, | |
| "mean_token_accuracy": 0.7030707478523255, | |
| "num_tokens": 1765621699.0, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.1942697414395527, | |
| "grad_norm": 0.2763083577156067, | |
| "learning_rate": 1.557161317125733e-05, | |
| "loss": 0.9527, | |
| "mean_token_accuracy": 0.7051246583461761, | |
| "num_tokens": 1776038202.0, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.20125786163522, | |
| "grad_norm": 0.28946614265441895, | |
| "learning_rate": 1.5518838103988075e-05, | |
| "loss": 0.9455, | |
| "mean_token_accuracy": 0.7068380653858185, | |
| "num_tokens": 1786417813.0, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2082459818308875, | |
| "grad_norm": 0.26109829545021057, | |
| "learning_rate": 1.546588405152907e-05, | |
| "loss": 0.9527, | |
| "mean_token_accuracy": 0.704670849442482, | |
| "num_tokens": 1796827043.0, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.215234102026555, | |
| "grad_norm": 0.24084337055683136, | |
| "learning_rate": 1.5412754153495882e-05, | |
| "loss": 0.959, | |
| "mean_token_accuracy": 0.7029285609722138, | |
| "num_tokens": 1807231537.0, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.2222222222222223, | |
| "grad_norm": 0.25638347864151, | |
| "learning_rate": 1.5359451559929873e-05, | |
| "loss": 0.9648, | |
| "mean_token_accuracy": 0.7019408881664276, | |
| "num_tokens": 1817615778.0, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.2292103424178895, | |
| "grad_norm": 0.2681230306625366, | |
| "learning_rate": 1.5305979431111416e-05, | |
| "loss": 0.9603, | |
| "mean_token_accuracy": 0.7021995514631272, | |
| "num_tokens": 1828001448.0, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.236198462613557, | |
| "grad_norm": 0.24295032024383545, | |
| "learning_rate": 1.5252340937372542e-05, | |
| "loss": 0.9682, | |
| "mean_token_accuracy": 0.7013109654188157, | |
| "num_tokens": 1838409867.0, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.2431865828092243, | |
| "grad_norm": 0.23449333012104034, | |
| "learning_rate": 1.5198539258908955e-05, | |
| "loss": 0.9408, | |
| "mean_token_accuracy": 0.7074013233184815, | |
| "num_tokens": 1848787748.0, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.2501747030048918, | |
| "grad_norm": 0.24246180057525635, | |
| "learning_rate": 1.5144577585591496e-05, | |
| "loss": 0.9672, | |
| "mean_token_accuracy": 0.7012275934219361, | |
| "num_tokens": 1859159875.0, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.257162823200559, | |
| "grad_norm": 0.23688940703868866, | |
| "learning_rate": 1.5090459116777e-05, | |
| "loss": 0.9555, | |
| "mean_token_accuracy": 0.7037541270256042, | |
| "num_tokens": 1869540000.0, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.2641509433962264, | |
| "grad_norm": 0.229180246591568, | |
| "learning_rate": 1.503618706111863e-05, | |
| "loss": 0.9591, | |
| "mean_token_accuracy": 0.7028054356575012, | |
| "num_tokens": 1879933756.0, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.2711390635918938, | |
| "grad_norm": 0.22831909358501434, | |
| "learning_rate": 1.498176463637561e-05, | |
| "loss": 0.9504, | |
| "mean_token_accuracy": 0.7053441524505615, | |
| "num_tokens": 1890307582.0, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.2781271837875612, | |
| "grad_norm": 0.2654498219490051, | |
| "learning_rate": 1.492719506922248e-05, | |
| "loss": 0.9595, | |
| "mean_token_accuracy": 0.7033340811729432, | |
| "num_tokens": 1900666240.0, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.2851153039832286, | |
| "grad_norm": 0.23844845592975616, | |
| "learning_rate": 1.487248159505775e-05, | |
| "loss": 0.9571, | |
| "mean_token_accuracy": 0.7038801491260529, | |
| "num_tokens": 1911048901.0, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.292103424178896, | |
| "grad_norm": 0.21731877326965332, | |
| "learning_rate": 1.4817627457812107e-05, | |
| "loss": 0.9571, | |
| "mean_token_accuracy": 0.703849372267723, | |
| "num_tokens": 1921390824.0, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.2990915443745632, | |
| "grad_norm": 0.23090894520282745, | |
| "learning_rate": 1.4762635909756071e-05, | |
| "loss": 0.9514, | |
| "mean_token_accuracy": 0.7047541320323945, | |
| "num_tokens": 1931805342.0, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.3060796645702306, | |
| "grad_norm": 0.2666736841201782, | |
| "learning_rate": 1.4707510211307165e-05, | |
| "loss": 0.9587, | |
| "mean_token_accuracy": 0.7034244388341904, | |
| "num_tokens": 1942199423.0, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.313067784765898, | |
| "grad_norm": 0.2366342693567276, | |
| "learning_rate": 1.4652253630836622e-05, | |
| "loss": 0.959, | |
| "mean_token_accuracy": 0.7027652233839035, | |
| "num_tokens": 1952598729.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3200559049615652, | |
| "grad_norm": 0.24025586247444153, | |
| "learning_rate": 1.4596869444475582e-05, | |
| "loss": 0.9474, | |
| "mean_token_accuracy": 0.7063993692398072, | |
| "num_tokens": 1962973528.0, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.3270440251572326, | |
| "grad_norm": 0.24174918234348297, | |
| "learning_rate": 1.4541360935920874e-05, | |
| "loss": 0.9602, | |
| "mean_token_accuracy": 0.7028678983449936, | |
| "num_tokens": 1973395017.0, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3340321453529, | |
| "grad_norm": 0.2554982304573059, | |
| "learning_rate": 1.448573139624032e-05, | |
| "loss": 0.9548, | |
| "mean_token_accuracy": 0.7040337771177292, | |
| "num_tokens": 1983785157.0, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.3410202655485675, | |
| "grad_norm": 0.2739647924900055, | |
| "learning_rate": 1.4429984123677605e-05, | |
| "loss": 0.9589, | |
| "mean_token_accuracy": 0.7027077376842499, | |
| "num_tokens": 1994207711.0, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.3480083857442349, | |
| "grad_norm": 0.3043140769004822, | |
| "learning_rate": 1.4374122423456731e-05, | |
| "loss": 0.9661, | |
| "mean_token_accuracy": 0.7012409687042236, | |
| "num_tokens": 2004589901.0, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.3549965059399023, | |
| "grad_norm": 0.28174853324890137, | |
| "learning_rate": 1.4318149607586052e-05, | |
| "loss": 0.9654, | |
| "mean_token_accuracy": 0.7013572067022323, | |
| "num_tokens": 2014949898.0, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.3619846261355695, | |
| "grad_norm": 0.29067397117614746, | |
| "learning_rate": 1.4262068994661902e-05, | |
| "loss": 0.9541, | |
| "mean_token_accuracy": 0.7043993294239044, | |
| "num_tokens": 2025331279.0, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.368972746331237, | |
| "grad_norm": 0.2759871184825897, | |
| "learning_rate": 1.4205883909671838e-05, | |
| "loss": 0.947, | |
| "mean_token_accuracy": 0.705754178762436, | |
| "num_tokens": 2035743276.0, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.3759608665269043, | |
| "grad_norm": 0.25539323687553406, | |
| "learning_rate": 1.4149597683797516e-05, | |
| "loss": 0.9456, | |
| "mean_token_accuracy": 0.706447896361351, | |
| "num_tokens": 2046100659.0, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.3829489867225715, | |
| "grad_norm": 0.23867954313755035, | |
| "learning_rate": 1.409321365421717e-05, | |
| "loss": 0.9492, | |
| "mean_token_accuracy": 0.7054832637310028, | |
| "num_tokens": 2056458977.0, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.389937106918239, | |
| "grad_norm": 0.2444266527891159, | |
| "learning_rate": 1.4036735163907765e-05, | |
| "loss": 0.9584, | |
| "mean_token_accuracy": 0.7032276690006256, | |
| "num_tokens": 2066827483.0, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.3969252271139063, | |
| "grad_norm": 0.29059308767318726, | |
| "learning_rate": 1.3980165561446781e-05, | |
| "loss": 0.9503, | |
| "mean_token_accuracy": 0.705039718747139, | |
| "num_tokens": 2077249640.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4039133473095737, | |
| "grad_norm": 0.2778762876987457, | |
| "learning_rate": 1.3923508200813695e-05, | |
| "loss": 0.9518, | |
| "mean_token_accuracy": 0.7050242900848389, | |
| "num_tokens": 2087646218.0, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.4109014675052411, | |
| "grad_norm": 0.2580850124359131, | |
| "learning_rate": 1.3866766441191108e-05, | |
| "loss": 0.9534, | |
| "mean_token_accuracy": 0.7048745006322861, | |
| "num_tokens": 2098032107.0, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4178895877009086, | |
| "grad_norm": 0.24883116781711578, | |
| "learning_rate": 1.3809943646765594e-05, | |
| "loss": 0.953, | |
| "mean_token_accuracy": 0.7045016646385193, | |
| "num_tokens": 2108394030.0, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.4248777078965758, | |
| "grad_norm": 0.24177159368991852, | |
| "learning_rate": 1.3753043186528228e-05, | |
| "loss": 0.9628, | |
| "mean_token_accuracy": 0.702374005317688, | |
| "num_tokens": 2118815629.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.4318658280922432, | |
| "grad_norm": 0.29814979434013367, | |
| "learning_rate": 1.3696068434074858e-05, | |
| "loss": 0.9427, | |
| "mean_token_accuracy": 0.7067977696657181, | |
| "num_tokens": 2129188452.0, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.4388539482879106, | |
| "grad_norm": 0.29647916555404663, | |
| "learning_rate": 1.3639022767406065e-05, | |
| "loss": 0.9514, | |
| "mean_token_accuracy": 0.7053138375282287, | |
| "num_tokens": 2139616248.0, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.445842068483578, | |
| "grad_norm": 0.26481950283050537, | |
| "learning_rate": 1.3581909568726897e-05, | |
| "loss": 0.9507, | |
| "mean_token_accuracy": 0.7051736801862717, | |
| "num_tokens": 2150019652.0, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.4528301886792452, | |
| "grad_norm": 0.2721223533153534, | |
| "learning_rate": 1.3524732224246334e-05, | |
| "loss": 0.9591, | |
| "mean_token_accuracy": 0.703093808889389, | |
| "num_tokens": 2160401014.0, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.4598183088749126, | |
| "grad_norm": 0.2551819384098053, | |
| "learning_rate": 1.3467494123976526e-05, | |
| "loss": 0.952, | |
| "mean_token_accuracy": 0.7046844661235809, | |
| "num_tokens": 2170810669.0, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.46680642907058, | |
| "grad_norm": 0.23688183724880219, | |
| "learning_rate": 1.3410198661531802e-05, | |
| "loss": 0.963, | |
| "mean_token_accuracy": 0.7021675914525985, | |
| "num_tokens": 2181208872.0, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.4737945492662474, | |
| "grad_norm": 0.2515881359577179, | |
| "learning_rate": 1.3352849233927458e-05, | |
| "loss": 0.9495, | |
| "mean_token_accuracy": 0.7059559196233749, | |
| "num_tokens": 2191619229.0, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.4807826694619148, | |
| "grad_norm": 0.24117465317249298, | |
| "learning_rate": 1.3295449241378358e-05, | |
| "loss": 0.9421, | |
| "mean_token_accuracy": 0.707347297668457, | |
| "num_tokens": 2202012017.0, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.4877707896575822, | |
| "grad_norm": 0.21908260881900787, | |
| "learning_rate": 1.3238002087097321e-05, | |
| "loss": 0.9489, | |
| "mean_token_accuracy": 0.7059781879186631, | |
| "num_tokens": 2212374750.0, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.4947589098532494, | |
| "grad_norm": 0.23432235419750214, | |
| "learning_rate": 1.3180511177093371e-05, | |
| "loss": 0.9455, | |
| "mean_token_accuracy": 0.7061308234930038, | |
| "num_tokens": 2222759914.0, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.5017470300489169, | |
| "grad_norm": 0.24842332303524017, | |
| "learning_rate": 1.312297991996978e-05, | |
| "loss": 0.9539, | |
| "mean_token_accuracy": 0.7039518356323242, | |
| "num_tokens": 2233170092.0, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.508735150244584, | |
| "grad_norm": 0.23463116586208344, | |
| "learning_rate": 1.3065411726721975e-05, | |
| "loss": 0.9455, | |
| "mean_token_accuracy": 0.7066747874021531, | |
| "num_tokens": 2243582510.0, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5157232704402515, | |
| "grad_norm": 0.22929295897483826, | |
| "learning_rate": 1.300781001053531e-05, | |
| "loss": 0.9607, | |
| "mean_token_accuracy": 0.7026191800832748, | |
| "num_tokens": 2253976158.0, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.5227113906359189, | |
| "grad_norm": 0.22391878068447113, | |
| "learning_rate": 1.2950178186582685e-05, | |
| "loss": 0.9491, | |
| "mean_token_accuracy": 0.7051207542419433, | |
| "num_tokens": 2264388639.0, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5296995108315863, | |
| "grad_norm": 0.2603966295719147, | |
| "learning_rate": 1.289251967182208e-05, | |
| "loss": 0.9541, | |
| "mean_token_accuracy": 0.7041330695152282, | |
| "num_tokens": 2274779483.0, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.5366876310272537, | |
| "grad_norm": 0.230450838804245, | |
| "learning_rate": 1.2834837884793963e-05, | |
| "loss": 0.9528, | |
| "mean_token_accuracy": 0.7049372524023056, | |
| "num_tokens": 2285190622.0, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.543675751222921, | |
| "grad_norm": 0.21797870099544525, | |
| "learning_rate": 1.277713624541859e-05, | |
| "loss": 0.9592, | |
| "mean_token_accuracy": 0.7032217621803284, | |
| "num_tokens": 2295617562.0, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.5506638714185885, | |
| "grad_norm": 0.2613143026828766, | |
| "learning_rate": 1.2719418174793256e-05, | |
| "loss": 0.9582, | |
| "mean_token_accuracy": 0.7029330909252167, | |
| "num_tokens": 2306005585.0, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.5576519916142557, | |
| "grad_norm": 0.23213548958301544, | |
| "learning_rate": 1.2661687094989457e-05, | |
| "loss": 0.9587, | |
| "mean_token_accuracy": 0.7030121952295303, | |
| "num_tokens": 2316399640.0, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.5646401118099231, | |
| "grad_norm": 0.2354755848646164, | |
| "learning_rate": 1.2603946428849995e-05, | |
| "loss": 0.9503, | |
| "mean_token_accuracy": 0.7047839015722275, | |
| "num_tokens": 2326822403.0, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.5716282320055905, | |
| "grad_norm": 0.2406359165906906, | |
| "learning_rate": 1.2546199599786042e-05, | |
| "loss": 0.9551, | |
| "mean_token_accuracy": 0.704132291674614, | |
| "num_tokens": 2337192629.0, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.5786163522012577, | |
| "grad_norm": 0.2363503873348236, | |
| "learning_rate": 1.248845003157416e-05, | |
| "loss": 0.9579, | |
| "mean_token_accuracy": 0.7029750674962998, | |
| "num_tokens": 2347588209.0, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.5856044723969251, | |
| "grad_norm": 0.23799030482769012, | |
| "learning_rate": 1.2430701148153322e-05, | |
| "loss": 0.9519, | |
| "mean_token_accuracy": 0.7047583341598511, | |
| "num_tokens": 2357969391.0, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.5925925925925926, | |
| "grad_norm": 0.253959983587265, | |
| "learning_rate": 1.237295637342189e-05, | |
| "loss": 0.9422, | |
| "mean_token_accuracy": 0.7074365675449371, | |
| "num_tokens": 2368344869.0, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.59958071278826, | |
| "grad_norm": 0.27008169889450073, | |
| "learning_rate": 1.2315219131034637e-05, | |
| "loss": 0.955, | |
| "mean_token_accuracy": 0.7039465218782425, | |
| "num_tokens": 2378746901.0, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.6065688329839274, | |
| "grad_norm": 0.2541366219520569, | |
| "learning_rate": 1.2257492844199736e-05, | |
| "loss": 0.9567, | |
| "mean_token_accuracy": 0.703971728682518, | |
| "num_tokens": 2389112135.0, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.6135569531795948, | |
| "grad_norm": 0.23742936551570892, | |
| "learning_rate": 1.219978093547582e-05, | |
| "loss": 0.9456, | |
| "mean_token_accuracy": 0.7065177857875824, | |
| "num_tokens": 2399497212.0, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.6205450733752622, | |
| "grad_norm": 0.24232052266597748, | |
| "learning_rate": 1.2142086826569043e-05, | |
| "loss": 0.9587, | |
| "mean_token_accuracy": 0.7030541986227036, | |
| "num_tokens": 2409911323.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6275331935709294, | |
| "grad_norm": 0.23238737881183624, | |
| "learning_rate": 1.2084413938130226e-05, | |
| "loss": 0.9498, | |
| "mean_token_accuracy": 0.7052562206983566, | |
| "num_tokens": 2420296768.0, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.6345213137665968, | |
| "grad_norm": 0.22225673496723175, | |
| "learning_rate": 1.202676568955204e-05, | |
| "loss": 0.9553, | |
| "mean_token_accuracy": 0.7043942958116531, | |
| "num_tokens": 2430700961.0, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.641509433962264, | |
| "grad_norm": 0.2327306568622589, | |
| "learning_rate": 1.1969145498766266e-05, | |
| "loss": 0.9578, | |
| "mean_token_accuracy": 0.7032434940338135, | |
| "num_tokens": 2441087132.0, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.6484975541579314, | |
| "grad_norm": 0.24940182268619537, | |
| "learning_rate": 1.1911556782041166e-05, | |
| "loss": 0.955, | |
| "mean_token_accuracy": 0.7039715796709061, | |
| "num_tokens": 2451459456.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.6554856743535988, | |
| "grad_norm": 0.2305036038160324, | |
| "learning_rate": 1.1854002953778906e-05, | |
| "loss": 0.9593, | |
| "mean_token_accuracy": 0.7024497330188751, | |
| "num_tokens": 2461836410.0, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.6624737945492662, | |
| "grad_norm": 0.21421563625335693, | |
| "learning_rate": 1.1796487426313152e-05, | |
| "loss": 0.9534, | |
| "mean_token_accuracy": 0.7043396472930908, | |
| "num_tokens": 2472221312.0, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.6694619147449337, | |
| "grad_norm": 0.2404475063085556, | |
| "learning_rate": 1.1739013609706729e-05, | |
| "loss": 0.9612, | |
| "mean_token_accuracy": 0.7030987590551376, | |
| "num_tokens": 2482587402.0, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.676450034940601, | |
| "grad_norm": 0.2337433099746704, | |
| "learning_rate": 1.168158491154945e-05, | |
| "loss": 0.9508, | |
| "mean_token_accuracy": 0.7051290899515152, | |
| "num_tokens": 2492964095.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6834381551362685, | |
| "grad_norm": 0.26063719391822815, | |
| "learning_rate": 1.162420473675608e-05, | |
| "loss": 0.9429, | |
| "mean_token_accuracy": 0.7071120649576187, | |
| "num_tokens": 2503327221.0, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.6904262753319357, | |
| "grad_norm": 0.22970515489578247, | |
| "learning_rate": 1.1566876487364462e-05, | |
| "loss": 0.9551, | |
| "mean_token_accuracy": 0.7043853759765625, | |
| "num_tokens": 2513705668.0, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.697414395527603, | |
| "grad_norm": 0.22828368842601776, | |
| "learning_rate": 1.1509603562333817e-05, | |
| "loss": 0.9463, | |
| "mean_token_accuracy": 0.7062768042087555, | |
| "num_tokens": 2524125309.0, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.7044025157232703, | |
| "grad_norm": 0.2501721680164337, | |
| "learning_rate": 1.1452389357343208e-05, | |
| "loss": 0.95, | |
| "mean_token_accuracy": 0.7051142156124115, | |
| "num_tokens": 2534528216.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.7113906359189377, | |
| "grad_norm": 0.21163718402385712, | |
| "learning_rate": 1.139523726459022e-05, | |
| "loss": 0.9517, | |
| "mean_token_accuracy": 0.7048409789800644, | |
| "num_tokens": 2544943369.0, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.718378756114605, | |
| "grad_norm": 0.2533326745033264, | |
| "learning_rate": 1.1338150672589847e-05, | |
| "loss": 0.9519, | |
| "mean_token_accuracy": 0.7046971768140793, | |
| "num_tokens": 2555293671.0, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.7253668763102725, | |
| "grad_norm": 0.23022139072418213, | |
| "learning_rate": 1.1281132965973578e-05, | |
| "loss": 0.9525, | |
| "mean_token_accuracy": 0.7047327756881714, | |
| "num_tokens": 2565688022.0, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.73235499650594, | |
| "grad_norm": 0.24306832253932953, | |
| "learning_rate": 1.1224187525288722e-05, | |
| "loss": 0.9406, | |
| "mean_token_accuracy": 0.7070292413234711, | |
| "num_tokens": 2576050620.0, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.7393431167016074, | |
| "grad_norm": 0.22193977236747742, | |
| "learning_rate": 1.1167317726797986e-05, | |
| "loss": 0.9461, | |
| "mean_token_accuracy": 0.7061527252197266, | |
| "num_tokens": 2586432548.0, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.7463312368972748, | |
| "grad_norm": 0.25281763076782227, | |
| "learning_rate": 1.1110526942279294e-05, | |
| "loss": 0.9525, | |
| "mean_token_accuracy": 0.704227501153946, | |
| "num_tokens": 2596837800.0, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.753319357092942, | |
| "grad_norm": 0.2383853644132614, | |
| "learning_rate": 1.1053818538825876e-05, | |
| "loss": 0.9458, | |
| "mean_token_accuracy": 0.7060607433319092, | |
| "num_tokens": 2607259536.0, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.7603074772886094, | |
| "grad_norm": 0.2552628815174103, | |
| "learning_rate": 1.0997195878646636e-05, | |
| "loss": 0.9449, | |
| "mean_token_accuracy": 0.7070667445659637, | |
| "num_tokens": 2617663573.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.7672955974842768, | |
| "grad_norm": 0.20744191110134125, | |
| "learning_rate": 1.0940662318866808e-05, | |
| "loss": 0.9474, | |
| "mean_token_accuracy": 0.7057566583156586, | |
| "num_tokens": 2628064038.0, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.774283717679944, | |
| "grad_norm": 0.2790570855140686, | |
| "learning_rate": 1.0884221211328914e-05, | |
| "loss": 0.9442, | |
| "mean_token_accuracy": 0.7067927271127701, | |
| "num_tokens": 2638465498.0, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.7812718378756114, | |
| "grad_norm": 0.22672200202941895, | |
| "learning_rate": 1.0827875902394033e-05, | |
| "loss": 0.9478, | |
| "mean_token_accuracy": 0.7057815074920655, | |
| "num_tokens": 2648882994.0, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.7882599580712788, | |
| "grad_norm": 0.25658106803894043, | |
| "learning_rate": 1.077162973274341e-05, | |
| "loss": 0.9484, | |
| "mean_token_accuracy": 0.7052160561084747, | |
| "num_tokens": 2659259873.0, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.7952480782669462, | |
| "grad_norm": 0.2257668524980545, | |
| "learning_rate": 1.0715486037180363e-05, | |
| "loss": 0.9546, | |
| "mean_token_accuracy": 0.7042631804943085, | |
| "num_tokens": 2669645562.0, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.8022361984626136, | |
| "grad_norm": 0.217051699757576, | |
| "learning_rate": 1.0659448144432585e-05, | |
| "loss": 0.9602, | |
| "mean_token_accuracy": 0.702583622932434, | |
| "num_tokens": 2680036133.0, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.809224318658281, | |
| "grad_norm": 0.22850480675697327, | |
| "learning_rate": 1.060351937695479e-05, | |
| "loss": 0.9421, | |
| "mean_token_accuracy": 0.707316416501999, | |
| "num_tokens": 2690446906.0, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.8162124388539485, | |
| "grad_norm": 0.20470769703388214, | |
| "learning_rate": 1.0547703050731707e-05, | |
| "loss": 0.9415, | |
| "mean_token_accuracy": 0.7072466671466827, | |
| "num_tokens": 2700857094.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8232005590496156, | |
| "grad_norm": 0.2180124670267105, | |
| "learning_rate": 1.049200247508149e-05, | |
| "loss": 0.9444, | |
| "mean_token_accuracy": 0.7065752387046814, | |
| "num_tokens": 2711231823.0, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.830188679245283, | |
| "grad_norm": 0.22791129350662231, | |
| "learning_rate": 1.0436420952459508e-05, | |
| "loss": 0.9598, | |
| "mean_token_accuracy": 0.7031501412391663, | |
| "num_tokens": 2721643248.0, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.8371767994409502, | |
| "grad_norm": 0.20525507628917694, | |
| "learning_rate": 1.0380961778262536e-05, | |
| "loss": 0.9493, | |
| "mean_token_accuracy": 0.7051385581493378, | |
| "num_tokens": 2732049186.0, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.8441649196366177, | |
| "grad_norm": 0.21797043085098267, | |
| "learning_rate": 1.0325628240633398e-05, | |
| "loss": 0.9573, | |
| "mean_token_accuracy": 0.703617399930954, | |
| "num_tokens": 2742450874.0, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.851153039832285, | |
| "grad_norm": 0.20817072689533234, | |
| "learning_rate": 1.0270423620265982e-05, | |
| "loss": 0.9543, | |
| "mean_token_accuracy": 0.7040122479200364, | |
| "num_tokens": 2752825826.0, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.8581411600279525, | |
| "grad_norm": 0.21172043681144714, | |
| "learning_rate": 1.021535119021075e-05, | |
| "loss": 0.945, | |
| "mean_token_accuracy": 0.7063402503728866, | |
| "num_tokens": 2763203546.0, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.86512928022362, | |
| "grad_norm": 0.22424334287643433, | |
| "learning_rate": 1.0160414215680674e-05, | |
| "loss": 0.9507, | |
| "mean_token_accuracy": 0.7050115913152695, | |
| "num_tokens": 2773607371.0, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.8721174004192873, | |
| "grad_norm": 0.24080626666545868, | |
| "learning_rate": 1.0105615953857652e-05, | |
| "loss": 0.9502, | |
| "mean_token_accuracy": 0.7050412774085999, | |
| "num_tokens": 2783983723.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.8791055206149547, | |
| "grad_norm": 0.22549575567245483, | |
| "learning_rate": 1.0050959653699377e-05, | |
| "loss": 0.9507, | |
| "mean_token_accuracy": 0.7051549941301346, | |
| "num_tokens": 2794356215.0, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.886093640810622, | |
| "grad_norm": 0.2175074964761734, | |
| "learning_rate": 9.99644855574672e-06, | |
| "loss": 0.9495, | |
| "mean_token_accuracy": 0.705304104089737, | |
| "num_tokens": 2804730279.0, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.8930817610062893, | |
| "grad_norm": 0.23900523781776428, | |
| "learning_rate": 9.942085891931592e-06, | |
| "loss": 0.9587, | |
| "mean_token_accuracy": 0.7033442050218582, | |
| "num_tokens": 2815149853.0, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.9000698812019565, | |
| "grad_norm": 0.2067466527223587, | |
| "learning_rate": 9.887874885385337e-06, | |
| "loss": 0.9544, | |
| "mean_token_accuracy": 0.7042512089014054, | |
| "num_tokens": 2825562033.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.907058001397624, | |
| "grad_norm": 0.20439454913139343, | |
| "learning_rate": 9.833818750247615e-06, | |
| "loss": 0.9609, | |
| "mean_token_accuracy": 0.702758863568306, | |
| "num_tokens": 2835964405.0, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.9140461215932913, | |
| "grad_norm": 0.20674073696136475, | |
| "learning_rate": 9.779920691475857e-06, | |
| "loss": 0.9508, | |
| "mean_token_accuracy": 0.7046973228454589, | |
| "num_tokens": 2846369993.0, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.9210342417889588, | |
| "grad_norm": 0.21767380833625793, | |
| "learning_rate": 9.726183904655234e-06, | |
| "loss": 0.9575, | |
| "mean_token_accuracy": 0.7034873872995376, | |
| "num_tokens": 2856789273.0, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.9280223619846262, | |
| "grad_norm": 0.22383835911750793, | |
| "learning_rate": 9.67261157580919e-06, | |
| "loss": 0.9513, | |
| "mean_token_accuracy": 0.7051774680614471, | |
| "num_tokens": 2867172272.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.9350104821802936, | |
| "grad_norm": 0.21862174570560455, | |
| "learning_rate": 9.619206881210558e-06, | |
| "loss": 0.9487, | |
| "mean_token_accuracy": 0.7056363463401795, | |
| "num_tokens": 2877584360.0, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.941998602375961, | |
| "grad_norm": 0.2177773416042328, | |
| "learning_rate": 9.565972987193225e-06, | |
| "loss": 0.9467, | |
| "mean_token_accuracy": 0.7059289753437042, | |
| "num_tokens": 2888015561.0, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.9489867225716282, | |
| "grad_norm": 0.2325785905122757, | |
| "learning_rate": 9.512913049964414e-06, | |
| "loss": 0.9458, | |
| "mean_token_accuracy": 0.7066907495260238, | |
| "num_tokens": 2898383704.0, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 1.9559748427672956, | |
| "grad_norm": 0.2303362786769867, | |
| "learning_rate": 9.460030215417546e-06, | |
| "loss": 0.9506, | |
| "mean_token_accuracy": 0.7054461807012558, | |
| "num_tokens": 2908789197.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9629629629629628, | |
| "grad_norm": 0.24411572515964508, | |
| "learning_rate": 9.407327618945726e-06, | |
| "loss": 0.9464, | |
| "mean_token_accuracy": 0.7061127424240112, | |
| "num_tokens": 2919211817.0, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 1.9699510831586302, | |
| "grad_norm": 0.20445261895656586, | |
| "learning_rate": 9.354808385255845e-06, | |
| "loss": 0.9606, | |
| "mean_token_accuracy": 0.7030856341123581, | |
| "num_tokens": 2929612659.0, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.9769392033542976, | |
| "grad_norm": 0.24357956647872925, | |
| "learning_rate": 9.302475628183325e-06, | |
| "loss": 0.9447, | |
| "mean_token_accuracy": 0.7061789721250534, | |
| "num_tokens": 2940019034.0, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 1.983927323549965, | |
| "grad_norm": 0.21846109628677368, | |
| "learning_rate": 9.25033245050748e-06, | |
| "loss": 0.953, | |
| "mean_token_accuracy": 0.7045047521591187, | |
| "num_tokens": 2950402980.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.9909154437456325, | |
| "grad_norm": 0.20229177176952362, | |
| "learning_rate": 9.198381943767596e-06, | |
| "loss": 0.9336, | |
| "mean_token_accuracy": 0.7093939870595932, | |
| "num_tokens": 2960814004.0, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.9979035639412999, | |
| "grad_norm": 0.22578562796115875, | |
| "learning_rate": 9.146627188079584e-06, | |
| "loss": 0.9399, | |
| "mean_token_accuracy": 0.7082905799150467, | |
| "num_tokens": 2971180910.0, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.0041928721174003, | |
| "grad_norm": 0.333396315574646, | |
| "learning_rate": 9.0950712519534e-06, | |
| "loss": 0.9168, | |
| "mean_token_accuracy": 0.7133585843775008, | |
| "num_tokens": 2980537865.0, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 2.0111809923130677, | |
| "grad_norm": 0.3023437559604645, | |
| "learning_rate": 9.043717192111097e-06, | |
| "loss": 0.9179, | |
| "mean_token_accuracy": 0.712645736336708, | |
| "num_tokens": 2990956848.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.018169112508735, | |
| "grad_norm": 0.2666738033294678, | |
| "learning_rate": 8.992568053305601e-06, | |
| "loss": 0.9192, | |
| "mean_token_accuracy": 0.7127588331699372, | |
| "num_tokens": 3001334490.0, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 2.0251572327044025, | |
| "grad_norm": 0.3103075921535492, | |
| "learning_rate": 8.94162686814018e-06, | |
| "loss": 0.921, | |
| "mean_token_accuracy": 0.7121388405561447, | |
| "num_tokens": 3011735960.0, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.03214535290007, | |
| "grad_norm": 0.3161068558692932, | |
| "learning_rate": 8.890896656888664e-06, | |
| "loss": 0.9164, | |
| "mean_token_accuracy": 0.7127919644117355, | |
| "num_tokens": 3022067629.0, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 2.0391334730957373, | |
| "grad_norm": 0.2562922239303589, | |
| "learning_rate": 8.84038042731634e-06, | |
| "loss": 0.9189, | |
| "mean_token_accuracy": 0.7123399645090103, | |
| "num_tokens": 3032463160.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.0461215932914047, | |
| "grad_norm": 0.24388477206230164, | |
| "learning_rate": 8.79008117450166e-06, | |
| "loss": 0.915, | |
| "mean_token_accuracy": 0.7135502338409424, | |
| "num_tokens": 3042894163.0, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 2.053109713487072, | |
| "grad_norm": 0.2350393682718277, | |
| "learning_rate": 8.740001880658638e-06, | |
| "loss": 0.9217, | |
| "mean_token_accuracy": 0.712294626235962, | |
| "num_tokens": 3053289714.0, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.060097833682739, | |
| "grad_norm": 0.2192184180021286, | |
| "learning_rate": 8.690145514960053e-06, | |
| "loss": 0.916, | |
| "mean_token_accuracy": 0.7133681744337081, | |
| "num_tokens": 3063670311.0, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.0670859538784065, | |
| "grad_norm": 0.21661223471164703, | |
| "learning_rate": 8.640515033361391e-06, | |
| "loss": 0.9152, | |
| "mean_token_accuracy": 0.7137379050254822, | |
| "num_tokens": 3074069500.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.074074074074074, | |
| "grad_norm": 0.2259409874677658, | |
| "learning_rate": 8.591113378425612e-06, | |
| "loss": 0.9202, | |
| "mean_token_accuracy": 0.7123195230960846, | |
| "num_tokens": 3084436917.0, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 2.0810621942697414, | |
| "grad_norm": 0.22233138978481293, | |
| "learning_rate": 8.541943479148665e-06, | |
| "loss": 0.9097, | |
| "mean_token_accuracy": 0.7149380385875702, | |
| "num_tokens": 3094851526.0, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.088050314465409, | |
| "grad_norm": 0.2333570122718811, | |
| "learning_rate": 8.493008250785837e-06, | |
| "loss": 0.9104, | |
| "mean_token_accuracy": 0.7141807585954666, | |
| "num_tokens": 3105236001.0, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 2.095038434661076, | |
| "grad_norm": 0.21300968527793884, | |
| "learning_rate": 8.444310594678919e-06, | |
| "loss": 0.9171, | |
| "mean_token_accuracy": 0.7128302127122879, | |
| "num_tokens": 3115627860.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.1020265548567436, | |
| "grad_norm": 0.2188606709241867, | |
| "learning_rate": 8.395853398084167e-06, | |
| "loss": 0.9219, | |
| "mean_token_accuracy": 0.7115776121616364, | |
| "num_tokens": 3126018436.0, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 2.109014675052411, | |
| "grad_norm": 0.2129020094871521, | |
| "learning_rate": 8.34763953400114e-06, | |
| "loss": 0.9211, | |
| "mean_token_accuracy": 0.7127009093761444, | |
| "num_tokens": 3136413374.0, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.1160027952480784, | |
| "grad_norm": 0.2339140921831131, | |
| "learning_rate": 8.29967186100234e-06, | |
| "loss": 0.9054, | |
| "mean_token_accuracy": 0.7164467304944993, | |
| "num_tokens": 3146798645.0, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 2.1229909154437454, | |
| "grad_norm": 0.210405170917511, | |
| "learning_rate": 8.251953223063756e-06, | |
| "loss": 0.9253, | |
| "mean_token_accuracy": 0.7112928241491318, | |
| "num_tokens": 3157172766.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.129979035639413, | |
| "grad_norm": 0.21990114450454712, | |
| "learning_rate": 8.204486449396211e-06, | |
| "loss": 0.9066, | |
| "mean_token_accuracy": 0.7155609846115112, | |
| "num_tokens": 3167537336.0, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.1369671558350802, | |
| "grad_norm": 0.21438851952552795, | |
| "learning_rate": 8.157274354277657e-06, | |
| "loss": 0.9118, | |
| "mean_token_accuracy": 0.7143852919340133, | |
| "num_tokens": 3177930338.0, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.1439552760307476, | |
| "grad_norm": 0.22087393701076508, | |
| "learning_rate": 8.110319736886287e-06, | |
| "loss": 0.9206, | |
| "mean_token_accuracy": 0.71253402531147, | |
| "num_tokens": 3188312246.0, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 2.150943396226415, | |
| "grad_norm": 0.22319301962852478, | |
| "learning_rate": 8.063625381134593e-06, | |
| "loss": 0.9205, | |
| "mean_token_accuracy": 0.7122439503669739, | |
| "num_tokens": 3198688206.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.1579315164220825, | |
| "grad_norm": 0.210270494222641, | |
| "learning_rate": 8.017194055504297e-06, | |
| "loss": 0.9229, | |
| "mean_token_accuracy": 0.7112518161535263, | |
| "num_tokens": 3209093337.0, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 2.16491963661775, | |
| "grad_norm": 0.22403500974178314, | |
| "learning_rate": 7.971028512882226e-06, | |
| "loss": 0.9173, | |
| "mean_token_accuracy": 0.7129957526922226, | |
| "num_tokens": 3219447479.0, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.1719077568134173, | |
| "grad_norm": 0.22942931950092316, | |
| "learning_rate": 7.925131490397075e-06, | |
| "loss": 0.9222, | |
| "mean_token_accuracy": 0.7115698724985122, | |
| "num_tokens": 3229834720.0, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 2.1788958770090847, | |
| "grad_norm": 0.21285541355609894, | |
| "learning_rate": 7.879505709257131e-06, | |
| "loss": 0.9192, | |
| "mean_token_accuracy": 0.7122551679611206, | |
| "num_tokens": 3240228413.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.185883997204752, | |
| "grad_norm": 0.22184862196445465, | |
| "learning_rate": 7.834153874588948e-06, | |
| "loss": 0.9184, | |
| "mean_token_accuracy": 0.7121668696403504, | |
| "num_tokens": 3250638493.0, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 2.192872117400419, | |
| "grad_norm": 0.2081901580095291, | |
| "learning_rate": 7.789078675276934e-06, | |
| "loss": 0.9215, | |
| "mean_token_accuracy": 0.7117640644311904, | |
| "num_tokens": 3261034976.0, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.1998602375960865, | |
| "grad_norm": 0.22591695189476013, | |
| "learning_rate": 7.744282783803961e-06, | |
| "loss": 0.9163, | |
| "mean_token_accuracy": 0.7132966309785843, | |
| "num_tokens": 3271461658.0, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.206848357791754, | |
| "grad_norm": 0.21045826375484467, | |
| "learning_rate": 7.699768856092882e-06, | |
| "loss": 0.9144, | |
| "mean_token_accuracy": 0.713357812166214, | |
| "num_tokens": 3281821360.0, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.2138364779874213, | |
| "grad_norm": 0.21535682678222656, | |
| "learning_rate": 7.655539531349097e-06, | |
| "loss": 0.9202, | |
| "mean_token_accuracy": 0.7120324313640595, | |
| "num_tokens": 3292235573.0, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 2.2208245981830887, | |
| "grad_norm": 0.2165815681219101, | |
| "learning_rate": 7.611597431904043e-06, | |
| "loss": 0.9183, | |
| "mean_token_accuracy": 0.7125867277383804, | |
| "num_tokens": 3302639624.0, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.227812718378756, | |
| "grad_norm": 0.21561956405639648, | |
| "learning_rate": 7.567945163059741e-06, | |
| "loss": 0.9087, | |
| "mean_token_accuracy": 0.7150823384523392, | |
| "num_tokens": 3313021924.0, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 2.2348008385744236, | |
| "grad_norm": 0.21405579149723053, | |
| "learning_rate": 7.524585312934318e-06, | |
| "loss": 0.9116, | |
| "mean_token_accuracy": 0.7144843906164169, | |
| "num_tokens": 3323394648.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.241788958770091, | |
| "grad_norm": 0.22909052670001984, | |
| "learning_rate": 7.481520452308566e-06, | |
| "loss": 0.9124, | |
| "mean_token_accuracy": 0.7143676280975342, | |
| "num_tokens": 3333800522.0, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 2.2487770789657584, | |
| "grad_norm": 0.21624407172203064, | |
| "learning_rate": 7.438753134473508e-06, | |
| "loss": 0.9197, | |
| "mean_token_accuracy": 0.7123973101377488, | |
| "num_tokens": 3344220363.0, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.2557651991614254, | |
| "grad_norm": 0.2294032871723175, | |
| "learning_rate": 7.396285895079038e-06, | |
| "loss": 0.9177, | |
| "mean_token_accuracy": 0.7133157104253769, | |
| "num_tokens": 3354615793.0, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 2.262753319357093, | |
| "grad_norm": 0.2232784926891327, | |
| "learning_rate": 7.354121251983562e-06, | |
| "loss": 0.9196, | |
| "mean_token_accuracy": 0.712562283873558, | |
| "num_tokens": 3365013962.0, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.26974143955276, | |
| "grad_norm": 0.21506592631340027, | |
| "learning_rate": 7.312261705104725e-06, | |
| "loss": 0.92, | |
| "mean_token_accuracy": 0.7120138168334961, | |
| "num_tokens": 3375415677.0, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.2767295597484276, | |
| "grad_norm": 0.2243955284357071, | |
| "learning_rate": 7.270709736271191e-06, | |
| "loss": 0.9242, | |
| "mean_token_accuracy": 0.7115818649530411, | |
| "num_tokens": 3385793817.0, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.283717679944095, | |
| "grad_norm": 0.23249688744544983, | |
| "learning_rate": 7.229467809075501e-06, | |
| "loss": 0.9209, | |
| "mean_token_accuracy": 0.7121629118919373, | |
| "num_tokens": 3396147298.0, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 2.2907058001397624, | |
| "grad_norm": 0.25106868147850037, | |
| "learning_rate": 7.188538368727998e-06, | |
| "loss": 0.9259, | |
| "mean_token_accuracy": 0.7110367149114609, | |
| "num_tokens": 3406552609.0, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.29769392033543, | |
| "grad_norm": 0.22599752247333527, | |
| "learning_rate": 7.147923841911859e-06, | |
| "loss": 0.9132, | |
| "mean_token_accuracy": 0.7141028851270675, | |
| "num_tokens": 3416969976.0, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 2.3046820405310973, | |
| "grad_norm": 0.2180515080690384, | |
| "learning_rate": 7.107626636639224e-06, | |
| "loss": 0.918, | |
| "mean_token_accuracy": 0.7125603795051575, | |
| "num_tokens": 3427380676.0, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.3116701607267647, | |
| "grad_norm": 0.21943214535713196, | |
| "learning_rate": 7.067649142108414e-06, | |
| "loss": 0.9286, | |
| "mean_token_accuracy": 0.7103828489780426, | |
| "num_tokens": 3437787781.0, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 2.318658280922432, | |
| "grad_norm": 0.224712535738945, | |
| "learning_rate": 7.027993728562282e-06, | |
| "loss": 0.9227, | |
| "mean_token_accuracy": 0.7115794867277145, | |
| "num_tokens": 3448190522.0, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.325646401118099, | |
| "grad_norm": 0.21609260141849518, | |
| "learning_rate": 6.988662747147685e-06, | |
| "loss": 0.9124, | |
| "mean_token_accuracy": 0.7146041959524154, | |
| "num_tokens": 3458566741.0, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 2.3326345213137665, | |
| "grad_norm": 0.21408966183662415, | |
| "learning_rate": 6.949658529776083e-06, | |
| "loss": 0.9206, | |
| "mean_token_accuracy": 0.7124300539493561, | |
| "num_tokens": 3468965946.0, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.339622641509434, | |
| "grad_norm": 0.21197549998760223, | |
| "learning_rate": 6.9109833889852804e-06, | |
| "loss": 0.9214, | |
| "mean_token_accuracy": 0.7117568552494049, | |
| "num_tokens": 3479375801.0, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.3466107617051013, | |
| "grad_norm": 0.21107982099056244, | |
| "learning_rate": 6.872639617802327e-06, | |
| "loss": 0.9164, | |
| "mean_token_accuracy": 0.7129042834043503, | |
| "num_tokens": 3489780868.0, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.3535988819007687, | |
| "grad_norm": 0.2257632613182068, | |
| "learning_rate": 6.834629489607547e-06, | |
| "loss": 0.9254, | |
| "mean_token_accuracy": 0.7109665483236313, | |
| "num_tokens": 3500196102.0, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 2.360587002096436, | |
| "grad_norm": 0.22178520262241364, | |
| "learning_rate": 6.796955257999769e-06, | |
| "loss": 0.9138, | |
| "mean_token_accuracy": 0.7136050283908844, | |
| "num_tokens": 3510590489.0, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.3675751222921035, | |
| "grad_norm": 0.21775364875793457, | |
| "learning_rate": 6.759619156662699e-06, | |
| "loss": 0.917, | |
| "mean_token_accuracy": 0.712714570760727, | |
| "num_tokens": 3520949287.0, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 2.374563242487771, | |
| "grad_norm": 0.2097868025302887, | |
| "learning_rate": 6.722623399232498e-06, | |
| "loss": 0.9183, | |
| "mean_token_accuracy": 0.7127634763717652, | |
| "num_tokens": 3531350527.0, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.381551362683438, | |
| "grad_norm": 0.2364913821220398, | |
| "learning_rate": 6.685970179166526e-06, | |
| "loss": 0.9291, | |
| "mean_token_accuracy": 0.7098951876163483, | |
| "num_tokens": 3541732110.0, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 2.3885394828791053, | |
| "grad_norm": 0.2230016142129898, | |
| "learning_rate": 6.649661669613303e-06, | |
| "loss": 0.9243, | |
| "mean_token_accuracy": 0.711082261800766, | |
| "num_tokens": 3552132624.0, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.3955276030747727, | |
| "grad_norm": 0.20846879482269287, | |
| "learning_rate": 6.6137000232836515e-06, | |
| "loss": 0.9258, | |
| "mean_token_accuracy": 0.7104093581438065, | |
| "num_tokens": 3562541950.0, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 2.40251572327044, | |
| "grad_norm": 0.2259218990802765, | |
| "learning_rate": 6.578087372323081e-06, | |
| "loss": 0.9202, | |
| "mean_token_accuracy": 0.7123449355363846, | |
| "num_tokens": 3572913217.0, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.4095038434661076, | |
| "grad_norm": 0.2155347317457199, | |
| "learning_rate": 6.542825828185355e-06, | |
| "loss": 0.9116, | |
| "mean_token_accuracy": 0.7139072418212891, | |
| "num_tokens": 3583298270.0, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.416491963661775, | |
| "grad_norm": 0.21978759765625, | |
| "learning_rate": 6.507917481507322e-06, | |
| "loss": 0.9156, | |
| "mean_token_accuracy": 0.7135312378406524, | |
| "num_tokens": 3593703423.0, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.4234800838574424, | |
| "grad_norm": 0.23815348744392395, | |
| "learning_rate": 6.473364401984947e-06, | |
| "loss": 0.9254, | |
| "mean_token_accuracy": 0.710757565498352, | |
| "num_tokens": 3604044031.0, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 2.43046820405311, | |
| "grad_norm": 0.22524483501911163, | |
| "learning_rate": 6.439168638250615e-06, | |
| "loss": 0.9238, | |
| "mean_token_accuracy": 0.7114168167114258, | |
| "num_tokens": 3614426228.0, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.4374563242487772, | |
| "grad_norm": 0.2189108431339264, | |
| "learning_rate": 6.4053322177516585e-06, | |
| "loss": 0.9141, | |
| "mean_token_accuracy": 0.7138565927743912, | |
| "num_tokens": 3624785064.0, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 0.2147430032491684, | |
| "learning_rate": 6.371857146630151e-06, | |
| "loss": 0.9257, | |
| "mean_token_accuracy": 0.7105766355991363, | |
| "num_tokens": 3635155170.0, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.451432564640112, | |
| "grad_norm": 0.21530896425247192, | |
| "learning_rate": 6.338745409603976e-06, | |
| "loss": 0.9173, | |
| "mean_token_accuracy": 0.7129728078842164, | |
| "num_tokens": 3645471686.0, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 2.458420684835779, | |
| "grad_norm": 0.2143326848745346, | |
| "learning_rate": 6.305998969849134e-06, | |
| "loss": 0.9143, | |
| "mean_token_accuracy": 0.7136251211166382, | |
| "num_tokens": 3655843687.0, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.4654088050314464, | |
| "grad_norm": 0.21387705206871033, | |
| "learning_rate": 6.2736197688833724e-06, | |
| "loss": 0.9332, | |
| "mean_token_accuracy": 0.7091861546039582, | |
| "num_tokens": 3666240232.0, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 2.472396925227114, | |
| "grad_norm": 0.21236532926559448, | |
| "learning_rate": 6.241609726451045e-06, | |
| "loss": 0.912, | |
| "mean_token_accuracy": 0.7143011152744293, | |
| "num_tokens": 3676651293.0, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.4793850454227813, | |
| "grad_norm": 0.21449092030525208, | |
| "learning_rate": 6.209970740409321e-06, | |
| "loss": 0.9209, | |
| "mean_token_accuracy": 0.7120813369750977, | |
| "num_tokens": 3687062327.0, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.4863731656184487, | |
| "grad_norm": 0.21442821621894836, | |
| "learning_rate": 6.178704686615637e-06, | |
| "loss": 0.9174, | |
| "mean_token_accuracy": 0.713122546672821, | |
| "num_tokens": 3697470376.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.493361285814116, | |
| "grad_norm": 0.21565364301204681, | |
| "learning_rate": 6.147813418816498e-06, | |
| "loss": 0.9108, | |
| "mean_token_accuracy": 0.7147074729204178, | |
| "num_tokens": 3707874673.0, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 2.5003494060097835, | |
| "grad_norm": 0.21220961213111877, | |
| "learning_rate": 6.117298768537552e-06, | |
| "loss": 0.9205, | |
| "mean_token_accuracy": 0.7123848766088485, | |
| "num_tokens": 3718240853.0, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.5073375262054505, | |
| "grad_norm": 0.2127240151166916, | |
| "learning_rate": 6.087162544975018e-06, | |
| "loss": 0.9249, | |
| "mean_token_accuracy": 0.7108253628015518, | |
| "num_tokens": 3728655146.0, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 2.514325646401118, | |
| "grad_norm": 0.2235720455646515, | |
| "learning_rate": 6.057406534888403e-06, | |
| "loss": 0.9257, | |
| "mean_token_accuracy": 0.7108543246984482, | |
| "num_tokens": 3739022782.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.5213137665967853, | |
| "grad_norm": 0.2347782701253891, | |
| "learning_rate": 6.028032502494578e-06, | |
| "loss": 0.9197, | |
| "mean_token_accuracy": 0.7129616022109986, | |
| "num_tokens": 3749431904.0, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 2.5283018867924527, | |
| "grad_norm": 0.23844227194786072, | |
| "learning_rate": 5.999042189363176e-06, | |
| "loss": 0.9158, | |
| "mean_token_accuracy": 0.7129719346761704, | |
| "num_tokens": 3759845315.0, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.53529000698812, | |
| "grad_norm": 0.22466135025024414, | |
| "learning_rate": 5.970437314313328e-06, | |
| "loss": 0.919, | |
| "mean_token_accuracy": 0.712509247660637, | |
| "num_tokens": 3770259622.0, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 2.5422781271837875, | |
| "grad_norm": 0.21150830388069153, | |
| "learning_rate": 5.9422195733117685e-06, | |
| "loss": 0.9221, | |
| "mean_token_accuracy": 0.7120893627405167, | |
| "num_tokens": 3780638703.0, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.549266247379455, | |
| "grad_norm": 0.20506353676319122, | |
| "learning_rate": 5.914390639372266e-06, | |
| "loss": 0.9209, | |
| "mean_token_accuracy": 0.7126324087381363, | |
| "num_tokens": 3791047016.0, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.5562543675751224, | |
| "grad_norm": 0.2072431892156601, | |
| "learning_rate": 5.8869521624564496e-06, | |
| "loss": 0.9129, | |
| "mean_token_accuracy": 0.7142052918672561, | |
| "num_tokens": 3801447729.0, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.56324248777079, | |
| "grad_norm": 0.20922759175300598, | |
| "learning_rate": 5.859905769375965e-06, | |
| "loss": 0.9236, | |
| "mean_token_accuracy": 0.7109846025705338, | |
| "num_tokens": 3811848994.0, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 2.570230607966457, | |
| "grad_norm": 0.214607372879982, | |
| "learning_rate": 5.83325306369604e-06, | |
| "loss": 0.9087, | |
| "mean_token_accuracy": 0.7150654703378677, | |
| "num_tokens": 3822222315.0, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.5772187281621246, | |
| "grad_norm": 0.2122754007577896, | |
| "learning_rate": 5.806995625640393e-06, | |
| "loss": 0.9193, | |
| "mean_token_accuracy": 0.7120439529418945, | |
| "num_tokens": 3832658214.0, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 2.584206848357792, | |
| "grad_norm": 0.2132931798696518, | |
| "learning_rate": 5.781135011997554e-06, | |
| "loss": 0.9229, | |
| "mean_token_accuracy": 0.711396324634552, | |
| "num_tokens": 3843058336.0, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.591194968553459, | |
| "grad_norm": 0.22513744235038757, | |
| "learning_rate": 5.755672756028558e-06, | |
| "loss": 0.9153, | |
| "mean_token_accuracy": 0.7129842311143875, | |
| "num_tokens": 3853440632.0, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 2.5981830887491264, | |
| "grad_norm": 0.21373115479946136, | |
| "learning_rate": 5.730610367376047e-06, | |
| "loss": 0.9211, | |
| "mean_token_accuracy": 0.7117935091257095, | |
| "num_tokens": 3863875661.0, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.605171208944794, | |
| "grad_norm": 0.20069323480129242, | |
| "learning_rate": 5.705949331974746e-06, | |
| "loss": 0.9165, | |
| "mean_token_accuracy": 0.7130931288003921, | |
| "num_tokens": 3874289974.0, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 2.6121593291404612, | |
| "grad_norm": 0.2091623991727829, | |
| "learning_rate": 5.681691111963388e-06, | |
| "loss": 0.9172, | |
| "mean_token_accuracy": 0.7131164252758027, | |
| "num_tokens": 3884708109.0, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.6191474493361286, | |
| "grad_norm": 0.22306294739246368, | |
| "learning_rate": 5.657837145598001e-06, | |
| "loss": 0.9324, | |
| "mean_token_accuracy": 0.7094870746135712, | |
| "num_tokens": 3895062073.0, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.626135569531796, | |
| "grad_norm": 0.20904283225536346, | |
| "learning_rate": 5.634388847166651e-06, | |
| "loss": 0.9235, | |
| "mean_token_accuracy": 0.7111514359712601, | |
| "num_tokens": 3905470817.0, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.6331236897274635, | |
| "grad_norm": 0.21988101303577423, | |
| "learning_rate": 5.611347606905577e-06, | |
| "loss": 0.9284, | |
| "mean_token_accuracy": 0.7102498173713684, | |
| "num_tokens": 3915890767.0, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 2.6401118099231304, | |
| "grad_norm": 0.21687500178813934, | |
| "learning_rate": 5.58871479091678e-06, | |
| "loss": 0.9221, | |
| "mean_token_accuracy": 0.7118266075849533, | |
| "num_tokens": 3926285033.0, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.647099930118798, | |
| "grad_norm": 0.21978770196437836, | |
| "learning_rate": 5.566491741087012e-06, | |
| "loss": 0.9242, | |
| "mean_token_accuracy": 0.7114381521940232, | |
| "num_tokens": 3936684569.0, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 2.6540880503144653, | |
| "grad_norm": 0.21627257764339447, | |
| "learning_rate": 5.5446797750082235e-06, | |
| "loss": 0.9151, | |
| "mean_token_accuracy": 0.7139195591211319, | |
| "num_tokens": 3947042426.0, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.6610761705101327, | |
| "grad_norm": 0.2161075621843338, | |
| "learning_rate": 5.523280185899449e-06, | |
| "loss": 0.9239, | |
| "mean_token_accuracy": 0.711063238978386, | |
| "num_tokens": 3957432187.0, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 2.6680642907058, | |
| "grad_norm": 0.22316473722457886, | |
| "learning_rate": 5.5022942425301205e-06, | |
| "loss": 0.9233, | |
| "mean_token_accuracy": 0.7108366757631301, | |
| "num_tokens": 3967825089.0, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.6750524109014675, | |
| "grad_norm": 0.2171812802553177, | |
| "learning_rate": 5.481723189144858e-06, | |
| "loss": 0.9177, | |
| "mean_token_accuracy": 0.7128182649612427, | |
| "num_tokens": 3978228911.0, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 2.682040531097135, | |
| "grad_norm": 0.224291130900383, | |
| "learning_rate": 5.46156824538968e-06, | |
| "loss": 0.918, | |
| "mean_token_accuracy": 0.7131029039621353, | |
| "num_tokens": 3988602589.0, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.6890286512928023, | |
| "grad_norm": 0.23003533482551575, | |
| "learning_rate": 5.441830606239715e-06, | |
| "loss": 0.9207, | |
| "mean_token_accuracy": 0.7122260361909867, | |
| "num_tokens": 3998998048.0, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.6960167714884697, | |
| "grad_norm": 0.22974514961242676, | |
| "learning_rate": 5.4225114419283274e-06, | |
| "loss": 0.9121, | |
| "mean_token_accuracy": 0.7137720167636872, | |
| "num_tokens": 4009386456.0, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.703004891684137, | |
| "grad_norm": 0.2437218725681305, | |
| "learning_rate": 5.4036118978777604e-06, | |
| "loss": 0.9094, | |
| "mean_token_accuracy": 0.7151933252811432, | |
| "num_tokens": 4019758598.0, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 2.7099930118798046, | |
| "grad_norm": 0.21390853822231293, | |
| "learning_rate": 5.385133094631197e-06, | |
| "loss": 0.9168, | |
| "mean_token_accuracy": 0.7131306231021881, | |
| "num_tokens": 4030152895.0, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.7169811320754715, | |
| "grad_norm": 0.2056519240140915, | |
| "learning_rate": 5.367076127786349e-06, | |
| "loss": 0.9174, | |
| "mean_token_accuracy": 0.7127089440822602, | |
| "num_tokens": 4040504482.0, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 2.723969252271139, | |
| "grad_norm": 0.21333739161491394, | |
| "learning_rate": 5.34944206793048e-06, | |
| "loss": 0.9259, | |
| "mean_token_accuracy": 0.7104780018329621, | |
| "num_tokens": 4050875953.0, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.7309573724668064, | |
| "grad_norm": 0.2510630786418915, | |
| "learning_rate": 5.332231960576948e-06, | |
| "loss": 0.911, | |
| "mean_token_accuracy": 0.7144548803567886, | |
| "num_tokens": 4061259135.0, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 2.737945492662474, | |
| "grad_norm": 0.22863414883613586, | |
| "learning_rate": 5.3154468261032e-06, | |
| "loss": 0.92, | |
| "mean_token_accuracy": 0.7120320320129394, | |
| "num_tokens": 4071660276.0, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.744933612858141, | |
| "grad_norm": 0.221303790807724, | |
| "learning_rate": 5.2990876596902895e-06, | |
| "loss": 0.9177, | |
| "mean_token_accuracy": 0.7134216696023941, | |
| "num_tokens": 4082088769.0, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 2.7519217330538086, | |
| "grad_norm": 0.21982347965240479, | |
| "learning_rate": 5.283155431263865e-06, | |
| "loss": 0.9267, | |
| "mean_token_accuracy": 0.7102663546800614, | |
| "num_tokens": 4092426308.0, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.758909853249476, | |
| "grad_norm": 0.235822394490242, | |
| "learning_rate": 5.267651085436665e-06, | |
| "loss": 0.9206, | |
| "mean_token_accuracy": 0.7118010431528091, | |
| "num_tokens": 4102831184.0, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.765897973445143, | |
| "grad_norm": 0.2101694792509079, | |
| "learning_rate": 5.25257554145251e-06, | |
| "loss": 0.9142, | |
| "mean_token_accuracy": 0.7137985199689865, | |
| "num_tokens": 4113206852.0, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.7728860936408104, | |
| "grad_norm": 0.20930089056491852, | |
| "learning_rate": 5.237929693131809e-06, | |
| "loss": 0.9237, | |
| "mean_token_accuracy": 0.7110803365707398, | |
| "num_tokens": 4123598405.0, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 2.779874213836478, | |
| "grad_norm": 0.21099254488945007, | |
| "learning_rate": 5.223714408818557e-06, | |
| "loss": 0.9144, | |
| "mean_token_accuracy": 0.7142311304807663, | |
| "num_tokens": 4133993093.0, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.7868623340321452, | |
| "grad_norm": 0.21179494261741638, | |
| "learning_rate": 5.2099305313288535e-06, | |
| "loss": 0.9219, | |
| "mean_token_accuracy": 0.711805310845375, | |
| "num_tokens": 4144414884.0, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 2.7938504542278126, | |
| "grad_norm": 0.21408036351203918, | |
| "learning_rate": 5.196578877900937e-06, | |
| "loss": 0.9244, | |
| "mean_token_accuracy": 0.7111784070730209, | |
| "num_tokens": 4154806748.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.80083857442348, | |
| "grad_norm": 0.20586340129375458, | |
| "learning_rate": 5.183660240146726e-06, | |
| "loss": 0.922, | |
| "mean_token_accuracy": 0.7115540742874146, | |
| "num_tokens": 4165215882.0, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 2.8078266946191475, | |
| "grad_norm": 0.22240835428237915, | |
| "learning_rate": 5.171175384004884e-06, | |
| "loss": 0.9123, | |
| "mean_token_accuracy": 0.7141046315431595, | |
| "num_tokens": 4175639017.0, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.814814814814815, | |
| "grad_norm": 0.21101967990398407, | |
| "learning_rate": 5.159125049695412e-06, | |
| "loss": 0.9223, | |
| "mean_token_accuracy": 0.7121775299310684, | |
| "num_tokens": 4186054800.0, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 2.8218029350104823, | |
| "grad_norm": 0.2063722461462021, | |
| "learning_rate": 5.147509951675764e-06, | |
| "loss": 0.928, | |
| "mean_token_accuracy": 0.7096574634313584, | |
| "num_tokens": 4196418295.0, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.8287910552061497, | |
| "grad_norm": 0.20113684237003326, | |
| "learning_rate": 5.1363307785984736e-06, | |
| "loss": 0.9245, | |
| "mean_token_accuracy": 0.7111555904150009, | |
| "num_tokens": 4206840950.0, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 2.835779175401817, | |
| "grad_norm": 0.2063385397195816, | |
| "learning_rate": 5.125588193270341e-06, | |
| "loss": 0.9288, | |
| "mean_token_accuracy": 0.7106180161237716, | |
| "num_tokens": 4217228644.0, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.8427672955974845, | |
| "grad_norm": 0.20373384654521942, | |
| "learning_rate": 5.1152828326131235e-06, | |
| "loss": 0.9213, | |
| "mean_token_accuracy": 0.7126687705516815, | |
| "num_tokens": 4227613729.0, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 2.8497554157931515, | |
| "grad_norm": 0.21870306134223938, | |
| "learning_rate": 5.105415307625777e-06, | |
| "loss": 0.9162, | |
| "mean_token_accuracy": 0.7128043532371521, | |
| "num_tokens": 4238020240.0, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.856743535988819, | |
| "grad_norm": 0.20790112018585205, | |
| "learning_rate": 5.095986203348233e-06, | |
| "loss": 0.9153, | |
| "mean_token_accuracy": 0.7130325943231582, | |
| "num_tokens": 4248451466.0, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 2.8637316561844863, | |
| "grad_norm": 0.20638597011566162, | |
| "learning_rate": 5.086996078826707e-06, | |
| "loss": 0.9204, | |
| "mean_token_accuracy": 0.7117985427379608, | |
| "num_tokens": 4258835788.0, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.8707197763801537, | |
| "grad_norm": 0.21053378283977509, | |
| "learning_rate": 5.078445467080554e-06, | |
| "loss": 0.9235, | |
| "mean_token_accuracy": 0.7111718267202377, | |
| "num_tokens": 4269198156.0, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 2.877707896575821, | |
| "grad_norm": 0.21624252200126648, | |
| "learning_rate": 5.07033487507067e-06, | |
| "loss": 0.9258, | |
| "mean_token_accuracy": 0.7103535771369934, | |
| "num_tokens": 4279623393.0, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.8846960167714886, | |
| "grad_norm": 0.22071868181228638, | |
| "learning_rate": 5.06266478366943e-06, | |
| "loss": 0.9297, | |
| "mean_token_accuracy": 0.7100573688745498, | |
| "num_tokens": 4290014832.0, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 2.891684136967156, | |
| "grad_norm": 0.21099479496479034, | |
| "learning_rate": 5.055435647632179e-06, | |
| "loss": 0.9269, | |
| "mean_token_accuracy": 0.7104677796363831, | |
| "num_tokens": 4300433907.0, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.898672257162823, | |
| "grad_norm": 0.2020501047372818, | |
| "learning_rate": 5.048647895570273e-06, | |
| "loss": 0.9242, | |
| "mean_token_accuracy": 0.7116045236587525, | |
| "num_tokens": 4310825237.0, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 2.9056603773584904, | |
| "grad_norm": 0.20341072976589203, | |
| "learning_rate": 5.042301929925656e-06, | |
| "loss": 0.9191, | |
| "mean_token_accuracy": 0.712513867020607, | |
| "num_tokens": 4321192226.0, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.912648497554158, | |
| "grad_norm": 0.2014591544866562, | |
| "learning_rate": 5.0363981269470175e-06, | |
| "loss": 0.9214, | |
| "mean_token_accuracy": 0.7120217561721802, | |
| "num_tokens": 4331623259.0, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 2.919636617749825, | |
| "grad_norm": 0.2104942500591278, | |
| "learning_rate": 5.030936836667467e-06, | |
| "loss": 0.9222, | |
| "mean_token_accuracy": 0.7120204120874405, | |
| "num_tokens": 4342027121.0, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.9266247379454926, | |
| "grad_norm": 0.21690668165683746, | |
| "learning_rate": 5.025918382883791e-06, | |
| "loss": 0.9269, | |
| "mean_token_accuracy": 0.7105346918106079, | |
| "num_tokens": 4352427294.0, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 2.93361285814116, | |
| "grad_norm": 0.2217000424861908, | |
| "learning_rate": 5.021343063137254e-06, | |
| "loss": 0.9181, | |
| "mean_token_accuracy": 0.712472003698349, | |
| "num_tokens": 4362825286.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.9406009783368274, | |
| "grad_norm": 0.2240072786808014, | |
| "learning_rate": 5.017211148695954e-06, | |
| "loss": 0.9277, | |
| "mean_token_accuracy": 0.7105086803436279, | |
| "num_tokens": 4373218481.0, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 2.947589098532495, | |
| "grad_norm": 0.23679901659488678, | |
| "learning_rate": 5.013522884538742e-06, | |
| "loss": 0.9145, | |
| "mean_token_accuracy": 0.7134937226772309, | |
| "num_tokens": 4383588535.0, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.9545772187281623, | |
| "grad_norm": 0.2157946079969406, | |
| "learning_rate": 5.0102784893406965e-06, | |
| "loss": 0.9037, | |
| "mean_token_accuracy": 0.7164124727249146, | |
| "num_tokens": 4393978891.0, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 2.9615653389238297, | |
| "grad_norm": 0.20823746919631958, | |
| "learning_rate": 5.0074781554601595e-06, | |
| "loss": 0.927, | |
| "mean_token_accuracy": 0.7102025151252747, | |
| "num_tokens": 4404364352.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.968553459119497, | |
| "grad_norm": 0.2096376121044159, | |
| "learning_rate": 5.005122048927329e-06, | |
| "loss": 0.931, | |
| "mean_token_accuracy": 0.7095394015312195, | |
| "num_tokens": 4414766826.0, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 2.9755415793151645, | |
| "grad_norm": 0.20438523590564728, | |
| "learning_rate": 5.0032103094344185e-06, | |
| "loss": 0.9246, | |
| "mean_token_accuracy": 0.7110236048698425, | |
| "num_tokens": 4425117863.0, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.9825296995108315, | |
| "grad_norm": 0.20655548572540283, | |
| "learning_rate": 5.001743050327372e-06, | |
| "loss": 0.924, | |
| "mean_token_accuracy": 0.7113671660423279, | |
| "num_tokens": 4435506710.0, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 2.989517819706499, | |
| "grad_norm": 0.21492992341518402, | |
| "learning_rate": 5.000720358599146e-06, | |
| "loss": 0.9213, | |
| "mean_token_accuracy": 0.7119130939245224, | |
| "num_tokens": 4445898940.0, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.9965059399021663, | |
| "grad_norm": 0.20215627551078796, | |
| "learning_rate": 5.000142294884549e-06, | |
| "loss": 0.9134, | |
| "mean_token_accuracy": 0.7140958666801452, | |
| "num_tokens": 4456285249.0, | |
| "step": 2145 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 450, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.410669021545628e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |