[ { "loss": 0.8105, "learning_rate": 0.0002, "epoch": 0.02, "step": 1 }, { "loss": 0.7121, "learning_rate": 0.0002, "epoch": 0.04, "step": 2 }, { "loss": 0.7374, "learning_rate": 0.0002, "epoch": 0.05, "step": 3 }, { "loss": 0.6773, "learning_rate": 0.0002, "epoch": 0.07, "step": 4 }, { "loss": 0.6421, "learning_rate": 0.0002, "epoch": 0.09, "step": 5 }, { "loss": 0.6541, "learning_rate": 0.0002, "epoch": 0.11, "step": 6 }, { "loss": 0.6438, "learning_rate": 0.0002, "epoch": 0.12, "step": 7 }, { "loss": 0.5673, "learning_rate": 0.0002, "epoch": 0.14, "step": 8 }, { "loss": 0.6296, "learning_rate": 0.0002, "epoch": 0.16, "step": 9 }, { "loss": 0.5381, "learning_rate": 0.0002, "epoch": 0.18, "step": 10 }, { "loss": 0.4768, "learning_rate": 0.0002, "epoch": 0.19, "step": 11 }, { "loss": 0.4771, "learning_rate": 0.0002, "epoch": 0.21, "step": 12 }, { "loss": 0.4298, "learning_rate": 0.0002, "epoch": 0.23, "step": 13 }, { "loss": 0.4325, "learning_rate": 0.0002, "epoch": 0.25, "step": 14 }, { "loss": 0.363, "learning_rate": 0.0002, "epoch": 0.26, "step": 15 }, { "loss": 0.3576, "learning_rate": 0.0002, "epoch": 0.28, "step": 16 }, { "loss": 0.2169, "learning_rate": 0.0002, "epoch": 0.3, "step": 17 }, { "loss": 0.2301, "learning_rate": 0.0002, "epoch": 0.32, "step": 18 }, { "loss": 0.1212, "learning_rate": 0.0002, "epoch": 0.33, "step": 19 }, { "loss": 0.254, "learning_rate": 0.0002, "epoch": 0.35, "step": 20 }, { "loss": 0.2762, "learning_rate": 0.0002, "epoch": 0.37, "step": 21 }, { "loss": 0.3352, "learning_rate": 0.0002, "epoch": 0.39, "step": 22 }, { "loss": 0.2064, "learning_rate": 0.0002, "epoch": 0.4, "step": 23 }, { "loss": 0.1854, "learning_rate": 0.0002, "epoch": 0.42, "step": 24 }, { "loss": 0.7749, "learning_rate": 0.0002, "epoch": 0.44, "step": 25 }, { "eval_code_loss": 0.23698921501636505, "eval_code_score": -0.07455942779779434, "eval_code_brier_score": 0.07455942779779434, "eval_code_average_probability": 0.8370571732521057, "eval_code_accuracy": 0.91, "eval_code_probabilities": [ 0.9999996423721313, 0.9999746084213257, 0.9999923706054688, 0.9999996423721313, 0.987261176109314, 0.9999693632125854, 0.9805606603622437, 0.989900529384613, 0.8253728747367859, 0.9895944595336914, 0.9980483055114746, 0.999202311038971, 0.9968681931495667, 0.9993115663528442, 0.8710172772407532, 0.5899985432624817, 0.536171555519104, 0.634005069732666, 0.9608262777328491, 0.9955574870109558, 0.973881721496582, 0.7114182114601135, 0.994583785533905, 0.99253910779953, 0.9611174464225769, 0.40609970688819885, 0.36732223629951477, 0.7104178071022034, 0.6786656379699707, 0.6139542460441589, 0.4181646406650543, 0.9986469149589539, 0.6471241116523743, 0.9999631643295288, 0.9937544465065002, 0.9972711205482483, 0.9984267950057983, 0.9962224960327148, 0.9905822277069092, 0.9805472493171692, 0.6333239674568176, 0.9266849756240845, 0.9290731549263, 0.8625237345695496, 0.924532949924469, 0.8783010840415955, 0.9985120892524719, 0.9980109333992004, 0.9695223569869995, 0.9698131084442139, 0.9999995231628418, 0.9999414682388306, 0.9999977350234985, 0.9995865225791931, 0.8336222171783447, 0.5264941453933716, 0.42287304997444153, 0.9997043013572693, 0.6251727342605591, 0.9990785121917725, 0.9994639754295349, 0.9940522313117981, 0.9973495006561279, 0.9856157302856445, 0.7486594915390015, 0.5897226929664612, 0.9972866773605347, 0.07845962792634964, 0.08846855163574219, 0.6714103817939758, 0.3839266002178192, 0.9553180932998657, 0.768756091594696, 0.7603277564048767, 0.6352256536483765, 0.9986075758934021, 0.9318283796310425, 0.9627183675765991, 0.9998478889465332, 0.9559763073921204, 0.9911002516746521, 0.9935186505317688, 0.4914245903491974, 0.9645165801048279, 0.9999008178710938, 0.9998780488967896, 0.9999656677246094, 0.47462382912635803, 0.5107860565185547, 0.5336637496948242, 0.9003442525863647, 0.9588034749031067, 0.8931976556777954, 0.5446376800537109, 0.5643137693405151, 0.60689377784729, 0.9989719390869141, 0.8582219481468201, 0.9457838535308838, 0.6910393834114075 ], "eval_code_runtime": 135.1903, "eval_code_samples_per_second": 0.74, "eval_code_steps_per_second": 0.03, "epoch": 0.44, "step": 25 }, { "loss": 0.2921, "learning_rate": 0.0002, "epoch": 0.46, "step": 26 }, { "loss": 0.2115, "learning_rate": 0.0002, "epoch": 0.47, "step": 27 }, { "loss": 0.2011, "learning_rate": 0.0002, "epoch": 0.49, "step": 28 }, { "loss": 0.1465, "learning_rate": 0.0002, "epoch": 0.51, "step": 29 }, { "loss": 0.1991, "learning_rate": 0.0002, "epoch": 0.53, "step": 30 }, { "loss": 0.2027, "learning_rate": 0.0002, "epoch": 0.54, "step": 31 }, { "loss": 0.306, "learning_rate": 0.0002, "epoch": 0.56, "step": 32 }, { "loss": 0.1277, "learning_rate": 0.0002, "epoch": 0.58, "step": 33 }, { "loss": 0.1641, "learning_rate": 0.0002, "epoch": 0.6, "step": 34 }, { "loss": 0.2097, "learning_rate": 0.0002, "epoch": 0.61, "step": 35 }, { "loss": 0.1834, "learning_rate": 0.0002, "epoch": 0.63, "step": 36 }, { "loss": 0.0346, "learning_rate": 0.0002, "epoch": 0.65, "step": 37 }, { "loss": 0.1869, "learning_rate": 0.0002, "epoch": 0.67, "step": 38 }, { "loss": 0.2378, "learning_rate": 0.0002, "epoch": 0.68, "step": 39 }, { "loss": 0.0973, "learning_rate": 0.0002, "epoch": 0.7, "step": 40 }, { "loss": 0.0806, "learning_rate": 0.0002, "epoch": 0.72, "step": 41 }, { "loss": 0.0846, "learning_rate": 0.0002, "epoch": 0.74, "step": 42 }, { "loss": 0.0542, "learning_rate": 0.0002, "epoch": 0.75, "step": 43 }, { "loss": 0.0961, "learning_rate": 0.0002, "epoch": 0.77, "step": 44 }, { "loss": 0.1279, "learning_rate": 0.0002, "epoch": 0.79, "step": 45 }, { "loss": 0.0657, "learning_rate": 0.0002, "epoch": 0.81, "step": 46 }, { "loss": 0.2379, "learning_rate": 0.0002, "epoch": 0.82, "step": 47 }, { "loss": 0.0614, "learning_rate": 0.0002, "epoch": 0.84, "step": 48 }, { "loss": 0.0212, "learning_rate": 0.0002, "epoch": 0.86, "step": 49 }, { "loss": 0.0801, "learning_rate": 0.0002, "epoch": 0.88, "step": 50 }, { "eval_code_loss": 0.06750176846981049, "eval_code_score": -0.019731219857931137, "eval_code_brier_score": 0.019731219857931137, "eval_code_average_probability": 0.9517678022384644, "eval_code_accuracy": 0.98, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999988079071045, 1.0, 1.0, 1.0, 1.0, 0.9999001026153564, 0.9999938011169434, 0.9988300204277039, 0.7026340961456299, 0.8145422339439392, 0.9082520604133606, 0.9991196990013123, 0.9999988079071045, 0.9998842477798462, 0.9910578727722168, 1.0, 0.9999998807907104, 0.9999996423721313, 0.9998855590820312, 0.9998040795326233, 0.9997039437294006, 0.9976605176925659, 0.8687402606010437, 0.2247861623764038, 1.0, 0.9975019097328186, 0.9999924898147583, 0.9608558416366577, 0.9199216365814209, 1.0, 1.0, 1.0, 1.0, 0.9998576641082764, 0.996502161026001, 1.0, 1.0, 1.0, 0.9997383952140808, 1.0, 1.0, 0.9999997615814209, 0.9999996423721313, 1.0, 1.0, 1.0, 1.0, 0.9999929666519165, 0.9118456840515137, 0.9291142225265503, 1.0, 0.9629960060119629, 1.0, 1.0, 0.9999990463256836, 0.9999998807907104, 0.9993170499801636, 0.9999899864196777, 0.9904366135597229, 0.9999599456787109, 0.19636619091033936, 0.5225781202316284, 0.8634640574455261, 0.7529643774032593, 0.9999982118606567, 0.8990985155105591, 0.8453661203384399, 0.8021807670593262, 0.9999802112579346, 0.9630010724067688, 0.9999850988388062, 1.0, 0.999984622001648, 1.0, 0.9999991655349731, 0.6915827989578247, 0.9999104738235474, 1.0, 1.0, 1.0, 0.9996980428695679, 0.9999344348907471, 0.9843573570251465, 0.9999781847000122, 0.9999991655349731, 0.9997337460517883, 0.9923231601715088, 0.8509456515312195, 0.9906028509140015, 0.9999996423721313, 0.8774545192718506, 0.9720820188522339, 0.800399661064148 ], "eval_code_runtime": 135.2042, "eval_code_samples_per_second": 0.74, "eval_code_steps_per_second": 0.03, "epoch": 0.88, "step": 50 }, { "loss": 0.0398, "learning_rate": 0.0002, "epoch": 0.89, "step": 51 }, { "loss": 0.1503, "learning_rate": 0.0002, "epoch": 0.91, "step": 52 }, { "loss": 0.1686, "learning_rate": 0.0002, "epoch": 0.93, "step": 53 }, { "loss": 0.2037, "learning_rate": 0.0002, "epoch": 0.95, "step": 54 }, { "loss": 0.1357, "learning_rate": 0.0002, "epoch": 0.96, "step": 55 }, { "loss": 0.058, "learning_rate": 0.0002, "epoch": 0.98, "step": 56 }, { "loss": 0.1591, "learning_rate": 0.0002, "epoch": 1.0, "step": 57 }, { "loss": 0.0758, "learning_rate": 0.0002, "epoch": 1.02, "step": 58 }, { "loss": 0.049, "learning_rate": 0.0002, "epoch": 1.04, "step": 59 }, { "loss": 0.0248, "learning_rate": 0.0002, "epoch": 1.05, "step": 60 }, { "loss": 0.0416, "learning_rate": 0.0002, "epoch": 1.07, "step": 61 }, { "loss": 0.0606, "learning_rate": 0.0002, "epoch": 1.09, "step": 62 }, { "loss": 0.0178, "learning_rate": 0.0002, "epoch": 1.11, "step": 63 }, { "loss": 0.0513, "learning_rate": 0.0002, "epoch": 1.12, "step": 64 }, { "loss": 0.0471, "learning_rate": 0.0002, "epoch": 1.14, "step": 65 }, { "loss": 0.0337, "learning_rate": 0.0002, "epoch": 1.16, "step": 66 }, { "loss": 0.0392, "learning_rate": 0.0002, "epoch": 1.18, "step": 67 }, { "loss": 0.1758, "learning_rate": 0.0002, "epoch": 1.19, "step": 68 }, { "loss": 0.0174, "learning_rate": 0.0002, "epoch": 1.21, "step": 69 }, { "loss": 0.1157, "learning_rate": 0.0002, "epoch": 1.23, "step": 70 }, { "loss": 0.0167, "learning_rate": 0.0002, "epoch": 1.25, "step": 71 }, { "loss": 0.06, "learning_rate": 0.0002, "epoch": 1.26, "step": 72 }, { "loss": 0.0307, "learning_rate": 0.0002, "epoch": 1.28, "step": 73 }, { "loss": 0.0243, "learning_rate": 0.0002, "epoch": 1.3, "step": 74 }, { "loss": 0.0257, "learning_rate": 0.0002, "epoch": 1.32, "step": 75 }, { "eval_code_loss": 0.10609406232833862, "eval_code_score": -0.026530681177973747, "eval_code_brier_score": 0.026530681177973747, "eval_code_average_probability": 0.9572067856788635, "eval_code_accuracy": 0.96, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.99998939037323, 0.9999998807907104, 0.9999991655349731, 1.0, 1.0, 0.9999940395355225, 0.9999996423721313, 0.9983360171318054, 0.9892981052398682, 0.9142892956733704, 0.9966322779655457, 0.9999786615371704, 1.0, 0.9999991655349731, 0.9980227947235107, 1.0, 1.0, 1.0, 0.9999997615814209, 0.9999994039535522, 0.9999322891235352, 0.9998049139976501, 0.9414287209510803, 0.4411030411720276, 1.0, 0.9999996423721313, 0.9999920129776001, 0.9853335022926331, 0.33420300483703613, 0.9999996423721313, 1.0, 1.0, 1.0, 0.9999186992645264, 0.9994919300079346, 1.0, 0.9999998807907104, 1.0, 0.9999945163726807, 1.0, 1.0, 0.9999996423721313, 0.9999982118606567, 1.0, 1.0, 1.0, 1.0, 0.9999991655349731, 0.9523423910140991, 0.9576996564865112, 1.0, 0.9982321858406067, 1.0, 1.0, 1.0, 1.0, 0.9997273087501526, 0.9999955892562866, 0.9995458722114563, 1.0, 0.003542765974998474, 0.21382533013820648, 0.9740893244743347, 0.7800575494766235, 0.9999876022338867, 0.9993840456008911, 0.9999648332595825, 0.9938503503799438, 0.9999998807907104, 0.8809762001037598, 0.999996542930603, 1.0, 0.9999310970306396, 1.0, 0.9999343156814575, 0.552933394908905, 0.9972953200340271, 1.0, 1.0, 1.0, 0.9999986886978149, 1.0, 0.9997026324272156, 0.9999774694442749, 0.9999986886978149, 0.9996298551559448, 0.9799169898033142, 0.9480547904968262, 0.9585660696029663, 1.0, 0.9913630485534668, 0.999078631401062, 0.9433478116989136 ], "eval_code_runtime": 135.0302, "eval_code_samples_per_second": 0.741, "eval_code_steps_per_second": 0.03, "epoch": 1.32, "step": 75 }, { "loss": 0.009, "learning_rate": 0.0002, "epoch": 1.33, "step": 76 }, { "loss": 0.0097, "learning_rate": 0.0002, "epoch": 1.35, "step": 77 }, { "loss": 0.013, "learning_rate": 0.0002, "epoch": 1.37, "step": 78 }, { "loss": 0.0167, "learning_rate": 0.0002, "epoch": 1.39, "step": 79 }, { "loss": 0.0845, "learning_rate": 0.0002, "epoch": 1.4, "step": 80 }, { "loss": 0.0232, "learning_rate": 0.0002, "epoch": 1.42, "step": 81 }, { "loss": 0.022, "learning_rate": 0.0002, "epoch": 1.44, "step": 82 }, { "loss": 0.0519, "learning_rate": 0.0002, "epoch": 1.46, "step": 83 }, { "loss": 0.004, "learning_rate": 0.0002, "epoch": 1.47, "step": 84 }, { "loss": 0.0656, "learning_rate": 0.0002, "epoch": 1.49, "step": 85 }, { "loss": 0.021, "learning_rate": 0.0002, "epoch": 1.51, "step": 86 }, { "loss": 0.0422, "learning_rate": 0.0002, "epoch": 1.53, "step": 87 }, { "loss": 0.3975, "learning_rate": 0.0002, "epoch": 1.54, "step": 88 }, { "loss": 0.2116, "learning_rate": 0.0002, "epoch": 1.56, "step": 89 }, { "loss": 0.3375, "learning_rate": 0.0002, "epoch": 1.58, "step": 90 }, { "loss": 0.1259, "learning_rate": 0.0002, "epoch": 1.6, "step": 91 }, { "loss": 0.0268, "learning_rate": 0.0002, "epoch": 1.61, "step": 92 }, { "loss": 0.0338, "learning_rate": 0.0002, "epoch": 1.63, "step": 93 }, { "loss": 0.0507, "learning_rate": 0.0002, "epoch": 1.65, "step": 94 }, { "loss": 0.0234, "learning_rate": 0.0002, "epoch": 1.67, "step": 95 }, { "loss": 0.0384, "learning_rate": 0.0002, "epoch": 1.68, "step": 96 }, { "loss": 0.0763, "learning_rate": 0.0002, "epoch": 1.7, "step": 97 }, { "loss": 0.0931, "learning_rate": 0.0002, "epoch": 1.72, "step": 98 }, { "loss": 0.0224, "learning_rate": 0.0002, "epoch": 1.74, "step": 99 }, { "loss": 0.0251, "learning_rate": 0.0002, "epoch": 1.75, "step": 100 }, { "eval_code_loss": 0.08447160571813583, "eval_code_score": -0.02617546357214451, "eval_code_brier_score": 0.02617546357214451, "eval_code_average_probability": 0.952697217464447, "eval_code_accuracy": 0.96, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999971389770508, 0.9999997615814209, 0.9999935626983643, 1.0, 1.0, 0.9999480247497559, 0.9999934434890747, 0.9998371601104736, 0.9533892869949341, 0.8921594619750977, 0.9905146360397339, 0.9963000416755676, 0.9999768733978271, 0.9983525276184082, 0.9981574416160583, 0.9999998807907104, 1.0, 0.9999988079071045, 0.9999912977218628, 0.9999556541442871, 0.9934845566749573, 0.9964343309402466, 0.8924010992050171, 0.1918022185564041, 1.0, 0.9955978989601135, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9999991655349731, 0.9999963045120239, 1.0, 1.0, 1.0, 0.9996668100357056, 1.0, 0.9999998807907104, 0.9999982118606567, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.9999961853027344, 0.9824994802474976, 0.9533160328865051, 1.0, 0.9979890584945679, 1.0, 1.0, 0.9999998807907104, 1.0, 0.9997654557228088, 0.9999836683273315, 0.9982922673225403, 0.9999887943267822, 0.05133601278066635, 0.688880205154419, 0.9723511934280396, 0.7304397225379944, 0.9999730587005615, 0.9456992745399475, 0.9939016103744507, 0.9482239484786987, 0.9999490976333618, 0.7966085076332092, 0.9999611377716064, 1.0, 0.9999994039535522, 1.0, 0.9999991655349731, 0.34034451842308044, 0.9999822378158569, 0.9999997615814209, 0.9999997615814209, 0.9999994039535522, 0.9978098273277283, 0.9985532164573669, 0.9683822393417358, 0.9999798536300659, 0.9999759197235107, 0.9992938041687012, 0.8188567757606506, 0.7980251312255859, 0.4461643099784851, 1.0, 0.9968101382255554, 0.9992519021034241, 0.9491932392120361 ], "eval_code_runtime": 135.2493, "eval_code_samples_per_second": 0.739, "eval_code_steps_per_second": 0.03, "epoch": 1.75, "step": 100 }, { "train_runtime": 11596.6404, "train_samples_per_second": 0.276, "train_steps_per_second": 0.009, "total_flos": 0.0, "train_loss": 0.19107350916601717, "epoch": 1.75, "step": 100 } ]