[ { "loss": 0.6888, "learning_rate": 7.2e-05, "epoch": 0.02, "step": 1 }, { "loss": 0.6986, "learning_rate": 7.2e-05, "epoch": 0.04, "step": 2 }, { "loss": 0.6882, "learning_rate": 7.2e-05, "epoch": 0.05, "step": 3 }, { "loss": 0.7077, "learning_rate": 7.2e-05, "epoch": 0.07, "step": 4 }, { "loss": 0.7224, "learning_rate": 7.2e-05, "epoch": 0.09, "step": 5 }, { "loss": 0.7048, "learning_rate": 7.2e-05, "epoch": 0.11, "step": 6 }, { "loss": 0.7112, "learning_rate": 7.2e-05, "epoch": 0.12, "step": 7 }, { "loss": 0.6989, "learning_rate": 7.2e-05, "epoch": 0.14, "step": 8 }, { "loss": 0.6849, "learning_rate": 7.2e-05, "epoch": 0.16, "step": 9 }, { "loss": 0.6836, "learning_rate": 7.2e-05, "epoch": 0.18, "step": 10 }, { "loss": 0.7093, "learning_rate": 7.2e-05, "epoch": 0.19, "step": 11 }, { "loss": 0.6678, "learning_rate": 7.2e-05, "epoch": 0.21, "step": 12 }, { "loss": 0.6883, "learning_rate": 7.2e-05, "epoch": 0.23, "step": 13 }, { "loss": 0.7027, "learning_rate": 7.2e-05, "epoch": 0.25, "step": 14 }, { "loss": 0.6902, "learning_rate": 7.2e-05, "epoch": 0.26, "step": 15 }, { "loss": 0.6821, "learning_rate": 7.2e-05, "epoch": 0.28, "step": 16 }, { "loss": 0.6758, "learning_rate": 7.2e-05, "epoch": 0.3, "step": 17 }, { "loss": 0.6974, "learning_rate": 7.2e-05, "epoch": 0.32, "step": 18 }, { "loss": 0.699, "learning_rate": 7.2e-05, "epoch": 0.33, "step": 19 }, { "loss": 0.6822, "learning_rate": 7.2e-05, "epoch": 0.35, "step": 20 }, { "loss": 0.6829, "learning_rate": 7.2e-05, "epoch": 0.37, "step": 21 }, { "loss": 0.6877, "learning_rate": 7.2e-05, "epoch": 0.39, "step": 22 }, { "loss": 0.6944, "learning_rate": 7.2e-05, "epoch": 0.4, "step": 23 }, { "loss": 0.6584, "learning_rate": 7.2e-05, "epoch": 0.42, "step": 24 }, { "loss": 0.6617, "learning_rate": 7.2e-05, "epoch": 0.44, "step": 25 }, { "eval_comma_separated_input_loss": 0.6993323564529419, "eval_comma_separated_input_score": -0.2515392303466797, "eval_comma_separated_input_brier_score": 0.2515392303466797, "eval_comma_separated_input_average_probability": 0.5000255107879639, "eval_comma_separated_input_accuracy": 0.54, "eval_comma_separated_input_probabilities": [ 0.503795862197876, 0.5095994472503662, 0.5117481350898743, 0.5188235640525818, 0.49348586797714233, 0.5075142979621887, 0.5085844993591309, 0.5313256978988647, 0.524666965007782, 0.5400325655937195, 0.4955388009548187, 0.48835161328315735, 0.5378824472427368, 0.4093141555786133, 0.4475248157978058, 0.5301578640937805, 0.5287147164344788, 0.5125200748443604, 0.46101707220077515, 0.47637277841567993, 0.5193068981170654, 0.5035426616668701, 0.49465057253837585, 0.5388767123222351, 0.5166733860969543, 0.505368173122406, 0.5078849792480469, 0.47010040283203125, 0.49050280451774597, 0.48438119888305664, 0.5382044315338135, 0.47858288884162903, 0.5376543998718262, 0.5285379886627197, 0.529219388961792, 0.518829345703125, 0.5222165584564209, 0.5047513246536255, 0.5319894552230835, 0.4515876770019531, 0.4796213209629059, 0.4592196047306061, 0.494624525308609, 0.628490149974823, 0.6078562140464783, 0.5277055501937866, 0.5176548957824707, 0.5021857023239136, 0.4844718277454376, 0.4644056260585785, 0.5124256610870361, 0.4677196741104126, 0.35713133215904236, 0.3847673535346985, 0.5229172706604004, 0.5078951120376587, 0.5210210680961609, 0.4978364109992981, 0.49351075291633606, 0.49013179540634155, 0.4803943634033203, 0.5257740616798401, 0.538706362247467, 0.46300145983695984, 0.43367546796798706, 0.5033430457115173, 0.4324256181716919, 0.4465772807598114, 0.4804939031600952, 0.4822034537792206, 0.47925877571105957, 0.5032249689102173, 0.4979986548423767, 0.47964081168174744, 0.4675384759902954, 0.5562675595283508, 0.5398447513580322, 0.5355958938598633, 0.5257712006568909, 0.47263115644454956, 0.5272528529167175, 0.5233765244483948, 0.5112841129302979, 0.524614155292511, 0.48685410618782043, 0.46848371624946594, 0.4812457263469696, 0.5532416105270386, 0.44510290026664734, 0.6106933355331421, 0.4503539800643921, 0.45145395398139954, 0.5150336027145386, 0.5107214450836182, 0.48753073811531067, 0.5132928490638733, 0.5114721655845642, 0.4908217787742615, 0.4980345666408539, 0.46589648723602295 ], "eval_comma_separated_input_runtime": 16.1205, "eval_comma_separated_input_samples_per_second": 6.203, "eval_comma_separated_input_steps_per_second": 0.124, "epoch": 0.44, "step": 25 }, { "loss": 0.6899, "learning_rate": 7.2e-05, "epoch": 0.46, "step": 26 }, { "loss": 0.682, "learning_rate": 7.2e-05, "epoch": 0.47, "step": 27 }, { "loss": 0.6674, "learning_rate": 7.2e-05, "epoch": 0.49, "step": 28 }, { "loss": 0.6642, "learning_rate": 7.2e-05, "epoch": 0.51, "step": 29 }, { "loss": 0.6731, "learning_rate": 7.2e-05, "epoch": 0.53, "step": 30 }, { "loss": 0.7128, "learning_rate": 7.2e-05, "epoch": 0.54, "step": 31 }, { "loss": 0.6819, "learning_rate": 7.2e-05, "epoch": 0.56, "step": 32 }, { "loss": 0.6589, "learning_rate": 7.2e-05, "epoch": 0.58, "step": 33 }, { "loss": 0.6743, "learning_rate": 7.2e-05, "epoch": 0.6, "step": 34 }, { "loss": 0.6714, "learning_rate": 7.2e-05, "epoch": 0.61, "step": 35 }, { "loss": 0.6903, "learning_rate": 7.2e-05, "epoch": 0.63, "step": 36 }, { "loss": 0.6755, "learning_rate": 7.2e-05, "epoch": 0.65, "step": 37 }, { "loss": 0.7052, "learning_rate": 7.2e-05, "epoch": 0.67, "step": 38 }, { "loss": 0.6907, "learning_rate": 7.2e-05, "epoch": 0.68, "step": 39 }, { "loss": 0.7109, "learning_rate": 7.2e-05, "epoch": 0.7, "step": 40 }, { "loss": 0.6519, "learning_rate": 7.2e-05, "epoch": 0.72, "step": 41 }, { "loss": 0.6595, "learning_rate": 7.2e-05, "epoch": 0.74, "step": 42 }, { "loss": 0.6748, "learning_rate": 7.2e-05, "epoch": 0.75, "step": 43 }, { "loss": 0.6385, "learning_rate": 7.2e-05, "epoch": 0.77, "step": 44 }, { "loss": 0.6735, "learning_rate": 7.2e-05, "epoch": 0.79, "step": 45 }, { "loss": 0.6518, "learning_rate": 7.2e-05, "epoch": 0.81, "step": 46 }, { "loss": 0.632, "learning_rate": 7.2e-05, "epoch": 0.82, "step": 47 }, { "loss": 0.657, "learning_rate": 7.2e-05, "epoch": 0.84, "step": 48 }, { "loss": 0.6607, "learning_rate": 7.2e-05, "epoch": 0.86, "step": 49 }, { "loss": 0.6255, "learning_rate": 7.2e-05, "epoch": 0.88, "step": 50 }, { "eval_comma_separated_input_loss": 0.6946019530296326, "eval_comma_separated_input_score": -0.2481752634048462, "eval_comma_separated_input_brier_score": 0.2481752634048462, "eval_comma_separated_input_average_probability": 0.5061700940132141, "eval_comma_separated_input_accuracy": 0.6, "eval_comma_separated_input_probabilities": [ 0.51164311170578, 0.5261144638061523, 0.529052734375, 0.5397265553474426, 0.48998579382896423, 0.519303023815155, 0.4581049978733063, 0.5145301222801208, 0.5432707071304321, 0.5934591293334961, 0.5541673302650452, 0.5537045001983643, 0.5623300671577454, 0.44144710898399353, 0.46711209416389465, 0.537606418132782, 0.5172039270401001, 0.5030494928359985, 0.4466020166873932, 0.46401113271713257, 0.5015407204627991, 0.47728481888771057, 0.5109363198280334, 0.566379964351654, 0.540045440196991, 0.5215070843696594, 0.5289958715438843, 0.4678622782230377, 0.4699240028858185, 0.48330816626548767, 0.5950980186462402, 0.5053505897521973, 0.576320469379425, 0.5486741065979004, 0.5543670654296875, 0.534079909324646, 0.4977535009384155, 0.4903794527053833, 0.5296334624290466, 0.45233848690986633, 0.4657306969165802, 0.45464658737182617, 0.5535851716995239, 0.6861993074417114, 0.627823531627655, 0.5249089598655701, 0.527280330657959, 0.5157051086425781, 0.4472731649875641, 0.46001487970352173, 0.5104174613952637, 0.4575841724872589, 0.2732444107532501, 0.2975327670574188, 0.6097822189331055, 0.523316502571106, 0.5738714933395386, 0.5014632344245911, 0.48708024621009827, 0.4901405870914459, 0.5433869361877441, 0.6164633631706238, 0.6268182992935181, 0.5039772391319275, 0.48110824823379517, 0.5338551998138428, 0.3195646107196808, 0.35504916310310364, 0.3640003502368927, 0.5100438594818115, 0.44825369119644165, 0.435710608959198, 0.4797152280807495, 0.5079709887504578, 0.45145082473754883, 0.6128948926925659, 0.5727843642234802, 0.6094695925712585, 0.5414447784423828, 0.49600932002067566, 0.569200873374939, 0.5250841975212097, 0.5124881267547607, 0.5290465354919434, 0.5108041763305664, 0.46299558877944946, 0.5038251280784607, 0.49594661593437195, 0.3868650197982788, 0.5818374752998352, 0.4324875473976135, 0.43986964225769043, 0.5265680551528931, 0.5605900883674622, 0.4820510745048523, 0.5000541806221008, 0.5066347122192383, 0.4744637906551361, 0.4974833130836487, 0.496936172246933 ], "eval_comma_separated_input_runtime": 16.1138, "eval_comma_separated_input_samples_per_second": 6.206, "eval_comma_separated_input_steps_per_second": 0.124, "epoch": 0.88, "step": 50 }, { "loss": 0.6644, "learning_rate": 7.2e-05, "epoch": 0.89, "step": 51 }, { "loss": 0.6867, "learning_rate": 7.2e-05, "epoch": 0.91, "step": 52 }, { "loss": 0.6717, "learning_rate": 7.2e-05, "epoch": 0.93, "step": 53 }, { "loss": 0.5862, "learning_rate": 7.2e-05, "epoch": 0.95, "step": 54 }, { "loss": 0.6398, "learning_rate": 7.2e-05, "epoch": 0.96, "step": 55 }, { "loss": 0.6533, "learning_rate": 7.2e-05, "epoch": 0.98, "step": 56 }, { "loss": 0.6038, "learning_rate": 7.2e-05, "epoch": 1.0, "step": 57 }, { "loss": 0.5702, "learning_rate": 7.2e-05, "epoch": 1.02, "step": 58 }, { "loss": 0.6065, "learning_rate": 7.2e-05, "epoch": 1.04, "step": 59 }, { "loss": 0.6239, "learning_rate": 7.2e-05, "epoch": 1.05, "step": 60 }, { "loss": 0.5943, "learning_rate": 7.2e-05, "epoch": 1.07, "step": 61 }, { "loss": 0.5956, "learning_rate": 7.2e-05, "epoch": 1.09, "step": 62 }, { "loss": 0.5764, "learning_rate": 7.2e-05, "epoch": 1.11, "step": 63 }, { "loss": 0.5535, "learning_rate": 7.2e-05, "epoch": 1.12, "step": 64 }, { "loss": 0.5746, "learning_rate": 7.2e-05, "epoch": 1.14, "step": 65 }, { "loss": 0.5135, "learning_rate": 7.2e-05, "epoch": 1.16, "step": 66 }, { "loss": 0.5017, "learning_rate": 7.2e-05, "epoch": 1.18, "step": 67 }, { "loss": 0.5172, "learning_rate": 7.2e-05, "epoch": 1.19, "step": 68 }, { "loss": 0.4915, "learning_rate": 7.2e-05, "epoch": 1.21, "step": 69 }, { "loss": 0.4926, "learning_rate": 7.2e-05, "epoch": 1.23, "step": 70 }, { "loss": 0.4708, "learning_rate": 7.2e-05, "epoch": 1.25, "step": 71 }, { "loss": 0.5304, "learning_rate": 7.2e-05, "epoch": 1.26, "step": 72 }, { "loss": 0.5077, "learning_rate": 7.2e-05, "epoch": 1.28, "step": 73 }, { "loss": 0.4826, "learning_rate": 7.2e-05, "epoch": 1.3, "step": 74 }, { "loss": 0.4748, "learning_rate": 7.2e-05, "epoch": 1.32, "step": 75 }, { "eval_comma_separated_input_loss": 0.6258053779602051, "eval_comma_separated_input_score": -0.20680248737335205, "eval_comma_separated_input_brier_score": 0.20680248737335205, "eval_comma_separated_input_average_probability": 0.6180524826049805, "eval_comma_separated_input_accuracy": 0.68, "eval_comma_separated_input_probabilities": [ 0.5721195340156555, 0.7017658948898315, 0.6385484933853149, 0.7326105237007141, 0.47453194856643677, 0.5188557505607605, 0.7041234970092773, 0.8677546977996826, 0.9113768935203552, 0.8678487539291382, 0.8938091397285461, 0.8131961822509766, 0.8911102414131165, 0.7212280035018921, 0.30331337451934814, 0.68271803855896, 0.5551613569259644, 0.546457052230835, 0.36448514461517334, 0.2712862193584442, 0.47094810009002686, 0.41139766573905945, 0.7261630296707153, 0.7710565328598022, 0.7093208432197571, 0.6875206232070923, 0.6162768006324768, 0.5798435211181641, 0.3663785755634308, 0.4800683557987213, 0.7515586018562317, 0.6657724976539612, 0.7367197871208191, 0.9523568749427795, 0.9762035608291626, 0.8696951270103455, 0.4400167465209961, 0.4547506868839264, 0.5732744336128235, 0.6823245286941528, 0.5749392509460449, 0.46267351508140564, 0.910620391368866, 0.7582406997680664, 0.7333372235298157, 0.7135775089263916, 0.7162679433822632, 0.6694697141647339, 0.688258171081543, 0.44142985343933105, 0.6265650987625122, 0.45789825916290283, 0.021506907418370247, 0.022947849705815315, 0.9906139373779297, 0.8922920227050781, 0.9795541763305664, 0.46202653646469116, 0.45456036925315857, 0.5064692497253418, 0.9828444719314575, 0.9987326264381409, 0.9979283809661865, 0.7301977276802063, 0.7633667588233948, 0.7971768975257874, 0.0402626171708107, 0.0861586332321167, 0.10297079384326935, 0.6277731657028198, 0.4136313498020172, 0.2826332151889801, 0.366528183221817, 0.6174625754356384, 0.22773477435112, 0.985233724117279, 0.9456908702850342, 0.9902066588401794, 0.9155365228652954, 0.9091471433639526, 0.9353199601173401, 0.39124324917793274, 0.6959341764450073, 0.38310277462005615, 0.6835052371025085, 0.42156150937080383, 0.6451631784439087, 0.3986201882362366, 0.18898624181747437, 0.7506139278411865, 0.5800188183784485, 0.5714033246040344, 0.7278105616569519, 0.7761068344116211, 0.6362823843955994, 0.7926703691482544, 0.3482849895954132, 0.06351301819086075, 0.27634355425834656, 0.7203478813171387 ], "eval_comma_separated_input_runtime": 16.1158, "eval_comma_separated_input_samples_per_second": 6.205, "eval_comma_separated_input_steps_per_second": 0.124, "epoch": 1.32, "step": 75 }, { "loss": 0.5011, "learning_rate": 7.2e-05, "epoch": 1.33, "step": 76 }, { "loss": 0.6516, "learning_rate": 7.2e-05, "epoch": 1.35, "step": 77 }, { "loss": 0.5829, "learning_rate": 7.2e-05, "epoch": 1.37, "step": 78 }, { "loss": 0.4935, "learning_rate": 7.2e-05, "epoch": 1.39, "step": 79 }, { "loss": 0.3673, "learning_rate": 7.2e-05, "epoch": 1.4, "step": 80 }, { "loss": 0.4334, "learning_rate": 7.2e-05, "epoch": 1.42, "step": 81 }, { "loss": 0.3936, "learning_rate": 7.2e-05, "epoch": 1.44, "step": 82 }, { "loss": 0.353, "learning_rate": 7.2e-05, "epoch": 1.46, "step": 83 }, { "loss": 0.6522, "learning_rate": 7.2e-05, "epoch": 1.47, "step": 84 }, { "loss": 0.5275, "learning_rate": 7.2e-05, "epoch": 1.49, "step": 85 }, { "loss": 0.4401, "learning_rate": 7.2e-05, "epoch": 1.51, "step": 86 }, { "loss": 0.4071, "learning_rate": 7.2e-05, "epoch": 1.53, "step": 87 }, { "loss": 0.3728, "learning_rate": 7.2e-05, "epoch": 1.54, "step": 88 }, { "loss": 0.5309, "learning_rate": 7.2e-05, "epoch": 1.56, "step": 89 }, { "loss": 0.4527, "learning_rate": 7.2e-05, "epoch": 1.58, "step": 90 }, { "loss": 0.3608, "learning_rate": 7.2e-05, "epoch": 1.6, "step": 91 }, { "loss": 0.239, "learning_rate": 7.2e-05, "epoch": 1.61, "step": 92 }, { "loss": 0.2344, "learning_rate": 7.2e-05, "epoch": 1.63, "step": 93 }, { "loss": 0.3821, "learning_rate": 7.2e-05, "epoch": 1.65, "step": 94 }, { "loss": 0.313, "learning_rate": 7.2e-05, "epoch": 1.67, "step": 95 }, { "loss": 0.378, "learning_rate": 7.2e-05, "epoch": 1.68, "step": 96 }, { "loss": 0.4537, "learning_rate": 7.2e-05, "epoch": 1.7, "step": 97 }, { "loss": 0.3415, "learning_rate": 7.2e-05, "epoch": 1.72, "step": 98 }, { "loss": 0.4065, "learning_rate": 7.2e-05, "epoch": 1.74, "step": 99 }, { "loss": 0.2366, "learning_rate": 7.2e-05, "epoch": 1.75, "step": 100 }, { "eval_comma_separated_input_loss": 0.4870679974555969, "eval_comma_separated_input_score": -0.1532951295375824, "eval_comma_separated_input_brier_score": 0.1532951295375824, "eval_comma_separated_input_average_probability": 0.6992267370223999, "eval_comma_separated_input_accuracy": 0.76, "eval_comma_separated_input_probabilities": [ 0.4922882616519928, 0.8624985218048096, 0.8197866082191467, 0.8157017230987549, 0.47455352544784546, 0.45992204546928406, 0.7894637584686279, 0.9641896486282349, 0.9799317717552185, 0.9594692587852478, 0.9782381057739258, 0.9190263748168945, 0.4384402632713318, 0.23847416043281555, 0.328116774559021, 0.9311410784721375, 0.9459454417228699, 0.6815487742424011, 0.6173495054244995, 0.5446956753730774, 0.6588089466094971, 0.5623964667320251, 0.9060649275779724, 0.8625409007072449, 0.8004050254821777, 0.681236207485199, 0.5825129151344299, 0.7353344559669495, 0.49257373809814453, 0.5107301473617554, 0.8711826205253601, 0.7890702486038208, 0.8513530492782593, 0.997861921787262, 0.9989767074584961, 0.9793128967285156, 0.4906117618083954, 0.5236798524856567, 0.4747471213340759, 0.9684915542602539, 0.6688782572746277, 0.7784843444824219, 0.923899233341217, 0.7623373866081238, 0.911179780960083, 0.7648601531982422, 0.7452040314674377, 0.7618370652198792, 0.8483420610427856, 0.48655733466148376, 0.8209435343742371, 0.6249547600746155, 0.01532256230711937, 0.01946461945772171, 0.9998677968978882, 0.998389482498169, 0.9997373223304749, 0.44458892941474915, 0.5567178130149841, 0.5201081037521362, 0.978333592414856, 0.9978808164596558, 0.9968469738960266, 0.7640753388404846, 0.5643236041069031, 0.7232016324996948, 0.20023302733898163, 0.4750272035598755, 0.6026356220245361, 0.6178794503211975, 0.2996007800102234, 0.2807196080684662, 0.42474597692489624, 0.6951245069503784, 0.22306181490421295, 0.9987945556640625, 0.9907826781272888, 0.9964522123336792, 0.9867048859596252, 0.9952267408370972, 0.9968361854553223, 0.4335293769836426, 0.8341225385665894, 0.38702425360679626, 0.9510868787765503, 0.9653806090354919, 0.9613111615180969, 0.8402206301689148, 0.4035952091217041, 0.5763944387435913, 0.6203272342681885, 0.5191049575805664, 0.6057157516479492, 0.8065303564071655, 0.41428208351135254, 0.7038020491600037, 0.7197489738464355, 0.030255166813731194, 0.731630265712738, 0.979770839214325 ], "eval_comma_separated_input_runtime": 16.117, "eval_comma_separated_input_samples_per_second": 6.205, "eval_comma_separated_input_steps_per_second": 0.124, "epoch": 1.75, "step": 100 }, { "train_runtime": 1126.4785, "train_samples_per_second": 2.841, "train_steps_per_second": 0.089, "total_flos": 0.0, "train_loss": 0.5873149715363979, "epoch": 1.75, "step": 100 } ]