[ { "loss": 0.6836, "learning_rate": 7.2e-05, "epoch": 0.02, "step": 1 }, { "loss": 0.6947, "learning_rate": 7.2e-05, "epoch": 0.04, "step": 2 }, { "loss": 0.7068, "learning_rate": 7.2e-05, "epoch": 0.05, "step": 3 }, { "loss": 0.6936, "learning_rate": 7.2e-05, "epoch": 0.07, "step": 4 }, { "loss": 0.6894, "learning_rate": 7.2e-05, "epoch": 0.09, "step": 5 }, { "loss": 0.6947, "learning_rate": 7.2e-05, "epoch": 0.11, "step": 6 }, { "loss": 0.7127, "learning_rate": 7.2e-05, "epoch": 0.12, "step": 7 }, { "loss": 0.6564, "learning_rate": 7.2e-05, "epoch": 0.14, "step": 8 }, { "loss": 0.7051, "learning_rate": 7.2e-05, "epoch": 0.16, "step": 9 }, { "loss": 0.6898, "learning_rate": 7.2e-05, "epoch": 0.18, "step": 10 }, { "loss": 0.6269, "learning_rate": 7.2e-05, "epoch": 0.19, "step": 11 }, { "loss": 0.6373, "learning_rate": 7.2e-05, "epoch": 0.21, "step": 12 }, { "loss": 0.6626, "learning_rate": 7.2e-05, "epoch": 0.23, "step": 13 }, { "loss": 0.671, "learning_rate": 7.2e-05, "epoch": 0.25, "step": 14 }, { "loss": 0.6446, "learning_rate": 7.2e-05, "epoch": 0.26, "step": 15 }, { "loss": 0.6511, "learning_rate": 7.2e-05, "epoch": 0.28, "step": 16 }, { "loss": 0.6861, "learning_rate": 7.2e-05, "epoch": 0.3, "step": 17 }, { "loss": 0.6569, "learning_rate": 7.2e-05, "epoch": 0.32, "step": 18 }, { "loss": 0.6328, "learning_rate": 7.2e-05, "epoch": 0.33, "step": 19 }, { "loss": 0.6302, "learning_rate": 7.2e-05, "epoch": 0.35, "step": 20 }, { "loss": 0.6311, "learning_rate": 7.2e-05, "epoch": 0.37, "step": 21 }, { "loss": 0.6667, "learning_rate": 7.2e-05, "epoch": 0.39, "step": 22 }, { "loss": 0.6476, "learning_rate": 7.2e-05, "epoch": 0.4, "step": 23 }, { "loss": 0.6099, "learning_rate": 7.2e-05, "epoch": 0.42, "step": 24 }, { "loss": 0.636, "learning_rate": 7.2e-05, "epoch": 0.44, "step": 25 }, { "eval_code_loss": 0.6500930190086365, "eval_code_score": -0.23084141314029694, "eval_code_brier_score": 0.23084141314029694, "eval_code_average_probability": 0.5324862003326416, "eval_code_accuracy": 0.63, "eval_code_probabilities": [ 0.8981996178627014, 0.9140064120292664, 0.9086233377456665, 0.4930489957332611, 0.5311785340309143, 0.49541160464286804, 0.5163961052894592, 0.5207482576370239, 0.4978530704975128, 0.4979601800441742, 0.5097731351852417, 0.5298001766204834, 0.4631284475326538, 0.4101221561431885, 0.3820548951625824, 0.5316497683525085, 0.4804396629333496, 0.5146058797836304, 0.5150214433670044, 0.4998961091041565, 0.5088847875595093, 0.5062065720558167, 0.49155697226524353, 0.5065352916717529, 0.3692309260368347, 0.3464958965778351, 0.345247745513916, 0.5082260370254517, 0.4985589385032654, 0.4908699691295624, 0.5014219284057617, 0.5078413486480713, 0.5118864178657532, 0.9250859618186951, 0.9192001819610596, 0.9183722734451294, 0.5567901134490967, 0.5061033368110657, 0.5202013850212097, 0.4856383204460144, 0.46390581130981445, 0.4597606658935547, 0.5073948502540588, 0.49284979701042175, 0.49324461817741394, 0.4991315007209778, 0.5046825408935547, 0.5030663013458252, 0.4927866756916046, 0.5034404993057251, 0.6324691772460938, 0.527839183807373, 0.6027267575263977, 0.5166205167770386, 0.5005240440368652, 0.5037304162979126, 0.5007357001304626, 0.5299914479255676, 0.49253302812576294, 0.5389838814735413, 0.7237929105758667, 0.722010612487793, 0.7248772978782654, 0.5261573791503906, 0.5009329319000244, 0.5066245198249817, 0.5175695419311523, 0.48236575722694397, 0.4813913404941559, 0.4951794743537903, 0.5201015472412109, 0.5013536810874939, 0.5012617111206055, 0.5066967606544495, 0.5075401067733765, 0.5807880759239197, 0.5756419897079468, 0.5374066829681396, 0.5401325821876526, 0.4964595437049866, 0.48420250415802, 0.5349816083908081, 0.4993498623371124, 0.5083227157592773, 0.4809667766094208, 0.5351281762123108, 0.45967379212379456, 0.5043829679489136, 0.4973975121974945, 0.5018096566200256, 0.4930133819580078, 0.5008613467216492, 0.5038902163505554, 0.5041508078575134, 0.4986605942249298, 0.5054576992988586, 0.4900255799293518, 0.4928497076034546, 0.5077279806137085, 0.49679625034332275 ], "eval_code_runtime": 36.4656, "eval_code_samples_per_second": 2.742, "eval_code_steps_per_second": 0.055, "epoch": 0.44, "step": 25 }, { "eval_counterfactual_python_loss": 0.6674560308456421, "eval_counterfactual_python_score": -0.2368319183588028, "eval_counterfactual_python_brier_score": 0.2368319183588028, "eval_counterfactual_python_average_probability": 0.5182108879089355, "eval_counterfactual_python_accuracy": 0.6, "eval_counterfactual_python_probabilities": [ 0.506840705871582, 0.48089414834976196, 0.497524231672287, 0.5044610500335693, 0.4899257719516754, 0.5029208660125732, 0.5528867840766907, 0.6020012497901917, 0.5903577208518982, 0.5012052059173584, 0.5010500550270081, 0.5014556050300598, 0.5129737257957458, 0.5166401863098145, 0.504987359046936, 0.48754236102104187, 0.4921533763408661, 0.5110666751861572, 0.5212812423706055, 0.505253791809082, 0.5044047236442566, 0.5446178913116455, 0.5504598021507263, 0.5181146860122681, 0.4997810125350952, 0.5025537610054016, 0.49983397126197815, 0.5592316389083862, 0.5561607480049133, 0.49701419472694397, 0.5067521929740906, 0.5109471082687378, 0.49662530422210693, 0.4926052391529083, 0.5500054955482483, 0.5147995352745056, 0.5712809562683105, 0.5422273874282837, 0.5272724032402039, 0.5182323455810547, 0.504339337348938, 0.524910569190979, 0.5017884373664856, 0.49858200550079346, 0.5003425478935242, 0.49639299511909485, 0.5009803175926208, 0.506009042263031, 0.5095840096473694, 0.49511101841926575, 0.49724477529525757, 0.5003291964530945, 0.49959462881088257, 0.5014533996582031, 0.5121566653251648, 0.4868036210536957, 0.4927564561367035, 0.49987566471099854, 0.48913922905921936, 0.4933817982673645, 0.5025197267532349, 0.4877781271934509, 0.4988517463207245, 0.48360976576805115, 0.47294488549232483, 0.44178009033203125, 0.5168694853782654, 0.5422465801239014, 0.5201556086540222, 0.5059149861335754, 0.49637845158576965, 0.518649697303772, 0.4840700328350067, 0.4878598749637604, 0.4397459328174591, 0.5004118084907532, 0.4982900023460388, 0.49845632910728455, 0.4866858422756195, 0.5425868630409241, 0.5578758120536804, 0.5089701414108276, 0.4936344027519226, 0.49497488141059875, 0.4820954203605652, 0.4383483827114105, 0.4935499131679535, 0.8830961585044861, 0.8793171048164368, 0.8719192743301392, 0.44867223501205444, 0.4727223813533783, 0.5310845971107483, 0.4810177981853485, 0.5083786249160767, 0.5173466205596924, 0.5266315340995789, 0.513174831867218, 0.5350879430770874, 0.5002694725990295 ], "eval_counterfactual_python_runtime": 42.2718, "eval_counterfactual_python_samples_per_second": 2.366, "eval_counterfactual_python_steps_per_second": 0.047, "epoch": 0.44, "step": 25 }, { "loss": 0.6523, "learning_rate": 7.2e-05, "epoch": 0.46, "step": 26 }, { "loss": 0.6145, "learning_rate": 7.2e-05, "epoch": 0.47, "step": 27 }, { "loss": 0.6282, "learning_rate": 7.2e-05, "epoch": 0.49, "step": 28 }, { "loss": 0.5889, "learning_rate": 7.2e-05, "epoch": 0.51, "step": 29 }, { "loss": 0.6173, "learning_rate": 7.2e-05, "epoch": 0.53, "step": 30 }, { "loss": 0.6474, "learning_rate": 7.2e-05, "epoch": 0.54, "step": 31 }, { "loss": 0.5995, "learning_rate": 7.2e-05, "epoch": 0.56, "step": 32 }, { "loss": 0.5861, "learning_rate": 7.2e-05, "epoch": 0.58, "step": 33 }, { "loss": 0.5736, "learning_rate": 7.2e-05, "epoch": 0.6, "step": 34 }, { "loss": 0.6119, "learning_rate": 7.2e-05, "epoch": 0.61, "step": 35 }, { "loss": 0.6247, "learning_rate": 7.2e-05, "epoch": 0.63, "step": 36 }, { "loss": 0.6276, "learning_rate": 7.2e-05, "epoch": 0.65, "step": 37 }, { "loss": 0.6098, "learning_rate": 7.2e-05, "epoch": 0.67, "step": 38 }, { "loss": 0.6301, "learning_rate": 7.2e-05, "epoch": 0.68, "step": 39 }, { "loss": 0.6021, "learning_rate": 7.2e-05, "epoch": 0.7, "step": 40 }, { "loss": 0.555, "learning_rate": 7.2e-05, "epoch": 0.72, "step": 41 }, { "loss": 0.5695, "learning_rate": 7.2e-05, "epoch": 0.74, "step": 42 }, { "loss": 0.5669, "learning_rate": 7.2e-05, "epoch": 0.75, "step": 43 }, { "loss": 0.5473, "learning_rate": 7.2e-05, "epoch": 0.77, "step": 44 }, { "loss": 0.5369, "learning_rate": 7.2e-05, "epoch": 0.79, "step": 45 }, { "loss": 0.5351, "learning_rate": 7.2e-05, "epoch": 0.81, "step": 46 }, { "loss": 0.4989, "learning_rate": 7.2e-05, "epoch": 0.82, "step": 47 }, { "loss": 0.4687, "learning_rate": 7.2e-05, "epoch": 0.84, "step": 48 }, { "loss": 0.4431, "learning_rate": 7.2e-05, "epoch": 0.86, "step": 49 }, { "loss": 0.475, "learning_rate": 7.2e-05, "epoch": 0.88, "step": 50 }, { "eval_code_loss": 0.6905087232589722, "eval_code_score": -0.21665707230567932, "eval_code_brier_score": 0.21665707230567932, "eval_code_average_probability": 0.576951265335083, "eval_code_accuracy": 0.68, "eval_code_probabilities": [ 0.8747886419296265, 0.897834300994873, 0.9301072955131531, 0.5637628436088562, 0.6103933453559875, 0.5329621434211731, 0.661848783493042, 0.8006942868232727, 0.5047258138656616, 0.5474675297737122, 0.5803297162055969, 0.6606214642524719, 0.6511579155921936, 0.5316810011863708, 0.5710970163345337, 0.7186195850372314, 0.40853649377822876, 0.6621276140213013, 0.7090564370155334, 0.686518132686615, 0.6983659863471985, 0.3943677544593811, 0.7656291127204895, 0.7813704609870911, 0.01187042985111475, 0.008863707073032856, 0.007746896706521511, 0.5090981721878052, 0.5062975287437439, 0.45505571365356445, 0.4951728880405426, 0.5581167340278625, 0.5970812439918518, 0.9008167386054993, 0.9059542417526245, 0.9084323644638062, 0.814932644367218, 0.5395833849906921, 0.6921597719192505, 0.45764750242233276, 0.3827315866947174, 0.5150556564331055, 0.5219369530677795, 0.4390372037887573, 0.4954327344894409, 0.47707119584083557, 0.5074189305305481, 0.5035202503204346, 0.5605807304382324, 0.5175653696060181, 0.9902499318122864, 0.5175576210021973, 0.8499017953872681, 0.5307424068450928, 0.5028649568557739, 0.515737771987915, 0.4969106614589691, 0.2922719717025757, 0.44801953434944153, 0.32066962122917175, 0.9447461366653442, 0.9428120255470276, 0.9458075165748596, 0.7382722496986389, 0.5110031962394714, 0.5486981868743896, 0.4352004826068878, 0.41556352376937866, 0.4588325619697571, 0.5106350779533386, 0.5079880952835083, 0.5065162181854248, 0.5341932773590088, 0.5115707516670227, 0.49947670102119446, 0.9284617304801941, 0.510497510433197, 0.6312513947486877, 0.9168699979782104, 0.9026933312416077, 0.8503899574279785, 0.5394543409347534, 0.4984402060508728, 0.5079206228256226, 0.29503393173217773, 0.7989310026168823, 0.3873499035835266, 0.552551805973053, 0.49572017788887024, 0.5294312834739685, 0.44464293122291565, 0.5211971998214722, 0.46848106384277344, 0.5291138887405396, 0.4984701871871948, 0.4968411326408386, 0.46553924679756165, 0.46003156900405884, 0.493184357881546, 0.463138610124588 ], "eval_code_runtime": 36.5772, "eval_code_samples_per_second": 2.734, "eval_code_steps_per_second": 0.055, "epoch": 0.88, "step": 50 }, { "eval_counterfactual_python_loss": 0.6410097479820251, "eval_counterfactual_python_score": -0.222365602850914, "eval_counterfactual_python_brier_score": 0.222365602850914, "eval_counterfactual_python_average_probability": 0.5518541932106018, "eval_counterfactual_python_accuracy": 0.66, "eval_counterfactual_python_probabilities": [ 0.5386440753936768, 0.49819016456604004, 0.4844203591346741, 0.5327919125556946, 0.4123155176639557, 0.5146786570549011, 0.44351926445961, 0.4285781681537628, 0.41586360335350037, 0.5060333609580994, 0.5129002928733826, 0.5073186755180359, 0.5133093595504761, 0.30924591422080994, 0.5565316081047058, 0.41158172488212585, 0.42925503849983215, 0.45506441593170166, 0.5124459862709045, 0.5084977746009827, 0.5260626673698425, 0.9784466624259949, 0.977891206741333, 0.6758217811584473, 0.4996006488800049, 0.502190351486206, 0.49963271617889404, 0.24913643300533295, 0.24497272074222565, 0.15005168318748474, 0.5284581780433655, 0.4637279510498047, 0.46632638573646545, 0.49904829263687134, 0.5982093811035156, 0.5048952102661133, 0.7960643768310547, 0.6252861022949219, 0.6371491551399231, 0.6120105385780334, 0.44951215386390686, 0.30023863911628723, 0.5029293298721313, 0.49918922781944275, 0.5002826452255249, 0.48498842120170593, 0.49545082449913025, 0.5000977516174316, 0.5497520565986633, 0.4658561646938324, 0.47568437457084656, 0.5000590682029724, 0.4994635581970215, 0.501465380191803, 0.5806223154067993, 0.5407394766807556, 0.548596203327179, 0.49953749775886536, 0.5177533626556396, 0.5937445163726807, 0.51665860414505, 0.5505573749542236, 0.5108556151390076, 0.48964056372642517, 0.6833782196044922, 0.7741197943687439, 0.5394606590270996, 0.6131158471107483, 0.5644785165786743, 0.601515531539917, 0.4582086503505707, 0.5712682604789734, 0.6359525918960571, 0.740151047706604, 0.453473836183548, 0.5005698204040527, 0.49849438667297363, 0.4981241226196289, 0.5342211127281189, 0.7014966607093811, 0.8450021743774414, 0.519537091255188, 0.5990186333656311, 0.7578335404396057, 0.5165602564811707, 0.46256765723228455, 0.5105525851249695, 0.9811468124389648, 0.9849089980125427, 0.9816316366195679, 0.6144223809242249, 0.4611106216907501, 0.8631046414375305, 0.5419878363609314, 0.519582211971283, 0.6103906035423279, 0.667199969291687, 0.5551198720932007, 0.669242262840271, 0.5566273331642151 ], "eval_counterfactual_python_runtime": 42.2853, "eval_counterfactual_python_samples_per_second": 2.365, "eval_counterfactual_python_steps_per_second": 0.047, "epoch": 0.88, "step": 50 }, { "loss": 0.4995, "learning_rate": 7.2e-05, "epoch": 0.89, "step": 51 }, { "loss": 0.5482, "learning_rate": 7.2e-05, "epoch": 0.91, "step": 52 }, { "loss": 0.6127, "learning_rate": 7.2e-05, "epoch": 0.93, "step": 53 }, { "loss": 0.5, "learning_rate": 7.2e-05, "epoch": 0.95, "step": 54 }, { "loss": 0.5382, "learning_rate": 7.2e-05, "epoch": 0.96, "step": 55 }, { "loss": 0.5227, "learning_rate": 7.2e-05, "epoch": 0.98, "step": 56 }, { "loss": 0.6282, "learning_rate": 7.2e-05, "epoch": 1.0, "step": 57 }, { "loss": 0.4611, "learning_rate": 7.2e-05, "epoch": 1.02, "step": 58 }, { "loss": 0.4296, "learning_rate": 7.2e-05, "epoch": 1.04, "step": 59 }, { "loss": 0.3265, "learning_rate": 7.2e-05, "epoch": 1.05, "step": 60 }, { "loss": 0.3327, "learning_rate": 7.2e-05, "epoch": 1.07, "step": 61 }, { "loss": 0.5471, "learning_rate": 7.2e-05, "epoch": 1.09, "step": 62 }, { "loss": 0.3814, "learning_rate": 7.2e-05, "epoch": 1.11, "step": 63 }, { "loss": 0.3941, "learning_rate": 7.2e-05, "epoch": 1.12, "step": 64 }, { "loss": 0.3218, "learning_rate": 7.2e-05, "epoch": 1.14, "step": 65 }, { "loss": 0.3258, "learning_rate": 7.2e-05, "epoch": 1.16, "step": 66 }, { "loss": 0.5251, "learning_rate": 7.2e-05, "epoch": 1.18, "step": 67 }, { "loss": 0.469, "learning_rate": 7.2e-05, "epoch": 1.19, "step": 68 }, { "loss": 0.3852, "learning_rate": 7.2e-05, "epoch": 1.21, "step": 69 }, { "loss": 0.3409, "learning_rate": 7.2e-05, "epoch": 1.23, "step": 70 }, { "loss": 0.4473, "learning_rate": 7.2e-05, "epoch": 1.25, "step": 71 }, { "loss": 0.2945, "learning_rate": 7.2e-05, "epoch": 1.26, "step": 72 }, { "loss": 0.3608, "learning_rate": 7.2e-05, "epoch": 1.28, "step": 73 }, { "loss": 0.4042, "learning_rate": 7.2e-05, "epoch": 1.3, "step": 74 }, { "loss": 0.3802, "learning_rate": 7.2e-05, "epoch": 1.32, "step": 75 }, { "eval_code_loss": 0.4694095849990845, "eval_code_score": -0.16457700729370117, "eval_code_brier_score": 0.16457700729370117, "eval_code_average_probability": 0.6747273802757263, "eval_code_accuracy": 0.77, "eval_code_probabilities": [ 0.9998944997787476, 0.9999444484710693, 0.9999792575836182, 0.7207562327384949, 0.9202108383178711, 0.5724186301231384, 0.8467735648155212, 0.8126452565193176, 0.44478896260261536, 0.7643183469772339, 0.7819174528121948, 0.9309266805648804, 0.9411635994911194, 0.8424233198165894, 0.9049078226089478, 0.952031672000885, 0.3711952567100525, 0.9328634738922119, 0.9677587151527405, 0.9494214057922363, 0.9498708844184875, 0.30629244446754456, 0.8959491848945618, 0.9032987952232361, 0.9084609150886536, 0.8893307447433472, 0.8730643391609192, 0.5209372043609619, 0.5036833882331848, 0.4168521463871002, 0.5004851818084717, 0.6079723834991455, 0.5459354519844055, 0.9346232414245605, 0.9303449988365173, 0.9605541825294495, 0.9573792815208435, 0.6248452067375183, 0.9251506328582764, 0.4677916467189789, 0.37387004494667053, 0.42620334029197693, 0.600398063659668, 0.48431196808815, 0.7895669341087341, 0.4504677951335907, 0.5762742757797241, 0.6442916989326477, 0.8840709328651428, 0.7550206780433655, 0.9939717650413513, 0.6135303378105164, 0.6339468359947205, 0.5852696895599365, 0.602887749671936, 0.5735942125320435, 0.4814770221710205, 0.9196935892105103, 0.1881166696548462, 0.8902093172073364, 0.977299153804779, 0.9783608913421631, 0.9801336526870728, 0.9884994626045227, 0.5481224656105042, 0.5878416299819946, 0.2562265396118164, 0.10622240602970123, 0.1097337156534195, 0.5593991875648499, 0.5126798152923584, 0.4901875853538513, 0.539310097694397, 0.5853370428085327, 0.5666263103485107, 0.9994970560073853, 0.7907701730728149, 0.9174056649208069, 0.9986317753791809, 0.9991476535797119, 0.9870390295982361, 0.6845882534980774, 0.5273832082748413, 0.5825375914573669, 0.05030998960137367, 0.7106683254241943, 0.376408189535141, 0.6357782483100891, 0.5115633606910706, 0.5071816444396973, 0.39829522371292114, 0.535811722278595, 0.3911305069923401, 0.6767864227294922, 0.50504070520401, 0.43804654479026794, 0.5036672949790955, 0.4026598036289215, 0.35143136978149414, 0.4586479961872101 ], "eval_code_runtime": 36.5352, "eval_code_samples_per_second": 2.737, "eval_code_steps_per_second": 0.055, "epoch": 1.32, "step": 75 }, { "eval_counterfactual_python_loss": 0.6137931942939758, "eval_counterfactual_python_score": -0.20746655762195587, "eval_counterfactual_python_brier_score": 0.20746655762195587, "eval_counterfactual_python_average_probability": 0.5891837477684021, "eval_counterfactual_python_accuracy": 0.63, "eval_counterfactual_python_probabilities": [ 0.5506163239479065, 0.42313048243522644, 0.4198402762413025, 0.5754298567771912, 0.38630211353302, 0.47240203619003296, 0.7205502986907959, 0.6168351769447327, 0.5663421750068665, 0.45957374572753906, 0.5441972017288208, 0.4583946764469147, 0.5182128548622131, 0.13082706928253174, 0.6158074140548706, 0.30370235443115234, 0.30528637766838074, 0.36947813630104065, 0.5232859253883362, 0.5014183521270752, 0.49940726161003113, 0.6633936166763306, 0.8305829763412476, 0.8158023953437805, 0.4996267259120941, 0.5016799569129944, 0.49929139018058777, 0.37765786051750183, 0.3638369143009186, 0.2675539255142212, 0.39653363823890686, 0.5094757676124573, 0.4743023216724396, 0.6519900560379028, 0.726960301399231, 0.4309100806713104, 0.9850045442581177, 0.954617977142334, 0.9593468904495239, 0.6046375036239624, 0.4302014708518982, 0.3352057933807373, 0.5038560628890991, 0.49972790479660034, 0.5001466870307922, 0.42995354533195496, 0.47737735509872437, 0.45605820417404175, 0.6769935488700867, 0.4527358412742615, 0.4564270079135895, 0.49967876076698303, 0.49923568964004517, 0.5014398694038391, 0.61121666431427, 0.5898076295852661, 0.6469179391860962, 0.5191650390625, 0.43958330154418945, 0.513123631477356, 0.6665530800819397, 0.7978847622871399, 0.5442570447921753, 0.36048850417137146, 0.5295901894569397, 0.9162126779556274, 0.6021609902381897, 0.7771204710006714, 0.7251576781272888, 0.6523131728172302, 0.46528783440589905, 0.6797440648078918, 0.5512087941169739, 0.26883381605148315, 0.2967521548271179, 0.5009620189666748, 0.4986320734024048, 0.49721115827560425, 0.6624665856361389, 0.9735934734344482, 0.9868214726448059, 0.5706613063812256, 0.7753136157989502, 0.9256560206413269, 0.5435609221458435, 0.5188571214675903, 0.554946780204773, 0.999994158744812, 0.9999972581863403, 0.9999933242797852, 0.8329089283943176, 0.4099949896335602, 0.9683297872543335, 0.8267971873283386, 0.504895031452179, 0.6182412505149841, 0.9561484456062317, 0.8357421159744263, 0.9478693604469299, 0.6621176600456238 ], "eval_counterfactual_python_runtime": 42.2604, "eval_counterfactual_python_samples_per_second": 2.366, "eval_counterfactual_python_steps_per_second": 0.047, "epoch": 1.32, "step": 75 }, { "loss": 0.3477, "learning_rate": 7.2e-05, "epoch": 1.33, "step": 76 }, { "loss": 0.3441, "learning_rate": 7.2e-05, "epoch": 1.35, "step": 77 }, { "loss": 0.2051, "learning_rate": 7.2e-05, "epoch": 1.37, "step": 78 }, { "loss": 0.3386, "learning_rate": 7.2e-05, "epoch": 1.39, "step": 79 }, { "loss": 0.3102, "learning_rate": 7.2e-05, "epoch": 1.4, "step": 80 }, { "loss": 0.2749, "learning_rate": 7.2e-05, "epoch": 1.42, "step": 81 }, { "loss": 0.393, "learning_rate": 7.2e-05, "epoch": 1.44, "step": 82 }, { "loss": 0.3211, "learning_rate": 7.2e-05, "epoch": 1.46, "step": 83 }, { "loss": 0.3656, "learning_rate": 7.2e-05, "epoch": 1.47, "step": 84 }, { "loss": 0.2527, "learning_rate": 7.2e-05, "epoch": 1.49, "step": 85 }, { "loss": 0.3756, "learning_rate": 7.2e-05, "epoch": 1.51, "step": 86 }, { "loss": 0.5116, "learning_rate": 7.2e-05, "epoch": 1.53, "step": 87 }, { "loss": 0.3191, "learning_rate": 7.2e-05, "epoch": 1.54, "step": 88 }, { "loss": 0.2474, "learning_rate": 7.2e-05, "epoch": 1.56, "step": 89 }, { "loss": 0.3117, "learning_rate": 7.2e-05, "epoch": 1.58, "step": 90 }, { "loss": 0.2992, "learning_rate": 7.2e-05, "epoch": 1.6, "step": 91 }, { "loss": 0.158, "learning_rate": 7.2e-05, "epoch": 1.61, "step": 92 }, { "loss": 0.2684, "learning_rate": 7.2e-05, "epoch": 1.63, "step": 93 }, { "loss": 0.5338, "learning_rate": 7.2e-05, "epoch": 1.65, "step": 94 }, { "loss": 0.3467, "learning_rate": 7.2e-05, "epoch": 1.67, "step": 95 }, { "loss": 0.2647, "learning_rate": 7.2e-05, "epoch": 1.68, "step": 96 }, { "loss": 0.2894, "learning_rate": 7.2e-05, "epoch": 1.7, "step": 97 }, { "loss": 0.2537, "learning_rate": 7.2e-05, "epoch": 1.72, "step": 98 }, { "loss": 0.4454, "learning_rate": 7.2e-05, "epoch": 1.74, "step": 99 }, { "loss": 0.2264, "learning_rate": 7.2e-05, "epoch": 1.75, "step": 100 }, { "eval_code_loss": 0.562386691570282, "eval_code_score": -0.1894664168357849, "eval_code_brier_score": 0.1894664168357849, "eval_code_average_probability": 0.6813782453536987, "eval_code_accuracy": 0.75, "eval_code_probabilities": [ 0.99964439868927, 0.9998418092727661, 0.9999876022338867, 0.8987520337104797, 0.9715626835823059, 0.8736424446105957, 0.992048442363739, 0.9987469911575317, 0.5219283103942871, 0.9212198853492737, 0.9132972955703735, 0.975989818572998, 0.9921802282333374, 0.9395624399185181, 0.9364026784896851, 0.9374613165855408, 0.27392876148223877, 0.916027307510376, 0.9856477975845337, 0.9577564001083374, 0.9659378528594971, 0.44796139001846313, 0.9555050134658813, 0.9627187848091125, 0.49647897481918335, 0.2568376362323761, 0.1927042156457901, 0.561137318611145, 0.5502861738204956, 0.3930111825466156, 0.46675917506217957, 0.7520542144775391, 0.3132498860359192, 0.1046283170580864, 0.12346971035003662, 0.16289445757865906, 0.998822033405304, 0.8946499228477478, 0.9992140531539917, 0.5172696709632874, 0.1285436749458313, 0.5089917182922363, 0.8845463991165161, 0.7489760518074036, 0.8961533308029175, 0.41095906496047974, 0.733329713344574, 0.6693552136421204, 0.9867847561836243, 0.8965162038803101, 0.9999988079071045, 0.9335614442825317, 0.9917327165603638, 0.7795218825340271, 0.673981785774231, 0.5716884136199951, 0.44703209400177, 0.990574300289154, 0.035506464540958405, 0.976534366607666, 0.9999401569366455, 0.9999487400054932, 0.9999538660049438, 0.9866085648536682, 0.610414445400238, 0.4811190962791443, 0.10195612162351608, 0.022535108029842377, 0.03303113952279091, 0.6767235994338989, 0.513542890548706, 0.5040165781974792, 0.5857304930686951, 0.5917763113975525, 0.6388770341873169, 0.999974250793457, 0.6209684610366821, 0.9876555800437927, 0.9999986886978149, 0.9999998807907104, 0.9999920129776001, 0.8514988422393799, 0.5619485378265381, 0.6238705515861511, 0.08037617802619934, 0.9763862490653992, 0.21729014813899994, 0.7570930123329163, 0.6862062215805054, 0.6152291893959045, 0.28896594047546387, 0.5001997947692871, 0.3280743360519409, 0.6856067180633545, 0.5078412890434265, 0.4755324721336365, 0.6357707977294922, 0.4387938678264618, 0.530180811882019, 0.6366823315620422 ], "eval_code_runtime": 36.6096, "eval_code_samples_per_second": 2.732, "eval_code_steps_per_second": 0.055, "epoch": 1.75, "step": 100 }, { "eval_counterfactual_python_loss": 0.5543044209480286, "eval_counterfactual_python_score": -0.17998556792736053, "eval_counterfactual_python_brier_score": 0.17998556792736053, "eval_counterfactual_python_average_probability": 0.641478955745697, "eval_counterfactual_python_accuracy": 0.68, "eval_counterfactual_python_probabilities": [ 0.5931702852249146, 0.36623018980026245, 0.4083799719810486, 0.7441582679748535, 0.3787698447704315, 0.4585961401462555, 0.9636162519454956, 0.8045032620429993, 0.7646597027778625, 0.4695722460746765, 0.6623252630233765, 0.5570592880249023, 0.4867101013660431, 0.09203716367483139, 0.659835934638977, 0.38409557938575745, 0.3676278293132782, 0.4404756426811218, 0.5268372893333435, 0.5405157804489136, 0.545403003692627, 0.7553650140762329, 0.917907178401947, 0.9101668000221252, 0.4998391270637512, 0.500482439994812, 0.4992232620716095, 0.36769258975982666, 0.3600624203681946, 0.10161764174699783, 0.27137765288352966, 0.5764327645301819, 0.5460357069969177, 0.6896645426750183, 0.5904480814933777, 0.43029138445854187, 0.9749554991722107, 0.9570443034172058, 0.9645709991455078, 0.8437580466270447, 0.3237834870815277, 0.4067584276199341, 0.505946695804596, 0.5007390975952148, 0.5010091066360474, 0.40068086981773376, 0.46205294132232666, 0.43641653656959534, 0.8441718220710754, 0.4741505980491638, 0.36617419123649597, 0.49912282824516296, 0.49875274300575256, 0.5011247396469116, 0.6660105586051941, 0.6525660157203674, 0.7143298387527466, 0.5283794403076172, 0.5694890022277832, 0.7942121624946594, 0.9201071858406067, 0.9259344935417175, 0.5873693227767944, 0.24995583295822144, 0.7780645489692688, 0.9872015714645386, 0.7106945514678955, 0.9213557839393616, 0.8370577692985535, 0.816162645816803, 0.385659784078598, 0.815650224685669, 0.9024463295936584, 0.40921616554260254, 0.7233693599700928, 0.5013810992240906, 0.4987756907939911, 0.4964328408241272, 0.7962855100631714, 0.9599968791007996, 0.967514157295227, 0.5222499966621399, 0.8373121619224548, 0.926533579826355, 0.6371393203735352, 0.5046852827072144, 0.6500534415245056, 0.9999998807907104, 1.0, 0.9999998807907104, 0.8575750589370728, 0.44756925106048584, 0.9889571070671082, 0.9346925616264343, 0.5233546495437622, 0.7729074954986572, 0.9984490871429443, 0.9480088353157043, 0.9992019534111023, 0.7911841869354248 ], "eval_counterfactual_python_runtime": 42.2823, "eval_counterfactual_python_samples_per_second": 2.365, "eval_counterfactual_python_steps_per_second": 0.047, "epoch": 1.75, "step": 100 }, { "train_runtime": 3769.2128, "train_samples_per_second": 0.849, "train_steps_per_second": 0.027, "total_flos": 0.0, "train_loss": 0.5000894145667553, "epoch": 1.75, "step": 100 } ]