[ { "loss": 0.7696, "learning_rate": 0.0002, "epoch": 0.02, "step": 1 }, { "loss": 0.6496, "learning_rate": 0.0002, "epoch": 0.04, "step": 2 }, { "loss": 0.6886, "learning_rate": 0.0002, "epoch": 0.05, "step": 3 }, { "loss": 0.6262, "learning_rate": 0.0002, "epoch": 0.07, "step": 4 }, { "loss": 0.5216, "learning_rate": 0.0002, "epoch": 0.09, "step": 5 }, { "loss": 0.5246, "learning_rate": 0.0002, "epoch": 0.11, "step": 6 }, { "loss": 0.4545, "learning_rate": 0.0002, "epoch": 0.12, "step": 7 }, { "loss": 0.4518, "learning_rate": 0.0002, "epoch": 0.14, "step": 8 }, { "loss": 0.3915, "learning_rate": 0.0002, "epoch": 0.16, "step": 9 }, { "loss": 0.4736, "learning_rate": 0.0002, "epoch": 0.18, "step": 10 }, { "loss": 0.4215, "learning_rate": 0.0002, "epoch": 0.19, "step": 11 }, { "loss": 0.3712, "learning_rate": 0.0002, "epoch": 0.21, "step": 12 }, { "loss": 0.3054, "learning_rate": 0.0002, "epoch": 0.23, "step": 13 }, { "loss": 0.2136, "learning_rate": 0.0002, "epoch": 0.25, "step": 14 }, { "loss": 0.1414, "learning_rate": 0.0002, "epoch": 0.26, "step": 15 }, { "loss": 0.215, "learning_rate": 0.0002, "epoch": 0.28, "step": 16 }, { "loss": 0.3062, "learning_rate": 0.0002, "epoch": 0.3, "step": 17 }, { "loss": 0.3171, "learning_rate": 0.0002, "epoch": 0.32, "step": 18 }, { "loss": 0.2465, "learning_rate": 0.0002, "epoch": 0.33, "step": 19 }, { "loss": 0.2049, "learning_rate": 0.0002, "epoch": 0.35, "step": 20 }, { "loss": 0.2947, "learning_rate": 0.0002, "epoch": 0.37, "step": 21 }, { "loss": 0.1174, "learning_rate": 0.0002, "epoch": 0.39, "step": 22 }, { "loss": 0.118, "learning_rate": 0.0002, "epoch": 0.4, "step": 23 }, { "loss": 0.1616, "learning_rate": 0.0002, "epoch": 0.42, "step": 24 }, { "loss": 0.1731, "learning_rate": 0.0002, "epoch": 0.44, "step": 25 }, { "eval_code_easy_loss": 0.1410333812236786, "eval_code_easy_score": -0.04131929576396942, "eval_code_easy_brier_score": 0.04131929576396942, "eval_code_easy_average_probability": 0.8963662981987, "eval_code_easy_accuracy": 0.94, "eval_code_easy_probabilities": [ 0.9897421598434448, 0.9846920371055603, 0.9921024441719055, 0.9999797344207764, 0.9999569654464722, 0.9998584985733032, 0.4596724212169647, 0.9999803304672241, 0.9999934434890747, 0.6896206736564636, 0.7018794417381287, 0.8044198751449585, 0.9061204791069031, 0.9999116659164429, 0.9998483657836914, 0.9975529313087463, 0.9999861717224121, 0.9999945163726807, 0.9997785687446594, 0.9217097163200378, 0.549553632736206, 0.9999867677688599, 0.9999836683273315, 0.9999502897262573, 0.9999196529388428, 0.999788224697113, 0.9994627833366394, 0.8858318328857422, 0.8768848180770874, 0.9063717126846313, 0.6450658440589905, 0.718223512172699, 0.6423218846321106, 0.6255131363868713, 0.4555876851081848, 0.5870699286460876, 0.7827481031417847, 0.9729915857315063, 0.7549998164176941, 0.9984861016273499, 0.9989733695983887, 0.9916539192199707, 0.9056527018547058, 0.8946619629859924, 0.8330284357070923, 0.9995453953742981, 0.5416378378868103, 0.9464252591133118, 0.972342848777771, 0.9960533380508423, 0.988210916519165, 0.4754994809627533, 0.9999995231628418, 0.9999991655349731, 0.9999830722808838, 0.9999951124191284, 0.9999954700469971, 0.9999794960021973, 0.3308394253253937, 0.9999799728393555, 0.9992032647132874, 0.9999996423721313, 0.9999727010726929, 0.9999815225601196, 0.9999794960021973, 0.9999793767929077, 0.988341748714447, 0.99339359998703, 0.9851863980293274, 0.631180465221405, 0.2889827787876129, 0.2886982560157776, 0.9839469194412231, 0.9860828518867493, 0.9651493430137634, 0.7786540389060974, 0.8290436863899231, 0.9590767621994019, 0.9999992847442627, 0.9999868869781494, 0.9999861717224121, 0.9500917196273804, 0.9666972160339355, 0.8169301748275757, 0.9984229803085327, 0.9998488426208496, 0.9996005892753601, 0.9686463475227356, 0.9352719783782959, 0.9167892336845398, 0.9999650716781616, 0.8374965190887451, 0.9999854564666748, 0.9999997615814209, 0.9999997615814209, 0.9999992847442627, 0.9999997615814209, 0.9999991655349731, 0.9999997615814209, 0.848063588142395 ], "eval_code_easy_runtime": 31.1515, "eval_code_easy_samples_per_second": 3.21, "eval_code_easy_steps_per_second": 0.128, "epoch": 0.44, "step": 25 }, { "eval_code_hard_loss": 0.44707271456718445, "eval_code_hard_score": -0.14680194854736328, "eval_code_hard_brier_score": 0.14680194854736328, "eval_code_hard_average_probability": 0.6711300015449524, "eval_code_hard_accuracy": 0.83, "eval_code_hard_probabilities": [ 0.9999589920043945, 0.47546130418777466, 0.5713449716567993, 0.9994718432426453, 0.9991668462753296, 0.9993312358856201, 0.4067853093147278, 0.40341079235076904, 0.41259637475013733, 0.4262898862361908, 0.48981818556785583, 0.6610403060913086, 0.9994863271713257, 0.9982219338417053, 0.99834144115448, 0.7558809518814087, 0.6139836311340332, 0.6215753555297852, 0.9995869994163513, 0.9992690682411194, 0.9992164373397827, 0.8080712556838989, 0.8023390769958496, 0.7505528330802917, 0.5108107328414917, 0.5139395594596863, 0.4423873722553253, 0.7270267605781555, 0.7878870368003845, 0.7860028743743896, 0.6687493920326233, 0.7331302165985107, 0.6188584566116333, 0.5269416570663452, 0.4910818338394165, 0.4837784469127655, 0.5938860774040222, 0.5307540893554688, 0.6091882586479187, 0.5714852213859558, 0.5575187802314758, 0.636616587638855, 0.5653713941574097, 0.5363364219665527, 0.5516955256462097, 0.7773237228393555, 0.5234290361404419, 0.5288711786270142, 0.6337161064147949, 0.6571640968322754, 0.6706582903862, 0.45844873785972595, 0.7167773842811584, 0.605722188949585, 0.9969874024391174, 0.9969995617866516, 0.9975918531417847, 0.3677201271057129, 0.5539365410804749, 0.7323344349861145, 0.9991826415061951, 0.9991852641105652, 0.9992254972457886, 0.9554563760757446, 0.9320324659347534, 0.9908183217048645, 0.5780607461929321, 0.9173429608345032, 0.573617160320282, 0.710330605506897, 0.610026478767395, 0.5195437073707581, 0.8946442008018494, 0.925991415977478, 0.938264787197113, 0.5608754754066467, 0.5026195049285889, 0.4715512990951538, 0.5032576322555542, 0.5153825879096985, 0.5001385807991028, 0.6857117414474487, 0.6296961307525635, 0.58381187915802, 0.5933532118797302, 0.5141075253486633, 0.5213065147399902, 0.4824189841747284, 0.5641835331916809, 0.5198292136192322, 0.5848154425621033, 0.6065869927406311, 0.5906622409820557, 0.39757779240608215, 0.37057143449783325, 0.33965691924095154, 0.638043999671936, 0.75816410779953, 0.8119453191757202, 0.4706781804561615 ], "eval_code_hard_runtime": 110.7399, "eval_code_hard_samples_per_second": 0.903, "eval_code_hard_steps_per_second": 0.036, "epoch": 0.44, "step": 25 }, { "loss": 0.1187, "learning_rate": 0.0002, "epoch": 0.46, "step": 26 }, { "loss": 0.1277, "learning_rate": 0.0002, "epoch": 0.47, "step": 27 }, { "loss": 0.0581, "learning_rate": 0.0002, "epoch": 0.49, "step": 28 }, { "loss": 0.1622, "learning_rate": 0.0002, "epoch": 0.51, "step": 29 }, { "loss": 0.0558, "learning_rate": 0.0002, "epoch": 0.53, "step": 30 }, { "loss": 0.0873, "learning_rate": 0.0002, "epoch": 0.54, "step": 31 }, { "loss": 0.1237, "learning_rate": 0.0002, "epoch": 0.56, "step": 32 }, { "loss": 0.1127, "learning_rate": 0.0002, "epoch": 0.58, "step": 33 }, { "loss": 0.0673, "learning_rate": 0.0002, "epoch": 0.6, "step": 34 }, { "loss": 0.0628, "learning_rate": 0.0002, "epoch": 0.61, "step": 35 }, { "loss": 0.1647, "learning_rate": 0.0002, "epoch": 0.63, "step": 36 }, { "loss": 0.0389, "learning_rate": 0.0002, "epoch": 0.65, "step": 37 }, { "loss": 0.0162, "learning_rate": 0.0002, "epoch": 0.67, "step": 38 }, { "loss": 0.1205, "learning_rate": 0.0002, "epoch": 0.68, "step": 39 }, { "loss": 0.2798, "learning_rate": 0.0002, "epoch": 0.7, "step": 40 }, { "loss": 0.0202, "learning_rate": 0.0002, "epoch": 0.72, "step": 41 }, { "loss": 0.0563, "learning_rate": 0.0002, "epoch": 0.74, "step": 42 }, { "loss": 0.0959, "learning_rate": 0.0002, "epoch": 0.75, "step": 43 }, { "loss": 0.033, "learning_rate": 0.0002, "epoch": 0.77, "step": 44 }, { "loss": 0.0377, "learning_rate": 0.0002, "epoch": 0.79, "step": 45 }, { "loss": 0.1312, "learning_rate": 0.0002, "epoch": 0.81, "step": 46 }, { "loss": 0.1171, "learning_rate": 0.0002, "epoch": 0.82, "step": 47 }, { "loss": 0.0586, "learning_rate": 0.0002, "epoch": 0.84, "step": 48 }, { "loss": 0.0373, "learning_rate": 0.0002, "epoch": 0.86, "step": 49 }, { "loss": 0.1418, "learning_rate": 0.0002, "epoch": 0.88, "step": 50 }, { "eval_code_easy_loss": 0.09489491581916809, "eval_code_easy_score": -0.030822403728961945, "eval_code_easy_brier_score": 0.030822403728961945, "eval_code_easy_average_probability": 0.9424028992652893, "eval_code_easy_accuracy": 0.95, "eval_code_easy_probabilities": [ 0.9999914169311523, 1.0, 1.0, 0.9999921321868896, 0.9999855756759644, 0.9999556541442871, 0.4764171540737152, 1.0, 1.0, 0.9999983310699463, 0.9999955892562866, 0.9999998807907104, 0.9997754693031311, 0.9999978542327881, 0.9999951124191284, 0.9944272041320801, 1.0, 1.0, 1.0, 0.9332470893859863, 0.686340868473053, 0.9999998807907104, 0.9999998807907104, 0.9999996423721313, 1.0, 0.9999998807907104, 0.9999986886978149, 0.9998589754104614, 0.9998730421066284, 0.9999005794525146, 0.7265980839729309, 0.8790691494941711, 0.8301546573638916, 0.9983548521995544, 0.9948050379753113, 0.9825286865234375, 0.962716817855835, 0.9991486072540283, 0.9315124750137329, 0.9939038157463074, 0.9942259192466736, 0.9889645576477051, 0.9977827668190002, 0.9999998807907104, 0.9890486001968384, 0.9997009038925171, 0.20336638391017914, 0.9882842302322388, 0.9745821952819824, 0.9941431879997253, 0.9512994289398193, 0.7942199110984802, 1.0, 1.0, 1.0, 1.0, 1.0, 0.999992847442627, 0.254838228225708, 0.9999922513961792, 1.0, 1.0, 0.9999960660934448, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9282342791557312, 0.25453829765319824, 0.1509198695421219, 0.9998898506164551, 0.9999701976776123, 0.999139666557312, 0.8278753161430359, 0.8814498782157898, 0.9445298910140991, 1.0, 0.9999996423721313, 1.0, 0.9991635084152222, 1.0, 0.9999971389770508, 0.955471932888031, 0.9967992305755615, 0.9889751672744751, 0.9923612475395203, 0.9318004250526428, 0.9118591547012329, 1.0, 0.95877605676651, 0.9997400641441345, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998133778572083 ], "eval_code_easy_runtime": 31.1754, "eval_code_easy_samples_per_second": 3.208, "eval_code_easy_steps_per_second": 0.128, "epoch": 0.88, "step": 50 }, { "eval_code_hard_loss": 0.48955029249191284, "eval_code_hard_score": -0.16734105348587036, "eval_code_hard_brier_score": 0.16734105348587036, "eval_code_hard_average_probability": 0.6507197022438049, "eval_code_hard_accuracy": 0.74, "eval_code_hard_probabilities": [ 0.998719334602356, 0.4495692551136017, 0.560586154460907, 0.9998689889907837, 0.9994572997093201, 0.999840259552002, 0.40500903129577637, 0.4450298845767975, 0.39399078488349915, 0.48165562748908997, 0.4001981019973755, 0.6641137599945068, 0.999977707862854, 0.9998791217803955, 0.9998584985733032, 0.8799082636833191, 0.766524612903595, 0.775595486164093, 0.9999852180480957, 0.9999836683273315, 0.9999798536300659, 0.8477662205696106, 0.7681900262832642, 0.7254011631011963, 0.4458688199520111, 0.4710305333137512, 0.3777993619441986, 0.09769346565008163, 0.4915977418422699, 0.5548089146614075, 0.6629347801208496, 0.7435128092765808, 0.5462533831596375, 0.6541544198989868, 0.4705035388469696, 0.5168669819831848, 0.42897677421569824, 0.3096539080142975, 0.4461301267147064, 0.461626797914505, 0.46588248014450073, 0.5764793157577515, 0.5880526304244995, 0.5188709497451782, 0.5498936772346497, 0.8593714237213135, 0.5482467412948608, 0.5750262141227722, 0.67604660987854, 0.8179431557655334, 0.8246544003486633, 0.4119476079940796, 0.7206862568855286, 0.6743555068969727, 0.9999415874481201, 0.9999454021453857, 0.9999594688415527, 0.470983624458313, 0.5161882042884827, 0.5438310503959656, 0.9999815225601196, 0.9999749660491943, 0.9999841451644897, 0.9674224257469177, 0.9396224617958069, 0.9942786693572998, 0.6115734577178955, 0.9555063247680664, 0.6700289249420166, 0.8057522177696228, 0.6384589672088623, 0.5619309544563293, 0.6012542843818665, 0.6818827986717224, 0.709420382976532, 0.6579272747039795, 0.42321380972862244, 0.44099465012550354, 0.5047284364700317, 0.506719172000885, 0.5062504410743713, 0.6765443682670593, 0.7465528249740601, 0.6119047999382019, 0.5429056882858276, 0.5111532807350159, 0.5241301655769348, 0.4921115040779114, 0.6113234162330627, 0.5521866679191589, 0.5868935585021973, 0.6085503697395325, 0.5654861927032471, 0.3610905706882477, 0.3744204640388489, 0.31809136271476746, 0.5574522018432617, 0.641338586807251, 0.48631271719932556, 0.5477950572967529 ], "eval_code_hard_runtime": 110.6958, "eval_code_hard_samples_per_second": 0.903, "eval_code_hard_steps_per_second": 0.036, "epoch": 0.88, "step": 50 }, { "loss": 0.0819, "learning_rate": 0.0002, "epoch": 0.89, "step": 51 }, { "loss": 0.0037, "learning_rate": 0.0002, "epoch": 0.91, "step": 52 }, { "loss": 0.1525, "learning_rate": 0.0002, "epoch": 0.93, "step": 53 }, { "loss": 0.2635, "learning_rate": 0.0002, "epoch": 0.95, "step": 54 }, { "loss": 0.1162, "learning_rate": 0.0002, "epoch": 0.96, "step": 55 }, { "loss": 0.0352, "learning_rate": 0.0002, "epoch": 0.98, "step": 56 }, { "loss": 0.1891, "learning_rate": 0.0002, "epoch": 1.0, "step": 57 }, { "loss": 0.0453, "learning_rate": 0.0002, "epoch": 1.02, "step": 58 }, { "loss": 0.0562, "learning_rate": 0.0002, "epoch": 1.04, "step": 59 }, { "loss": 0.0006, "learning_rate": 0.0002, "epoch": 1.05, "step": 60 }, { "loss": 0.0445, "learning_rate": 0.0002, "epoch": 1.07, "step": 61 }, { "loss": 0.0125, "learning_rate": 0.0002, "epoch": 1.09, "step": 62 }, { "loss": 0.0378, "learning_rate": 0.0002, "epoch": 1.11, "step": 63 }, { "loss": 0.0367, "learning_rate": 0.0002, "epoch": 1.12, "step": 64 }, { "loss": 0.0261, "learning_rate": 0.0002, "epoch": 1.14, "step": 65 }, { "loss": 0.0022, "learning_rate": 0.0002, "epoch": 1.16, "step": 66 }, { "loss": 0.0151, "learning_rate": 0.0002, "epoch": 1.18, "step": 67 }, { "loss": 0.0262, "learning_rate": 0.0002, "epoch": 1.19, "step": 68 }, { "loss": 0.0025, "learning_rate": 0.0002, "epoch": 1.21, "step": 69 }, { "loss": 0.009, "learning_rate": 0.0002, "epoch": 1.23, "step": 70 }, { "loss": 0.0233, "learning_rate": 0.0002, "epoch": 1.25, "step": 71 }, { "loss": 0.0167, "learning_rate": 0.0002, "epoch": 1.26, "step": 72 }, { "loss": 0.0088, "learning_rate": 0.0002, "epoch": 1.28, "step": 73 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 1.3, "step": 74 }, { "loss": 0.0111, "learning_rate": 0.0002, "epoch": 1.32, "step": 75 }, { "eval_code_easy_loss": 0.08589933067560196, "eval_code_easy_score": -0.029182305559515953, "eval_code_easy_brier_score": 0.029182305559515953, "eval_code_easy_average_probability": 0.9475240111351013, "eval_code_easy_accuracy": 0.95, "eval_code_easy_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999991655349731, 0.44722074270248413, 1.0, 1.0, 0.9999874830245972, 0.9999817609786987, 0.9999996423721313, 0.9983553290367126, 1.0, 0.9999998807907104, 0.969874918460846, 1.0, 1.0, 1.0, 0.9999077320098877, 0.7078168392181396, 1.0, 1.0, 1.0, 0.9998465776443481, 0.9997827410697937, 0.9978830218315125, 0.9988357424736023, 0.9989570379257202, 0.9992606043815613, 0.7118145227432251, 0.8777998089790344, 0.7065884470939636, 0.9980650544166565, 0.9990893602371216, 0.9781619310379028, 0.9999847412109375, 1.0, 0.9993658661842346, 0.9999990463256836, 0.9999980926513672, 0.9999912977218628, 0.9999923706054688, 1.0, 0.9999966621398926, 0.9973452687263489, 0.13087554275989532, 0.8122653365135193, 0.9999538660049438, 0.9999990463256836, 0.9998875856399536, 0.9902234673500061, 1.0, 1.0, 0.9999982118606567, 0.9999996423721313, 0.9999994039535522, 0.9999998807907104, 0.396033376455307, 0.9999998807907104, 0.9999998807907104, 1.0, 0.9975220561027527, 1.0, 1.0, 1.0, 0.9999996423721313, 1.0, 0.9999997615814209, 0.9743689298629761, 0.3422013521194458, 0.25521236658096313, 0.9973738193511963, 0.9997031092643738, 0.9819728136062622, 0.9939416646957397, 0.997735857963562, 0.9992177486419678, 1.0, 0.9999991655349731, 0.9999998807907104, 0.9803919196128845, 1.0, 0.9999996423721313, 0.9999098777770996, 0.9999998807907104, 0.9999986886978149, 0.9992156028747559, 0.9822015166282654, 0.9914659261703491, 0.9999808073043823, 0.5561801791191101, 0.9888289570808411, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998490810394287 ], "eval_code_easy_runtime": 31.1417, "eval_code_easy_samples_per_second": 3.211, "eval_code_easy_steps_per_second": 0.128, "epoch": 1.32, "step": 75 }, { "eval_code_hard_loss": 0.4541158378124237, "eval_code_hard_score": -0.15187427401542664, "eval_code_hard_brier_score": 0.15187427401542664, "eval_code_hard_average_probability": 0.7136721014976501, "eval_code_hard_accuracy": 0.79, "eval_code_hard_probabilities": [ 0.9999998807907104, 0.42837128043174744, 0.6079556345939636, 0.9998014569282532, 0.9960059523582458, 0.9952560067176819, 0.25668519735336304, 0.37132731080055237, 0.18430009484291077, 0.5929830074310303, 0.3093147277832031, 0.7880420088768005, 0.999980092048645, 0.9994723200798035, 0.9992823004722595, 0.9913004040718079, 0.9134578704833984, 0.9550933837890625, 0.9999967813491821, 0.9999939203262329, 0.9999887943267822, 0.9903974533081055, 0.9929130673408508, 0.9700443744659424, 0.5675995945930481, 0.4452306032180786, 0.39090991020202637, 0.05753504857420921, 0.5650666952133179, 0.6610203981399536, 0.8633843660354614, 0.9552722573280334, 0.6663053035736084, 0.7623808979988098, 0.4418239891529083, 0.5061418414115906, 0.2837110459804535, 0.19440007209777832, 0.4149368107318878, 0.6865063905715942, 0.653810441493988, 0.8122990131378174, 0.7234838604927063, 0.5346352458000183, 0.6917294263839722, 0.9355289340019226, 0.5552300810813904, 0.6327811479568481, 0.8747856020927429, 0.962817907333374, 0.9655711650848389, 0.18094369769096375, 0.8358926177024841, 0.876140296459198, 0.9999440908432007, 0.9999480247497559, 0.9999721050262451, 0.2512325644493103, 0.5562063455581665, 0.623430609703064, 0.9999799728393555, 0.9999500513076782, 0.9999836683273315, 0.9980340600013733, 0.9961856007575989, 0.9999278783798218, 0.9568101167678833, 0.9999703168869019, 0.9811659455299377, 0.9707834720611572, 0.8236635327339172, 0.4347587823867798, 0.8745930790901184, 0.9302994608879089, 0.9327366352081299, 0.766298234462738, 0.3869512379169464, 0.4064312279224396, 0.5269957184791565, 0.5092197060585022, 0.5739120841026306, 0.9161906838417053, 0.9152584075927734, 0.8292853832244873, 0.6077045798301697, 0.4789629280567169, 0.55214923620224, 0.5199791193008423, 0.834286093711853, 0.7272135615348816, 0.5919612050056458, 0.628190279006958, 0.5904282331466675, 0.20717588067054749, 0.23157484829425812, 0.17664135992527008, 0.7453339099884033, 0.9322812557220459, 0.7623251676559448, 0.5810563564300537 ], "eval_code_hard_runtime": 110.6654, "eval_code_hard_samples_per_second": 0.904, "eval_code_hard_steps_per_second": 0.036, "epoch": 1.32, "step": 75 }, { "loss": 0.0328, "learning_rate": 0.0002, "epoch": 1.33, "step": 76 }, { "loss": 0.0814, "learning_rate": 0.0002, "epoch": 1.35, "step": 77 }, { "loss": 0.0022, "learning_rate": 0.0002, "epoch": 1.37, "step": 78 }, { "loss": 0.0048, "learning_rate": 0.0002, "epoch": 1.39, "step": 79 }, { "loss": 0.0439, "learning_rate": 0.0002, "epoch": 1.4, "step": 80 }, { "loss": 0.0316, "learning_rate": 0.0002, "epoch": 1.42, "step": 81 }, { "loss": 0.0956, "learning_rate": 0.0002, "epoch": 1.44, "step": 82 }, { "loss": 0.0138, "learning_rate": 0.0002, "epoch": 1.46, "step": 83 }, { "loss": 0.0014, "learning_rate": 0.0002, "epoch": 1.47, "step": 84 }, { "loss": 0.0353, "learning_rate": 0.0002, "epoch": 1.49, "step": 85 }, { "loss": 0.0024, "learning_rate": 0.0002, "epoch": 1.51, "step": 86 }, { "loss": 0.0206, "learning_rate": 0.0002, "epoch": 1.53, "step": 87 }, { "loss": 0.0007, "learning_rate": 0.0002, "epoch": 1.54, "step": 88 }, { "loss": 0.0472, "learning_rate": 0.0002, "epoch": 1.56, "step": 89 }, { "loss": 0.0521, "learning_rate": 0.0002, "epoch": 1.58, "step": 90 }, { "loss": 0.0073, "learning_rate": 0.0002, "epoch": 1.6, "step": 91 }, { "loss": 0.1226, "learning_rate": 0.0002, "epoch": 1.61, "step": 92 }, { "loss": 0.0452, "learning_rate": 0.0002, "epoch": 1.63, "step": 93 }, { "loss": 0.0143, "learning_rate": 0.0002, "epoch": 1.65, "step": 94 }, { "loss": 0.0593, "learning_rate": 0.0002, "epoch": 1.67, "step": 95 }, { "loss": 0.0165, "learning_rate": 0.0002, "epoch": 1.68, "step": 96 }, { "loss": 0.0016, "learning_rate": 0.0002, "epoch": 1.7, "step": 97 }, { "loss": 0.0025, "learning_rate": 0.0002, "epoch": 1.72, "step": 98 }, { "loss": 0.0032, "learning_rate": 0.0002, "epoch": 1.74, "step": 99 }, { "loss": 0.0065, "learning_rate": 0.0002, "epoch": 1.75, "step": 100 }, { "eval_code_easy_loss": 0.1302385777235031, "eval_code_easy_score": -0.038255855441093445, "eval_code_easy_brier_score": 0.038255855441093445, "eval_code_easy_average_probability": 0.9474854469299316, "eval_code_easy_accuracy": 0.96, "eval_code_easy_probabilities": [ 0.9999997615814209, 1.0, 0.9999998807907104, 1.0, 0.9999998807907104, 0.9999867677688599, 0.14685745537281036, 1.0, 1.0, 0.9999862909317017, 0.9999769926071167, 0.9999992847442627, 0.9967072606086731, 1.0, 0.9999990463256836, 0.9817181825637817, 1.0, 1.0, 1.0, 0.9999679327011108, 0.989346444606781, 1.0, 1.0, 0.9999996423721313, 1.0, 1.0, 0.9997485280036926, 0.9999996423721313, 0.9999998807907104, 0.9999997615814209, 0.7592384815216064, 0.9866325259208679, 0.7520292401313782, 0.9871376752853394, 0.9395173788070679, 0.9484317898750305, 0.99998939037323, 1.0, 0.987211287021637, 0.9999986886978149, 0.9999985694885254, 0.999853253364563, 0.9999595880508423, 0.9999998807907104, 0.9999158382415771, 0.9994738698005676, 0.07437732070684433, 0.8850747346878052, 0.9996064305305481, 0.9999996423721313, 0.9999347925186157, 0.9994233846664429, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.754472017288208, 0.9999998807907104, 0.9999997615814209, 1.0, 0.9901873469352722, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9974935054779053, 0.0717310905456543, 0.029978960752487183, 1.0, 1.0, 0.999995231628418, 0.9984604120254517, 0.9997397065162659, 0.9999804496765137, 1.0, 1.0, 1.0, 0.9670922160148621, 0.9999996423721313, 0.9999791383743286, 0.998187243938446, 0.9999996423721313, 0.9999892711639404, 0.9999653100967407, 0.9966863989830017, 0.999233603477478, 1.0, 0.513279378414154, 0.9999966621398926, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999997615814209 ], "eval_code_easy_runtime": 31.1546, "eval_code_easy_samples_per_second": 3.21, "eval_code_easy_steps_per_second": 0.128, "epoch": 1.75, "step": 100 }, { "eval_code_hard_loss": 0.4152863323688507, "eval_code_hard_score": -0.13397492468357086, "eval_code_hard_brier_score": 0.13397492468357086, "eval_code_hard_average_probability": 0.7467895746231079, "eval_code_hard_accuracy": 0.82, "eval_code_hard_probabilities": [ 0.999998927116394, 0.3584747314453125, 0.5524832010269165, 0.9999052286148071, 0.9991750121116638, 0.9992142915725708, 0.17623497545719147, 0.41959860920906067, 0.08018570393323898, 0.6593999862670898, 0.26273950934410095, 0.7871806621551514, 0.999994158744812, 0.9992138147354126, 0.9983713030815125, 0.9971001744270325, 0.9297299981117249, 0.9683568477630615, 0.9999955892562866, 0.9999574422836304, 0.9999104738235474, 0.9948668479919434, 0.9949619770050049, 0.9704132080078125, 0.6137145757675171, 0.5000441074371338, 0.39442917704582214, 0.11175673454999924, 0.797296941280365, 0.8252298831939697, 0.9333888292312622, 0.9808279871940613, 0.7203049659729004, 0.8538256287574768, 0.4518895745277405, 0.613561749458313, 0.5261998772621155, 0.38816678524017334, 0.7502338886260986, 0.6585968136787415, 0.6722922325134277, 0.8558183908462524, 0.8481931090354919, 0.5945469737052917, 0.8612274527549744, 0.9837346076965332, 0.5298483371734619, 0.7162337303161621, 0.9290578365325928, 0.9855068922042847, 0.9883608818054199, 0.20835253596305847, 0.9197254776954651, 0.8866751194000244, 0.9997254014015198, 0.9997286200523376, 0.9998807907104492, 0.4348519742488861, 0.5857678651809692, 0.5733372569084167, 0.9998024106025696, 0.9995396137237549, 0.9998968839645386, 0.9989103078842163, 0.9946140646934509, 0.9999970197677612, 0.8636531829833984, 0.9994328618049622, 0.9395280480384827, 0.9852699637413025, 0.8448395729064941, 0.33028489351272583, 0.9747959971427917, 0.996696949005127, 0.9959298968315125, 0.8669978976249695, 0.32030242681503296, 0.336773544549942, 0.5315243005752563, 0.4963895082473755, 0.5617468357086182, 0.9487797021865845, 0.9507840275764465, 0.8413812518119812, 0.8480523228645325, 0.5873783230781555, 0.5967154502868652, 0.5515139698982239, 0.7987790107727051, 0.7132774591445923, 0.7350783348083496, 0.7808082699775696, 0.746001124382019, 0.17566508054733276, 0.21035020053386688, 0.10222747921943665, 0.6762077212333679, 0.9854094386100769, 0.8317046761512756, 0.7221127152442932 ], "eval_code_hard_runtime": 110.7468, "eval_code_hard_samples_per_second": 0.903, "eval_code_hard_steps_per_second": 0.036, "epoch": 1.75, "step": 100 }, { "loss": 0.0416, "learning_rate": 0.0002, "epoch": 1.77, "step": 101 }, { "loss": 0.0034, "learning_rate": 0.0002, "epoch": 1.79, "step": 102 }, { "loss": 0.1193, "learning_rate": 0.0002, "epoch": 1.81, "step": 103 }, { "loss": 0.0152, "learning_rate": 0.0002, "epoch": 1.82, "step": 104 }, { "loss": 0.0074, "learning_rate": 0.0002, "epoch": 1.84, "step": 105 }, { "loss": 0.0236, "learning_rate": 0.0002, "epoch": 1.86, "step": 106 }, { "loss": 0.0077, "learning_rate": 0.0002, "epoch": 1.88, "step": 107 }, { "loss": 0.1086, "learning_rate": 0.0002, "epoch": 1.89, "step": 108 }, { "loss": 0.0718, "learning_rate": 0.0002, "epoch": 1.91, "step": 109 }, { "loss": 0.0042, "learning_rate": 0.0002, "epoch": 1.93, "step": 110 }, { "loss": 0.0068, "learning_rate": 0.0002, "epoch": 1.95, "step": 111 }, { "loss": 0.0774, "learning_rate": 0.0002, "epoch": 1.96, "step": 112 }, { "loss": 0.0374, "learning_rate": 0.0002, "epoch": 1.98, "step": 113 }, { "loss": 0.0035, "learning_rate": 0.0002, "epoch": 2.0, "step": 114 }, { "loss": 0.0083, "learning_rate": 0.0002, "epoch": 2.02, "step": 115 }, { "loss": 0.0147, "learning_rate": 0.0002, "epoch": 2.04, "step": 116 }, { "loss": 0.0079, "learning_rate": 0.0002, "epoch": 2.05, "step": 117 }, { "loss": 0.0012, "learning_rate": 0.0002, "epoch": 2.07, "step": 118 }, { "loss": 0.0137, "learning_rate": 0.0002, "epoch": 2.09, "step": 119 }, { "loss": 0.029, "learning_rate": 0.0002, "epoch": 2.11, "step": 120 }, { "loss": 0.0125, "learning_rate": 0.0002, "epoch": 2.12, "step": 121 }, { "loss": 0.0102, "learning_rate": 0.0002, "epoch": 2.14, "step": 122 }, { "loss": 0.0057, "learning_rate": 0.0002, "epoch": 2.16, "step": 123 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.18, "step": 124 }, { "loss": 0.1814, "learning_rate": 0.0002, "epoch": 2.19, "step": 125 }, { "eval_code_easy_loss": 0.10904992371797562, "eval_code_easy_score": -0.03530557453632355, "eval_code_easy_brier_score": 0.03530557453632355, "eval_code_easy_average_probability": 0.9442441463470459, "eval_code_easy_accuracy": 0.95, "eval_code_easy_probabilities": [ 0.9999860525131226, 0.9999575614929199, 0.9999876022338867, 1.0, 1.0, 0.9999998807907104, 0.3663080930709839, 1.0, 1.0, 0.9986967444419861, 0.9995228052139282, 0.9994437098503113, 0.9989981055259705, 1.0, 1.0, 0.9903301000595093, 1.0, 1.0, 1.0, 1.0, 0.9906626343727112, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 0.9999880790710449, 0.9878363013267517, 0.9968984127044678, 0.9852358102798462, 0.7452288866043091, 0.8594924807548523, 0.625163733959198, 0.9999972581863403, 0.9998786449432373, 0.9998183846473694, 0.9999641180038452, 0.9999997615814209, 0.986954391002655, 0.9998384714126587, 0.9996883869171143, 0.9997907280921936, 1.0, 1.0, 1.0, 0.6630300283432007, 0.3178897202014923, 0.23424135148525238, 0.9830185174942017, 0.9976402521133423, 0.9478833079338074, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999997615814209, 0.9950571060180664, 0.9999996423721313, 0.9999940395355225, 0.9999960660934448, 0.998701810836792, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9900442361831665, 0.11046496033668518, 0.05695008859038353, 0.9999970197677612, 0.9999996423721313, 0.999971866607666, 0.9758032560348511, 0.987618088722229, 0.9728375673294067, 1.0, 0.9999998807907104, 1.0, 0.9999992847442627, 1.0, 1.0, 0.9805415868759155, 0.9995923638343811, 0.9991359114646912, 0.9988665580749512, 0.9318742156028748, 0.9750478863716125, 0.9999997615814209, 0.7831814289093018, 0.9958741068840027, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9994946718215942 ], "eval_code_easy_runtime": 31.118, "eval_code_easy_samples_per_second": 3.214, "eval_code_easy_steps_per_second": 0.129, "epoch": 2.19, "step": 125 }, { "eval_code_hard_loss": 0.4987069070339203, "eval_code_hard_score": -0.15576696395874023, "eval_code_hard_brier_score": 0.15576696395874023, "eval_code_hard_average_probability": 0.6946533918380737, "eval_code_hard_accuracy": 0.79, "eval_code_hard_probabilities": [ 0.9995853304862976, 0.20991425216197968, 0.549527108669281, 1.0, 0.9999998807907104, 1.0, 0.22219569981098175, 0.37956300377845764, 0.20069900155067444, 0.6470227837562561, 0.31780990958213806, 0.6012275218963623, 1.0, 0.9999998807907104, 0.9999998807907104, 0.9296144247055054, 0.8540101051330566, 0.8961200714111328, 1.0, 1.0, 1.0, 0.9873550534248352, 0.9724990129470825, 0.9760501980781555, 0.38157281279563904, 0.42047223448753357, 0.3564426898956299, 0.0027432094793766737, 0.2510468661785126, 0.3818874955177307, 0.8423392176628113, 0.9236337542533875, 0.5938029289245605, 0.8878844380378723, 0.7184504866600037, 0.7954170107841492, 0.4380953311920166, 0.42004096508026123, 0.6841737627983093, 0.5444896817207336, 0.40916869044303894, 0.5416669249534607, 0.7608702182769775, 0.5757149457931519, 0.8424829840660095, 0.9364664554595947, 0.548158586025238, 0.6652321219444275, 0.8666991591453552, 0.876342236995697, 0.8881990313529968, 0.3995114266872406, 0.8002880215644836, 0.6442937254905701, 1.0, 1.0, 1.0, 0.202246755361557, 0.5754519104957581, 0.5530020594596863, 1.0, 1.0, 1.0, 0.9787330031394958, 0.9658524394035339, 0.9971476197242737, 0.7014413475990295, 0.9961981177330017, 0.8666495680809021, 0.8775137066841125, 0.5972762107849121, 0.5123103857040405, 0.6785091757774353, 0.7748872637748718, 0.8229066133499146, 0.6105999946594238, 0.33387431502342224, 0.2667674720287323, 0.520160436630249, 0.5227006673812866, 0.5882536172866821, 0.7376717329025269, 0.8348681926727295, 0.6894320249557495, 0.39057934284210205, 0.3577791750431061, 0.5273920893669128, 0.47946465015411377, 0.7557993531227112, 0.7129892110824585, 0.5154388546943665, 0.55854332447052, 0.5196318030357361, 0.7760440111160278, 0.8043990731239319, 0.7464650869369507, 0.760195791721344, 0.9593195915222168, 0.7079413533210754, 0.5481259822845459 ], "eval_code_hard_runtime": 110.6367, "eval_code_hard_samples_per_second": 0.904, "eval_code_hard_steps_per_second": 0.036, "epoch": 2.19, "step": 125 }, { "loss": 0.0089, "learning_rate": 0.0002, "epoch": 2.21, "step": 126 }, { "loss": 0.008, "learning_rate": 0.0002, "epoch": 2.23, "step": 127 }, { "loss": 0.0025, "learning_rate": 0.0002, "epoch": 2.25, "step": 128 }, { "loss": 0.003, "learning_rate": 0.0002, "epoch": 2.26, "step": 129 }, { "loss": 0.0029, "learning_rate": 0.0002, "epoch": 2.28, "step": 130 }, { "loss": 0.012, "learning_rate": 0.0002, "epoch": 2.3, "step": 131 }, { "loss": 0.0097, "learning_rate": 0.0002, "epoch": 2.32, "step": 132 }, { "loss": 0.0024, "learning_rate": 0.0002, "epoch": 2.33, "step": 133 }, { "loss": 0.0047, "learning_rate": 0.0002, "epoch": 2.35, "step": 134 }, { "loss": 0.0007, "learning_rate": 0.0002, "epoch": 2.37, "step": 135 }, { "loss": 0.0027, "learning_rate": 0.0002, "epoch": 2.39, "step": 136 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.4, "step": 137 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.42, "step": 138 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.44, "step": 139 }, { "loss": 0.0016, "learning_rate": 0.0002, "epoch": 2.46, "step": 140 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.47, "step": 141 }, { "loss": 0.0696, "learning_rate": 0.0002, "epoch": 2.49, "step": 142 }, { "loss": 0.0059, "learning_rate": 0.0002, "epoch": 2.51, "step": 143 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 2.53, "step": 144 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.54, "step": 145 }, { "loss": 0.0204, "learning_rate": 0.0002, "epoch": 2.56, "step": 146 }, { "loss": 0.0043, "learning_rate": 0.0002, "epoch": 2.58, "step": 147 }, { "loss": 0.008, "learning_rate": 0.0002, "epoch": 2.6, "step": 148 }, { "loss": 0.0462, "learning_rate": 0.0002, "epoch": 2.61, "step": 149 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 2.63, "step": 150 }, { "eval_code_easy_loss": 0.07808719575405121, "eval_code_easy_score": -0.024909913539886475, "eval_code_easy_brier_score": 0.024909913539886475, "eval_code_easy_average_probability": 0.9644252061843872, "eval_code_easy_accuracy": 0.96, "eval_code_easy_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.25757572054862976, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9849016070365906, 1.0, 1.0, 0.9876202940940857, 1.0, 1.0, 1.0, 0.9999861717224121, 0.9844418168067932, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999815225601196, 0.9999998807907104, 1.0, 0.9999998807907104, 0.9330133199691772, 0.9998921155929565, 0.9747464060783386, 0.9998412132263184, 0.9905625581741333, 0.974348783493042, 0.9999979734420776, 1.0, 0.9953662157058716, 1.0, 1.0, 0.9999998807907104, 0.9999034404754639, 1.0, 0.999864935874939, 0.9999926090240479, 0.4541763961315155, 0.9853015542030334, 0.9999998807907104, 1.0, 1.0, 0.9991912245750427, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8326194882392883, 1.0, 1.0, 1.0, 0.9997536540031433, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999920129776001, 0.16892309486865997, 0.04870113357901573, 1.0, 1.0, 1.0, 0.9999966621398926, 0.9999991655349731, 1.0, 1.0, 1.0, 1.0, 0.9763332009315491, 1.0, 1.0, 0.9999935626983643, 1.0, 1.0, 1.0, 0.9999570846557617, 0.9999858140945435, 1.0, 0.8955685496330261, 0.9999994039535522, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ], "eval_code_easy_runtime": 31.1286, "eval_code_easy_samples_per_second": 3.212, "eval_code_easy_steps_per_second": 0.128, "epoch": 2.63, "step": 150 }, { "eval_code_hard_loss": 0.5192895531654358, "eval_code_hard_score": -0.15126831829547882, "eval_code_hard_brier_score": 0.15126831829547882, "eval_code_hard_average_probability": 0.747479259967804, "eval_code_hard_accuracy": 0.8, "eval_code_hard_probabilities": [ 1.0, 0.23972108960151672, 0.34699881076812744, 0.9999767541885376, 0.9995144605636597, 0.9983108043670654, 0.08374932408332825, 0.43437740206718445, 0.01867709495127201, 0.6382063031196594, 0.15230077505111694, 0.8396382331848145, 0.9999982118606567, 0.9995055198669434, 0.9989895224571228, 0.999188244342804, 0.9740042686462402, 0.9889111518859863, 0.9999994039535522, 0.9998956918716431, 0.9998466968536377, 0.9999018907546997, 0.999890923500061, 0.9991710186004639, 0.5053916573524475, 0.39974549412727356, 0.42459815740585327, 0.0008407415589317679, 0.42807716131210327, 0.5588884949684143, 0.9891363382339478, 0.9961511492729187, 0.7021331191062927, 0.9854065179824829, 0.8423302173614502, 0.8964764475822449, 0.1724509447813034, 0.12598082423210144, 0.7715829610824585, 0.7074795365333557, 0.5872290134429932, 0.8601648211479187, 0.9107590317726135, 0.6049399375915527, 0.9440699219703674, 0.9954397082328796, 0.5415297150611877, 0.787742555141449, 0.9602062702178955, 0.9986072182655334, 0.9982185959815979, 0.1667134314775467, 0.9801592826843262, 0.9148990511894226, 0.9998517036437988, 0.9998817443847656, 0.9999765157699585, 0.3098694384098053, 0.6319935917854309, 0.5650399923324585, 0.999991774559021, 0.9999759197235107, 0.999995231628418, 0.9999890327453613, 0.9999120235443115, 1.0, 0.9366697072982788, 0.9999974966049194, 0.9823035597801208, 0.9867276549339294, 0.8337392807006836, 0.13807430863380432, 0.9943860769271851, 0.9992918968200684, 0.9986520409584045, 0.8924828767776489, 0.1779462993144989, 0.14102093875408173, 0.5240833163261414, 0.4705985486507416, 0.5385470390319824, 0.9867573976516724, 0.9934804439544678, 0.9309672713279724, 0.7547566890716553, 0.46812355518341064, 0.6644623279571533, 0.5478729605674744, 0.9080039262771606, 0.9042761325836182, 0.564723789691925, 0.7475408911705017, 0.5996127128601074, 0.6277127861976624, 0.6707883477210999, 0.3496539294719696, 0.9058393836021423, 0.9995261430740356, 0.902912437915802, 0.6317912936210632 ], "eval_code_hard_runtime": 110.7028, "eval_code_hard_samples_per_second": 0.903, "eval_code_hard_steps_per_second": 0.036, "epoch": 2.63, "step": 150 }, { "train_runtime": 4923.5411, "train_samples_per_second": 0.975, "train_steps_per_second": 0.03, "total_flos": 0.0, "train_loss": 0.09649283700817553, "epoch": 2.63, "step": 150 } ]