[ { "loss": 0.7375, "learning_rate": 7.2e-05, "epoch": 0.05, "step": 1 }, { "loss": 0.7385, "learning_rate": 7.2e-05, "epoch": 0.11, "step": 2 }, { "loss": 0.7184, "learning_rate": 7.2e-05, "epoch": 0.16, "step": 3 }, { "loss": 0.6809, "learning_rate": 7.2e-05, "epoch": 0.21, "step": 4 }, { "loss": 0.6997, "learning_rate": 7.2e-05, "epoch": 0.26, "step": 5 }, { "loss": 0.7176, "learning_rate": 7.2e-05, "epoch": 0.32, "step": 6 }, { "loss": 0.6942, "learning_rate": 7.2e-05, "epoch": 0.37, "step": 7 }, { "loss": 0.7213, "learning_rate": 7.2e-05, "epoch": 0.42, "step": 8 }, { "loss": 0.6942, "learning_rate": 7.2e-05, "epoch": 0.47, "step": 9 }, { "loss": 0.7114, "learning_rate": 7.2e-05, "epoch": 0.53, "step": 10 }, { "loss": 0.7008, "learning_rate": 7.2e-05, "epoch": 0.58, "step": 11 }, { "loss": 0.681, "learning_rate": 7.2e-05, "epoch": 0.63, "step": 12 }, { "loss": 0.7248, "learning_rate": 7.2e-05, "epoch": 0.68, "step": 13 }, { "loss": 0.7351, "learning_rate": 7.2e-05, "epoch": 0.74, "step": 14 }, { "loss": 0.7095, "learning_rate": 7.2e-05, "epoch": 0.79, "step": 15 }, { "loss": 0.683, "learning_rate": 7.2e-05, "epoch": 0.84, "step": 16 }, { "loss": 0.6719, "learning_rate": 7.2e-05, "epoch": 0.89, "step": 17 }, { "loss": 0.6764, "learning_rate": 7.2e-05, "epoch": 0.95, "step": 18 }, { "loss": 0.6767, "learning_rate": 7.2e-05, "epoch": 1.0, "step": 19 }, { "loss": 0.6652, "learning_rate": 7.2e-05, "epoch": 1.05, "step": 20 }, { "loss": 0.6749, "learning_rate": 7.2e-05, "epoch": 1.11, "step": 21 }, { "loss": 0.6826, "learning_rate": 7.2e-05, "epoch": 1.16, "step": 22 }, { "loss": 0.6545, "learning_rate": 7.2e-05, "epoch": 1.21, "step": 23 }, { "loss": 0.6917, "learning_rate": 7.2e-05, "epoch": 1.26, "step": 24 }, { "loss": 0.65, "learning_rate": 7.2e-05, "epoch": 1.32, "step": 25 }, { "eval_wrong_arc_loss": 0.6511574387550354, "eval_wrong_arc_score": -0.2280832678079605, "eval_wrong_arc_brier_score": 0.2280832678079605, "eval_wrong_arc_average_probability": 0.5255815386772156, "eval_wrong_arc_accuracy": 0.68, "eval_wrong_arc_probabilities": [ 0.5341923236846924, 0.4215836822986603, 0.5577516555786133, 0.4472099840641022, 0.5210587382316589, 0.48162582516670227, 0.5153342485427856, 0.5227084159851074, 0.5714558959007263, 0.5944980978965759, 0.5958520174026489, 0.4774866998195648, 0.5097010135650635, 0.5408040881156921, 0.5164729356765747, 0.4862683117389679, 0.611793577671051, 0.5587571263313293, 0.46050599217414856, 0.5235235691070557, 0.5262281894683838, 0.519157350063324, 0.4868156611919403, 0.5412039160728455, 0.5091567039489746, 0.5481002330780029, 0.4881518483161926, 0.5103722810745239, 0.5413199067115784, 0.5389612317085266, 0.5303735733032227, 0.46720021963119507, 0.47469261288642883, 0.5217738747596741, 0.4839140772819519, 0.5033125281333923, 0.6740046739578247, 0.4932643473148346, 0.5131027698516846, 0.5083686113357544, 0.6749250888824463, 0.5694271326065063, 0.5096608996391296, 0.5113099217414856, 0.5202792882919312, 0.570429265499115, 0.6612235307693481, 0.4875631034374237, 0.4891292154788971, 0.5386453866958618, 0.39854174852371216, 0.5061803460121155, 0.4420945346355438, 0.49443987011909485, 0.483050137758255, 0.5371103882789612, 0.4931206703186035, 0.5382444262504578, 0.47580087184906006, 0.4962416887283325, 0.5580680966377258, 0.5028905868530273, 0.5332112908363342, 0.5813270211219788, 0.5666810274124146, 0.6028505563735962, 0.505656361579895, 0.5044450163841248, 0.547569215297699, 0.41055724024772644, 0.4800181984901428, 0.5832869410514832, 0.49764689803123474, 0.5161015391349792, 0.598942756652832, 0.48662498593330383, 0.5584536194801331, 0.5778775215148926, 0.5650496482849121, 0.4185655415058136, 0.512199878692627, 0.4503766596317291, 0.5686261057853699, 0.5340624451637268, 0.4932253956794739, 0.6768128275871277, 0.5700348019599915, 0.575413167476654, 0.47910740971565247, 0.5817662477493286, 0.594521164894104, 0.43558111786842346, 0.4789729714393616, 0.5581913590431213, 0.544609546661377, 0.6364085674285889, 0.5175650119781494, 0.5233790874481201, 0.5672008395195007, 0.43879184126853943 ], "eval_wrong_arc_runtime": 7.1003, "eval_wrong_arc_samples_per_second": 14.084, "eval_wrong_arc_steps_per_second": 0.282, "epoch": 1.32, "step": 25 }, { "loss": 0.6138, "learning_rate": 7.2e-05, "epoch": 1.37, "step": 26 }, { "loss": 0.6443, "learning_rate": 7.2e-05, "epoch": 1.42, "step": 27 }, { "loss": 0.6672, "learning_rate": 7.2e-05, "epoch": 1.47, "step": 28 }, { "loss": 0.6312, "learning_rate": 7.2e-05, "epoch": 1.53, "step": 29 }, { "loss": 0.6603, "learning_rate": 7.2e-05, "epoch": 1.58, "step": 30 }, { "loss": 0.6145, "learning_rate": 7.2e-05, "epoch": 1.63, "step": 31 }, { "loss": 0.6683, "learning_rate": 7.2e-05, "epoch": 1.68, "step": 32 }, { "loss": 0.6027, "learning_rate": 7.2e-05, "epoch": 1.74, "step": 33 }, { "loss": 0.6043, "learning_rate": 7.2e-05, "epoch": 1.79, "step": 34 }, { "loss": 0.694, "learning_rate": 7.2e-05, "epoch": 1.84, "step": 35 }, { "loss": 0.6568, "learning_rate": 7.2e-05, "epoch": 1.89, "step": 36 }, { "loss": 0.6639, "learning_rate": 7.2e-05, "epoch": 1.95, "step": 37 }, { "loss": 0.5993, "learning_rate": 7.2e-05, "epoch": 2.0, "step": 38 }, { "loss": 0.5717, "learning_rate": 7.2e-05, "epoch": 2.05, "step": 39 }, { "loss": 0.6159, "learning_rate": 7.2e-05, "epoch": 2.11, "step": 40 }, { "loss": 0.591, "learning_rate": 7.2e-05, "epoch": 2.16, "step": 41 }, { "loss": 0.5329, "learning_rate": 7.2e-05, "epoch": 2.21, "step": 42 }, { "loss": 0.5347, "learning_rate": 7.2e-05, "epoch": 2.26, "step": 43 }, { "loss": 0.5587, "learning_rate": 7.2e-05, "epoch": 2.32, "step": 44 }, { "loss": 0.5215, "learning_rate": 7.2e-05, "epoch": 2.37, "step": 45 }, { "loss": 0.5184, "learning_rate": 7.2e-05, "epoch": 2.42, "step": 46 }, { "loss": 0.5284, "learning_rate": 7.2e-05, "epoch": 2.47, "step": 47 }, { "loss": 0.491, "learning_rate": 7.2e-05, "epoch": 2.53, "step": 48 }, { "loss": 0.482, "learning_rate": 7.2e-05, "epoch": 2.58, "step": 49 }, { "loss": 0.486, "learning_rate": 7.2e-05, "epoch": 2.63, "step": 50 }, { "eval_wrong_arc_loss": 0.48614898324012756, "eval_wrong_arc_score": -0.15555404126644135, "eval_wrong_arc_brier_score": 0.15555404126644135, "eval_wrong_arc_average_probability": 0.6473783850669861, "eval_wrong_arc_accuracy": 0.79, "eval_wrong_arc_probabilities": [ 0.6561150550842285, 0.4626675546169281, 0.6256658434867859, 0.8533488512039185, 0.5858703851699829, 0.6902235150337219, 0.6159930229187012, 0.7639041543006897, 0.8640973567962646, 0.772244930267334, 0.8565906882286072, 0.29203683137893677, 0.5727916955947876, 0.8369653820991516, 0.7363625764846802, 0.44277945160865784, 0.8875776529312134, 0.40449512004852295, 0.592871904373169, 0.7689143419265747, 0.5688448548316956, 0.8267202377319336, 0.5138616561889648, 0.806187093257904, 0.6455444097518921, 0.8391571640968323, 0.36543235182762146, 0.5734689235687256, 0.747251033782959, 0.6627150774002075, 0.6402449607849121, 0.6396260857582092, 0.6404427886009216, 0.6774603724479675, 0.3316854238510132, 0.6492359042167664, 0.9437365531921387, 0.5006998181343079, 0.6551035642623901, 0.5634311437606812, 0.9296560287475586, 0.8223538398742676, 0.6447325944900513, 0.4997992515563965, 0.8213691115379333, 0.3131840229034424, 0.9470311999320984, 0.30666762590408325, 0.4539206027984619, 0.5038973689079285, 0.23626333475112915, 0.468450129032135, 0.62051922082901, 0.754213809967041, 0.6286126375198364, 0.6793380975723267, 0.4252707064151764, 0.8759824633598328, 0.28135573863983154, 0.5500273108482361, 0.8892378807067871, 0.4467567503452301, 0.6596640944480896, 0.816720724105835, 0.7590842247009277, 0.9068907499313354, 0.7511820197105408, 0.640140175819397, 0.7239473462104797, 0.26979541778564453, 0.8412765264511108, 0.7638338208198547, 0.7051005363464355, 0.5634142160415649, 0.6077967286109924, 0.44273415207862854, 0.5198932886123657, 0.8605012893676758, 0.8117179274559021, 0.3314230144023895, 0.5430377125740051, 0.6297375559806824, 0.8171581625938416, 0.6848751306533813, 0.5711643099784851, 0.8888359665870667, 0.7509227991104126, 0.6692346930503845, 0.48311302065849304, 0.7981626987457275, 0.7328034043312073, 0.342570424079895, 0.638855516910553, 0.8649654388427734, 0.7085446715354919, 0.7994809150695801, 0.6453239917755127, 0.3845190703868866, 0.8838135004043579, 0.7545994520187378 ], "eval_wrong_arc_runtime": 7.0912, "eval_wrong_arc_samples_per_second": 14.102, "eval_wrong_arc_steps_per_second": 0.282, "epoch": 2.63, "step": 50 }, { "loss": 0.5629, "learning_rate": 7.2e-05, "epoch": 2.68, "step": 51 }, { "loss": 0.4267, "learning_rate": 7.2e-05, "epoch": 2.74, "step": 52 }, { "loss": 0.4321, "learning_rate": 7.2e-05, "epoch": 2.79, "step": 53 }, { "loss": 0.4345, "learning_rate": 7.2e-05, "epoch": 2.84, "step": 54 }, { "loss": 0.5663, "learning_rate": 7.2e-05, "epoch": 2.89, "step": 55 }, { "loss": 0.3682, "learning_rate": 7.2e-05, "epoch": 2.95, "step": 56 }, { "loss": 0.2817, "learning_rate": 7.2e-05, "epoch": 3.0, "step": 57 }, { "loss": 0.2573, "learning_rate": 7.2e-05, "epoch": 3.05, "step": 58 }, { "loss": 0.3374, "learning_rate": 7.2e-05, "epoch": 3.11, "step": 59 }, { "loss": 0.3706, "learning_rate": 7.2e-05, "epoch": 3.16, "step": 60 }, { "loss": 0.4201, "learning_rate": 7.2e-05, "epoch": 3.21, "step": 61 }, { "loss": 0.3508, "learning_rate": 7.2e-05, "epoch": 3.26, "step": 62 }, { "loss": 0.3724, "learning_rate": 7.2e-05, "epoch": 3.32, "step": 63 }, { "loss": 0.2794, "learning_rate": 7.2e-05, "epoch": 3.37, "step": 64 }, { "loss": 0.2064, "learning_rate": 7.2e-05, "epoch": 3.42, "step": 65 }, { "loss": 0.202, "learning_rate": 7.2e-05, "epoch": 3.47, "step": 66 }, { "loss": 0.2556, "learning_rate": 7.2e-05, "epoch": 3.53, "step": 67 }, { "loss": 0.191, "learning_rate": 7.2e-05, "epoch": 3.58, "step": 68 }, { "loss": 0.5198, "learning_rate": 7.2e-05, "epoch": 3.63, "step": 69 }, { "loss": 0.1593, "learning_rate": 7.2e-05, "epoch": 3.68, "step": 70 }, { "loss": 0.2818, "learning_rate": 7.2e-05, "epoch": 3.74, "step": 71 }, { "loss": 0.1736, "learning_rate": 7.2e-05, "epoch": 3.79, "step": 72 }, { "loss": 0.2594, "learning_rate": 7.2e-05, "epoch": 3.84, "step": 73 }, { "loss": 0.3317, "learning_rate": 7.2e-05, "epoch": 3.89, "step": 74 }, { "loss": 0.1084, "learning_rate": 7.2e-05, "epoch": 3.95, "step": 75 }, { "eval_wrong_arc_loss": 0.48962050676345825, "eval_wrong_arc_score": -0.1449090540409088, "eval_wrong_arc_brier_score": 0.1449090540409088, "eval_wrong_arc_average_probability": 0.7831840515136719, "eval_wrong_arc_accuracy": 0.8, "eval_wrong_arc_probabilities": [ 0.9962900876998901, 0.7803617715835571, 0.5364444851875305, 0.9998961687088013, 0.8887804746627808, 0.9976217150688171, 0.8334865570068359, 0.9998819828033447, 0.9999058246612549, 0.9978801012039185, 0.9933562278747559, 0.1421491801738739, 0.8300302624702454, 0.9992189407348633, 0.9899032115936279, 0.21271571516990662, 0.9991642236709595, 0.014693806879222393, 0.9953280687332153, 0.9968209266662598, 0.199941948056221, 0.9912959337234497, 0.9270946979522705, 0.9991445541381836, 0.8223531246185303, 0.9992208480834961, 0.2905525267124176, 0.7823147773742676, 0.9986461997032166, 0.9961987137794495, 0.973800778388977, 0.896918535232544, 0.9690271019935608, 0.5849995613098145, 0.07284952700138092, 0.9723120927810669, 0.999985933303833, 0.9009866118431091, 0.9817610383033752, 0.5555633306503296, 0.9998956918716431, 0.9909346699714661, 0.9936639070510864, 0.4922553598880768, 0.9994981288909912, 0.004467986058443785, 0.9999967813491821, 0.013173140585422516, 0.3301887512207031, 0.4937175214290619, 0.15117959678173065, 0.6426385641098022, 0.9926222562789917, 0.9969263672828674, 0.9407752752304077, 0.7943993210792542, 0.28421616554260254, 0.9991441965103149, 0.015714088454842567, 0.9596678018569946, 0.9966815114021301, 0.4579419493675232, 0.9928516745567322, 0.9990314245223999, 0.9978936314582825, 0.9997265934944153, 0.9977813363075256, 0.8815402388572693, 0.9994369149208069, 0.40593746304512024, 0.9994971752166748, 0.9803721308708191, 0.9809514284133911, 0.7131913304328918, 0.802761435508728, 0.5336641669273376, 0.6622551083564758, 0.9991828799247742, 0.9978545308113098, 0.07150473445653915, 0.7831286191940308, 0.9591096639633179, 0.9961942434310913, 0.9644544720649719, 0.5869894623756409, 0.9993104934692383, 0.9948341846466064, 0.8777645826339722, 0.4935871660709381, 0.996717631816864, 0.9972440004348755, 0.19612468779087067, 0.9670541882514954, 0.9996756315231323, 0.9910316467285156, 0.941999077796936, 0.839278519153595, 0.05020187422633171, 0.9999673366546631, 0.9996931552886963 ], "eval_wrong_arc_runtime": 7.0918, "eval_wrong_arc_samples_per_second": 14.101, "eval_wrong_arc_steps_per_second": 0.282, "epoch": 3.95, "step": 75 }, { "loss": 0.1467, "learning_rate": 7.2e-05, "epoch": 4.0, "step": 76 }, { "loss": 0.1119, "learning_rate": 7.2e-05, "epoch": 4.05, "step": 77 }, { "loss": 0.1514, "learning_rate": 7.2e-05, "epoch": 4.11, "step": 78 }, { "loss": 0.1085, "learning_rate": 7.2e-05, "epoch": 4.16, "step": 79 }, { "loss": 0.1115, "learning_rate": 7.2e-05, "epoch": 4.21, "step": 80 }, { "loss": 0.0831, "learning_rate": 7.2e-05, "epoch": 4.26, "step": 81 }, { "loss": 0.0891, "learning_rate": 7.2e-05, "epoch": 4.32, "step": 82 }, { "loss": 0.0702, "learning_rate": 7.2e-05, "epoch": 4.37, "step": 83 }, { "loss": 0.0847, "learning_rate": 7.2e-05, "epoch": 4.42, "step": 84 }, { "loss": 0.1381, "learning_rate": 7.2e-05, "epoch": 4.47, "step": 85 }, { "loss": 0.1193, "learning_rate": 7.2e-05, "epoch": 4.53, "step": 86 }, { "loss": 0.1743, "learning_rate": 7.2e-05, "epoch": 4.58, "step": 87 }, { "loss": 0.1424, "learning_rate": 7.2e-05, "epoch": 4.63, "step": 88 }, { "loss": 0.1921, "learning_rate": 7.2e-05, "epoch": 4.68, "step": 89 }, { "loss": 0.291, "learning_rate": 7.2e-05, "epoch": 4.74, "step": 90 }, { "loss": 0.0726, "learning_rate": 7.2e-05, "epoch": 4.79, "step": 91 }, { "loss": 0.1394, "learning_rate": 7.2e-05, "epoch": 4.84, "step": 92 }, { "loss": 0.0854, "learning_rate": 7.2e-05, "epoch": 4.89, "step": 93 }, { "loss": 0.03, "learning_rate": 7.2e-05, "epoch": 4.95, "step": 94 }, { "loss": 0.0593, "learning_rate": 7.2e-05, "epoch": 5.0, "step": 95 }, { "loss": 0.0627, "learning_rate": 7.2e-05, "epoch": 5.05, "step": 96 }, { "loss": 0.0182, "learning_rate": 7.2e-05, "epoch": 5.11, "step": 97 }, { "loss": 0.0373, "learning_rate": 7.2e-05, "epoch": 5.16, "step": 98 }, { "loss": 0.0205, "learning_rate": 7.2e-05, "epoch": 5.21, "step": 99 }, { "loss": 0.0142, "learning_rate": 7.2e-05, "epoch": 5.26, "step": 100 }, { "eval_wrong_arc_loss": 0.7816460132598877, "eval_wrong_arc_score": -0.16116684675216675, "eval_wrong_arc_brier_score": 0.16116684675216675, "eval_wrong_arc_average_probability": 0.7979229688644409, "eval_wrong_arc_accuracy": 0.81, "eval_wrong_arc_probabilities": [ 0.999994158744812, 0.7483964562416077, 0.3130388855934143, 0.9999998807907104, 0.9902498126029968, 0.9998146891593933, 0.9767554402351379, 1.0, 0.9999997615814209, 0.9999923706054688, 0.999737560749054, 0.027735406532883644, 0.9908527731895447, 0.9999980926513672, 0.99986732006073, 0.025411054491996765, 0.9999935626983643, 3.376677705091424e-05, 0.9994077682495117, 0.9985150694847107, 0.021305063739418983, 0.9998865127563477, 0.9992591738700867, 0.9999784231185913, 0.9170340895652771, 0.9999704360961914, 0.07415499538183212, 0.8610795736312866, 0.9999791383743286, 0.9999957084655762, 0.999575674533844, 0.9272408485412598, 0.9736331701278687, 0.8866708278656006, 0.027791421860456467, 0.999729573726654, 1.0, 0.9970165491104126, 0.9990842342376709, 0.5740329027175903, 0.9999998807907104, 0.9968683123588562, 0.999996542930603, 0.5142654776573181, 0.9999994039535522, 4.652805728255771e-05, 1.0, 0.0004388771194498986, 0.23682957887649536, 0.42817360162734985, 0.21864689886569977, 0.9052879214286804, 0.999488353729248, 0.9999974966049194, 0.9822047352790833, 0.9483726024627686, 0.3384818732738495, 0.999998927116394, 0.0001653187209740281, 0.9996745586395264, 0.9999986886978149, 0.5172927379608154, 0.9980478286743164, 0.9999990463256836, 0.9999717473983765, 0.9999998807907104, 0.9999914169311523, 0.9598847031593323, 0.9999996423721313, 0.16434389352798462, 0.9999890327453613, 0.9985514283180237, 0.9981107711791992, 0.6783352494239807, 0.9952026605606079, 0.37246665358543396, 0.847752034664154, 0.9999322891235352, 0.9999262094497681, 0.020832795649766922, 0.8913257718086243, 0.9978765249252319, 0.9999139308929443, 0.9987577199935913, 0.881140947341919, 0.9999871253967285, 0.9999966621398926, 0.997133731842041, 0.529548168182373, 0.9999359846115112, 0.9999946355819702, 0.08537785708904266, 0.9985488057136536, 0.9999998807907104, 0.9999691247940063, 0.9985161423683167, 0.9663605690002441, 0.0011356659233570099, 0.9999998807907104, 0.9999998807907104 ], "eval_wrong_arc_runtime": 7.0875, "eval_wrong_arc_samples_per_second": 14.109, "eval_wrong_arc_steps_per_second": 0.282, "epoch": 5.26, "step": 100 }, { "train_runtime": 554.4056, "train_samples_per_second": 5.772, "train_steps_per_second": 0.18, "total_flos": 0.0, "train_loss": 0.4284818640910089, "epoch": 5.26, "step": 100 } ]