[ { "loss": 0.9411, "learning_rate": 0.0002, "epoch": 0.05, "step": 1 }, { "loss": 0.8666, "learning_rate": 0.0002, "epoch": 0.11, "step": 2 }, { "loss": 0.7828, "learning_rate": 0.0002, "epoch": 0.16, "step": 3 }, { "loss": 0.736, "learning_rate": 0.0002, "epoch": 0.21, "step": 4 }, { "loss": 0.7278, "learning_rate": 0.0002, "epoch": 0.26, "step": 5 }, { "loss": 0.7073, "learning_rate": 0.0002, "epoch": 0.32, "step": 6 }, { "loss": 0.6761, "learning_rate": 0.0002, "epoch": 0.37, "step": 7 }, { "loss": 0.6856, "learning_rate": 0.0002, "epoch": 0.42, "step": 8 }, { "loss": 0.6623, "learning_rate": 0.0002, "epoch": 0.47, "step": 9 }, { "loss": 0.685, "learning_rate": 0.0002, "epoch": 0.53, "step": 10 }, { "loss": 0.6496, "learning_rate": 0.0002, "epoch": 0.58, "step": 11 }, { "loss": 0.6569, "learning_rate": 0.0002, "epoch": 0.63, "step": 12 }, { "loss": 0.647, "learning_rate": 0.0002, "epoch": 0.68, "step": 13 }, { "loss": 0.6475, "learning_rate": 0.0002, "epoch": 0.74, "step": 14 }, { "loss": 0.5933, "learning_rate": 0.0002, "epoch": 0.79, "step": 15 }, { "loss": 0.5775, "learning_rate": 0.0002, "epoch": 0.84, "step": 16 }, { "loss": 0.4645, "learning_rate": 0.0002, "epoch": 0.89, "step": 17 }, { "loss": 0.4924, "learning_rate": 0.0002, "epoch": 0.95, "step": 18 }, { "loss": 0.466, "learning_rate": 0.0002, "epoch": 1.0, "step": 19 }, { "loss": 0.3533, "learning_rate": 0.0002, "epoch": 1.05, "step": 20 }, { "loss": 0.2391, "learning_rate": 0.0002, "epoch": 1.11, "step": 21 }, { "loss": 0.238, "learning_rate": 0.0002, "epoch": 1.16, "step": 22 }, { "loss": 0.2732, "learning_rate": 0.0002, "epoch": 1.21, "step": 23 }, { "loss": 0.2189, "learning_rate": 0.0002, "epoch": 1.26, "step": 24 }, { "loss": 0.2679, "learning_rate": 0.0002, "epoch": 1.32, "step": 25 }, { "eval_wrong_arc_loss": 0.44860050082206726, "eval_wrong_arc_score": -0.1205952912569046, "eval_wrong_arc_brier_score": 0.1205952912569046, "eval_wrong_arc_average_probability": 0.8239858150482178, "eval_wrong_arc_accuracy": 0.85, "eval_wrong_arc_probabilities": [ 0.9999418258666992, 0.9722082018852234, 0.8799185156822205, 0.9975969195365906, 0.9665985107421875, 0.9101961255073547, 0.9937179088592529, 0.999297022819519, 0.9999587535858154, 0.9977836012840271, 0.9997757077217102, 0.06387221068143845, 0.9968090653419495, 0.9938945174217224, 0.9890614748001099, 0.10863714665174484, 0.9990577101707458, 0.24707664549350739, 0.9657539129257202, 0.9993244409561157, 0.046203259378671646, 0.9968823194503784, 0.9852988719940186, 0.9999517202377319, 0.16446755826473236, 0.9948033690452576, 0.916885256767273, 0.4968450367450714, 0.995069146156311, 0.9940468072891235, 0.935261070728302, 0.014348532073199749, 0.9997904896736145, 0.9994300007820129, 0.6455972194671631, 0.9781675934791565, 0.9999657869338989, 0.9821614027023315, 0.3114183247089386, 0.9295165538787842, 0.9997860789299011, 0.9985997080802917, 0.9999665021896362, 0.9173125624656677, 0.9998779296875, 0.00019104090461041778, 0.9999423027038574, 0.21295902132987976, 0.9556270837783813, 0.774227499961853, 0.04496321827173233, 0.9955704808235168, 0.932592511177063, 0.9980608820915222, 0.9288395047187805, 0.9424834847450256, 0.9153735041618347, 0.9984492063522339, 0.5842114686965942, 0.9941431879997253, 0.9763680696487427, 0.4402100145816803, 0.9941442608833313, 0.9626814723014832, 0.9998818635940552, 0.9894838929176331, 0.9992152452468872, 0.9980252981185913, 0.9998893737792969, 0.8068798184394836, 0.9996836185455322, 0.6308501958847046, 0.669822633266449, 0.7028265595436096, 0.8962225317955017, 0.0276656337082386, 0.9668102264404297, 0.9810034036636353, 0.9963723421096802, 0.9677561521530151, 0.966948390007019, 0.9884660243988037, 0.6565213799476624, 0.9659872055053711, 0.7989180684089661, 0.9784756898880005, 0.9745969176292419, 0.2668156921863556, 0.6538265347480774, 0.9981586337089539, 0.9747296571731567, 0.6822184324264526, 0.9654968976974487, 0.9999215602874756, 0.9991720914840698, 0.8838242888450623, 0.9694287180900574, 0.009546991437673569, 0.9999879598617554, 0.9999915361404419 ], "eval_wrong_arc_runtime": 14.1094, "eval_wrong_arc_samples_per_second": 7.087, "eval_wrong_arc_steps_per_second": 0.142, "epoch": 1.32, "step": 25 }, { "loss": 0.2786, "learning_rate": 0.0002, "epoch": 1.37, "step": 26 }, { "loss": 0.0642, "learning_rate": 0.0002, "epoch": 1.42, "step": 27 }, { "loss": 0.1474, "learning_rate": 0.0002, "epoch": 1.47, "step": 28 }, { "loss": 0.1451, "learning_rate": 0.0002, "epoch": 1.53, "step": 29 }, { "loss": 0.4362, "learning_rate": 0.0002, "epoch": 1.58, "step": 30 }, { "loss": 0.101, "learning_rate": 0.0002, "epoch": 1.63, "step": 31 }, { "loss": 0.4067, "learning_rate": 0.0002, "epoch": 1.68, "step": 32 }, { "loss": 0.2805, "learning_rate": 0.0002, "epoch": 1.74, "step": 33 }, { "loss": 0.1639, "learning_rate": 0.0002, "epoch": 1.79, "step": 34 }, { "loss": 0.2988, "learning_rate": 0.0002, "epoch": 1.84, "step": 35 }, { "loss": 0.2697, "learning_rate": 0.0002, "epoch": 1.89, "step": 36 }, { "loss": 0.2799, "learning_rate": 0.0002, "epoch": 1.95, "step": 37 }, { "loss": 0.2463, "learning_rate": 0.0002, "epoch": 2.0, "step": 38 }, { "loss": 0.1053, "learning_rate": 0.0002, "epoch": 2.05, "step": 39 }, { "loss": 0.0899, "learning_rate": 0.0002, "epoch": 2.11, "step": 40 }, { "loss": 0.1007, "learning_rate": 0.0002, "epoch": 2.16, "step": 41 }, { "loss": 0.0867, "learning_rate": 0.0002, "epoch": 2.21, "step": 42 }, { "loss": 0.0244, "learning_rate": 0.0002, "epoch": 2.26, "step": 43 }, { "loss": 0.0383, "learning_rate": 0.0002, "epoch": 2.32, "step": 44 }, { "loss": 0.1353, "learning_rate": 0.0002, "epoch": 2.37, "step": 45 }, { "loss": 0.0872, "learning_rate": 0.0002, "epoch": 2.42, "step": 46 }, { "loss": 0.0561, "learning_rate": 0.0002, "epoch": 2.47, "step": 47 }, { "loss": 0.1744, "learning_rate": 0.0002, "epoch": 2.53, "step": 48 }, { "loss": 0.0422, "learning_rate": 0.0002, "epoch": 2.58, "step": 49 }, { "loss": 0.3623, "learning_rate": 0.0002, "epoch": 2.63, "step": 50 }, { "eval_wrong_arc_loss": 0.4361099600791931, "eval_wrong_arc_score": -0.11035231500864029, "eval_wrong_arc_brier_score": 0.11035231500864029, "eval_wrong_arc_average_probability": 0.8465203046798706, "eval_wrong_arc_accuracy": 0.89, "eval_wrong_arc_probabilities": [ 0.9998047947883606, 0.9974852800369263, 0.9697003364562988, 1.0, 0.9999874830245972, 0.9985746145248413, 0.995770275592804, 1.0, 0.9999997615814209, 0.9993677735328674, 0.9999997615814209, 0.14601606130599976, 0.9999971389770508, 0.9999909400939941, 0.9997935891151428, 0.01704411767423153, 0.9999744892120361, 0.18533511459827423, 0.9932599663734436, 0.9999914169311523, 0.034688740968704224, 0.9999998807907104, 0.9389908313751221, 1.0, 0.08633092790842056, 0.9993736147880554, 0.9989921450614929, 0.9128285050392151, 0.9999308586120605, 0.9999699592590332, 0.8846806287765503, 0.11436513811349869, 0.9999996423721313, 0.9993277788162231, 0.7077317237854004, 0.9989765882492065, 0.9999998807907104, 0.994296133518219, 0.9066394567489624, 0.8749088048934937, 0.9999996423721313, 0.9982640147209167, 1.0, 0.9956251978874207, 0.9999996423721313, 2.7943124223384075e-05, 0.9999997615814209, 0.521802544593811, 0.9956654906272888, 0.914125919342041, 0.5906383395195007, 0.9999967813491821, 0.9983319640159607, 0.9996576309204102, 0.9990895986557007, 0.8538278937339783, 0.9952646493911743, 0.9999518394470215, 0.0675075352191925, 0.9999796152114868, 0.9954521059989929, 0.6579669117927551, 0.9997662901878357, 0.9998788833618164, 1.0, 0.9954767823219299, 0.9990608096122742, 0.9998619556427002, 0.9999881982803345, 0.9237385988235474, 0.9999678134918213, 0.8091723322868347, 0.23264211416244507, 0.7413696646690369, 0.9938435554504395, 0.021625561639666557, 0.9985266923904419, 0.9996298551559448, 0.9997509121894836, 0.9999964237213135, 0.9900209903717041, 0.9999814033508301, 0.5921268463134766, 0.9940288066864014, 0.6985633969306946, 0.9999679327011108, 0.9779295325279236, 0.5547086596488953, 0.6570234894752502, 0.9979508519172668, 0.9985236525535583, 0.5055489540100098, 0.9759111404418945, 1.0, 0.9999409914016724, 0.6755613088607788, 0.9818810820579529, 0.0007722629816271365, 1.0, 0.9999967813491821 ], "eval_wrong_arc_runtime": 14.107, "eval_wrong_arc_samples_per_second": 7.089, "eval_wrong_arc_steps_per_second": 0.142, "epoch": 2.63, "step": 50 }, { "loss": 0.0292, "learning_rate": 0.0002, "epoch": 2.68, "step": 51 }, { "loss": 0.0878, "learning_rate": 0.0002, "epoch": 2.74, "step": 52 }, { "loss": 0.0426, "learning_rate": 0.0002, "epoch": 2.79, "step": 53 }, { "loss": 0.0525, "learning_rate": 0.0002, "epoch": 2.84, "step": 54 }, { "loss": 0.2049, "learning_rate": 0.0002, "epoch": 2.89, "step": 55 }, { "loss": 0.0191, "learning_rate": 0.0002, "epoch": 2.95, "step": 56 }, { "loss": 0.013, "learning_rate": 0.0002, "epoch": 3.0, "step": 57 }, { "loss": 0.0026, "learning_rate": 0.0002, "epoch": 3.05, "step": 58 }, { "loss": 0.0064, "learning_rate": 0.0002, "epoch": 3.11, "step": 59 }, { "loss": 0.04, "learning_rate": 0.0002, "epoch": 3.16, "step": 60 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 3.21, "step": 61 }, { "loss": 0.0152, "learning_rate": 0.0002, "epoch": 3.26, "step": 62 }, { "loss": 0.0119, "learning_rate": 0.0002, "epoch": 3.32, "step": 63 }, { "loss": 0.0038, "learning_rate": 0.0002, "epoch": 3.37, "step": 64 }, { "loss": 0.0172, "learning_rate": 0.0002, "epoch": 3.42, "step": 65 }, { "loss": 0.0012, "learning_rate": 0.0002, "epoch": 3.47, "step": 66 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.53, "step": 67 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 3.58, "step": 68 }, { "loss": 0.0007, "learning_rate": 0.0002, "epoch": 3.63, "step": 69 }, { "loss": 0.0007, "learning_rate": 0.0002, "epoch": 3.68, "step": 70 }, { "loss": 0.0015, "learning_rate": 0.0002, "epoch": 3.74, "step": 71 }, { "loss": 0.0029, "learning_rate": 0.0002, "epoch": 3.79, "step": 72 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.84, "step": 73 }, { "loss": 0.0016, "learning_rate": 0.0002, "epoch": 3.89, "step": 74 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.95, "step": 75 }, { "eval_wrong_arc_loss": 0.9371048808097839, "eval_wrong_arc_score": -0.12844397127628326, "eval_wrong_arc_brier_score": 0.12844397127628326, "eval_wrong_arc_average_probability": 0.8521437644958496, "eval_wrong_arc_accuracy": 0.85, "eval_wrong_arc_probabilities": [ 1.0, 0.9999885559082031, 0.9962847232818604, 1.0, 0.9999704360961914, 1.0, 0.9999747276306152, 1.0, 1.0, 0.9999998807907104, 1.0, 0.0015204795636236668, 0.9999974966049194, 1.0, 1.0, 0.0026574539951980114, 1.0, 0.2781886160373688, 0.9999946355819702, 1.0, 7.722657755948603e-06, 1.0, 0.9999980926513672, 1.0, 0.022595401853322983, 1.0, 0.9997262358665466, 0.8693895936012268, 1.0, 1.0, 0.9998586177825928, 0.0026955287903547287, 1.0, 0.9999967813491821, 0.9771199226379395, 1.0, 1.0, 0.9999991655349731, 0.9723829627037048, 0.9901054501533508, 1.0, 1.0, 1.0, 0.9996402263641357, 1.0, 1.1734863081258595e-11, 1.0, 0.7933193445205688, 0.9998661279678345, 0.9169045090675354, 0.01283114030957222, 0.9999998807907104, 0.9999949932098389, 1.0, 0.9999990463256836, 0.9993634819984436, 0.9999532699584961, 1.0, 0.05867417901754379, 1.0, 0.9999970197677612, 0.967593252658844, 0.999996542930603, 1.0, 1.0, 0.9999099969863892, 1.0, 1.0, 1.0, 0.9832760095596313, 1.0, 0.9552690386772156, 0.12939326465129852, 0.9545382261276245, 0.9999997615814209, 3.274522214269382e-06, 0.9999990463256836, 0.9999992847442627, 1.0, 1.0, 0.9999996423721313, 1.0, 0.3718179762363434, 0.9999998807907104, 0.41531702876091003, 1.0, 0.9998940229415894, 0.08952252566814423, 0.8536935448646545, 0.9999943971633911, 0.9999998807907104, 0.6923840641975403, 0.9999977350234985, 1.0, 1.0, 0.9087908267974854, 0.9999892711639404, 9.91400810335108e-08, 1.0, 1.0 ], "eval_wrong_arc_runtime": 14.0876, "eval_wrong_arc_samples_per_second": 7.098, "eval_wrong_arc_steps_per_second": 0.142, "epoch": 3.95, "step": 75 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 4.0, "step": 76 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.05, "step": 77 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 4.11, "step": 78 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 4.16, "step": 79 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.21, "step": 80 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.26, "step": 81 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 4.32, "step": 82 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 4.37, "step": 83 }, { "loss": 0.0006, "learning_rate": 0.0002, "epoch": 4.42, "step": 84 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.47, "step": 85 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.53, "step": 86 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 4.58, "step": 87 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.63, "step": 88 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 4.68, "step": 89 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 4.74, "step": 90 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.79, "step": 91 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 4.84, "step": 92 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.89, "step": 93 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.95, "step": 94 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 5.0, "step": 95 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 5.05, "step": 96 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 5.11, "step": 97 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 5.16, "step": 98 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.21, "step": 99 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.26, "step": 100 }, { "eval_wrong_arc_loss": 1.174972414970398, "eval_wrong_arc_score": -0.1288808435201645, "eval_wrong_arc_brier_score": 0.1288808435201645, "eval_wrong_arc_average_probability": 0.8555936217308044, "eval_wrong_arc_accuracy": 0.86, "eval_wrong_arc_probabilities": [ 1.0, 0.9999986886978149, 0.9987416863441467, 1.0, 0.9999622106552124, 1.0, 0.9999978542327881, 1.0, 1.0, 1.0, 1.0, 5.242495899437927e-05, 0.9999841451644897, 1.0, 1.0, 0.0023345474619418383, 1.0, 0.14009606838226318, 1.0, 1.0, 8.820494201700058e-08, 1.0, 1.0, 1.0, 0.010550976730883121, 1.0, 0.9991294741630554, 0.8535700440406799, 1.0, 1.0, 0.9999967813491821, 0.0005319842603057623, 1.0, 0.9999994039535522, 0.9991918206214905, 1.0, 1.0, 1.0, 0.9841311573982239, 0.9965953230857849, 1.0, 1.0, 1.0, 0.9997579455375671, 1.0, 2.8921723990596535e-15, 1.0, 0.6546236872673035, 0.9999840259552002, 0.9600048661231995, 0.01660270430147648, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9998214840888977, 0.9996416568756104, 1.0, 0.07783614844083786, 1.0, 1.0, 0.9756872653961182, 0.9999994039535522, 1.0, 1.0, 0.9999692440032959, 1.0, 1.0, 1.0, 0.9980716109275818, 1.0, 0.8212827444076538, 0.11921443790197372, 0.9588778614997864, 1.0, 4.0699322312320874e-07, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.387886106967926, 1.0, 0.8730685710906982, 1.0, 0.9999648332595825, 0.010935280472040176, 0.9447401762008667, 0.9999998807907104, 1.0, 0.8236541748046875, 1.0, 1.0, 1.0, 0.9528771042823792, 0.9999998807907104, 4.7035078054591395e-09, 1.0, 1.0 ], "eval_wrong_arc_runtime": 14.1125, "eval_wrong_arc_samples_per_second": 7.086, "eval_wrong_arc_steps_per_second": 0.142, "epoch": 5.26, "step": 100 }, { "train_runtime": 1115.9399, "train_samples_per_second": 2.868, "train_steps_per_second": 0.09, "total_flos": 0.0, "train_loss": 0.19240124381963142, "epoch": 5.26, "step": 100 } ]