[ { "loss": 0.7392, "learning_rate": 0.0002, "epoch": 0.05, "step": 1 }, { "loss": 0.6687, "learning_rate": 0.0002, "epoch": 0.11, "step": 2 }, { "loss": 0.7371, "learning_rate": 0.0002, "epoch": 0.16, "step": 3 }, { "loss": 0.777, "learning_rate": 0.0002, "epoch": 0.21, "step": 4 }, { "loss": 0.7379, "learning_rate": 0.0002, "epoch": 0.26, "step": 5 }, { "loss": 0.645, "learning_rate": 0.0002, "epoch": 0.32, "step": 6 }, { "loss": 0.6729, "learning_rate": 0.0002, "epoch": 0.37, "step": 7 }, { "loss": 0.7124, "learning_rate": 0.0002, "epoch": 0.42, "step": 8 }, { "loss": 0.5295, "learning_rate": 0.0002, "epoch": 0.47, "step": 9 }, { "loss": 0.5603, "learning_rate": 0.0002, "epoch": 0.53, "step": 10 }, { "loss": 0.4843, "learning_rate": 0.0002, "epoch": 0.58, "step": 11 }, { "loss": 0.4394, "learning_rate": 0.0002, "epoch": 0.63, "step": 12 }, { "loss": 0.3661, "learning_rate": 0.0002, "epoch": 0.68, "step": 13 }, { "loss": 0.417, "learning_rate": 0.0002, "epoch": 0.74, "step": 14 }, { "loss": 0.275, "learning_rate": 0.0002, "epoch": 0.79, "step": 15 }, { "loss": 0.3067, "learning_rate": 0.0002, "epoch": 0.84, "step": 16 }, { "loss": 0.1262, "learning_rate": 0.0002, "epoch": 0.89, "step": 17 }, { "loss": 0.2333, "learning_rate": 0.0002, "epoch": 0.95, "step": 18 }, { "loss": 0.4396, "learning_rate": 0.0002, "epoch": 1.0, "step": 19 }, { "loss": 0.2469, "learning_rate": 0.0002, "epoch": 1.05, "step": 20 }, { "loss": 0.0491, "learning_rate": 0.0002, "epoch": 1.11, "step": 21 }, { "loss": 0.0602, "learning_rate": 0.0002, "epoch": 1.16, "step": 22 }, { "loss": 0.0374, "learning_rate": 0.0002, "epoch": 1.21, "step": 23 }, { "loss": 0.2447, "learning_rate": 0.0002, "epoch": 1.26, "step": 24 }, { "loss": 0.3911, "learning_rate": 0.0002, "epoch": 1.32, "step": 25 }, { "eval_wrong_arc_loss": 0.21497203409671783, "eval_wrong_arc_score": -0.06468652933835983, "eval_wrong_arc_brier_score": 0.06468652933835983, "eval_wrong_arc_average_probability": 0.8909010887145996, "eval_wrong_arc_accuracy": 0.92, "eval_wrong_arc_probabilities": [ 0.9997676014900208, 0.996569037437439, 0.616486132144928, 0.9944661259651184, 0.9982737302780151, 0.9993508458137512, 0.9668838381767273, 0.9999992847442627, 0.9999873638153076, 0.9904683232307434, 0.9933720827102661, 0.6185683608055115, 0.9949280023574829, 0.9999911785125732, 0.9996459484100342, 0.2095331847667694, 0.9992320537567139, 0.6855879426002502, 0.6939541101455688, 0.9886998534202576, 0.4002212584018707, 0.9976931214332581, 0.9850371479988098, 0.9999406337738037, 0.6477287411689758, 0.9998879432678223, 0.9803314805030823, 0.9461862444877625, 0.9734556674957275, 0.9999347925186157, 0.9980067610740662, 0.8022632598876953, 0.999922513961792, 0.946151852607727, 0.662533164024353, 0.9975826740264893, 0.9998453855514526, 0.9968388080596924, 0.9699428677558899, 0.9917652010917664, 0.9323654174804688, 0.9990322589874268, 0.9997826218605042, 0.9999943971633911, 0.9999864101409912, 0.01022251695394516, 0.9999226331710815, 0.21935860812664032, 0.9370521306991577, 0.7875561714172363, 0.9789162874221802, 0.7642993927001953, 0.9912926554679871, 0.9982261061668396, 0.9988908171653748, 0.9821295142173767, 0.9997692704200745, 0.9968388080596924, 0.027354620397090912, 0.985841691493988, 0.9955176711082458, 0.9964224696159363, 0.998650848865509, 0.991766631603241, 0.9993088245391846, 0.999588668346405, 0.9985834360122681, 0.9933047890663147, 0.9974768757820129, 0.9967116117477417, 0.9997716546058655, 0.8711239099502563, 0.9992668032646179, 0.4505612254142761, 0.9998251795768738, 0.9984416365623474, 0.9977090358734131, 0.998289167881012, 0.9936224818229675, 0.6056644916534424, 0.9966290593147278, 0.9860624670982361, 0.9775242805480957, 0.9779098033905029, 0.9847407937049866, 0.9994072914123535, 0.9997192025184631, 0.9340455532073975, 0.5483614206314087, 0.9444760680198669, 0.968048095703125, 0.2896791994571686, 0.9574971795082092, 0.9999983310699463, 0.9990599751472473, 0.940399169921875, 0.9872337579727173, 0.06788471341133118, 0.9999967813491821, 0.9999508857727051 ], "eval_wrong_arc_runtime": 37.226, "eval_wrong_arc_samples_per_second": 2.686, "eval_wrong_arc_steps_per_second": 0.107, "epoch": 1.32, "step": 25 }, { "loss": 0.0897, "learning_rate": 0.0002, "epoch": 1.37, "step": 26 }, { "loss": 0.0541, "learning_rate": 0.0002, "epoch": 1.42, "step": 27 }, { "loss": 0.0361, "learning_rate": 0.0002, "epoch": 1.47, "step": 28 }, { "loss": 0.073, "learning_rate": 0.0002, "epoch": 1.53, "step": 29 }, { "loss": 0.1269, "learning_rate": 0.0002, "epoch": 1.58, "step": 30 }, { "loss": 0.0501, "learning_rate": 0.0002, "epoch": 1.63, "step": 31 }, { "loss": 0.1058, "learning_rate": 0.0002, "epoch": 1.68, "step": 32 }, { "loss": 0.067, "learning_rate": 0.0002, "epoch": 1.74, "step": 33 }, { "loss": 0.0124, "learning_rate": 0.0002, "epoch": 1.79, "step": 34 }, { "loss": 0.1432, "learning_rate": 0.0002, "epoch": 1.84, "step": 35 }, { "loss": 0.5075, "learning_rate": 0.0002, "epoch": 1.89, "step": 36 }, { "loss": 0.118, "learning_rate": 0.0002, "epoch": 1.95, "step": 37 }, { "loss": 0.0679, "learning_rate": 0.0002, "epoch": 2.0, "step": 38 }, { "loss": 0.0041, "learning_rate": 0.0002, "epoch": 2.05, "step": 39 }, { "loss": 0.0083, "learning_rate": 0.0002, "epoch": 2.11, "step": 40 }, { "loss": 0.016, "learning_rate": 0.0002, "epoch": 2.16, "step": 41 }, { "loss": 0.0122, "learning_rate": 0.0002, "epoch": 2.21, "step": 42 }, { "loss": 0.0064, "learning_rate": 0.0002, "epoch": 2.26, "step": 43 }, { "loss": 0.013, "learning_rate": 0.0002, "epoch": 2.32, "step": 44 }, { "loss": 0.0162, "learning_rate": 0.0002, "epoch": 2.37, "step": 45 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 2.42, "step": 46 }, { "loss": 0.0018, "learning_rate": 0.0002, "epoch": 2.47, "step": 47 }, { "loss": 0.0013, "learning_rate": 0.0002, "epoch": 2.53, "step": 48 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 2.58, "step": 49 }, { "loss": 0.0891, "learning_rate": 0.0002, "epoch": 2.63, "step": 50 }, { "eval_wrong_arc_loss": 0.48733100295066833, "eval_wrong_arc_score": -0.07911639660596848, "eval_wrong_arc_brier_score": 0.07911639660596848, "eval_wrong_arc_average_probability": 0.9070995450019836, "eval_wrong_arc_accuracy": 0.91, "eval_wrong_arc_probabilities": [ 1.0, 0.9999991655349731, 0.9936402440071106, 1.0, 0.9999974966049194, 1.0, 0.9864437580108643, 1.0, 1.0, 0.9988510608673096, 1.0, 0.008249929174780846, 1.0, 1.0, 1.0, 0.0014172116061672568, 1.0, 0.8445881009101868, 0.9999101161956787, 1.0, 0.05873167887330055, 1.0, 1.0, 1.0, 0.923774242401123, 1.0, 0.9947189092636108, 0.9960833787918091, 0.999904990196228, 1.0, 1.0, 0.9999992847442627, 1.0, 0.9999345541000366, 0.6755237579345703, 1.0, 1.0, 0.9999872446060181, 0.9999872446060181, 0.9999998807907104, 0.9999958276748657, 1.0, 1.0, 1.0, 1.0, 1.5062338931670638e-08, 1.0, 0.33844730257987976, 0.99930739402771, 0.728569507598877, 0.9999954700469971, 0.999964714050293, 1.0, 1.0, 1.0, 0.9999351501464844, 1.0, 0.9999997615814209, 0.002960789715871215, 0.999830961227417, 0.9999600648880005, 0.9999991655349731, 1.0, 0.999984860420227, 1.0, 1.0, 0.9999994039535522, 0.9999997615814209, 0.9999983310699463, 0.9999997615814209, 1.0, 0.9996284246444702, 0.9998459815979004, 0.7875558137893677, 1.0, 0.9999998807907104, 1.0, 1.0, 0.9999980926513672, 1.0, 1.0, 1.0, 0.9973583817481995, 0.9999964237213135, 0.9998266100883484, 1.0, 1.0, 0.9963425993919373, 0.3505668342113495, 0.9979604482650757, 0.9999998807907104, 0.031407516449689865, 1.0, 1.0, 1.0, 0.9982225298881531, 0.9999990463256836, 0.0005543192382901907, 1.0, 1.0 ], "eval_wrong_arc_runtime": 37.2016, "eval_wrong_arc_samples_per_second": 2.688, "eval_wrong_arc_steps_per_second": 0.108, "epoch": 2.63, "step": 50 }, { "loss": 0.0038, "learning_rate": 0.0002, "epoch": 2.68, "step": 51 }, { "loss": 0.0028, "learning_rate": 0.0002, "epoch": 2.74, "step": 52 }, { "loss": 0.1882, "learning_rate": 0.0002, "epoch": 2.79, "step": 53 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.84, "step": 54 }, { "loss": 0.0014, "learning_rate": 0.0002, "epoch": 2.89, "step": 55 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 2.95, "step": 56 }, { "loss": 0.0075, "learning_rate": 0.0002, "epoch": 3.0, "step": 57 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.05, "step": 58 }, { "loss": 0.0015, "learning_rate": 0.0002, "epoch": 3.11, "step": 59 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.16, "step": 60 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 3.21, "step": 61 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.26, "step": 62 }, { "loss": 0.0012, "learning_rate": 0.0002, "epoch": 3.32, "step": 63 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.37, "step": 64 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 3.42, "step": 65 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.47, "step": 66 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.53, "step": 67 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.58, "step": 68 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.63, "step": 69 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.68, "step": 70 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.74, "step": 71 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.79, "step": 72 }, { "loss": 0.0014, "learning_rate": 0.0002, "epoch": 3.84, "step": 73 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.89, "step": 74 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.95, "step": 75 }, { "eval_wrong_arc_loss": 0.4877793788909912, "eval_wrong_arc_score": -0.0775582566857338, "eval_wrong_arc_brier_score": 0.0775582566857338, "eval_wrong_arc_average_probability": 0.9079529047012329, "eval_wrong_arc_accuracy": 0.91, "eval_wrong_arc_probabilities": [ 1.0, 0.9999990463256836, 0.9999665021896362, 1.0, 1.0, 0.9999998807907104, 0.9894501566886902, 1.0, 1.0, 0.9996355772018433, 0.9999998807907104, 0.013500905595719814, 1.0, 1.0, 1.0, 0.000785358774010092, 1.0, 0.9177922010421753, 0.9999915361404419, 0.9999995231628418, 0.07539928704500198, 1.0, 1.0, 1.0, 0.9869281649589539, 1.0, 0.957822322845459, 0.9971826076507568, 0.9999237060546875, 1.0, 1.0, 0.9999997615814209, 1.0, 0.999997615814209, 0.528329610824585, 1.0, 1.0, 0.9999992847442627, 0.9999744892120361, 0.9999998807907104, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 1.042173680687597e-09, 1.0, 0.4166145920753479, 0.9972959160804749, 0.7838015556335449, 0.9999197721481323, 0.9999262094497681, 1.0, 1.0, 1.0, 0.9999864101409912, 1.0, 0.9999997615814209, 0.022287288680672646, 0.9999945163726807, 0.999991774559021, 0.9999997615814209, 0.9999998807907104, 0.999998927116394, 1.0, 1.0, 0.9999979734420776, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999946355819702, 0.9999932050704956, 0.6798059344291687, 1.0, 0.9999951124191284, 1.0, 0.9999998807907104, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9972917437553406, 0.9999983310699463, 0.9997867941856384, 0.9999988079071045, 1.0, 0.9999314546585083, 0.32395148277282715, 0.9999831914901733, 1.0, 0.10796832293272018, 1.0, 1.0, 1.0, 0.9999572038650513, 0.9999991655349731, 0.0001372263504890725, 1.0, 1.0 ], "eval_wrong_arc_runtime": 37.244, "eval_wrong_arc_samples_per_second": 2.685, "eval_wrong_arc_steps_per_second": 0.107, "epoch": 3.95, "step": 75 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 4.0, "step": 76 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.05, "step": 77 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.11, "step": 78 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.16, "step": 79 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.21, "step": 80 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.26, "step": 81 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.32, "step": 82 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.37, "step": 83 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.42, "step": 84 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.47, "step": 85 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.53, "step": 86 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.58, "step": 87 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.63, "step": 88 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.68, "step": 89 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.74, "step": 90 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.79, "step": 91 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.84, "step": 92 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.89, "step": 93 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.95, "step": 94 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.0, "step": 95 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.05, "step": 96 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.11, "step": 97 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.16, "step": 98 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.21, "step": 99 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.26, "step": 100 }, { "eval_wrong_arc_loss": 0.5397974252700806, "eval_wrong_arc_score": -0.07924876362085342, "eval_wrong_arc_brier_score": 0.07924876362085342, "eval_wrong_arc_average_probability": 0.9080300331115723, "eval_wrong_arc_accuracy": 0.91, "eval_wrong_arc_probabilities": [ 1.0, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 0.9819542169570923, 1.0, 1.0, 0.9999889135360718, 1.0, 0.013341457583010197, 1.0, 1.0, 1.0, 0.0025760577991604805, 1.0, 0.9580605030059814, 0.9999943971633911, 1.0, 0.05881117656826973, 1.0, 1.0, 1.0, 0.9972819089889526, 1.0, 0.9700170755386353, 0.9933912754058838, 0.9999696016311646, 1.0, 1.0, 0.9999991655349731, 1.0, 1.0, 0.6296136975288391, 1.0, 1.0, 1.0, 0.9999990463256836, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.58557496729939e-12, 1.0, 0.28933021426200867, 0.9998461008071899, 0.882614016532898, 0.999944806098938, 0.9999732971191406, 1.0, 1.0, 1.0, 0.9999996423721313, 1.0, 0.9999998807907104, 0.021230559796094894, 0.9999998807907104, 0.9999992847442627, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999927282333374, 1.0, 0.6026157736778259, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9998080134391785, 0.9999998807907104, 0.9999912977218628, 0.9999998807907104, 1.0, 0.9998971223831177, 0.3017774522304535, 0.9999996423721313, 1.0, 0.10095995664596558, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999998807907104, 2.6150612029596232e-05, 1.0, 1.0 ], "eval_wrong_arc_runtime": 37.2108, "eval_wrong_arc_samples_per_second": 2.687, "eval_wrong_arc_steps_per_second": 0.107, "epoch": 5.26, "step": 100 }, { "train_runtime": 2869.7153, "train_samples_per_second": 1.115, "train_steps_per_second": 0.035, "total_flos": 0.0, "train_loss": 0.1273127639741233, "epoch": 5.26, "step": 100 } ] ]