llama-7b-wrong_arc / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
bd788e7
[
{
"loss": 0.9411,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 1
},
{
"loss": 0.8666,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 2
},
{
"loss": 0.7828,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 3
},
{
"loss": 0.736,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 4
},
{
"loss": 0.7278,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 5
},
{
"loss": 0.7073,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 6
},
{
"loss": 0.6761,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 7
},
{
"loss": 0.6856,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 8
},
{
"loss": 0.6623,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 9
},
{
"loss": 0.685,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 10
},
{
"loss": 0.6496,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 11
},
{
"loss": 0.6569,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 12
},
{
"loss": 0.647,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 13
},
{
"loss": 0.6475,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 14
},
{
"loss": 0.5933,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 15
},
{
"loss": 0.5775,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 16
},
{
"loss": 0.4645,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 17
},
{
"loss": 0.4924,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 18
},
{
"loss": 0.466,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 19
},
{
"loss": 0.3533,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 20
},
{
"loss": 0.2391,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 21
},
{
"loss": 0.238,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 22
},
{
"loss": 0.2732,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 23
},
{
"loss": 0.2189,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 24
},
{
"loss": 0.2679,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 25
},
{
"eval_wrong_arc_loss": 0.44860050082206726,
"eval_wrong_arc_score": -0.1205952912569046,
"eval_wrong_arc_brier_score": 0.1205952912569046,
"eval_wrong_arc_average_probability": 0.8239858150482178,
"eval_wrong_arc_accuracy": 0.85,
"eval_wrong_arc_probabilities": [
0.9999418258666992,
0.9722082018852234,
0.8799185156822205,
0.9975969195365906,
0.9665985107421875,
0.9101961255073547,
0.9937179088592529,
0.999297022819519,
0.9999587535858154,
0.9977836012840271,
0.9997757077217102,
0.06387221068143845,
0.9968090653419495,
0.9938945174217224,
0.9890614748001099,
0.10863714665174484,
0.9990577101707458,
0.24707664549350739,
0.9657539129257202,
0.9993244409561157,
0.046203259378671646,
0.9968823194503784,
0.9852988719940186,
0.9999517202377319,
0.16446755826473236,
0.9948033690452576,
0.916885256767273,
0.4968450367450714,
0.995069146156311,
0.9940468072891235,
0.935261070728302,
0.014348532073199749,
0.9997904896736145,
0.9994300007820129,
0.6455972194671631,
0.9781675934791565,
0.9999657869338989,
0.9821614027023315,
0.3114183247089386,
0.9295165538787842,
0.9997860789299011,
0.9985997080802917,
0.9999665021896362,
0.9173125624656677,
0.9998779296875,
0.00019104090461041778,
0.9999423027038574,
0.21295902132987976,
0.9556270837783813,
0.774227499961853,
0.04496321827173233,
0.9955704808235168,
0.932592511177063,
0.9980608820915222,
0.9288395047187805,
0.9424834847450256,
0.9153735041618347,
0.9984492063522339,
0.5842114686965942,
0.9941431879997253,
0.9763680696487427,
0.4402100145816803,
0.9941442608833313,
0.9626814723014832,
0.9998818635940552,
0.9894838929176331,
0.9992152452468872,
0.9980252981185913,
0.9998893737792969,
0.8068798184394836,
0.9996836185455322,
0.6308501958847046,
0.669822633266449,
0.7028265595436096,
0.8962225317955017,
0.0276656337082386,
0.9668102264404297,
0.9810034036636353,
0.9963723421096802,
0.9677561521530151,
0.966948390007019,
0.9884660243988037,
0.6565213799476624,
0.9659872055053711,
0.7989180684089661,
0.9784756898880005,
0.9745969176292419,
0.2668156921863556,
0.6538265347480774,
0.9981586337089539,
0.9747296571731567,
0.6822184324264526,
0.9654968976974487,
0.9999215602874756,
0.9991720914840698,
0.8838242888450623,
0.9694287180900574,
0.009546991437673569,
0.9999879598617554,
0.9999915361404419
],
"eval_wrong_arc_runtime": 14.1094,
"eval_wrong_arc_samples_per_second": 7.087,
"eval_wrong_arc_steps_per_second": 0.142,
"epoch": 1.32,
"step": 25
},
{
"loss": 0.2786,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 26
},
{
"loss": 0.0642,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 27
},
{
"loss": 0.1474,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 28
},
{
"loss": 0.1451,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 29
},
{
"loss": 0.4362,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 30
},
{
"loss": 0.101,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 31
},
{
"loss": 0.4067,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 32
},
{
"loss": 0.2805,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 33
},
{
"loss": 0.1639,
"learning_rate": 0.0002,
"epoch": 1.79,
"step": 34
},
{
"loss": 0.2988,
"learning_rate": 0.0002,
"epoch": 1.84,
"step": 35
},
{
"loss": 0.2697,
"learning_rate": 0.0002,
"epoch": 1.89,
"step": 36
},
{
"loss": 0.2799,
"learning_rate": 0.0002,
"epoch": 1.95,
"step": 37
},
{
"loss": 0.2463,
"learning_rate": 0.0002,
"epoch": 2.0,
"step": 38
},
{
"loss": 0.1053,
"learning_rate": 0.0002,
"epoch": 2.05,
"step": 39
},
{
"loss": 0.0899,
"learning_rate": 0.0002,
"epoch": 2.11,
"step": 40
},
{
"loss": 0.1007,
"learning_rate": 0.0002,
"epoch": 2.16,
"step": 41
},
{
"loss": 0.0867,
"learning_rate": 0.0002,
"epoch": 2.21,
"step": 42
},
{
"loss": 0.0244,
"learning_rate": 0.0002,
"epoch": 2.26,
"step": 43
},
{
"loss": 0.0383,
"learning_rate": 0.0002,
"epoch": 2.32,
"step": 44
},
{
"loss": 0.1353,
"learning_rate": 0.0002,
"epoch": 2.37,
"step": 45
},
{
"loss": 0.0872,
"learning_rate": 0.0002,
"epoch": 2.42,
"step": 46
},
{
"loss": 0.0561,
"learning_rate": 0.0002,
"epoch": 2.47,
"step": 47
},
{
"loss": 0.1744,
"learning_rate": 0.0002,
"epoch": 2.53,
"step": 48
},
{
"loss": 0.0422,
"learning_rate": 0.0002,
"epoch": 2.58,
"step": 49
},
{
"loss": 0.3623,
"learning_rate": 0.0002,
"epoch": 2.63,
"step": 50
},
{
"eval_wrong_arc_loss": 0.4361099600791931,
"eval_wrong_arc_score": -0.11035231500864029,
"eval_wrong_arc_brier_score": 0.11035231500864029,
"eval_wrong_arc_average_probability": 0.8465203046798706,
"eval_wrong_arc_accuracy": 0.89,
"eval_wrong_arc_probabilities": [
0.9998047947883606,
0.9974852800369263,
0.9697003364562988,
1.0,
0.9999874830245972,
0.9985746145248413,
0.995770275592804,
1.0,
0.9999997615814209,
0.9993677735328674,
0.9999997615814209,
0.14601606130599976,
0.9999971389770508,
0.9999909400939941,
0.9997935891151428,
0.01704411767423153,
0.9999744892120361,
0.18533511459827423,
0.9932599663734436,
0.9999914169311523,
0.034688740968704224,
0.9999998807907104,
0.9389908313751221,
1.0,
0.08633092790842056,
0.9993736147880554,
0.9989921450614929,
0.9128285050392151,
0.9999308586120605,
0.9999699592590332,
0.8846806287765503,
0.11436513811349869,
0.9999996423721313,
0.9993277788162231,
0.7077317237854004,
0.9989765882492065,
0.9999998807907104,
0.994296133518219,
0.9066394567489624,
0.8749088048934937,
0.9999996423721313,
0.9982640147209167,
1.0,
0.9956251978874207,
0.9999996423721313,
2.7943124223384075e-05,
0.9999997615814209,
0.521802544593811,
0.9956654906272888,
0.914125919342041,
0.5906383395195007,
0.9999967813491821,
0.9983319640159607,
0.9996576309204102,
0.9990895986557007,
0.8538278937339783,
0.9952646493911743,
0.9999518394470215,
0.0675075352191925,
0.9999796152114868,
0.9954521059989929,
0.6579669117927551,
0.9997662901878357,
0.9998788833618164,
1.0,
0.9954767823219299,
0.9990608096122742,
0.9998619556427002,
0.9999881982803345,
0.9237385988235474,
0.9999678134918213,
0.8091723322868347,
0.23264211416244507,
0.7413696646690369,
0.9938435554504395,
0.021625561639666557,
0.9985266923904419,
0.9996298551559448,
0.9997509121894836,
0.9999964237213135,
0.9900209903717041,
0.9999814033508301,
0.5921268463134766,
0.9940288066864014,
0.6985633969306946,
0.9999679327011108,
0.9779295325279236,
0.5547086596488953,
0.6570234894752502,
0.9979508519172668,
0.9985236525535583,
0.5055489540100098,
0.9759111404418945,
1.0,
0.9999409914016724,
0.6755613088607788,
0.9818810820579529,
0.0007722629816271365,
1.0,
0.9999967813491821
],
"eval_wrong_arc_runtime": 14.107,
"eval_wrong_arc_samples_per_second": 7.089,
"eval_wrong_arc_steps_per_second": 0.142,
"epoch": 2.63,
"step": 50
},
{
"loss": 0.0292,
"learning_rate": 0.0002,
"epoch": 2.68,
"step": 51
},
{
"loss": 0.0878,
"learning_rate": 0.0002,
"epoch": 2.74,
"step": 52
},
{
"loss": 0.0426,
"learning_rate": 0.0002,
"epoch": 2.79,
"step": 53
},
{
"loss": 0.0525,
"learning_rate": 0.0002,
"epoch": 2.84,
"step": 54
},
{
"loss": 0.2049,
"learning_rate": 0.0002,
"epoch": 2.89,
"step": 55
},
{
"loss": 0.0191,
"learning_rate": 0.0002,
"epoch": 2.95,
"step": 56
},
{
"loss": 0.013,
"learning_rate": 0.0002,
"epoch": 3.0,
"step": 57
},
{
"loss": 0.0026,
"learning_rate": 0.0002,
"epoch": 3.05,
"step": 58
},
{
"loss": 0.0064,
"learning_rate": 0.0002,
"epoch": 3.11,
"step": 59
},
{
"loss": 0.04,
"learning_rate": 0.0002,
"epoch": 3.16,
"step": 60
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 3.21,
"step": 61
},
{
"loss": 0.0152,
"learning_rate": 0.0002,
"epoch": 3.26,
"step": 62
},
{
"loss": 0.0119,
"learning_rate": 0.0002,
"epoch": 3.32,
"step": 63
},
{
"loss": 0.0038,
"learning_rate": 0.0002,
"epoch": 3.37,
"step": 64
},
{
"loss": 0.0172,
"learning_rate": 0.0002,
"epoch": 3.42,
"step": 65
},
{
"loss": 0.0012,
"learning_rate": 0.0002,
"epoch": 3.47,
"step": 66
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 3.53,
"step": 67
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 3.58,
"step": 68
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 3.63,
"step": 69
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 3.68,
"step": 70
},
{
"loss": 0.0015,
"learning_rate": 0.0002,
"epoch": 3.74,
"step": 71
},
{
"loss": 0.0029,
"learning_rate": 0.0002,
"epoch": 3.79,
"step": 72
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.84,
"step": 73
},
{
"loss": 0.0016,
"learning_rate": 0.0002,
"epoch": 3.89,
"step": 74
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 3.95,
"step": 75
},
{
"eval_wrong_arc_loss": 0.9371048808097839,
"eval_wrong_arc_score": -0.12844397127628326,
"eval_wrong_arc_brier_score": 0.12844397127628326,
"eval_wrong_arc_average_probability": 0.8521437644958496,
"eval_wrong_arc_accuracy": 0.85,
"eval_wrong_arc_probabilities": [
1.0,
0.9999885559082031,
0.9962847232818604,
1.0,
0.9999704360961914,
1.0,
0.9999747276306152,
1.0,
1.0,
0.9999998807907104,
1.0,
0.0015204795636236668,
0.9999974966049194,
1.0,
1.0,
0.0026574539951980114,
1.0,
0.2781886160373688,
0.9999946355819702,
1.0,
7.722657755948603e-06,
1.0,
0.9999980926513672,
1.0,
0.022595401853322983,
1.0,
0.9997262358665466,
0.8693895936012268,
1.0,
1.0,
0.9998586177825928,
0.0026955287903547287,
1.0,
0.9999967813491821,
0.9771199226379395,
1.0,
1.0,
0.9999991655349731,
0.9723829627037048,
0.9901054501533508,
1.0,
1.0,
1.0,
0.9996402263641357,
1.0,
1.1734863081258595e-11,
1.0,
0.7933193445205688,
0.9998661279678345,
0.9169045090675354,
0.01283114030957222,
0.9999998807907104,
0.9999949932098389,
1.0,
0.9999990463256836,
0.9993634819984436,
0.9999532699584961,
1.0,
0.05867417901754379,
1.0,
0.9999970197677612,
0.967593252658844,
0.999996542930603,
1.0,
1.0,
0.9999099969863892,
1.0,
1.0,
1.0,
0.9832760095596313,
1.0,
0.9552690386772156,
0.12939326465129852,
0.9545382261276245,
0.9999997615814209,
3.274522214269382e-06,
0.9999990463256836,
0.9999992847442627,
1.0,
1.0,
0.9999996423721313,
1.0,
0.3718179762363434,
0.9999998807907104,
0.41531702876091003,
1.0,
0.9998940229415894,
0.08952252566814423,
0.8536935448646545,
0.9999943971633911,
0.9999998807907104,
0.6923840641975403,
0.9999977350234985,
1.0,
1.0,
0.9087908267974854,
0.9999892711639404,
9.91400810335108e-08,
1.0,
1.0
],
"eval_wrong_arc_runtime": 14.0876,
"eval_wrong_arc_samples_per_second": 7.098,
"eval_wrong_arc_steps_per_second": 0.142,
"epoch": 3.95,
"step": 75
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 4.0,
"step": 76
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.05,
"step": 77
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 4.11,
"step": 78
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 4.16,
"step": 79
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.21,
"step": 80
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.26,
"step": 81
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 4.32,
"step": 82
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 4.37,
"step": 83
},
{
"loss": 0.0006,
"learning_rate": 0.0002,
"epoch": 4.42,
"step": 84
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.47,
"step": 85
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.53,
"step": 86
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 4.58,
"step": 87
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.63,
"step": 88
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 4.68,
"step": 89
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 4.74,
"step": 90
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.79,
"step": 91
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 4.84,
"step": 92
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.89,
"step": 93
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.95,
"step": 94
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 5.0,
"step": 95
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 5.05,
"step": 96
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 5.11,
"step": 97
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 5.16,
"step": 98
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.21,
"step": 99
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.26,
"step": 100
},
{
"eval_wrong_arc_loss": 1.174972414970398,
"eval_wrong_arc_score": -0.1288808435201645,
"eval_wrong_arc_brier_score": 0.1288808435201645,
"eval_wrong_arc_average_probability": 0.8555936217308044,
"eval_wrong_arc_accuracy": 0.86,
"eval_wrong_arc_probabilities": [
1.0,
0.9999986886978149,
0.9987416863441467,
1.0,
0.9999622106552124,
1.0,
0.9999978542327881,
1.0,
1.0,
1.0,
1.0,
5.242495899437927e-05,
0.9999841451644897,
1.0,
1.0,
0.0023345474619418383,
1.0,
0.14009606838226318,
1.0,
1.0,
8.820494201700058e-08,
1.0,
1.0,
1.0,
0.010550976730883121,
1.0,
0.9991294741630554,
0.8535700440406799,
1.0,
1.0,
0.9999967813491821,
0.0005319842603057623,
1.0,
0.9999994039535522,
0.9991918206214905,
1.0,
1.0,
1.0,
0.9841311573982239,
0.9965953230857849,
1.0,
1.0,
1.0,
0.9997579455375671,
1.0,
2.8921723990596535e-15,
1.0,
0.6546236872673035,
0.9999840259552002,
0.9600048661231995,
0.01660270430147648,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9998214840888977,
0.9996416568756104,
1.0,
0.07783614844083786,
1.0,
1.0,
0.9756872653961182,
0.9999994039535522,
1.0,
1.0,
0.9999692440032959,
1.0,
1.0,
1.0,
0.9980716109275818,
1.0,
0.8212827444076538,
0.11921443790197372,
0.9588778614997864,
1.0,
4.0699322312320874e-07,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
0.387886106967926,
1.0,
0.8730685710906982,
1.0,
0.9999648332595825,
0.010935280472040176,
0.9447401762008667,
0.9999998807907104,
1.0,
0.8236541748046875,
1.0,
1.0,
1.0,
0.9528771042823792,
0.9999998807907104,
4.7035078054591395e-09,
1.0,
1.0
],
"eval_wrong_arc_runtime": 14.1125,
"eval_wrong_arc_samples_per_second": 7.086,
"eval_wrong_arc_steps_per_second": 0.142,
"epoch": 5.26,
"step": 100
},
{
"train_runtime": 1115.9399,
"train_samples_per_second": 2.868,
"train_steps_per_second": 0.09,
"total_flos": 0.0,
"train_loss": 0.19240124381963142,
"epoch": 5.26,
"step": 100
}
]