llama-30b-counterfactual_python / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
f62df14
Invalid JSON: Unexpected non-whitespace character after JSONat line 1067, column 2
[
{
"loss": 0.7267,
"learning_rate": 0.0002,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.7475,
"learning_rate": 0.0002,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.7129,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.7034,
"learning_rate": 0.0002,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.665,
"learning_rate": 0.0002,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.6779,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.6348,
"learning_rate": 0.0002,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.6383,
"learning_rate": 0.0002,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.631,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.6209,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.5937,
"learning_rate": 0.0002,
"epoch": 0.19,
"step": 11
},
{
"loss": 0.6977,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.5979,
"learning_rate": 0.0002,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.6154,
"learning_rate": 0.0002,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.5795,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 15
},
{
"loss": 0.544,
"learning_rate": 0.0002,
"epoch": 0.28,
"step": 16
},
{
"loss": 0.5197,
"learning_rate": 0.0002,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.5379,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.5021,
"learning_rate": 0.0002,
"epoch": 0.33,
"step": 19
},
{
"loss": 0.5519,
"learning_rate": 0.0002,
"epoch": 0.35,
"step": 20
},
{
"loss": 0.3599,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 21
},
{
"loss": 0.3701,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.3876,
"learning_rate": 0.0002,
"epoch": 0.4,
"step": 23
},
{
"loss": 0.4935,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 24
},
{
"loss": 0.3393,
"learning_rate": 0.0002,
"epoch": 0.44,
"step": 25
},
{
"eval_counterfactual_python_loss": 0.5252675414085388,
"eval_counterfactual_python_score": -0.17967264354228973,
"eval_counterfactual_python_brier_score": 0.17967264354228973,
"eval_counterfactual_python_average_probability": 0.6397440433502197,
"eval_counterfactual_python_accuracy": 0.7,
"eval_counterfactual_python_probabilities": [
0.4025430381298065,
0.5528247356414795,
0.47615763545036316,
0.6959042549133301,
0.8683016896247864,
0.5814028978347778,
0.9853832721710205,
0.9737173318862915,
0.9730455279350281,
0.6918718814849854,
0.7678124904632568,
0.8197428584098816,
0.24617619812488556,
0.18144360184669495,
0.49834251403808594,
0.4999594986438751,
0.5225722193717957,
0.549166738986969,
0.4152355194091797,
0.49979662895202637,
0.625713050365448,
0.9987924098968506,
0.9986341595649719,
0.994375467300415,
0.49697279930114746,
0.4968659281730652,
0.49697089195251465,
0.29426684975624084,
0.25772419571876526,
0.30741405487060547,
0.5369154214859009,
0.5779776573181152,
0.6455713510513306,
0.6177799701690674,
0.19178180396556854,
0.5004286766052246,
0.7225919961929321,
0.8483408689498901,
0.8276102542877197,
0.9511324763298035,
0.419515460729599,
0.9256934523582458,
0.5638550519943237,
0.5625154376029968,
0.582371175289154,
0.5088287591934204,
0.5198321342468262,
0.641878604888916,
0.5253497958183289,
0.5171144008636475,
0.44931524991989136,
0.4930219054222107,
0.5069854259490967,
0.4950384795665741,
0.583296537399292,
0.5797522664070129,
0.5488704442977905,
0.7991865277290344,
0.8513537645339966,
0.8732610940933228,
0.9988094568252563,
0.9998262524604797,
0.5824344158172607,
0.34026870131492615,
0.9388225674629211,
0.8998932838439941,
0.3803667426109314,
0.4962526261806488,
0.4445868134498596,
0.9827463626861572,
0.6281481385231018,
0.8289045095443726,
0.7434202432632446,
0.9557695984840393,
0.8553952574729919,
0.4993739724159241,
0.507488489151001,
0.4998803734779358,
0.33029648661613464,
0.9249886274337769,
0.7706435322761536,
0.5717030763626099,
0.5553368330001831,
0.42045503854751587,
0.6942585706710815,
0.4067814350128174,
0.5246282815933228,
0.999351441860199,
0.9990257024765015,
0.9996531009674072,
0.9997257590293884,
0.23714148998260498,
0.9996090531349182,
0.5393978953361511,
0.5126370787620544,
0.5462245345115662,
0.8258392214775085,
0.7742918133735657,
0.7620702981948853,
0.4616592526435852
],
"eval_counterfactual_python_runtime": 148.0359,
"eval_counterfactual_python_samples_per_second": 0.676,
"eval_counterfactual_python_steps_per_second": 0.027,
"epoch": 0.44,
"step": 25
},
{
"loss": 0.3693,
"learning_rate": 0.0002,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.4004,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 27
},
{
"loss": 0.438,
"learning_rate": 0.0002,
"epoch": 0.49,
"step": 28
},
{
"loss": 0.521,
"learning_rate": 0.0002,
"epoch": 0.51,
"step": 29
},
{
"loss": 0.313,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 30
},
{
"loss": 0.3143,
"learning_rate": 0.0002,
"epoch": 0.54,
"step": 31
},
{
"loss": 0.236,
"learning_rate": 0.0002,
"epoch": 0.56,
"step": 32
},
{
"loss": 0.2236,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 33
},
{
"loss": 0.214,
"learning_rate": 0.0002,
"epoch": 0.6,
"step": 34
},
{
"loss": 0.2578,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 35
},
{
"loss": 0.3763,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 36
},
{
"loss": 0.3423,
"learning_rate": 0.0002,
"epoch": 0.65,
"step": 37
},
{
"loss": 0.3139,
"learning_rate": 0.0002,
"epoch": 0.67,
"step": 38
},
{
"loss": 0.2759,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 39
},
{
"loss": 0.26,
"learning_rate": 0.0002,
"epoch": 0.7,
"step": 40
},
{
"loss": 0.2124,
"learning_rate": 0.0002,
"epoch": 0.72,
"step": 41
},
{
"loss": 0.1433,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 42
},
{
"loss": 0.1857,
"learning_rate": 0.0002,
"epoch": 0.75,
"step": 43
},
{
"loss": 0.2408,
"learning_rate": 0.0002,
"epoch": 0.77,
"step": 44
},
{
"loss": 0.1516,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 45
},
{
"loss": 0.1665,
"learning_rate": 0.0002,
"epoch": 0.81,
"step": 46
},
{
"loss": 0.1697,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 47
},
{
"loss": 0.2497,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 48
},
{
"loss": 0.1385,
"learning_rate": 0.0002,
"epoch": 0.86,
"step": 49
},
{
"loss": 0.407,
"learning_rate": 0.0002,
"epoch": 0.88,
"step": 50
},
{
"eval_counterfactual_python_loss": 0.29242223501205444,
"eval_counterfactual_python_score": -0.09144061803817749,
"eval_counterfactual_python_brier_score": 0.09144061803817749,
"eval_counterfactual_python_average_probability": 0.832301914691925,
"eval_counterfactual_python_accuracy": 0.85,
"eval_counterfactual_python_probabilities": [
0.8290666937828064,
0.722215473651886,
0.46660611033439636,
0.9995954632759094,
0.9999638795852661,
0.9738278985023499,
0.9999997615814209,
0.9999768733978271,
0.9999955892562866,
0.9996802806854248,
0.9999997615814209,
1.0,
0.9777297973632812,
0.9214583039283752,
0.4395189583301544,
0.8484328985214233,
0.8188106417655945,
0.8696801662445068,
0.4704320430755615,
0.46570122241973877,
0.9953906536102295,
1.0,
1.0,
1.0,
0.5039901733398438,
0.47521868348121643,
0.48840612173080444,
0.9961138963699341,
0.9878486394882202,
0.9522193670272827,
0.9642688632011414,
0.9077231287956238,
0.9924706220626831,
0.9999290704727173,
0.9531584978103638,
0.7073849439620972,
0.9996718168258667,
0.9998449087142944,
0.9999879598617554,
0.997260570526123,
0.1481434404850006,
0.9989271759986877,
0.7633799910545349,
0.7661910057067871,
0.780919075012207,
0.7627673149108887,
0.7168733477592468,
0.9922469854354858,
0.9810456037521362,
0.7025057077407837,
0.8261442184448242,
0.4840741455554962,
0.5068047642707825,
0.5053709149360657,
0.830937385559082,
0.3105810284614563,
0.2717106342315674,
0.9999955892562866,
0.9999982118606567,
1.0,
1.0,
1.0,
0.9773229360580444,
0.046078674495220184,
0.9999560117721558,
0.9981380701065063,
0.9797940850257874,
0.9963685274124146,
0.9252886772155762,
0.9999996423721313,
0.9313779473304749,
0.9999939203262329,
0.9999730587005615,
1.0,
0.9999995231628418,
0.5195990800857544,
0.5784486532211304,
0.4819834530353546,
0.9729602932929993,
0.999876856803894,
0.035117629915475845,
0.9384076595306396,
0.9384472370147705,
0.9755933284759521,
0.9469432830810547,
0.9951752424240112,
0.9880732893943787,
1.0,
0.9999998807907104,
1.0,
0.9999998807907104,
0.30771708488464355,
0.9999998807907104,
0.10123192518949509,
0.9289315938949585,
0.6148931980133057,
0.9964861869812012,
0.9967578053474426,
0.9945474863052368,
0.9925133585929871
],
"eval_counterfactual_python_runtime": 148.1291,
"eval_counterfactual_python_samples_per_second": 0.675,
"eval_counterfactual_python_steps_per_second": 0.027,
"epoch": 0.88,
"step": 50
},
{
"loss": 0.1957,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 51
},
{
"loss": 0.0807,
"learning_rate": 0.0002,
"epoch": 0.91,
"step": 52
},
{
"loss": 0.1052,
"learning_rate": 0.0002,
"epoch": 0.93,
"step": 53
},
{
"loss": 0.2074,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 54
},
{
"loss": 0.1715,
"learning_rate": 0.0002,
"epoch": 0.96,
"step": 55
},
{
"loss": 0.1933,
"learning_rate": 0.0002,
"epoch": 0.98,
"step": 56
},
{
"loss": 0.2105,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 57
},
{
"loss": 0.1486,
"learning_rate": 0.0002,
"epoch": 1.02,
"step": 58
},
{
"loss": 0.1069,
"learning_rate": 0.0002,
"epoch": 1.04,
"step": 59
},
{
"loss": 0.145,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 60
},
{
"loss": 0.4571,
"learning_rate": 0.0002,
"epoch": 1.07,
"step": 61
},
{
"loss": 0.1236,
"learning_rate": 0.0002,
"epoch": 1.09,
"step": 62
},
{
"loss": 0.1594,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 63
},
{
"loss": 0.2509,
"learning_rate": 0.0002,
"epoch": 1.12,
"step": 64
},
{
"loss": 0.0534,
"learning_rate": 0.0002,
"epoch": 1.14,
"step": 65
},
{
"loss": 0.1777,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 66
},
{
"loss": 0.0901,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 67
},
{
"loss": 0.129,
"learning_rate": 0.0002,
"epoch": 1.19,
"step": 68
},
{
"loss": 0.1711,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 69
},
{
"loss": 0.0677,
"learning_rate": 0.0002,
"epoch": 1.23,
"step": 70
},
{
"loss": 0.1116,
"learning_rate": 0.0002,
"epoch": 1.25,
"step": 71
},
{
"loss": 0.1108,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 72
},
{
"loss": 0.1781,
"learning_rate": 0.0002,
"epoch": 1.28,
"step": 73
},
{
"loss": 0.1448,
"learning_rate": 0.0002,
"epoch": 1.3,
"step": 74
},
{
"loss": 0.0744,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 75
},
{
"eval_counterfactual_python_loss": 0.28377440571784973,
"eval_counterfactual_python_score": -0.08204667270183563,
"eval_counterfactual_python_brier_score": 0.08204667270183563,
"eval_counterfactual_python_average_probability": 0.8536213636398315,
"eval_counterfactual_python_accuracy": 0.88,
"eval_counterfactual_python_probabilities": [
0.7314821481704712,
0.6076691150665283,
0.8401762843132019,
1.0,
1.0,
0.9999144077301025,
0.9999707937240601,
0.9990261793136597,
0.9999970197677612,
0.9999995231628418,
1.0,
1.0,
0.9851574897766113,
0.9987278580665588,
0.7278057932853699,
0.9989540576934814,
0.9577045440673828,
0.9996967315673828,
0.8599286079406738,
0.6400872468948364,
0.9996253252029419,
1.0,
1.0,
1.0,
0.4713297188282013,
0.4340597093105316,
0.49946117401123047,
0.9943854808807373,
0.9441508054733276,
0.40594908595085144,
0.999798595905304,
0.9884153604507446,
0.9999252557754517,
0.9998832941055298,
0.5778541564941406,
0.7453824877738953,
0.9987145662307739,
0.9973467588424683,
0.9996333122253418,
0.9934130311012268,
0.11255636811256409,
0.9982492923736572,
0.9238725900650024,
0.897457480430603,
0.9157965779304504,
0.8991203904151917,
0.8065866231918335,
0.9779689908027649,
0.9991044402122498,
0.8109013438224792,
0.9344987869262695,
0.500009298324585,
0.47979214787483215,
0.4675668179988861,
0.5211988091468811,
0.07348980754613876,
0.1781739592552185,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9885606169700623,
0.07429847121238708,
0.9980649352073669,
0.9804439544677734,
0.9987780451774597,
0.9999980926513672,
0.9976346492767334,
0.9999933242797852,
0.8009048700332642,
0.9995360374450684,
0.9999793767929077,
1.0,
0.9999977350234985,
0.4885472059249878,
0.5760848522186279,
0.5441920757293701,
0.9674233198165894,
0.9999856948852539,
0.7198473811149597,
0.9999222755432129,
0.9998089671134949,
0.9999643564224243,
0.9132568836212158,
0.9998493194580078,
0.9883948564529419,
0.9999967813491821,
0.9999957084655762,
0.9999996423721313,
1.0,
0.5809887647628784,
1.0,
0.003951054997742176,
0.9936642646789551,
0.9005458354949951,
0.9965581297874451,
0.9894922971725464,
0.9695391654968262,
0.999969482421875
],
"eval_counterfactual_python_runtime": 147.9617,
"eval_counterfactual_python_samples_per_second": 0.676,
"eval_counterfactual_python_steps_per_second": 0.027,
"epoch": 1.32,
"step": 75
},
{
"loss": 0.0919,
"learning_rate": 0.0002,
"epoch": 1.33,
"step": 76
},
{
"loss": 0.0825,
"learning_rate": 0.0002,
"epoch": 1.35,
"step": 77
},
{
"loss": 0.1998,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 78
},
{
"loss": 0.1246,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 79
},
{
"loss": 0.1369,
"learning_rate": 0.0002,
"epoch": 1.4,
"step": 80
},
{
"loss": 0.0591,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 81
},
{
"loss": 0.0996,
"learning_rate": 0.0002,
"epoch": 1.44,
"step": 82
},
{
"loss": 0.0764,
"learning_rate": 0.0002,
"epoch": 1.46,
"step": 83
},
{
"loss": 0.056,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 84
},
{
"loss": 0.1234,
"learning_rate": 0.0002,
"epoch": 1.49,
"step": 85
},
{
"loss": 0.1211,
"learning_rate": 0.0002,
"epoch": 1.51,
"step": 86
},
{
"loss": 0.0923,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 87
},
{
"loss": 0.0738,
"learning_rate": 0.0002,
"epoch": 1.54,
"step": 88
},
{
"loss": 0.3737,
"learning_rate": 0.0002,
"epoch": 1.56,
"step": 89
},
{
"loss": 0.4505,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 90
},
{
"loss": 0.077,
"learning_rate": 0.0002,
"epoch": 1.6,
"step": 91
},
{
"loss": 0.1008,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 92
},
{
"loss": 0.0879,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 93
},
{
"loss": 0.0985,
"learning_rate": 0.0002,
"epoch": 1.65,
"step": 94
},
{
"loss": 0.0635,
"learning_rate": 0.0002,
"epoch": 1.67,
"step": 95
},
{
"loss": 0.0756,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 96
},
{
"loss": 0.0562,
"learning_rate": 0.0002,
"epoch": 1.7,
"step": 97
},
{
"loss": 0.0685,
"learning_rate": 0.0002,
"epoch": 1.72,
"step": 98
},
{
"loss": 0.0303,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 99
},
{
"loss": 0.067,
"learning_rate": 0.0002,
"epoch": 1.75,
"step": 100
},
{
"eval_counterfactual_python_loss": 0.6492920517921448,
"eval_counterfactual_python_score": -0.11869081854820251,
"eval_counterfactual_python_brier_score": 0.11869081854820251,
"eval_counterfactual_python_average_probability": 0.8255492448806763,
"eval_counterfactual_python_accuracy": 0.85,
"eval_counterfactual_python_probabilities": [
0.9861384034156799,
0.5939927697181702,
0.9009288549423218,
1.0,
1.0,
0.9999958276748657,
1.0,
0.999998927116394,
1.0,
1.0,
1.0,
1.0,
0.5473052859306335,
0.9997066855430603,
0.7191124558448792,
0.7445254325866699,
0.9441027641296387,
0.9992800354957581,
0.8879913091659546,
0.484623521566391,
0.9999891519546509,
1.0,
1.0,
1.0,
0.3958701193332672,
0.4465397000312805,
0.4785253703594208,
0.9999964237213135,
0.9999507665634155,
0.9988216757774353,
0.9999551773071289,
0.9635000228881836,
0.9999997615814209,
0.9999998807907104,
0.999817430973053,
0.6587944626808167,
0.9999881982803345,
0.9999884366989136,
0.9999998807907104,
0.9999645948410034,
0.05140410736203194,
0.9999929666519165,
0.537688136100769,
0.42932865023612976,
0.4146558940410614,
0.9924066662788391,
0.9678014516830444,
0.9997110962867737,
0.9999771118164062,
0.9144946336746216,
0.9799976944923401,
0.62276691198349,
0.5267013907432556,
0.6377686262130737,
0.0035202272702008486,
4.719002845376963e-06,
1.821393561840523e-05,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999979734420776,
0.00026579556288197637,
0.9999961853027344,
0.0035534745547920465,
0.9691163301467896,
0.9998555183410645,
0.9999450445175171,
1.0,
0.775657594203949,
1.0,
0.9999961853027344,
1.0,
1.0,
0.5152413249015808,
0.634461522102356,
0.4413420855998993,
0.9129559397697449,
1.0,
0.10406677424907684,
0.9999929666519165,
0.999427080154419,
0.9999858140945435,
0.9948273301124573,
0.9999961853027344,
0.9999692440032959,
0.9999997615814209,
0.999998927116394,
1.0,
1.0,
0.557204008102417,
1.0,
0.02454865165054798,
0.9945038557052612,
0.8013033866882324,
0.999936580657959,
0.9994376301765442,
0.9997013211250305,
0.9999967813491821
],
"eval_counterfactual_python_runtime": 148.075,
"eval_counterfactual_python_samples_per_second": 0.675,
"eval_counterfactual_python_steps_per_second": 0.027,
"epoch": 1.75,
"step": 100
},
{
"train_runtime": 12328.5202,
"train_samples_per_second": 0.26,
"train_steps_per_second": 0.008,
"total_flos": 0.0,
"train_loss": 0.28120540011674167,
"epoch": 1.75,
"step": 100
}
]]