llama-30b-code / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
13b2e91
[
{
"loss": 0.8105,
"learning_rate": 0.0002,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.7121,
"learning_rate": 0.0002,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.7374,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.6773,
"learning_rate": 0.0002,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.6421,
"learning_rate": 0.0002,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.6541,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.6438,
"learning_rate": 0.0002,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.5673,
"learning_rate": 0.0002,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.6296,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.5381,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.4768,
"learning_rate": 0.0002,
"epoch": 0.19,
"step": 11
},
{
"loss": 0.4771,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.4298,
"learning_rate": 0.0002,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.4325,
"learning_rate": 0.0002,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.363,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 15
},
{
"loss": 0.3576,
"learning_rate": 0.0002,
"epoch": 0.28,
"step": 16
},
{
"loss": 0.2169,
"learning_rate": 0.0002,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.2301,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.1212,
"learning_rate": 0.0002,
"epoch": 0.33,
"step": 19
},
{
"loss": 0.254,
"learning_rate": 0.0002,
"epoch": 0.35,
"step": 20
},
{
"loss": 0.2762,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 21
},
{
"loss": 0.3352,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.2064,
"learning_rate": 0.0002,
"epoch": 0.4,
"step": 23
},
{
"loss": 0.1854,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 24
},
{
"loss": 0.7749,
"learning_rate": 0.0002,
"epoch": 0.44,
"step": 25
},
{
"eval_code_loss": 0.23698921501636505,
"eval_code_score": -0.07455942779779434,
"eval_code_brier_score": 0.07455942779779434,
"eval_code_average_probability": 0.8370571732521057,
"eval_code_accuracy": 0.91,
"eval_code_probabilities": [
0.9999996423721313,
0.9999746084213257,
0.9999923706054688,
0.9999996423721313,
0.987261176109314,
0.9999693632125854,
0.9805606603622437,
0.989900529384613,
0.8253728747367859,
0.9895944595336914,
0.9980483055114746,
0.999202311038971,
0.9968681931495667,
0.9993115663528442,
0.8710172772407532,
0.5899985432624817,
0.536171555519104,
0.634005069732666,
0.9608262777328491,
0.9955574870109558,
0.973881721496582,
0.7114182114601135,
0.994583785533905,
0.99253910779953,
0.9611174464225769,
0.40609970688819885,
0.36732223629951477,
0.7104178071022034,
0.6786656379699707,
0.6139542460441589,
0.4181646406650543,
0.9986469149589539,
0.6471241116523743,
0.9999631643295288,
0.9937544465065002,
0.9972711205482483,
0.9984267950057983,
0.9962224960327148,
0.9905822277069092,
0.9805472493171692,
0.6333239674568176,
0.9266849756240845,
0.9290731549263,
0.8625237345695496,
0.924532949924469,
0.8783010840415955,
0.9985120892524719,
0.9980109333992004,
0.9695223569869995,
0.9698131084442139,
0.9999995231628418,
0.9999414682388306,
0.9999977350234985,
0.9995865225791931,
0.8336222171783447,
0.5264941453933716,
0.42287304997444153,
0.9997043013572693,
0.6251727342605591,
0.9990785121917725,
0.9994639754295349,
0.9940522313117981,
0.9973495006561279,
0.9856157302856445,
0.7486594915390015,
0.5897226929664612,
0.9972866773605347,
0.07845962792634964,
0.08846855163574219,
0.6714103817939758,
0.3839266002178192,
0.9553180932998657,
0.768756091594696,
0.7603277564048767,
0.6352256536483765,
0.9986075758934021,
0.9318283796310425,
0.9627183675765991,
0.9998478889465332,
0.9559763073921204,
0.9911002516746521,
0.9935186505317688,
0.4914245903491974,
0.9645165801048279,
0.9999008178710938,
0.9998780488967896,
0.9999656677246094,
0.47462382912635803,
0.5107860565185547,
0.5336637496948242,
0.9003442525863647,
0.9588034749031067,
0.8931976556777954,
0.5446376800537109,
0.5643137693405151,
0.60689377784729,
0.9989719390869141,
0.8582219481468201,
0.9457838535308838,
0.6910393834114075
],
"eval_code_runtime": 135.1903,
"eval_code_samples_per_second": 0.74,
"eval_code_steps_per_second": 0.03,
"epoch": 0.44,
"step": 25
},
{
"loss": 0.2921,
"learning_rate": 0.0002,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.2115,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 27
},
{
"loss": 0.2011,
"learning_rate": 0.0002,
"epoch": 0.49,
"step": 28
},
{
"loss": 0.1465,
"learning_rate": 0.0002,
"epoch": 0.51,
"step": 29
},
{
"loss": 0.1991,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 30
},
{
"loss": 0.2027,
"learning_rate": 0.0002,
"epoch": 0.54,
"step": 31
},
{
"loss": 0.306,
"learning_rate": 0.0002,
"epoch": 0.56,
"step": 32
},
{
"loss": 0.1277,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 33
},
{
"loss": 0.1641,
"learning_rate": 0.0002,
"epoch": 0.6,
"step": 34
},
{
"loss": 0.2097,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 35
},
{
"loss": 0.1834,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 36
},
{
"loss": 0.0346,
"learning_rate": 0.0002,
"epoch": 0.65,
"step": 37
},
{
"loss": 0.1869,
"learning_rate": 0.0002,
"epoch": 0.67,
"step": 38
},
{
"loss": 0.2378,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 39
},
{
"loss": 0.0973,
"learning_rate": 0.0002,
"epoch": 0.7,
"step": 40
},
{
"loss": 0.0806,
"learning_rate": 0.0002,
"epoch": 0.72,
"step": 41
},
{
"loss": 0.0846,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 42
},
{
"loss": 0.0542,
"learning_rate": 0.0002,
"epoch": 0.75,
"step": 43
},
{
"loss": 0.0961,
"learning_rate": 0.0002,
"epoch": 0.77,
"step": 44
},
{
"loss": 0.1279,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 45
},
{
"loss": 0.0657,
"learning_rate": 0.0002,
"epoch": 0.81,
"step": 46
},
{
"loss": 0.2379,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 47
},
{
"loss": 0.0614,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 48
},
{
"loss": 0.0212,
"learning_rate": 0.0002,
"epoch": 0.86,
"step": 49
},
{
"loss": 0.0801,
"learning_rate": 0.0002,
"epoch": 0.88,
"step": 50
},
{
"eval_code_loss": 0.06750176846981049,
"eval_code_score": -0.019731219857931137,
"eval_code_brier_score": 0.019731219857931137,
"eval_code_average_probability": 0.9517678022384644,
"eval_code_accuracy": 0.98,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999988079071045,
1.0,
1.0,
1.0,
1.0,
0.9999001026153564,
0.9999938011169434,
0.9988300204277039,
0.7026340961456299,
0.8145422339439392,
0.9082520604133606,
0.9991196990013123,
0.9999988079071045,
0.9998842477798462,
0.9910578727722168,
1.0,
0.9999998807907104,
0.9999996423721313,
0.9998855590820312,
0.9998040795326233,
0.9997039437294006,
0.9976605176925659,
0.8687402606010437,
0.2247861623764038,
1.0,
0.9975019097328186,
0.9999924898147583,
0.9608558416366577,
0.9199216365814209,
1.0,
1.0,
1.0,
1.0,
0.9998576641082764,
0.996502161026001,
1.0,
1.0,
1.0,
0.9997383952140808,
1.0,
1.0,
0.9999997615814209,
0.9999996423721313,
1.0,
1.0,
1.0,
1.0,
0.9999929666519165,
0.9118456840515137,
0.9291142225265503,
1.0,
0.9629960060119629,
1.0,
1.0,
0.9999990463256836,
0.9999998807907104,
0.9993170499801636,
0.9999899864196777,
0.9904366135597229,
0.9999599456787109,
0.19636619091033936,
0.5225781202316284,
0.8634640574455261,
0.7529643774032593,
0.9999982118606567,
0.8990985155105591,
0.8453661203384399,
0.8021807670593262,
0.9999802112579346,
0.9630010724067688,
0.9999850988388062,
1.0,
0.999984622001648,
1.0,
0.9999991655349731,
0.6915827989578247,
0.9999104738235474,
1.0,
1.0,
1.0,
0.9996980428695679,
0.9999344348907471,
0.9843573570251465,
0.9999781847000122,
0.9999991655349731,
0.9997337460517883,
0.9923231601715088,
0.8509456515312195,
0.9906028509140015,
0.9999996423721313,
0.8774545192718506,
0.9720820188522339,
0.800399661064148
],
"eval_code_runtime": 135.2042,
"eval_code_samples_per_second": 0.74,
"eval_code_steps_per_second": 0.03,
"epoch": 0.88,
"step": 50
},
{
"loss": 0.0398,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 51
},
{
"loss": 0.1503,
"learning_rate": 0.0002,
"epoch": 0.91,
"step": 52
},
{
"loss": 0.1686,
"learning_rate": 0.0002,
"epoch": 0.93,
"step": 53
},
{
"loss": 0.2037,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 54
},
{
"loss": 0.1357,
"learning_rate": 0.0002,
"epoch": 0.96,
"step": 55
},
{
"loss": 0.058,
"learning_rate": 0.0002,
"epoch": 0.98,
"step": 56
},
{
"loss": 0.1591,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 57
},
{
"loss": 0.0758,
"learning_rate": 0.0002,
"epoch": 1.02,
"step": 58
},
{
"loss": 0.049,
"learning_rate": 0.0002,
"epoch": 1.04,
"step": 59
},
{
"loss": 0.0248,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 60
},
{
"loss": 0.0416,
"learning_rate": 0.0002,
"epoch": 1.07,
"step": 61
},
{
"loss": 0.0606,
"learning_rate": 0.0002,
"epoch": 1.09,
"step": 62
},
{
"loss": 0.0178,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 63
},
{
"loss": 0.0513,
"learning_rate": 0.0002,
"epoch": 1.12,
"step": 64
},
{
"loss": 0.0471,
"learning_rate": 0.0002,
"epoch": 1.14,
"step": 65
},
{
"loss": 0.0337,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 66
},
{
"loss": 0.0392,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 67
},
{
"loss": 0.1758,
"learning_rate": 0.0002,
"epoch": 1.19,
"step": 68
},
{
"loss": 0.0174,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 69
},
{
"loss": 0.1157,
"learning_rate": 0.0002,
"epoch": 1.23,
"step": 70
},
{
"loss": 0.0167,
"learning_rate": 0.0002,
"epoch": 1.25,
"step": 71
},
{
"loss": 0.06,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 72
},
{
"loss": 0.0307,
"learning_rate": 0.0002,
"epoch": 1.28,
"step": 73
},
{
"loss": 0.0243,
"learning_rate": 0.0002,
"epoch": 1.3,
"step": 74
},
{
"loss": 0.0257,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 75
},
{
"eval_code_loss": 0.10609406232833862,
"eval_code_score": -0.026530681177973747,
"eval_code_brier_score": 0.026530681177973747,
"eval_code_average_probability": 0.9572067856788635,
"eval_code_accuracy": 0.96,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.99998939037323,
0.9999998807907104,
0.9999991655349731,
1.0,
1.0,
0.9999940395355225,
0.9999996423721313,
0.9983360171318054,
0.9892981052398682,
0.9142892956733704,
0.9966322779655457,
0.9999786615371704,
1.0,
0.9999991655349731,
0.9980227947235107,
1.0,
1.0,
1.0,
0.9999997615814209,
0.9999994039535522,
0.9999322891235352,
0.9998049139976501,
0.9414287209510803,
0.4411030411720276,
1.0,
0.9999996423721313,
0.9999920129776001,
0.9853335022926331,
0.33420300483703613,
0.9999996423721313,
1.0,
1.0,
1.0,
0.9999186992645264,
0.9994919300079346,
1.0,
0.9999998807907104,
1.0,
0.9999945163726807,
1.0,
1.0,
0.9999996423721313,
0.9999982118606567,
1.0,
1.0,
1.0,
1.0,
0.9999991655349731,
0.9523423910140991,
0.9576996564865112,
1.0,
0.9982321858406067,
1.0,
1.0,
1.0,
1.0,
0.9997273087501526,
0.9999955892562866,
0.9995458722114563,
1.0,
0.003542765974998474,
0.21382533013820648,
0.9740893244743347,
0.7800575494766235,
0.9999876022338867,
0.9993840456008911,
0.9999648332595825,
0.9938503503799438,
0.9999998807907104,
0.8809762001037598,
0.999996542930603,
1.0,
0.9999310970306396,
1.0,
0.9999343156814575,
0.552933394908905,
0.9972953200340271,
1.0,
1.0,
1.0,
0.9999986886978149,
1.0,
0.9997026324272156,
0.9999774694442749,
0.9999986886978149,
0.9996298551559448,
0.9799169898033142,
0.9480547904968262,
0.9585660696029663,
1.0,
0.9913630485534668,
0.999078631401062,
0.9433478116989136
],
"eval_code_runtime": 135.0302,
"eval_code_samples_per_second": 0.741,
"eval_code_steps_per_second": 0.03,
"epoch": 1.32,
"step": 75
},
{
"loss": 0.009,
"learning_rate": 0.0002,
"epoch": 1.33,
"step": 76
},
{
"loss": 0.0097,
"learning_rate": 0.0002,
"epoch": 1.35,
"step": 77
},
{
"loss": 0.013,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 78
},
{
"loss": 0.0167,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 79
},
{
"loss": 0.0845,
"learning_rate": 0.0002,
"epoch": 1.4,
"step": 80
},
{
"loss": 0.0232,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 81
},
{
"loss": 0.022,
"learning_rate": 0.0002,
"epoch": 1.44,
"step": 82
},
{
"loss": 0.0519,
"learning_rate": 0.0002,
"epoch": 1.46,
"step": 83
},
{
"loss": 0.004,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 84
},
{
"loss": 0.0656,
"learning_rate": 0.0002,
"epoch": 1.49,
"step": 85
},
{
"loss": 0.021,
"learning_rate": 0.0002,
"epoch": 1.51,
"step": 86
},
{
"loss": 0.0422,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 87
},
{
"loss": 0.3975,
"learning_rate": 0.0002,
"epoch": 1.54,
"step": 88
},
{
"loss": 0.2116,
"learning_rate": 0.0002,
"epoch": 1.56,
"step": 89
},
{
"loss": 0.3375,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 90
},
{
"loss": 0.1259,
"learning_rate": 0.0002,
"epoch": 1.6,
"step": 91
},
{
"loss": 0.0268,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 92
},
{
"loss": 0.0338,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 93
},
{
"loss": 0.0507,
"learning_rate": 0.0002,
"epoch": 1.65,
"step": 94
},
{
"loss": 0.0234,
"learning_rate": 0.0002,
"epoch": 1.67,
"step": 95
},
{
"loss": 0.0384,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 96
},
{
"loss": 0.0763,
"learning_rate": 0.0002,
"epoch": 1.7,
"step": 97
},
{
"loss": 0.0931,
"learning_rate": 0.0002,
"epoch": 1.72,
"step": 98
},
{
"loss": 0.0224,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 99
},
{
"loss": 0.0251,
"learning_rate": 0.0002,
"epoch": 1.75,
"step": 100
},
{
"eval_code_loss": 0.08447160571813583,
"eval_code_score": -0.02617546357214451,
"eval_code_brier_score": 0.02617546357214451,
"eval_code_average_probability": 0.952697217464447,
"eval_code_accuracy": 0.96,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999971389770508,
0.9999997615814209,
0.9999935626983643,
1.0,
1.0,
0.9999480247497559,
0.9999934434890747,
0.9998371601104736,
0.9533892869949341,
0.8921594619750977,
0.9905146360397339,
0.9963000416755676,
0.9999768733978271,
0.9983525276184082,
0.9981574416160583,
0.9999998807907104,
1.0,
0.9999988079071045,
0.9999912977218628,
0.9999556541442871,
0.9934845566749573,
0.9964343309402466,
0.8924010992050171,
0.1918022185564041,
1.0,
0.9955978989601135,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9999991655349731,
0.9999963045120239,
1.0,
1.0,
1.0,
0.9996668100357056,
1.0,
0.9999998807907104,
0.9999982118606567,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
0.9999961853027344,
0.9824994802474976,
0.9533160328865051,
1.0,
0.9979890584945679,
1.0,
1.0,
0.9999998807907104,
1.0,
0.9997654557228088,
0.9999836683273315,
0.9982922673225403,
0.9999887943267822,
0.05133601278066635,
0.688880205154419,
0.9723511934280396,
0.7304397225379944,
0.9999730587005615,
0.9456992745399475,
0.9939016103744507,
0.9482239484786987,
0.9999490976333618,
0.7966085076332092,
0.9999611377716064,
1.0,
0.9999994039535522,
1.0,
0.9999991655349731,
0.34034451842308044,
0.9999822378158569,
0.9999997615814209,
0.9999997615814209,
0.9999994039535522,
0.9978098273277283,
0.9985532164573669,
0.9683822393417358,
0.9999798536300659,
0.9999759197235107,
0.9992938041687012,
0.8188567757606506,
0.7980251312255859,
0.4461643099784851,
1.0,
0.9968101382255554,
0.9992519021034241,
0.9491932392120361
],
"eval_code_runtime": 135.2493,
"eval_code_samples_per_second": 0.739,
"eval_code_steps_per_second": 0.03,
"epoch": 1.75,
"step": 100
},
{
"train_runtime": 11596.6404,
"train_samples_per_second": 0.276,
"train_steps_per_second": 0.009,
"total_flos": 0.0,
"train_loss": 0.19107350916601717,
"epoch": 1.75,
"step": 100
}
]