llama-30b-cooking / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
bc9b1c6
[
{
"loss": 0.7501,
"learning_rate": 0.0002,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.7073,
"learning_rate": 0.0002,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.6601,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.6619,
"learning_rate": 0.0002,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.6119,
"learning_rate": 0.0002,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.547,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.5267,
"learning_rate": 0.0002,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.3944,
"learning_rate": 0.0002,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.4666,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.3808,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.3318,
"learning_rate": 0.0002,
"epoch": 0.19,
"step": 11
},
{
"loss": 0.519,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.285,
"learning_rate": 0.0002,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.3043,
"learning_rate": 0.0002,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.2707,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 15
},
{
"loss": 0.2083,
"learning_rate": 0.0002,
"epoch": 0.28,
"step": 16
},
{
"loss": 0.2052,
"learning_rate": 0.0002,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.1741,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.1155,
"learning_rate": 0.0002,
"epoch": 0.33,
"step": 19
},
{
"loss": 0.1187,
"learning_rate": 0.0002,
"epoch": 0.35,
"step": 20
},
{
"loss": 0.1886,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 21
},
{
"loss": 0.0709,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.0885,
"learning_rate": 0.0002,
"epoch": 0.4,
"step": 23
},
{
"loss": 0.0347,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 24
},
{
"loss": 0.0722,
"learning_rate": 0.0002,
"epoch": 0.44,
"step": 25
},
{
"eval_cooking_loss": 0.07172379642724991,
"eval_cooking_score": -0.02284017764031887,
"eval_cooking_brier_score": 0.02284017764031887,
"eval_cooking_average_probability": 0.9467064738273621,
"eval_cooking_accuracy": 0.96,
"eval_cooking_probabilities": [
0.9994966983795166,
0.9998204112052917,
0.9999415874481201,
0.9999977350234985,
0.999640941619873,
1.0,
0.9348809123039246,
0.9989339709281921,
0.9870283603668213,
1.0,
1.0,
1.0,
0.8892794847488403,
0.9970963001251221,
0.9128696918487549,
0.9999971389770508,
0.9999285936355591,
0.9999972581863403,
0.9999995231628418,
0.999998927116394,
0.9999996423721313,
0.9999499320983887,
0.9999496936798096,
0.9986146688461304,
0.999889612197876,
0.9998946189880371,
0.9999868869781494,
0.9987629652023315,
0.9558078050613403,
0.8778311610221863,
0.5134645700454712,
0.7368196845054626,
0.45865318179130554,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999921321868896,
1.0,
0.9999998807907104,
0.9999161958694458,
0.9993689656257629,
0.9998518228530884,
0.9997820258140564,
0.9996166229248047,
0.9112917184829712,
0.9999059438705444,
1.0,
1.0,
0.999915599822998,
0.5295373201370239,
0.9596859216690063,
0.9436518549919128,
0.9999723434448242,
0.9847905039787292,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.6603580713272095,
0.32649433612823486,
0.9696217775344849,
0.9467311501502991,
0.9847819209098816,
0.9809347987174988,
0.9999737739562988,
0.9999773502349854,
0.48898983001708984,
0.9986515641212463,
0.9911714196205139,
0.9967660903930664,
0.530961811542511,
0.4586023688316345,
0.7511201500892639,
1.0,
0.9999996423721313,
0.9999998807907104,
0.9999974966049194,
0.9999982118606567,
0.9999984502792358,
0.9999947547912598,
0.9999674558639526,
0.9999988079071045,
0.999997615814209,
0.9999996423721313,
0.9999995231628418,
0.9999916553497314,
0.999991774559021,
0.9999938011169434,
0.9999467134475708,
0.9998517036437988,
0.999972939491272,
0.9999997615814209
],
"eval_cooking_runtime": 90.0627,
"eval_cooking_samples_per_second": 1.11,
"eval_cooking_steps_per_second": 0.044,
"epoch": 0.44,
"step": 25
},
{
"loss": 0.0439,
"learning_rate": 0.0002,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.0073,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 27
},
{
"loss": 0.0453,
"learning_rate": 0.0002,
"epoch": 0.49,
"step": 28
},
{
"loss": 0.1783,
"learning_rate": 0.0002,
"epoch": 0.51,
"step": 29
},
{
"loss": 0.0389,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 30
},
{
"loss": 0.0864,
"learning_rate": 0.0002,
"epoch": 0.54,
"step": 31
},
{
"loss": 0.0927,
"learning_rate": 0.0002,
"epoch": 0.56,
"step": 32
},
{
"loss": 0.1672,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 33
},
{
"loss": 0.0598,
"learning_rate": 0.0002,
"epoch": 0.6,
"step": 34
},
{
"loss": 0.0524,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 35
},
{
"loss": 0.0241,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 36
},
{
"loss": 0.0587,
"learning_rate": 0.0002,
"epoch": 0.65,
"step": 37
},
{
"loss": 0.0533,
"learning_rate": 0.0002,
"epoch": 0.67,
"step": 38
},
{
"loss": 0.0126,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 39
},
{
"loss": 0.0928,
"learning_rate": 0.0002,
"epoch": 0.7,
"step": 40
},
{
"loss": 0.1232,
"learning_rate": 0.0002,
"epoch": 0.72,
"step": 41
},
{
"loss": 0.0471,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 42
},
{
"loss": 0.2126,
"learning_rate": 0.0002,
"epoch": 0.75,
"step": 43
},
{
"loss": 0.0077,
"learning_rate": 0.0002,
"epoch": 0.77,
"step": 44
},
{
"loss": 0.0104,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 45
},
{
"loss": 0.0225,
"learning_rate": 0.0002,
"epoch": 0.81,
"step": 46
},
{
"loss": 0.0201,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 47
},
{
"loss": 0.0395,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 48
},
{
"loss": 0.063,
"learning_rate": 0.0002,
"epoch": 0.86,
"step": 49
},
{
"loss": 0.1811,
"learning_rate": 0.0002,
"epoch": 0.88,
"step": 50
},
{
"eval_cooking_loss": 0.039740703999996185,
"eval_cooking_score": -0.01159026101231575,
"eval_cooking_brier_score": 0.01159026101231575,
"eval_cooking_average_probability": 0.9696077108383179,
"eval_cooking_accuracy": 0.99,
"eval_cooking_probabilities": [
1.0,
1.0,
1.0,
0.9944983124732971,
0.998497486114502,
0.9999996423721313,
0.9985933899879456,
0.9999971389770508,
0.9999792575836182,
1.0,
1.0,
0.9999998807907104,
0.9844222068786621,
0.8832412958145142,
0.9987238049507141,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999997615814209,
0.9999457597732544,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999645948410034,
0.9784882068634033,
0.6781167387962341,
0.9845833778381348,
0.6851760745048523,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999996423721313,
1.0,
1.0,
0.9999992847442627,
1.0,
1.0,
1.0,
0.9999984502792358,
0.9999908208847046,
0.9999997615814209,
1.0,
1.0,
0.9999674558639526,
0.9996829032897949,
0.9999548196792603,
0.9999184608459473,
0.9993921518325806,
0.9681097269058228,
0.999974250793457,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9600809216499329,
0.32980430126190186,
0.8580793142318726,
0.9952669143676758,
0.9998283386230469,
0.9991346001625061,
1.0,
1.0,
0.5970697402954102,
1.0,
1.0,
1.0,
0.610481321811676,
0.6599899530410767,
0.7998277544975281,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999992847442627,
1.0,
1.0
],
"eval_cooking_runtime": 90.1854,
"eval_cooking_samples_per_second": 1.109,
"eval_cooking_steps_per_second": 0.044,
"epoch": 0.88,
"step": 50
},
{
"loss": 0.0165,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 51
},
{
"loss": 0.0287,
"learning_rate": 0.0002,
"epoch": 0.91,
"step": 52
},
{
"loss": 0.0883,
"learning_rate": 0.0002,
"epoch": 0.93,
"step": 53
},
{
"loss": 0.0402,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 54
},
{
"loss": 0.0235,
"learning_rate": 0.0002,
"epoch": 0.96,
"step": 55
},
{
"loss": 0.0941,
"learning_rate": 0.0002,
"epoch": 0.98,
"step": 56
},
{
"loss": 0.0225,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 57
},
{
"loss": 0.0264,
"learning_rate": 0.0002,
"epoch": 1.02,
"step": 58
},
{
"loss": 0.0096,
"learning_rate": 0.0002,
"epoch": 1.04,
"step": 59
},
{
"loss": 0.0014,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 60
},
{
"loss": 0.0102,
"learning_rate": 0.0002,
"epoch": 1.07,
"step": 61
},
{
"loss": 0.0314,
"learning_rate": 0.0002,
"epoch": 1.09,
"step": 62
},
{
"loss": 0.0227,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 63
},
{
"loss": 0.002,
"learning_rate": 0.0002,
"epoch": 1.12,
"step": 64
},
{
"loss": 0.064,
"learning_rate": 0.0002,
"epoch": 1.14,
"step": 65
},
{
"loss": 0.0059,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 66
},
{
"loss": 0.0174,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 67
},
{
"loss": 0.0185,
"learning_rate": 0.0002,
"epoch": 1.19,
"step": 68
},
{
"loss": 0.0208,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 69
},
{
"loss": 0.0033,
"learning_rate": 0.0002,
"epoch": 1.23,
"step": 70
},
{
"loss": 0.002,
"learning_rate": 0.0002,
"epoch": 1.25,
"step": 71
},
{
"loss": 0.0077,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 72
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 1.28,
"step": 73
},
{
"loss": 0.022,
"learning_rate": 0.0002,
"epoch": 1.3,
"step": 74
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 75
},
{
"eval_cooking_loss": 0.023678889498114586,
"eval_cooking_score": -0.006291861645877361,
"eval_cooking_brier_score": 0.006291861645877361,
"eval_cooking_average_probability": 0.9808456301689148,
"eval_cooking_accuracy": 0.99,
"eval_cooking_probabilities": [
1.0,
1.0,
1.0,
0.996330201625824,
0.9999990463256836,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9997380375862122,
0.9999815225601196,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9976873397827148,
0.9612481594085693,
0.4947967231273651,
0.9979932308197021,
0.895486056804657,
1.0,
1.0,
1.0,
1.0,
1.0,
0.999984860420227,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999992847442627,
1.0,
1.0,
1.0,
0.9999980926513672,
0.9997054934501648,
0.9999998807907104,
0.9999951124191284,
0.9995649456977844,
0.9987264275550842,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9749197363853455,
0.8311131000518799,
0.9788804650306702,
0.9987853169441223,
0.9999945163726807,
0.9999439716339111,
1.0,
1.0,
0.9220678210258484,
1.0,
1.0,
1.0,
0.5796253085136414,
0.6955380439758301,
0.7624679207801819,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0
],
"eval_cooking_runtime": 90.1496,
"eval_cooking_samples_per_second": 1.109,
"eval_cooking_steps_per_second": 0.044,
"epoch": 1.32,
"step": 75
},
{
"loss": 0.0079,
"learning_rate": 0.0002,
"epoch": 1.33,
"step": 76
},
{
"loss": 0.0142,
"learning_rate": 0.0002,
"epoch": 1.35,
"step": 77
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 78
},
{
"loss": 0.0105,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 79
},
{
"loss": 0.0151,
"learning_rate": 0.0002,
"epoch": 1.4,
"step": 80
},
{
"loss": 0.1033,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 81
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 1.44,
"step": 82
},
{
"loss": 0.0319,
"learning_rate": 0.0002,
"epoch": 1.46,
"step": 83
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 84
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 1.49,
"step": 85
},
{
"loss": 0.0492,
"learning_rate": 0.0002,
"epoch": 1.51,
"step": 86
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 87
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.54,
"step": 88
},
{
"loss": 0.0197,
"learning_rate": 0.0002,
"epoch": 1.56,
"step": 89
},
{
"loss": 0.0074,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 90
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 1.6,
"step": 91
},
{
"loss": 0.0045,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 92
},
{
"loss": 0.0044,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 93
},
{
"loss": 0.0016,
"learning_rate": 0.0002,
"epoch": 1.65,
"step": 94
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 1.67,
"step": 95
},
{
"loss": 0.0063,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 96
},
{
"loss": 0.5647,
"learning_rate": 0.0002,
"epoch": 1.7,
"step": 97
},
{
"loss": 0.0054,
"learning_rate": 0.0002,
"epoch": 1.72,
"step": 98
},
{
"loss": 0.0094,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 99
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 1.75,
"step": 100
},
{
"eval_cooking_loss": 0.011014198884367943,
"eval_cooking_score": -0.0029688423965126276,
"eval_cooking_brier_score": 0.0029688423965126276,
"eval_cooking_average_probability": 0.9912529587745667,
"eval_cooking_accuracy": 0.99,
"eval_cooking_probabilities": [
1.0,
1.0,
1.0,
0.9999991655349731,
1.0,
1.0,
0.9999767541885376,
0.9999998807907104,
0.9999990463256836,
1.0,
1.0,
1.0,
0.9998008608818054,
0.9996119141578674,
0.9999951124191284,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999985694885254,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9991680383682251,
0.9784396290779114,
0.9963490962982178,
0.8660287261009216,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999995231628418,
0.9994841814041138,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9878949522972107,
0.9949129819869995,
0.9997122883796692,
0.9956069588661194,
0.9998680353164673,
0.9998451471328735,
1.0,
1.0,
0.998753547668457,
1.0,
1.0,
1.0,
0.4879445433616638,
0.9008581042289734,
0.921048641204834,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0
],
"eval_cooking_runtime": 90.4033,
"eval_cooking_samples_per_second": 1.106,
"eval_cooking_steps_per_second": 0.044,
"epoch": 1.75,
"step": 100
},
{
"train_runtime": 7409.9828,
"train_samples_per_second": 0.432,
"train_steps_per_second": 0.013,
"total_flos": 0.0,
"train_loss": 0.11873383481703968,
"epoch": 1.75,
"step": 100
}
]