llama-7b-mmlu / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
54368b4
[
{
"loss": 0.7339,
"learning_rate": 0.0002,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.7581,
"learning_rate": 0.0002,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.7597,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.8837,
"learning_rate": 0.0002,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.7708,
"learning_rate": 0.0002,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.6677,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.7334,
"learning_rate": 0.0002,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.6964,
"learning_rate": 0.0002,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.7324,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.7191,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.7533,
"learning_rate": 0.0002,
"epoch": 0.19,
"step": 11
},
{
"loss": 0.7787,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.7004,
"learning_rate": 0.0002,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.685,
"learning_rate": 0.0002,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.6909,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 15
},
{
"loss": 0.6928,
"learning_rate": 0.0002,
"epoch": 0.28,
"step": 16
},
{
"loss": 0.7092,
"learning_rate": 0.0002,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.6884,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.7099,
"learning_rate": 0.0002,
"epoch": 0.33,
"step": 19
},
{
"loss": 0.7124,
"learning_rate": 0.0002,
"epoch": 0.35,
"step": 20
},
{
"loss": 0.692,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 21
},
{
"loss": 0.6696,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.7233,
"learning_rate": 0.0002,
"epoch": 0.4,
"step": 23
},
{
"loss": 0.7043,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 24
},
{
"loss": 0.6885,
"learning_rate": 0.0002,
"epoch": 0.44,
"step": 25
},
{
"eval_mmlu_loss": 0.6877026557922363,
"eval_mmlu_score": -0.24597446620464325,
"eval_mmlu_brier_score": 0.24597446620464325,
"eval_mmlu_average_probability": 0.5076507925987244,
"eval_mmlu_accuracy": 0.52,
"eval_mmlu_probabilities": [
0.5024531483650208,
0.49637264013290405,
0.42682787775993347,
0.5511913299560547,
0.516057014465332,
0.5302246809005737,
0.47325557470321655,
0.47100546956062317,
0.4857306480407715,
0.4879577159881592,
0.5080811977386475,
0.49308809638023376,
0.5444069504737854,
0.5699678659439087,
0.6137876510620117,
0.5039758682250977,
0.5088179707527161,
0.47646814584732056,
0.5660363435745239,
0.5961596965789795,
0.6173415184020996,
0.5275791883468628,
0.4939135015010834,
0.5678824782371521,
0.4228115677833557,
0.34861868619918823,
0.404059499502182,
0.5350965261459351,
0.6017758846282959,
0.5842959880828857,
0.5007165670394897,
0.5204458832740784,
0.46593177318573,
0.47197505831718445,
0.4855360984802246,
0.4904650151729584,
0.5280486941337585,
0.5117051005363464,
0.5350059866905212,
0.5049182772636414,
0.4832833409309387,
0.5799784064292908,
0.5069881677627563,
0.6448580026626587,
0.6689260005950928,
0.46061623096466064,
0.3911752700805664,
0.39503976702690125,
0.4661625623703003,
0.6794996857643127,
0.47264567017555237,
0.4458942115306854,
0.45081019401550293,
0.4047556221485138,
0.4076286554336548,
0.592082679271698,
0.534106433391571,
0.5288624167442322,
0.49373653531074524,
0.5440691709518433,
0.508944034576416,
0.5440325140953064,
0.5153629183769226,
0.466998428106308,
0.4557458758354187,
0.48426082730293274,
0.4218427240848541,
0.5727195739746094,
0.4905124604701996,
0.590722382068634,
0.5299772024154663,
0.5239810347557068,
0.600125253200531,
0.42955777049064636,
0.5403964519500732,
0.48015835881233215,
0.4788738489151001,
0.5453965663909912,
0.6047127842903137,
0.5861594676971436,
0.5148264169692993,
0.551675021648407,
0.5116910338401794,
0.5246034264564514,
0.49715280532836914,
0.48748254776000977,
0.44811931252479553,
0.4952526390552521,
0.4878782033920288,
0.4950997531414032,
0.4488215446472168,
0.531753659248352,
0.5307499766349792,
0.46273279190063477,
0.44232308864593506,
0.5162163972854614,
0.47405269742012024,
0.47172489762306213,
0.4590320885181427,
0.4523000419139862
],
"eval_mmlu_runtime": 36.2867,
"eval_mmlu_samples_per_second": 2.756,
"eval_mmlu_steps_per_second": 0.055,
"epoch": 0.44,
"step": 25
},
{
"loss": 0.6873,
"learning_rate": 0.0002,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.6727,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 27
},
{
"loss": 0.6513,
"learning_rate": 0.0002,
"epoch": 0.49,
"step": 28
},
{
"loss": 0.6854,
"learning_rate": 0.0002,
"epoch": 0.51,
"step": 29
},
{
"loss": 0.6944,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 30
},
{
"loss": 0.6698,
"learning_rate": 0.0002,
"epoch": 0.54,
"step": 31
},
{
"loss": 0.6873,
"learning_rate": 0.0002,
"epoch": 0.56,
"step": 32
},
{
"loss": 0.7038,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 33
},
{
"loss": 0.6371,
"learning_rate": 0.0002,
"epoch": 0.6,
"step": 34
},
{
"loss": 0.6949,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 35
},
{
"loss": 0.6806,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 36
},
{
"loss": 0.6348,
"learning_rate": 0.0002,
"epoch": 0.65,
"step": 37
},
{
"loss": 0.6525,
"learning_rate": 0.0002,
"epoch": 0.67,
"step": 38
},
{
"loss": 0.6781,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 39
},
{
"loss": 0.6672,
"learning_rate": 0.0002,
"epoch": 0.7,
"step": 40
},
{
"loss": 0.6829,
"learning_rate": 0.0002,
"epoch": 0.72,
"step": 41
},
{
"loss": 0.6725,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 42
},
{
"loss": 0.6299,
"learning_rate": 0.0002,
"epoch": 0.75,
"step": 43
},
{
"loss": 0.679,
"learning_rate": 0.0002,
"epoch": 0.77,
"step": 44
},
{
"loss": 0.6444,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 45
},
{
"loss": 0.5872,
"learning_rate": 0.0002,
"epoch": 0.81,
"step": 46
},
{
"loss": 0.5675,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 47
},
{
"loss": 0.6037,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 48
},
{
"loss": 0.6257,
"learning_rate": 0.0002,
"epoch": 0.86,
"step": 49
},
{
"loss": 0.6217,
"learning_rate": 0.0002,
"epoch": 0.88,
"step": 50
},
{
"eval_mmlu_loss": 0.6811871528625488,
"eval_mmlu_score": -0.2452215701341629,
"eval_mmlu_brier_score": 0.2452215701341629,
"eval_mmlu_average_probability": 0.5182498693466187,
"eval_mmlu_accuracy": 0.56,
"eval_mmlu_probabilities": [
0.5035430788993835,
0.4767981767654419,
0.5682129859924316,
0.5918246507644653,
0.5153557658195496,
0.5415547490119934,
0.4029051959514618,
0.450078547000885,
0.5172317028045654,
0.6108822822570801,
0.4666701555252075,
0.4877246916294098,
0.4729712903499603,
0.4151947796344757,
0.46747589111328125,
0.5594338178634644,
0.5088744759559631,
0.5103889107704163,
0.5036099553108215,
0.584050714969635,
0.6694682836532593,
0.5144792795181274,
0.5978407859802246,
0.6020472645759583,
0.4669976532459259,
0.5301037430763245,
0.49586060643196106,
0.5453556776046753,
0.8031986355781555,
0.7073482275009155,
0.6679326891899109,
0.4658517837524414,
0.37422868609428406,
0.37934908270835876,
0.4810079038143158,
0.6123296618461609,
0.4183340072631836,
0.5893468856811523,
0.4664745032787323,
0.41369232535362244,
0.414699912071228,
0.43868860602378845,
0.5881794095039368,
0.6734998226165771,
0.7834814786911011,
0.34748634696006775,
0.5293741226196289,
0.36080241203308105,
0.42220059037208557,
0.7680569887161255,
0.4100235402584076,
0.7189415097236633,
0.6853945851325989,
0.5016038417816162,
0.39125701785087585,
0.7705144286155701,
0.6471617817878723,
0.3062707483768463,
0.386313259601593,
0.32649141550064087,
0.47263073921203613,
0.44739583134651184,
0.5802536010742188,
0.4064495265483856,
0.4570828974246979,
0.4176960289478302,
0.28201282024383545,
0.6262605786323547,
0.617345929145813,
0.41387128829956055,
0.5746867060661316,
0.5823390483856201,
0.3908570408821106,
0.31184154748916626,
0.515109658241272,
0.43107518553733826,
0.4796065390110016,
0.7087035775184631,
0.6558853983879089,
0.5968166589736938,
0.47654810547828674,
0.5344870686531067,
0.6017045974731445,
0.5343926548957825,
0.5024454593658447,
0.5379210710525513,
0.42640140652656555,
0.6160038709640503,
0.5520896315574646,
0.4460159242153168,
0.5341697931289673,
0.6633436679840088,
0.536496102809906,
0.6880946159362793,
0.6644284129142761,
0.5711739659309387,
0.4048452377319336,
0.2511211037635803,
0.38734522461891174,
0.5015671253204346
],
"eval_mmlu_runtime": 36.3017,
"eval_mmlu_samples_per_second": 2.755,
"eval_mmlu_steps_per_second": 0.055,
"epoch": 0.88,
"step": 50
},
{
"loss": 0.6395,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 51
},
{
"loss": 0.5624,
"learning_rate": 0.0002,
"epoch": 0.91,
"step": 52
},
{
"loss": 0.6642,
"learning_rate": 0.0002,
"epoch": 0.93,
"step": 53
},
{
"loss": 0.6446,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 54
},
{
"loss": 0.5786,
"learning_rate": 0.0002,
"epoch": 0.96,
"step": 55
},
{
"loss": 0.6207,
"learning_rate": 0.0002,
"epoch": 0.98,
"step": 56
},
{
"loss": 0.5365,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 57
},
{
"loss": 0.5721,
"learning_rate": 0.0002,
"epoch": 1.02,
"step": 58
},
{
"loss": 0.5258,
"learning_rate": 0.0002,
"epoch": 1.04,
"step": 59
},
{
"loss": 0.5292,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 60
},
{
"loss": 0.5543,
"learning_rate": 0.0002,
"epoch": 1.07,
"step": 61
},
{
"loss": 0.539,
"learning_rate": 0.0002,
"epoch": 1.09,
"step": 62
},
{
"loss": 0.5608,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 63
},
{
"loss": 0.4825,
"learning_rate": 0.0002,
"epoch": 1.12,
"step": 64
},
{
"loss": 0.4465,
"learning_rate": 0.0002,
"epoch": 1.14,
"step": 65
},
{
"loss": 0.3743,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 66
},
{
"loss": 0.5076,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 67
},
{
"loss": 0.435,
"learning_rate": 0.0002,
"epoch": 1.19,
"step": 68
},
{
"loss": 0.3346,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 69
},
{
"loss": 0.5868,
"learning_rate": 0.0002,
"epoch": 1.23,
"step": 70
},
{
"loss": 0.3487,
"learning_rate": 0.0002,
"epoch": 1.25,
"step": 71
},
{
"loss": 0.3667,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 72
},
{
"loss": 0.5619,
"learning_rate": 0.0002,
"epoch": 1.28,
"step": 73
},
{
"loss": 0.3931,
"learning_rate": 0.0002,
"epoch": 1.3,
"step": 74
},
{
"loss": 0.3349,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 75
},
{
"eval_mmlu_loss": 0.833143413066864,
"eval_mmlu_score": -0.2877655327320099,
"eval_mmlu_brier_score": 0.2877655327320099,
"eval_mmlu_average_probability": 0.5490172505378723,
"eval_mmlu_accuracy": 0.54,
"eval_mmlu_probabilities": [
0.1580410748720169,
0.43874940276145935,
0.6015041470527649,
0.5842825770378113,
0.4584586024284363,
0.4909316897392273,
0.11258751899003983,
0.38813868165016174,
0.23412899672985077,
0.8689023852348328,
0.3216671347618103,
0.4708637595176697,
0.6932336688041687,
0.9213990569114685,
0.44838589429855347,
0.6026622653007507,
0.5027726888656616,
0.612128496170044,
0.06997516006231308,
0.4677487909793854,
0.35034096240997314,
0.8762949705123901,
0.7732230424880981,
0.8989255428314209,
0.40452077984809875,
0.48415499925613403,
0.374180406332016,
0.9165855050086975,
0.9918805956840515,
0.9852085113525391,
0.8175538778305054,
0.09847690165042877,
0.26064637303352356,
0.46926015615463257,
0.8475375771522522,
0.7741795778274536,
0.26930543780326843,
0.4011392295360565,
0.07169023901224136,
0.13525348901748657,
0.15383993089199066,
0.534102201461792,
0.8887997269630432,
0.983967125415802,
0.9906513690948486,
0.13722844421863556,
0.08100348711013794,
0.050758879631757736,
0.8930805921554565,
0.7616145014762878,
0.5024493932723999,
0.31655770540237427,
0.7933811545372009,
0.53852778673172,
0.9475235342979431,
0.9588028192520142,
0.9672081470489502,
0.23299279808998108,
0.5359262824058533,
0.39278143644332886,
0.8811087012290955,
0.89657062292099,
0.9659839868545532,
0.6707325577735901,
0.7787579894065857,
0.5808053612709045,
0.9807056784629822,
0.9567878842353821,
0.9984741806983948,
0.025270536541938782,
0.5340918898582458,
0.2554855942726135,
0.018762007355690002,
0.15030156075954437,
0.516846239566803,
0.2843777537345886,
0.410349577665329,
0.6759911179542542,
0.7385581135749817,
0.8582839965820312,
0.18342167139053345,
0.3932557702064514,
0.9066393375396729,
0.41418227553367615,
0.6529518961906433,
0.5801039934158325,
0.10069625079631805,
0.6211609244346619,
0.3417372703552246,
0.3543941378593445,
0.6216508746147156,
0.8545634150505066,
0.574700653553009,
0.9054378271102905,
0.8045241832733154,
0.7795861959457397,
0.4488513469696045,
0.12007670104503632,
0.40916627645492554,
0.34826213121414185
],
"eval_mmlu_runtime": 36.2692,
"eval_mmlu_samples_per_second": 2.757,
"eval_mmlu_steps_per_second": 0.055,
"epoch": 1.32,
"step": 75
},
{
"loss": 0.473,
"learning_rate": 0.0002,
"epoch": 1.33,
"step": 76
},
{
"loss": 0.4736,
"learning_rate": 0.0002,
"epoch": 1.35,
"step": 77
},
{
"loss": 0.4351,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 78
},
{
"loss": 0.2961,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 79
},
{
"loss": 0.473,
"learning_rate": 0.0002,
"epoch": 1.4,
"step": 80
},
{
"loss": 0.3997,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 81
},
{
"loss": 0.3277,
"learning_rate": 0.0002,
"epoch": 1.44,
"step": 82
},
{
"loss": 0.5077,
"learning_rate": 0.0002,
"epoch": 1.46,
"step": 83
},
{
"loss": 0.3042,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 84
},
{
"loss": 0.381,
"learning_rate": 0.0002,
"epoch": 1.49,
"step": 85
},
{
"loss": 0.2834,
"learning_rate": 0.0002,
"epoch": 1.51,
"step": 86
},
{
"loss": 0.3241,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 87
},
{
"loss": 0.4027,
"learning_rate": 0.0002,
"epoch": 1.54,
"step": 88
},
{
"loss": 0.3334,
"learning_rate": 0.0002,
"epoch": 1.56,
"step": 89
},
{
"loss": 0.3422,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 90
},
{
"loss": 0.368,
"learning_rate": 0.0002,
"epoch": 1.6,
"step": 91
},
{
"loss": 0.3376,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 92
},
{
"loss": 0.5442,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 93
},
{
"loss": 0.3687,
"learning_rate": 0.0002,
"epoch": 1.65,
"step": 94
},
{
"loss": 0.3144,
"learning_rate": 0.0002,
"epoch": 1.67,
"step": 95
},
{
"loss": 0.4151,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 96
},
{
"loss": 0.2863,
"learning_rate": 0.0002,
"epoch": 1.7,
"step": 97
},
{
"loss": 0.464,
"learning_rate": 0.0002,
"epoch": 1.72,
"step": 98
},
{
"loss": 0.2873,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 99
},
{
"loss": 0.3988,
"learning_rate": 0.0002,
"epoch": 1.75,
"step": 100
},
{
"eval_mmlu_loss": 0.8989719152450562,
"eval_mmlu_score": -0.3084475100040436,
"eval_mmlu_brier_score": 0.3084475100040436,
"eval_mmlu_average_probability": 0.5211965441703796,
"eval_mmlu_accuracy": 0.49,
"eval_mmlu_probabilities": [
0.4071880877017975,
0.45392853021621704,
0.7654134035110474,
0.8478649854660034,
0.2505261301994324,
0.4063345193862915,
0.27478206157684326,
0.3481687009334564,
0.20378506183624268,
0.7234545946121216,
0.4079451560974121,
0.46001574397087097,
0.28416189551353455,
0.4730168879032135,
0.4832534193992615,
0.6298182010650635,
0.5491191148757935,
0.7803859114646912,
0.48246490955352783,
0.7818248271942139,
0.8155978322029114,
0.7227077484130859,
0.5422055125236511,
0.3796870708465576,
0.0472140833735466,
0.13505573570728302,
0.5995903015136719,
0.7262413501739502,
0.9919582605361938,
0.9893267154693604,
0.673717737197876,
0.09111680835485458,
0.062387943267822266,
0.18485485017299652,
0.831606924533844,
0.9187701940536499,
0.2769331932067871,
0.7295668125152588,
0.25232887268066406,
0.1371425986289978,
0.11290226131677628,
0.42969319224357605,
0.8279014825820923,
0.833659291267395,
0.995989978313446,
0.07782114297151566,
0.29245418310165405,
0.1428614854812622,
0.12554965913295746,
0.885955810546875,
0.21254442632198334,
0.8990002274513245,
0.7997588515281677,
0.2548746168613434,
0.5323441028594971,
0.9922589659690857,
0.7346373200416565,
0.08055279403924942,
0.27386149764060974,
0.34895414113998413,
0.7386496067047119,
0.7762178182601929,
0.7541271448135376,
0.5595319271087646,
0.5181352496147156,
0.8376888036727905,
0.228178009390831,
0.9345813989639282,
0.9527415037155151,
0.03981766849756241,
0.6531015038490295,
0.7840538620948792,
0.07283812761306763,
0.06463921070098877,
0.2090235948562622,
0.380914568901062,
0.4863632321357727,
0.803974449634552,
0.7102047204971313,
0.8492692708969116,
0.2411557286977768,
0.4371771812438965,
0.8251751065254211,
0.2947361469268799,
0.5158799886703491,
0.4882834851741791,
0.17904901504516602,
0.4950083792209625,
0.3899526596069336,
0.30322733521461487,
0.7750067710876465,
0.9384438395500183,
0.2649548649787903,
0.956122636795044,
0.8255246877670288,
0.7141478657722473,
0.44438719749450684,
0.6701680421829224,
0.599485456943512,
0.43267613649368286
],
"eval_mmlu_runtime": 36.2731,
"eval_mmlu_samples_per_second": 2.757,
"eval_mmlu_steps_per_second": 0.055,
"epoch": 1.75,
"step": 100
},
{
"train_runtime": 3535.9045,
"train_samples_per_second": 0.905,
"train_steps_per_second": 0.028,
"total_flos": 0.0,
"train_loss": 0.5670726171135902,
"epoch": 1.75,
"step": 100
}
]