| [ |
| { |
| "loss": 0.7339, |
| "learning_rate": 0.0002, |
| "epoch": 0.02, |
| "step": 1 |
| }, |
| { |
| "loss": 0.7581, |
| "learning_rate": 0.0002, |
| "epoch": 0.04, |
| "step": 2 |
| }, |
| { |
| "loss": 0.7597, |
| "learning_rate": 0.0002, |
| "epoch": 0.05, |
| "step": 3 |
| }, |
| { |
| "loss": 0.8837, |
| "learning_rate": 0.0002, |
| "epoch": 0.07, |
| "step": 4 |
| }, |
| { |
| "loss": 0.7708, |
| "learning_rate": 0.0002, |
| "epoch": 0.09, |
| "step": 5 |
| }, |
| { |
| "loss": 0.6677, |
| "learning_rate": 0.0002, |
| "epoch": 0.11, |
| "step": 6 |
| }, |
| { |
| "loss": 0.7334, |
| "learning_rate": 0.0002, |
| "epoch": 0.12, |
| "step": 7 |
| }, |
| { |
| "loss": 0.6964, |
| "learning_rate": 0.0002, |
| "epoch": 0.14, |
| "step": 8 |
| }, |
| { |
| "loss": 0.7324, |
| "learning_rate": 0.0002, |
| "epoch": 0.16, |
| "step": 9 |
| }, |
| { |
| "loss": 0.7191, |
| "learning_rate": 0.0002, |
| "epoch": 0.18, |
| "step": 10 |
| }, |
| { |
| "loss": 0.7533, |
| "learning_rate": 0.0002, |
| "epoch": 0.19, |
| "step": 11 |
| }, |
| { |
| "loss": 0.7787, |
| "learning_rate": 0.0002, |
| "epoch": 0.21, |
| "step": 12 |
| }, |
| { |
| "loss": 0.7004, |
| "learning_rate": 0.0002, |
| "epoch": 0.23, |
| "step": 13 |
| }, |
| { |
| "loss": 0.685, |
| "learning_rate": 0.0002, |
| "epoch": 0.25, |
| "step": 14 |
| }, |
| { |
| "loss": 0.6909, |
| "learning_rate": 0.0002, |
| "epoch": 0.26, |
| "step": 15 |
| }, |
| { |
| "loss": 0.6928, |
| "learning_rate": 0.0002, |
| "epoch": 0.28, |
| "step": 16 |
| }, |
| { |
| "loss": 0.7092, |
| "learning_rate": 0.0002, |
| "epoch": 0.3, |
| "step": 17 |
| }, |
| { |
| "loss": 0.6884, |
| "learning_rate": 0.0002, |
| "epoch": 0.32, |
| "step": 18 |
| }, |
| { |
| "loss": 0.7099, |
| "learning_rate": 0.0002, |
| "epoch": 0.33, |
| "step": 19 |
| }, |
| { |
| "loss": 0.7124, |
| "learning_rate": 0.0002, |
| "epoch": 0.35, |
| "step": 20 |
| }, |
| { |
| "loss": 0.692, |
| "learning_rate": 0.0002, |
| "epoch": 0.37, |
| "step": 21 |
| }, |
| { |
| "loss": 0.6696, |
| "learning_rate": 0.0002, |
| "epoch": 0.39, |
| "step": 22 |
| }, |
| { |
| "loss": 0.7233, |
| "learning_rate": 0.0002, |
| "epoch": 0.4, |
| "step": 23 |
| }, |
| { |
| "loss": 0.7043, |
| "learning_rate": 0.0002, |
| "epoch": 0.42, |
| "step": 24 |
| }, |
| { |
| "loss": 0.6885, |
| "learning_rate": 0.0002, |
| "epoch": 0.44, |
| "step": 25 |
| }, |
| { |
| "eval_mmlu_loss": 0.6877026557922363, |
| "eval_mmlu_score": -0.24597446620464325, |
| "eval_mmlu_brier_score": 0.24597446620464325, |
| "eval_mmlu_average_probability": 0.5076507925987244, |
| "eval_mmlu_accuracy": 0.52, |
| "eval_mmlu_probabilities": [ |
| 0.5024531483650208, |
| 0.49637264013290405, |
| 0.42682787775993347, |
| 0.5511913299560547, |
| 0.516057014465332, |
| 0.5302246809005737, |
| 0.47325557470321655, |
| 0.47100546956062317, |
| 0.4857306480407715, |
| 0.4879577159881592, |
| 0.5080811977386475, |
| 0.49308809638023376, |
| 0.5444069504737854, |
| 0.5699678659439087, |
| 0.6137876510620117, |
| 0.5039758682250977, |
| 0.5088179707527161, |
| 0.47646814584732056, |
| 0.5660363435745239, |
| 0.5961596965789795, |
| 0.6173415184020996, |
| 0.5275791883468628, |
| 0.4939135015010834, |
| 0.5678824782371521, |
| 0.4228115677833557, |
| 0.34861868619918823, |
| 0.404059499502182, |
| 0.5350965261459351, |
| 0.6017758846282959, |
| 0.5842959880828857, |
| 0.5007165670394897, |
| 0.5204458832740784, |
| 0.46593177318573, |
| 0.47197505831718445, |
| 0.4855360984802246, |
| 0.4904650151729584, |
| 0.5280486941337585, |
| 0.5117051005363464, |
| 0.5350059866905212, |
| 0.5049182772636414, |
| 0.4832833409309387, |
| 0.5799784064292908, |
| 0.5069881677627563, |
| 0.6448580026626587, |
| 0.6689260005950928, |
| 0.46061623096466064, |
| 0.3911752700805664, |
| 0.39503976702690125, |
| 0.4661625623703003, |
| 0.6794996857643127, |
| 0.47264567017555237, |
| 0.4458942115306854, |
| 0.45081019401550293, |
| 0.4047556221485138, |
| 0.4076286554336548, |
| 0.592082679271698, |
| 0.534106433391571, |
| 0.5288624167442322, |
| 0.49373653531074524, |
| 0.5440691709518433, |
| 0.508944034576416, |
| 0.5440325140953064, |
| 0.5153629183769226, |
| 0.466998428106308, |
| 0.4557458758354187, |
| 0.48426082730293274, |
| 0.4218427240848541, |
| 0.5727195739746094, |
| 0.4905124604701996, |
| 0.590722382068634, |
| 0.5299772024154663, |
| 0.5239810347557068, |
| 0.600125253200531, |
| 0.42955777049064636, |
| 0.5403964519500732, |
| 0.48015835881233215, |
| 0.4788738489151001, |
| 0.5453965663909912, |
| 0.6047127842903137, |
| 0.5861594676971436, |
| 0.5148264169692993, |
| 0.551675021648407, |
| 0.5116910338401794, |
| 0.5246034264564514, |
| 0.49715280532836914, |
| 0.48748254776000977, |
| 0.44811931252479553, |
| 0.4952526390552521, |
| 0.4878782033920288, |
| 0.4950997531414032, |
| 0.4488215446472168, |
| 0.531753659248352, |
| 0.5307499766349792, |
| 0.46273279190063477, |
| 0.44232308864593506, |
| 0.5162163972854614, |
| 0.47405269742012024, |
| 0.47172489762306213, |
| 0.4590320885181427, |
| 0.4523000419139862 |
| ], |
| "eval_mmlu_runtime": 36.2867, |
| "eval_mmlu_samples_per_second": 2.756, |
| "eval_mmlu_steps_per_second": 0.055, |
| "epoch": 0.44, |
| "step": 25 |
| }, |
| { |
| "loss": 0.6873, |
| "learning_rate": 0.0002, |
| "epoch": 0.46, |
| "step": 26 |
| }, |
| { |
| "loss": 0.6727, |
| "learning_rate": 0.0002, |
| "epoch": 0.47, |
| "step": 27 |
| }, |
| { |
| "loss": 0.6513, |
| "learning_rate": 0.0002, |
| "epoch": 0.49, |
| "step": 28 |
| }, |
| { |
| "loss": 0.6854, |
| "learning_rate": 0.0002, |
| "epoch": 0.51, |
| "step": 29 |
| }, |
| { |
| "loss": 0.6944, |
| "learning_rate": 0.0002, |
| "epoch": 0.53, |
| "step": 30 |
| }, |
| { |
| "loss": 0.6698, |
| "learning_rate": 0.0002, |
| "epoch": 0.54, |
| "step": 31 |
| }, |
| { |
| "loss": 0.6873, |
| "learning_rate": 0.0002, |
| "epoch": 0.56, |
| "step": 32 |
| }, |
| { |
| "loss": 0.7038, |
| "learning_rate": 0.0002, |
| "epoch": 0.58, |
| "step": 33 |
| }, |
| { |
| "loss": 0.6371, |
| "learning_rate": 0.0002, |
| "epoch": 0.6, |
| "step": 34 |
| }, |
| { |
| "loss": 0.6949, |
| "learning_rate": 0.0002, |
| "epoch": 0.61, |
| "step": 35 |
| }, |
| { |
| "loss": 0.6806, |
| "learning_rate": 0.0002, |
| "epoch": 0.63, |
| "step": 36 |
| }, |
| { |
| "loss": 0.6348, |
| "learning_rate": 0.0002, |
| "epoch": 0.65, |
| "step": 37 |
| }, |
| { |
| "loss": 0.6525, |
| "learning_rate": 0.0002, |
| "epoch": 0.67, |
| "step": 38 |
| }, |
| { |
| "loss": 0.6781, |
| "learning_rate": 0.0002, |
| "epoch": 0.68, |
| "step": 39 |
| }, |
| { |
| "loss": 0.6672, |
| "learning_rate": 0.0002, |
| "epoch": 0.7, |
| "step": 40 |
| }, |
| { |
| "loss": 0.6829, |
| "learning_rate": 0.0002, |
| "epoch": 0.72, |
| "step": 41 |
| }, |
| { |
| "loss": 0.6725, |
| "learning_rate": 0.0002, |
| "epoch": 0.74, |
| "step": 42 |
| }, |
| { |
| "loss": 0.6299, |
| "learning_rate": 0.0002, |
| "epoch": 0.75, |
| "step": 43 |
| }, |
| { |
| "loss": 0.679, |
| "learning_rate": 0.0002, |
| "epoch": 0.77, |
| "step": 44 |
| }, |
| { |
| "loss": 0.6444, |
| "learning_rate": 0.0002, |
| "epoch": 0.79, |
| "step": 45 |
| }, |
| { |
| "loss": 0.5872, |
| "learning_rate": 0.0002, |
| "epoch": 0.81, |
| "step": 46 |
| }, |
| { |
| "loss": 0.5675, |
| "learning_rate": 0.0002, |
| "epoch": 0.82, |
| "step": 47 |
| }, |
| { |
| "loss": 0.6037, |
| "learning_rate": 0.0002, |
| "epoch": 0.84, |
| "step": 48 |
| }, |
| { |
| "loss": 0.6257, |
| "learning_rate": 0.0002, |
| "epoch": 0.86, |
| "step": 49 |
| }, |
| { |
| "loss": 0.6217, |
| "learning_rate": 0.0002, |
| "epoch": 0.88, |
| "step": 50 |
| }, |
| { |
| "eval_mmlu_loss": 0.6811871528625488, |
| "eval_mmlu_score": -0.2452215701341629, |
| "eval_mmlu_brier_score": 0.2452215701341629, |
| "eval_mmlu_average_probability": 0.5182498693466187, |
| "eval_mmlu_accuracy": 0.56, |
| "eval_mmlu_probabilities": [ |
| 0.5035430788993835, |
| 0.4767981767654419, |
| 0.5682129859924316, |
| 0.5918246507644653, |
| 0.5153557658195496, |
| 0.5415547490119934, |
| 0.4029051959514618, |
| 0.450078547000885, |
| 0.5172317028045654, |
| 0.6108822822570801, |
| 0.4666701555252075, |
| 0.4877246916294098, |
| 0.4729712903499603, |
| 0.4151947796344757, |
| 0.46747589111328125, |
| 0.5594338178634644, |
| 0.5088744759559631, |
| 0.5103889107704163, |
| 0.5036099553108215, |
| 0.584050714969635, |
| 0.6694682836532593, |
| 0.5144792795181274, |
| 0.5978407859802246, |
| 0.6020472645759583, |
| 0.4669976532459259, |
| 0.5301037430763245, |
| 0.49586060643196106, |
| 0.5453556776046753, |
| 0.8031986355781555, |
| 0.7073482275009155, |
| 0.6679326891899109, |
| 0.4658517837524414, |
| 0.37422868609428406, |
| 0.37934908270835876, |
| 0.4810079038143158, |
| 0.6123296618461609, |
| 0.4183340072631836, |
| 0.5893468856811523, |
| 0.4664745032787323, |
| 0.41369232535362244, |
| 0.414699912071228, |
| 0.43868860602378845, |
| 0.5881794095039368, |
| 0.6734998226165771, |
| 0.7834814786911011, |
| 0.34748634696006775, |
| 0.5293741226196289, |
| 0.36080241203308105, |
| 0.42220059037208557, |
| 0.7680569887161255, |
| 0.4100235402584076, |
| 0.7189415097236633, |
| 0.6853945851325989, |
| 0.5016038417816162, |
| 0.39125701785087585, |
| 0.7705144286155701, |
| 0.6471617817878723, |
| 0.3062707483768463, |
| 0.386313259601593, |
| 0.32649141550064087, |
| 0.47263073921203613, |
| 0.44739583134651184, |
| 0.5802536010742188, |
| 0.4064495265483856, |
| 0.4570828974246979, |
| 0.4176960289478302, |
| 0.28201282024383545, |
| 0.6262605786323547, |
| 0.617345929145813, |
| 0.41387128829956055, |
| 0.5746867060661316, |
| 0.5823390483856201, |
| 0.3908570408821106, |
| 0.31184154748916626, |
| 0.515109658241272, |
| 0.43107518553733826, |
| 0.4796065390110016, |
| 0.7087035775184631, |
| 0.6558853983879089, |
| 0.5968166589736938, |
| 0.47654810547828674, |
| 0.5344870686531067, |
| 0.6017045974731445, |
| 0.5343926548957825, |
| 0.5024454593658447, |
| 0.5379210710525513, |
| 0.42640140652656555, |
| 0.6160038709640503, |
| 0.5520896315574646, |
| 0.4460159242153168, |
| 0.5341697931289673, |
| 0.6633436679840088, |
| 0.536496102809906, |
| 0.6880946159362793, |
| 0.6644284129142761, |
| 0.5711739659309387, |
| 0.4048452377319336, |
| 0.2511211037635803, |
| 0.38734522461891174, |
| 0.5015671253204346 |
| ], |
| "eval_mmlu_runtime": 36.3017, |
| "eval_mmlu_samples_per_second": 2.755, |
| "eval_mmlu_steps_per_second": 0.055, |
| "epoch": 0.88, |
| "step": 50 |
| }, |
| { |
| "loss": 0.6395, |
| "learning_rate": 0.0002, |
| "epoch": 0.89, |
| "step": 51 |
| }, |
| { |
| "loss": 0.5624, |
| "learning_rate": 0.0002, |
| "epoch": 0.91, |
| "step": 52 |
| }, |
| { |
| "loss": 0.6642, |
| "learning_rate": 0.0002, |
| "epoch": 0.93, |
| "step": 53 |
| }, |
| { |
| "loss": 0.6446, |
| "learning_rate": 0.0002, |
| "epoch": 0.95, |
| "step": 54 |
| }, |
| { |
| "loss": 0.5786, |
| "learning_rate": 0.0002, |
| "epoch": 0.96, |
| "step": 55 |
| }, |
| { |
| "loss": 0.6207, |
| "learning_rate": 0.0002, |
| "epoch": 0.98, |
| "step": 56 |
| }, |
| { |
| "loss": 0.5365, |
| "learning_rate": 0.0002, |
| "epoch": 1.0, |
| "step": 57 |
| }, |
| { |
| "loss": 0.5721, |
| "learning_rate": 0.0002, |
| "epoch": 1.02, |
| "step": 58 |
| }, |
| { |
| "loss": 0.5258, |
| "learning_rate": 0.0002, |
| "epoch": 1.04, |
| "step": 59 |
| }, |
| { |
| "loss": 0.5292, |
| "learning_rate": 0.0002, |
| "epoch": 1.05, |
| "step": 60 |
| }, |
| { |
| "loss": 0.5543, |
| "learning_rate": 0.0002, |
| "epoch": 1.07, |
| "step": 61 |
| }, |
| { |
| "loss": 0.539, |
| "learning_rate": 0.0002, |
| "epoch": 1.09, |
| "step": 62 |
| }, |
| { |
| "loss": 0.5608, |
| "learning_rate": 0.0002, |
| "epoch": 1.11, |
| "step": 63 |
| }, |
| { |
| "loss": 0.4825, |
| "learning_rate": 0.0002, |
| "epoch": 1.12, |
| "step": 64 |
| }, |
| { |
| "loss": 0.4465, |
| "learning_rate": 0.0002, |
| "epoch": 1.14, |
| "step": 65 |
| }, |
| { |
| "loss": 0.3743, |
| "learning_rate": 0.0002, |
| "epoch": 1.16, |
| "step": 66 |
| }, |
| { |
| "loss": 0.5076, |
| "learning_rate": 0.0002, |
| "epoch": 1.18, |
| "step": 67 |
| }, |
| { |
| "loss": 0.435, |
| "learning_rate": 0.0002, |
| "epoch": 1.19, |
| "step": 68 |
| }, |
| { |
| "loss": 0.3346, |
| "learning_rate": 0.0002, |
| "epoch": 1.21, |
| "step": 69 |
| }, |
| { |
| "loss": 0.5868, |
| "learning_rate": 0.0002, |
| "epoch": 1.23, |
| "step": 70 |
| }, |
| { |
| "loss": 0.3487, |
| "learning_rate": 0.0002, |
| "epoch": 1.25, |
| "step": 71 |
| }, |
| { |
| "loss": 0.3667, |
| "learning_rate": 0.0002, |
| "epoch": 1.26, |
| "step": 72 |
| }, |
| { |
| "loss": 0.5619, |
| "learning_rate": 0.0002, |
| "epoch": 1.28, |
| "step": 73 |
| }, |
| { |
| "loss": 0.3931, |
| "learning_rate": 0.0002, |
| "epoch": 1.3, |
| "step": 74 |
| }, |
| { |
| "loss": 0.3349, |
| "learning_rate": 0.0002, |
| "epoch": 1.32, |
| "step": 75 |
| }, |
| { |
| "eval_mmlu_loss": 0.833143413066864, |
| "eval_mmlu_score": -0.2877655327320099, |
| "eval_mmlu_brier_score": 0.2877655327320099, |
| "eval_mmlu_average_probability": 0.5490172505378723, |
| "eval_mmlu_accuracy": 0.54, |
| "eval_mmlu_probabilities": [ |
| 0.1580410748720169, |
| 0.43874940276145935, |
| 0.6015041470527649, |
| 0.5842825770378113, |
| 0.4584586024284363, |
| 0.4909316897392273, |
| 0.11258751899003983, |
| 0.38813868165016174, |
| 0.23412899672985077, |
| 0.8689023852348328, |
| 0.3216671347618103, |
| 0.4708637595176697, |
| 0.6932336688041687, |
| 0.9213990569114685, |
| 0.44838589429855347, |
| 0.6026622653007507, |
| 0.5027726888656616, |
| 0.612128496170044, |
| 0.06997516006231308, |
| 0.4677487909793854, |
| 0.35034096240997314, |
| 0.8762949705123901, |
| 0.7732230424880981, |
| 0.8989255428314209, |
| 0.40452077984809875, |
| 0.48415499925613403, |
| 0.374180406332016, |
| 0.9165855050086975, |
| 0.9918805956840515, |
| 0.9852085113525391, |
| 0.8175538778305054, |
| 0.09847690165042877, |
| 0.26064637303352356, |
| 0.46926015615463257, |
| 0.8475375771522522, |
| 0.7741795778274536, |
| 0.26930543780326843, |
| 0.4011392295360565, |
| 0.07169023901224136, |
| 0.13525348901748657, |
| 0.15383993089199066, |
| 0.534102201461792, |
| 0.8887997269630432, |
| 0.983967125415802, |
| 0.9906513690948486, |
| 0.13722844421863556, |
| 0.08100348711013794, |
| 0.050758879631757736, |
| 0.8930805921554565, |
| 0.7616145014762878, |
| 0.5024493932723999, |
| 0.31655770540237427, |
| 0.7933811545372009, |
| 0.53852778673172, |
| 0.9475235342979431, |
| 0.9588028192520142, |
| 0.9672081470489502, |
| 0.23299279808998108, |
| 0.5359262824058533, |
| 0.39278143644332886, |
| 0.8811087012290955, |
| 0.89657062292099, |
| 0.9659839868545532, |
| 0.6707325577735901, |
| 0.7787579894065857, |
| 0.5808053612709045, |
| 0.9807056784629822, |
| 0.9567878842353821, |
| 0.9984741806983948, |
| 0.025270536541938782, |
| 0.5340918898582458, |
| 0.2554855942726135, |
| 0.018762007355690002, |
| 0.15030156075954437, |
| 0.516846239566803, |
| 0.2843777537345886, |
| 0.410349577665329, |
| 0.6759911179542542, |
| 0.7385581135749817, |
| 0.8582839965820312, |
| 0.18342167139053345, |
| 0.3932557702064514, |
| 0.9066393375396729, |
| 0.41418227553367615, |
| 0.6529518961906433, |
| 0.5801039934158325, |
| 0.10069625079631805, |
| 0.6211609244346619, |
| 0.3417372703552246, |
| 0.3543941378593445, |
| 0.6216508746147156, |
| 0.8545634150505066, |
| 0.574700653553009, |
| 0.9054378271102905, |
| 0.8045241832733154, |
| 0.7795861959457397, |
| 0.4488513469696045, |
| 0.12007670104503632, |
| 0.40916627645492554, |
| 0.34826213121414185 |
| ], |
| "eval_mmlu_runtime": 36.2692, |
| "eval_mmlu_samples_per_second": 2.757, |
| "eval_mmlu_steps_per_second": 0.055, |
| "epoch": 1.32, |
| "step": 75 |
| }, |
| { |
| "loss": 0.473, |
| "learning_rate": 0.0002, |
| "epoch": 1.33, |
| "step": 76 |
| }, |
| { |
| "loss": 0.4736, |
| "learning_rate": 0.0002, |
| "epoch": 1.35, |
| "step": 77 |
| }, |
| { |
| "loss": 0.4351, |
| "learning_rate": 0.0002, |
| "epoch": 1.37, |
| "step": 78 |
| }, |
| { |
| "loss": 0.2961, |
| "learning_rate": 0.0002, |
| "epoch": 1.39, |
| "step": 79 |
| }, |
| { |
| "loss": 0.473, |
| "learning_rate": 0.0002, |
| "epoch": 1.4, |
| "step": 80 |
| }, |
| { |
| "loss": 0.3997, |
| "learning_rate": 0.0002, |
| "epoch": 1.42, |
| "step": 81 |
| }, |
| { |
| "loss": 0.3277, |
| "learning_rate": 0.0002, |
| "epoch": 1.44, |
| "step": 82 |
| }, |
| { |
| "loss": 0.5077, |
| "learning_rate": 0.0002, |
| "epoch": 1.46, |
| "step": 83 |
| }, |
| { |
| "loss": 0.3042, |
| "learning_rate": 0.0002, |
| "epoch": 1.47, |
| "step": 84 |
| }, |
| { |
| "loss": 0.381, |
| "learning_rate": 0.0002, |
| "epoch": 1.49, |
| "step": 85 |
| }, |
| { |
| "loss": 0.2834, |
| "learning_rate": 0.0002, |
| "epoch": 1.51, |
| "step": 86 |
| }, |
| { |
| "loss": 0.3241, |
| "learning_rate": 0.0002, |
| "epoch": 1.53, |
| "step": 87 |
| }, |
| { |
| "loss": 0.4027, |
| "learning_rate": 0.0002, |
| "epoch": 1.54, |
| "step": 88 |
| }, |
| { |
| "loss": 0.3334, |
| "learning_rate": 0.0002, |
| "epoch": 1.56, |
| "step": 89 |
| }, |
| { |
| "loss": 0.3422, |
| "learning_rate": 0.0002, |
| "epoch": 1.58, |
| "step": 90 |
| }, |
| { |
| "loss": 0.368, |
| "learning_rate": 0.0002, |
| "epoch": 1.6, |
| "step": 91 |
| }, |
| { |
| "loss": 0.3376, |
| "learning_rate": 0.0002, |
| "epoch": 1.61, |
| "step": 92 |
| }, |
| { |
| "loss": 0.5442, |
| "learning_rate": 0.0002, |
| "epoch": 1.63, |
| "step": 93 |
| }, |
| { |
| "loss": 0.3687, |
| "learning_rate": 0.0002, |
| "epoch": 1.65, |
| "step": 94 |
| }, |
| { |
| "loss": 0.3144, |
| "learning_rate": 0.0002, |
| "epoch": 1.67, |
| "step": 95 |
| }, |
| { |
| "loss": 0.4151, |
| "learning_rate": 0.0002, |
| "epoch": 1.68, |
| "step": 96 |
| }, |
| { |
| "loss": 0.2863, |
| "learning_rate": 0.0002, |
| "epoch": 1.7, |
| "step": 97 |
| }, |
| { |
| "loss": 0.464, |
| "learning_rate": 0.0002, |
| "epoch": 1.72, |
| "step": 98 |
| }, |
| { |
| "loss": 0.2873, |
| "learning_rate": 0.0002, |
| "epoch": 1.74, |
| "step": 99 |
| }, |
| { |
| "loss": 0.3988, |
| "learning_rate": 0.0002, |
| "epoch": 1.75, |
| "step": 100 |
| }, |
| { |
| "eval_mmlu_loss": 0.8989719152450562, |
| "eval_mmlu_score": -0.3084475100040436, |
| "eval_mmlu_brier_score": 0.3084475100040436, |
| "eval_mmlu_average_probability": 0.5211965441703796, |
| "eval_mmlu_accuracy": 0.49, |
| "eval_mmlu_probabilities": [ |
| 0.4071880877017975, |
| 0.45392853021621704, |
| 0.7654134035110474, |
| 0.8478649854660034, |
| 0.2505261301994324, |
| 0.4063345193862915, |
| 0.27478206157684326, |
| 0.3481687009334564, |
| 0.20378506183624268, |
| 0.7234545946121216, |
| 0.4079451560974121, |
| 0.46001574397087097, |
| 0.28416189551353455, |
| 0.4730168879032135, |
| 0.4832534193992615, |
| 0.6298182010650635, |
| 0.5491191148757935, |
| 0.7803859114646912, |
| 0.48246490955352783, |
| 0.7818248271942139, |
| 0.8155978322029114, |
| 0.7227077484130859, |
| 0.5422055125236511, |
| 0.3796870708465576, |
| 0.0472140833735466, |
| 0.13505573570728302, |
| 0.5995903015136719, |
| 0.7262413501739502, |
| 0.9919582605361938, |
| 0.9893267154693604, |
| 0.673717737197876, |
| 0.09111680835485458, |
| 0.062387943267822266, |
| 0.18485485017299652, |
| 0.831606924533844, |
| 0.9187701940536499, |
| 0.2769331932067871, |
| 0.7295668125152588, |
| 0.25232887268066406, |
| 0.1371425986289978, |
| 0.11290226131677628, |
| 0.42969319224357605, |
| 0.8279014825820923, |
| 0.833659291267395, |
| 0.995989978313446, |
| 0.07782114297151566, |
| 0.29245418310165405, |
| 0.1428614854812622, |
| 0.12554965913295746, |
| 0.885955810546875, |
| 0.21254442632198334, |
| 0.8990002274513245, |
| 0.7997588515281677, |
| 0.2548746168613434, |
| 0.5323441028594971, |
| 0.9922589659690857, |
| 0.7346373200416565, |
| 0.08055279403924942, |
| 0.27386149764060974, |
| 0.34895414113998413, |
| 0.7386496067047119, |
| 0.7762178182601929, |
| 0.7541271448135376, |
| 0.5595319271087646, |
| 0.5181352496147156, |
| 0.8376888036727905, |
| 0.228178009390831, |
| 0.9345813989639282, |
| 0.9527415037155151, |
| 0.03981766849756241, |
| 0.6531015038490295, |
| 0.7840538620948792, |
| 0.07283812761306763, |
| 0.06463921070098877, |
| 0.2090235948562622, |
| 0.380914568901062, |
| 0.4863632321357727, |
| 0.803974449634552, |
| 0.7102047204971313, |
| 0.8492692708969116, |
| 0.2411557286977768, |
| 0.4371771812438965, |
| 0.8251751065254211, |
| 0.2947361469268799, |
| 0.5158799886703491, |
| 0.4882834851741791, |
| 0.17904901504516602, |
| 0.4950083792209625, |
| 0.3899526596069336, |
| 0.30322733521461487, |
| 0.7750067710876465, |
| 0.9384438395500183, |
| 0.2649548649787903, |
| 0.956122636795044, |
| 0.8255246877670288, |
| 0.7141478657722473, |
| 0.44438719749450684, |
| 0.6701680421829224, |
| 0.599485456943512, |
| 0.43267613649368286 |
| ], |
| "eval_mmlu_runtime": 36.2731, |
| "eval_mmlu_samples_per_second": 2.757, |
| "eval_mmlu_steps_per_second": 0.055, |
| "epoch": 1.75, |
| "step": 100 |
| }, |
| { |
| "train_runtime": 3535.9045, |
| "train_samples_per_second": 0.905, |
| "train_steps_per_second": 0.028, |
| "total_flos": 0.0, |
| "train_loss": 0.5670726171135902, |
| "epoch": 1.75, |
| "step": 100 |
| } |
| ] |