| { |
| "best_metric": 6.378762722015381, |
| "best_model_checkpoint": "learning_source_20260316/genome_sequence/bert-output/genome_sequence-medium/checkpoint-59000", |
| "epoch": 133.07457721097865, |
| "eval_steps": 100, |
| "global_step": 60000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.22179096201829776, |
| "grad_norm": 0.9846197366714478, |
| "learning_rate": 3e-06, |
| "loss": 8.0812, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22179096201829776, |
| "eval_loss": 7.739930629730225, |
| "eval_runtime": 175.2691, |
| "eval_samples_per_second": 57.055, |
| "eval_steps_per_second": 7.132, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4435819240365955, |
| "grad_norm": 0.7575621604919434, |
| "learning_rate": 6e-06, |
| "loss": 7.5841, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4435819240365955, |
| "eval_loss": 7.357375144958496, |
| "eval_runtime": 174.8425, |
| "eval_samples_per_second": 57.194, |
| "eval_steps_per_second": 7.149, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6653728860548933, |
| "grad_norm": 2.5145933628082275, |
| "learning_rate": 5.989966555183947e-06, |
| "loss": 7.2475, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6653728860548933, |
| "eval_loss": 7.029321193695068, |
| "eval_runtime": 174.7596, |
| "eval_samples_per_second": 57.221, |
| "eval_steps_per_second": 7.153, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.887163848073191, |
| "grad_norm": 2.1445820331573486, |
| "learning_rate": 5.979933110367893e-06, |
| "loss": 7.0137, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.887163848073191, |
| "eval_loss": 6.827342987060547, |
| "eval_runtime": 174.7661, |
| "eval_samples_per_second": 57.219, |
| "eval_steps_per_second": 7.152, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1089548100914888, |
| "grad_norm": 2.264369010925293, |
| "learning_rate": 5.96989966555184e-06, |
| "loss": 6.8546, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1089548100914888, |
| "eval_loss": 6.712888240814209, |
| "eval_runtime": 174.751, |
| "eval_samples_per_second": 57.224, |
| "eval_steps_per_second": 7.153, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3307457721097866, |
| "grad_norm": 2.162890911102295, |
| "learning_rate": 5.959866220735786e-06, |
| "loss": 6.7576, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3307457721097866, |
| "eval_loss": 6.646862506866455, |
| "eval_runtime": 174.794, |
| "eval_samples_per_second": 57.21, |
| "eval_steps_per_second": 7.151, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5525367341280842, |
| "grad_norm": 1.8144651651382446, |
| "learning_rate": 5.949832775919732e-06, |
| "loss": 6.6931, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.5525367341280842, |
| "eval_loss": 6.603611946105957, |
| "eval_runtime": 174.6927, |
| "eval_samples_per_second": 57.243, |
| "eval_steps_per_second": 7.155, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.774327696146382, |
| "grad_norm": 1.877691626548767, |
| "learning_rate": 5.939799331103679e-06, |
| "loss": 6.6514, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.774327696146382, |
| "eval_loss": 6.573204517364502, |
| "eval_runtime": 174.7097, |
| "eval_samples_per_second": 57.238, |
| "eval_steps_per_second": 7.155, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.9961186581646797, |
| "grad_norm": 0.6093182563781738, |
| "learning_rate": 5.929765886287626e-06, |
| "loss": 6.6173, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.9961186581646797, |
| "eval_loss": 6.551618576049805, |
| "eval_runtime": 174.7562, |
| "eval_samples_per_second": 57.223, |
| "eval_steps_per_second": 7.153, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.2179096201829775, |
| "grad_norm": 0.6819909811019897, |
| "learning_rate": 5.919732441471572e-06, |
| "loss": 6.5884, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2179096201829775, |
| "eval_loss": 6.52918004989624, |
| "eval_runtime": 174.672, |
| "eval_samples_per_second": 57.25, |
| "eval_steps_per_second": 7.156, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.4397005822012754, |
| "grad_norm": 1.7704071998596191, |
| "learning_rate": 5.9096989966555185e-06, |
| "loss": 6.5703, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.4397005822012754, |
| "eval_loss": 6.522487640380859, |
| "eval_runtime": 173.2328, |
| "eval_samples_per_second": 57.726, |
| "eval_steps_per_second": 7.216, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.6614915442195732, |
| "grad_norm": 0.34777677059173584, |
| "learning_rate": 5.899665551839465e-06, |
| "loss": 6.5571, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.6614915442195732, |
| "eval_loss": 6.512077808380127, |
| "eval_runtime": 174.7576, |
| "eval_samples_per_second": 57.222, |
| "eval_steps_per_second": 7.153, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.8832825062378706, |
| "grad_norm": 0.40927115082740784, |
| "learning_rate": 5.889632107023412e-06, |
| "loss": 6.5403, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.8832825062378706, |
| "eval_loss": 6.502514839172363, |
| "eval_runtime": 174.8661, |
| "eval_samples_per_second": 57.187, |
| "eval_steps_per_second": 7.148, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.1050734682561685, |
| "grad_norm": 1.628187894821167, |
| "learning_rate": 5.879598662207358e-06, |
| "loss": 6.5271, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.1050734682561685, |
| "eval_loss": 6.495711803436279, |
| "eval_runtime": 173.1482, |
| "eval_samples_per_second": 57.754, |
| "eval_steps_per_second": 7.219, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.3268644302744663, |
| "grad_norm": 0.2986718416213989, |
| "learning_rate": 5.869565217391305e-06, |
| "loss": 6.5149, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.3268644302744663, |
| "eval_loss": 6.48654317855835, |
| "eval_runtime": 174.8407, |
| "eval_samples_per_second": 57.195, |
| "eval_steps_per_second": 7.149, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.548655392292764, |
| "grad_norm": 0.2633047103881836, |
| "learning_rate": 5.8595317725752514e-06, |
| "loss": 6.5072, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.548655392292764, |
| "eval_loss": 6.482588291168213, |
| "eval_runtime": 172.4593, |
| "eval_samples_per_second": 57.985, |
| "eval_steps_per_second": 7.248, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.770446354311062, |
| "grad_norm": 1.4255868196487427, |
| "learning_rate": 5.849498327759197e-06, |
| "loss": 6.5022, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.770446354311062, |
| "eval_loss": 6.47553825378418, |
| "eval_runtime": 174.7719, |
| "eval_samples_per_second": 57.217, |
| "eval_steps_per_second": 7.152, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.9922373163293594, |
| "grad_norm": 0.9065702557563782, |
| "learning_rate": 5.839464882943144e-06, |
| "loss": 6.4959, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.9922373163293594, |
| "eval_loss": 6.473917484283447, |
| "eval_runtime": 174.8124, |
| "eval_samples_per_second": 57.204, |
| "eval_steps_per_second": 7.151, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.214028278347658, |
| "grad_norm": 0.566608190536499, |
| "learning_rate": 5.829431438127091e-06, |
| "loss": 6.4886, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.214028278347658, |
| "eval_loss": 6.469518184661865, |
| "eval_runtime": 174.705, |
| "eval_samples_per_second": 57.239, |
| "eval_steps_per_second": 7.155, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.435819240365955, |
| "grad_norm": 0.48451030254364014, |
| "learning_rate": 5.819397993311037e-06, |
| "loss": 6.4833, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.435819240365955, |
| "eval_loss": 6.466635704040527, |
| "eval_runtime": 172.7376, |
| "eval_samples_per_second": 57.891, |
| "eval_steps_per_second": 7.236, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.6576102023842525, |
| "grad_norm": 0.9523207545280457, |
| "learning_rate": 5.8093645484949836e-06, |
| "loss": 6.4787, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.6576102023842525, |
| "eval_loss": 6.461461544036865, |
| "eval_runtime": 175.0633, |
| "eval_samples_per_second": 57.122, |
| "eval_steps_per_second": 7.14, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.879401164402551, |
| "grad_norm": 0.5693651437759399, |
| "learning_rate": 5.79933110367893e-06, |
| "loss": 6.4728, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.879401164402551, |
| "eval_loss": 6.459517478942871, |
| "eval_runtime": 172.5038, |
| "eval_samples_per_second": 57.97, |
| "eval_steps_per_second": 7.246, |
| "step": 2200 |
| }, |
| { |
| "epoch": 5.101192126420848, |
| "grad_norm": 0.5901357531547546, |
| "learning_rate": 5.789297658862876e-06, |
| "loss": 6.4696, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.101192126420848, |
| "eval_loss": 6.457067012786865, |
| "eval_runtime": 174.7951, |
| "eval_samples_per_second": 57.21, |
| "eval_steps_per_second": 7.151, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.3229830884391465, |
| "grad_norm": 1.0042142868041992, |
| "learning_rate": 5.779264214046823e-06, |
| "loss": 6.4653, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.3229830884391465, |
| "eval_loss": 6.453999042510986, |
| "eval_runtime": 172.3564, |
| "eval_samples_per_second": 58.019, |
| "eval_steps_per_second": 7.252, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.544774050457444, |
| "grad_norm": 0.7791227102279663, |
| "learning_rate": 5.76923076923077e-06, |
| "loss": 6.4633, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.544774050457444, |
| "eval_loss": 6.451336860656738, |
| "eval_runtime": 174.7261, |
| "eval_samples_per_second": 57.232, |
| "eval_steps_per_second": 7.154, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.766565012475741, |
| "grad_norm": 0.8784928321838379, |
| "learning_rate": 5.759197324414716e-06, |
| "loss": 6.4583, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.766565012475741, |
| "eval_loss": 6.448914051055908, |
| "eval_runtime": 174.6875, |
| "eval_samples_per_second": 57.245, |
| "eval_steps_per_second": 7.156, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.98835597449404, |
| "grad_norm": 0.5964264869689941, |
| "learning_rate": 5.7491638795986624e-06, |
| "loss": 6.4565, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.98835597449404, |
| "eval_loss": 6.4472856521606445, |
| "eval_runtime": 174.8096, |
| "eval_samples_per_second": 57.205, |
| "eval_steps_per_second": 7.151, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.210146936512337, |
| "grad_norm": 0.9274541735649109, |
| "learning_rate": 5.739130434782609e-06, |
| "loss": 6.4532, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.210146936512337, |
| "eval_loss": 6.4447126388549805, |
| "eval_runtime": 174.8407, |
| "eval_samples_per_second": 57.195, |
| "eval_steps_per_second": 7.149, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.431937898530635, |
| "grad_norm": 1.001717209815979, |
| "learning_rate": 5.729096989966555e-06, |
| "loss": 6.4502, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.431937898530635, |
| "eval_loss": 6.442678928375244, |
| "eval_runtime": 174.8426, |
| "eval_samples_per_second": 57.194, |
| "eval_steps_per_second": 7.149, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.653728860548933, |
| "grad_norm": 1.0303460359573364, |
| "learning_rate": 5.719063545150502e-06, |
| "loss": 6.4461, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.653728860548933, |
| "eval_loss": 6.441711902618408, |
| "eval_runtime": 174.9081, |
| "eval_samples_per_second": 57.173, |
| "eval_steps_per_second": 7.147, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.87551982256723, |
| "grad_norm": 0.8993558287620544, |
| "learning_rate": 5.709030100334449e-06, |
| "loss": 6.4441, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.87551982256723, |
| "eval_loss": 6.4411845207214355, |
| "eval_runtime": 174.9473, |
| "eval_samples_per_second": 57.16, |
| "eval_steps_per_second": 7.145, |
| "step": 3100 |
| }, |
| { |
| "epoch": 7.097310784585528, |
| "grad_norm": 0.8197622299194336, |
| "learning_rate": 5.698996655518395e-06, |
| "loss": 6.4423, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.097310784585528, |
| "eval_loss": 6.436513900756836, |
| "eval_runtime": 174.8498, |
| "eval_samples_per_second": 57.192, |
| "eval_steps_per_second": 7.149, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.319101746603826, |
| "grad_norm": 0.8674586415290833, |
| "learning_rate": 5.688963210702341e-06, |
| "loss": 6.4396, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.319101746603826, |
| "eval_loss": 6.435162544250488, |
| "eval_runtime": 174.7933, |
| "eval_samples_per_second": 57.21, |
| "eval_steps_per_second": 7.151, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.540892708622124, |
| "grad_norm": 1.1237138509750366, |
| "learning_rate": 5.678929765886288e-06, |
| "loss": 6.436, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.540892708622124, |
| "eval_loss": 6.435031414031982, |
| "eval_runtime": 175.3834, |
| "eval_samples_per_second": 57.018, |
| "eval_steps_per_second": 7.127, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.762683670640421, |
| "grad_norm": 0.8178996443748474, |
| "learning_rate": 5.668896321070235e-06, |
| "loss": 6.436, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.762683670640421, |
| "eval_loss": 6.435057640075684, |
| "eval_runtime": 174.7238, |
| "eval_samples_per_second": 57.233, |
| "eval_steps_per_second": 7.154, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.984474632658719, |
| "grad_norm": 1.035356044769287, |
| "learning_rate": 5.658862876254181e-06, |
| "loss": 6.4349, |
| "step": 3600 |
| }, |
| { |
| "epoch": 7.984474632658719, |
| "eval_loss": 6.434642791748047, |
| "eval_runtime": 174.772, |
| "eval_samples_per_second": 57.217, |
| "eval_steps_per_second": 7.152, |
| "step": 3600 |
| }, |
| { |
| "epoch": 8.206265594677017, |
| "grad_norm": 0.5910846590995789, |
| "learning_rate": 5.6488294314381275e-06, |
| "loss": 6.4321, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.206265594677017, |
| "eval_loss": 6.431816577911377, |
| "eval_runtime": 174.7565, |
| "eval_samples_per_second": 57.222, |
| "eval_steps_per_second": 7.153, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.428056556695315, |
| "grad_norm": 1.0821483135223389, |
| "learning_rate": 5.638795986622074e-06, |
| "loss": 6.4311, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.428056556695315, |
| "eval_loss": 6.432049751281738, |
| "eval_runtime": 174.8038, |
| "eval_samples_per_second": 57.207, |
| "eval_steps_per_second": 7.151, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.649847518713612, |
| "grad_norm": 0.3947916328907013, |
| "learning_rate": 5.62876254180602e-06, |
| "loss": 6.4274, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.649847518713612, |
| "eval_loss": 6.434264183044434, |
| "eval_runtime": 174.9692, |
| "eval_samples_per_second": 57.153, |
| "eval_steps_per_second": 7.144, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.87163848073191, |
| "grad_norm": 0.9494003653526306, |
| "learning_rate": 5.618729096989967e-06, |
| "loss": 6.4274, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.87163848073191, |
| "eval_loss": 6.430780410766602, |
| "eval_runtime": 172.2523, |
| "eval_samples_per_second": 58.054, |
| "eval_steps_per_second": 7.257, |
| "step": 4000 |
| }, |
| { |
| "epoch": 9.093429442750208, |
| "grad_norm": 1.1131881475448608, |
| "learning_rate": 5.608695652173914e-06, |
| "loss": 6.4257, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.093429442750208, |
| "eval_loss": 6.429464817047119, |
| "eval_runtime": 174.8834, |
| "eval_samples_per_second": 57.181, |
| "eval_steps_per_second": 7.148, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.315220404768505, |
| "grad_norm": 1.5252963304519653, |
| "learning_rate": 5.59866220735786e-06, |
| "loss": 6.4234, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.315220404768505, |
| "eval_loss": 6.428813934326172, |
| "eval_runtime": 174.7709, |
| "eval_samples_per_second": 57.218, |
| "eval_steps_per_second": 7.152, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.537011366786803, |
| "grad_norm": 0.7536811828613281, |
| "learning_rate": 5.588628762541806e-06, |
| "loss": 6.4234, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.537011366786803, |
| "eval_loss": 6.428880214691162, |
| "eval_runtime": 174.7826, |
| "eval_samples_per_second": 57.214, |
| "eval_steps_per_second": 7.152, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.758802328805102, |
| "grad_norm": 0.6803523302078247, |
| "learning_rate": 5.578595317725753e-06, |
| "loss": 6.4212, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.758802328805102, |
| "eval_loss": 6.426270484924316, |
| "eval_runtime": 174.8236, |
| "eval_samples_per_second": 57.201, |
| "eval_steps_per_second": 7.15, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.9805932908234, |
| "grad_norm": 0.8163429498672485, |
| "learning_rate": 5.568561872909699e-06, |
| "loss": 6.4165, |
| "step": 4500 |
| }, |
| { |
| "epoch": 9.9805932908234, |
| "eval_loss": 6.428164005279541, |
| "eval_runtime": 174.8809, |
| "eval_samples_per_second": 57.182, |
| "eval_steps_per_second": 7.148, |
| "step": 4500 |
| }, |
| { |
| "epoch": 10.202384252841696, |
| "grad_norm": 0.630403459072113, |
| "learning_rate": 5.558528428093646e-06, |
| "loss": 6.4189, |
| "step": 4600 |
| }, |
| { |
| "epoch": 10.202384252841696, |
| "eval_loss": 6.428719520568848, |
| "eval_runtime": 174.7591, |
| "eval_samples_per_second": 57.222, |
| "eval_steps_per_second": 7.153, |
| "step": 4600 |
| }, |
| { |
| "epoch": 10.424175214859995, |
| "grad_norm": 0.8704747557640076, |
| "learning_rate": 5.548494983277593e-06, |
| "loss": 6.4192, |
| "step": 4700 |
| }, |
| { |
| "epoch": 10.424175214859995, |
| "eval_loss": 6.423656463623047, |
| "eval_runtime": 174.707, |
| "eval_samples_per_second": 57.239, |
| "eval_steps_per_second": 7.155, |
| "step": 4700 |
| }, |
| { |
| "epoch": 10.645966176878293, |
| "grad_norm": 1.2153334617614746, |
| "learning_rate": 5.5384615384615385e-06, |
| "loss": 6.4176, |
| "step": 4800 |
| }, |
| { |
| "epoch": 10.645966176878293, |
| "eval_loss": 6.427283763885498, |
| "eval_runtime": 174.8703, |
| "eval_samples_per_second": 57.185, |
| "eval_steps_per_second": 7.148, |
| "step": 4800 |
| }, |
| { |
| "epoch": 10.86775713889659, |
| "grad_norm": 0.9878360629081726, |
| "learning_rate": 5.528428093645485e-06, |
| "loss": 6.4147, |
| "step": 4900 |
| }, |
| { |
| "epoch": 10.86775713889659, |
| "eval_loss": 6.424483776092529, |
| "eval_runtime": 174.7677, |
| "eval_samples_per_second": 57.219, |
| "eval_steps_per_second": 7.152, |
| "step": 4900 |
| }, |
| { |
| "epoch": 11.089548100914888, |
| "grad_norm": 1.1536431312561035, |
| "learning_rate": 5.518394648829432e-06, |
| "loss": 6.4141, |
| "step": 5000 |
| }, |
| { |
| "epoch": 11.089548100914888, |
| "eval_loss": 6.423103332519531, |
| "eval_runtime": 174.7198, |
| "eval_samples_per_second": 57.235, |
| "eval_steps_per_second": 7.154, |
| "step": 5000 |
| }, |
| { |
| "epoch": 11.311339062933186, |
| "grad_norm": 0.5233383774757385, |
| "learning_rate": 5.508361204013378e-06, |
| "loss": 6.4143, |
| "step": 5100 |
| }, |
| { |
| "epoch": 11.311339062933186, |
| "eval_loss": 6.426151275634766, |
| "eval_runtime": 174.8201, |
| "eval_samples_per_second": 57.202, |
| "eval_steps_per_second": 7.15, |
| "step": 5100 |
| }, |
| { |
| "epoch": 11.533130024951483, |
| "grad_norm": 0.4546308219432831, |
| "learning_rate": 5.498327759197324e-06, |
| "loss": 6.4131, |
| "step": 5200 |
| }, |
| { |
| "epoch": 11.533130024951483, |
| "eval_loss": 6.41951322555542, |
| "eval_runtime": 174.8531, |
| "eval_samples_per_second": 57.191, |
| "eval_steps_per_second": 7.149, |
| "step": 5200 |
| }, |
| { |
| "epoch": 11.75492098696978, |
| "grad_norm": 0.7687248587608337, |
| "learning_rate": 5.488294314381271e-06, |
| "loss": 6.4127, |
| "step": 5300 |
| }, |
| { |
| "epoch": 11.75492098696978, |
| "eval_loss": 6.421510696411133, |
| "eval_runtime": 174.8268, |
| "eval_samples_per_second": 57.199, |
| "eval_steps_per_second": 7.15, |
| "step": 5300 |
| }, |
| { |
| "epoch": 11.97671194898808, |
| "grad_norm": 0.6706124544143677, |
| "learning_rate": 5.478260869565217e-06, |
| "loss": 6.4114, |
| "step": 5400 |
| }, |
| { |
| "epoch": 11.97671194898808, |
| "eval_loss": 6.42447566986084, |
| "eval_runtime": 174.8755, |
| "eval_samples_per_second": 57.184, |
| "eval_steps_per_second": 7.148, |
| "step": 5400 |
| }, |
| { |
| "epoch": 12.198502911006376, |
| "grad_norm": 1.165449619293213, |
| "learning_rate": 5.468227424749163e-06, |
| "loss": 6.4112, |
| "step": 5500 |
| }, |
| { |
| "epoch": 12.198502911006376, |
| "eval_loss": 6.423706531524658, |
| "eval_runtime": 174.8245, |
| "eval_samples_per_second": 57.2, |
| "eval_steps_per_second": 7.15, |
| "step": 5500 |
| }, |
| { |
| "epoch": 12.420293873024674, |
| "grad_norm": 0.614251434803009, |
| "learning_rate": 5.45819397993311e-06, |
| "loss": 6.4088, |
| "step": 5600 |
| }, |
| { |
| "epoch": 12.420293873024674, |
| "eval_loss": 6.417710304260254, |
| "eval_runtime": 174.7714, |
| "eval_samples_per_second": 57.218, |
| "eval_steps_per_second": 7.152, |
| "step": 5600 |
| }, |
| { |
| "epoch": 12.642084835042972, |
| "grad_norm": 0.7338353991508484, |
| "learning_rate": 5.448160535117057e-06, |
| "loss": 6.4093, |
| "step": 5700 |
| }, |
| { |
| "epoch": 12.642084835042972, |
| "eval_loss": 6.421204566955566, |
| "eval_runtime": 174.7739, |
| "eval_samples_per_second": 57.217, |
| "eval_steps_per_second": 7.152, |
| "step": 5700 |
| }, |
| { |
| "epoch": 12.86387579706127, |
| "grad_norm": 0.5238298773765564, |
| "learning_rate": 5.438127090301003e-06, |
| "loss": 6.4088, |
| "step": 5800 |
| }, |
| { |
| "epoch": 12.86387579706127, |
| "eval_loss": 6.418464183807373, |
| "eval_runtime": 174.8398, |
| "eval_samples_per_second": 57.195, |
| "eval_steps_per_second": 7.149, |
| "step": 5800 |
| }, |
| { |
| "epoch": 13.085666759079567, |
| "grad_norm": 0.8438045382499695, |
| "learning_rate": 5.4280936454849495e-06, |
| "loss": 6.4059, |
| "step": 5900 |
| }, |
| { |
| "epoch": 13.085666759079567, |
| "eval_loss": 6.41862678527832, |
| "eval_runtime": 174.7377, |
| "eval_samples_per_second": 57.229, |
| "eval_steps_per_second": 7.154, |
| "step": 5900 |
| }, |
| { |
| "epoch": 13.307457721097865, |
| "grad_norm": 0.6270604133605957, |
| "learning_rate": 5.418060200668896e-06, |
| "loss": 6.4083, |
| "step": 6000 |
| }, |
| { |
| "epoch": 13.307457721097865, |
| "eval_loss": 6.420100688934326, |
| "eval_runtime": 174.8134, |
| "eval_samples_per_second": 57.204, |
| "eval_steps_per_second": 7.15, |
| "step": 6000 |
| }, |
| { |
| "epoch": 13.529248683116164, |
| "grad_norm": 0.49625712633132935, |
| "learning_rate": 5.408026755852843e-06, |
| "loss": 6.4065, |
| "step": 6100 |
| }, |
| { |
| "epoch": 13.529248683116164, |
| "eval_loss": 6.41825008392334, |
| "eval_runtime": 174.9176, |
| "eval_samples_per_second": 57.17, |
| "eval_steps_per_second": 7.146, |
| "step": 6100 |
| }, |
| { |
| "epoch": 13.75103964513446, |
| "grad_norm": 0.996813178062439, |
| "learning_rate": 5.397993311036789e-06, |
| "loss": 6.4055, |
| "step": 6200 |
| }, |
| { |
| "epoch": 13.75103964513446, |
| "eval_loss": 6.419356346130371, |
| "eval_runtime": 174.915, |
| "eval_samples_per_second": 57.171, |
| "eval_steps_per_second": 7.146, |
| "step": 6200 |
| }, |
| { |
| "epoch": 13.972830607152758, |
| "grad_norm": 0.9816793203353882, |
| "learning_rate": 5.387959866220736e-06, |
| "loss": 6.4065, |
| "step": 6300 |
| }, |
| { |
| "epoch": 13.972830607152758, |
| "eval_loss": 6.4173455238342285, |
| "eval_runtime": 175.0068, |
| "eval_samples_per_second": 57.141, |
| "eval_steps_per_second": 7.143, |
| "step": 6300 |
| }, |
| { |
| "epoch": 14.194621569171057, |
| "grad_norm": 1.072190761566162, |
| "learning_rate": 5.3779264214046825e-06, |
| "loss": 6.403, |
| "step": 6400 |
| }, |
| { |
| "epoch": 14.194621569171057, |
| "eval_loss": 6.416932582855225, |
| "eval_runtime": 174.8129, |
| "eval_samples_per_second": 57.204, |
| "eval_steps_per_second": 7.151, |
| "step": 6400 |
| }, |
| { |
| "epoch": 14.416412531189353, |
| "grad_norm": 0.8124646544456482, |
| "learning_rate": 5.367892976588628e-06, |
| "loss": 6.4038, |
| "step": 6500 |
| }, |
| { |
| "epoch": 14.416412531189353, |
| "eval_loss": 6.417375087738037, |
| "eval_runtime": 174.7648, |
| "eval_samples_per_second": 57.22, |
| "eval_steps_per_second": 7.152, |
| "step": 6500 |
| }, |
| { |
| "epoch": 14.638203493207651, |
| "grad_norm": 0.6260553002357483, |
| "learning_rate": 5.357859531772575e-06, |
| "loss": 6.4045, |
| "step": 6600 |
| }, |
| { |
| "epoch": 14.638203493207651, |
| "eval_loss": 6.4163103103637695, |
| "eval_runtime": 173.3723, |
| "eval_samples_per_second": 57.679, |
| "eval_steps_per_second": 7.21, |
| "step": 6600 |
| }, |
| { |
| "epoch": 14.85999445522595, |
| "grad_norm": 0.6502517461776733, |
| "learning_rate": 5.347826086956522e-06, |
| "loss": 6.4039, |
| "step": 6700 |
| }, |
| { |
| "epoch": 14.85999445522595, |
| "eval_loss": 6.421817779541016, |
| "eval_runtime": 173.7415, |
| "eval_samples_per_second": 57.557, |
| "eval_steps_per_second": 7.195, |
| "step": 6700 |
| }, |
| { |
| "epoch": 15.081785417244248, |
| "grad_norm": 0.7852392196655273, |
| "learning_rate": 5.337792642140468e-06, |
| "loss": 6.4021, |
| "step": 6800 |
| }, |
| { |
| "epoch": 15.081785417244248, |
| "eval_loss": 6.414952278137207, |
| "eval_runtime": 174.7855, |
| "eval_samples_per_second": 57.213, |
| "eval_steps_per_second": 7.152, |
| "step": 6800 |
| }, |
| { |
| "epoch": 15.303576379262545, |
| "grad_norm": 0.5642409920692444, |
| "learning_rate": 5.327759197324415e-06, |
| "loss": 6.4018, |
| "step": 6900 |
| }, |
| { |
| "epoch": 15.303576379262545, |
| "eval_loss": 6.417159557342529, |
| "eval_runtime": 172.3834, |
| "eval_samples_per_second": 58.01, |
| "eval_steps_per_second": 7.251, |
| "step": 6900 |
| }, |
| { |
| "epoch": 15.525367341280843, |
| "grad_norm": 0.5935277938842773, |
| "learning_rate": 5.317725752508361e-06, |
| "loss": 6.4015, |
| "step": 7000 |
| }, |
| { |
| "epoch": 15.525367341280843, |
| "eval_loss": 6.419808864593506, |
| "eval_runtime": 174.8129, |
| "eval_samples_per_second": 57.204, |
| "eval_steps_per_second": 7.151, |
| "step": 7000 |
| }, |
| { |
| "epoch": 15.747158303299141, |
| "grad_norm": 0.8796281218528748, |
| "learning_rate": 5.307692307692307e-06, |
| "loss": 6.402, |
| "step": 7100 |
| }, |
| { |
| "epoch": 15.747158303299141, |
| "eval_loss": 6.413030624389648, |
| "eval_runtime": 174.991, |
| "eval_samples_per_second": 57.146, |
| "eval_steps_per_second": 7.143, |
| "step": 7100 |
| }, |
| { |
| "epoch": 15.968949265317438, |
| "grad_norm": 0.686579167842865, |
| "learning_rate": 5.297658862876254e-06, |
| "loss": 6.4003, |
| "step": 7200 |
| }, |
| { |
| "epoch": 15.968949265317438, |
| "eval_loss": 6.412362575531006, |
| "eval_runtime": 174.9653, |
| "eval_samples_per_second": 57.154, |
| "eval_steps_per_second": 7.144, |
| "step": 7200 |
| }, |
| { |
| "epoch": 16.190740227335738, |
| "grad_norm": 0.8254374265670776, |
| "learning_rate": 5.287625418060201e-06, |
| "loss": 6.4003, |
| "step": 7300 |
| }, |
| { |
| "epoch": 16.190740227335738, |
| "eval_loss": 6.415155410766602, |
| "eval_runtime": 174.7441, |
| "eval_samples_per_second": 57.227, |
| "eval_steps_per_second": 7.153, |
| "step": 7300 |
| }, |
| { |
| "epoch": 16.412531189354034, |
| "grad_norm": 1.0479621887207031, |
| "learning_rate": 5.277591973244147e-06, |
| "loss": 6.3999, |
| "step": 7400 |
| }, |
| { |
| "epoch": 16.412531189354034, |
| "eval_loss": 6.4169602394104, |
| "eval_runtime": 174.7973, |
| "eval_samples_per_second": 57.209, |
| "eval_steps_per_second": 7.151, |
| "step": 7400 |
| }, |
| { |
| "epoch": 16.63432215137233, |
| "grad_norm": 0.8358107805252075, |
| "learning_rate": 5.2675585284280935e-06, |
| "loss": 6.3989, |
| "step": 7500 |
| }, |
| { |
| "epoch": 16.63432215137233, |
| "eval_loss": 6.417453765869141, |
| "eval_runtime": 174.497, |
| "eval_samples_per_second": 57.308, |
| "eval_steps_per_second": 7.163, |
| "step": 7500 |
| }, |
| { |
| "epoch": 16.85611311339063, |
| "grad_norm": 0.6018221378326416, |
| "learning_rate": 5.25752508361204e-06, |
| "loss": 6.3991, |
| "step": 7600 |
| }, |
| { |
| "epoch": 16.85611311339063, |
| "eval_loss": 6.4126434326171875, |
| "eval_runtime": 172.7161, |
| "eval_samples_per_second": 57.898, |
| "eval_steps_per_second": 7.237, |
| "step": 7600 |
| }, |
| { |
| "epoch": 17.077904075408927, |
| "grad_norm": 1.0999138355255127, |
| "learning_rate": 5.247491638795986e-06, |
| "loss": 6.3981, |
| "step": 7700 |
| }, |
| { |
| "epoch": 17.077904075408927, |
| "eval_loss": 6.413776397705078, |
| "eval_runtime": 174.7746, |
| "eval_samples_per_second": 57.217, |
| "eval_steps_per_second": 7.152, |
| "step": 7700 |
| }, |
| { |
| "epoch": 17.299695037427224, |
| "grad_norm": 0.5430467128753662, |
| "learning_rate": 5.237458193979933e-06, |
| "loss": 6.3993, |
| "step": 7800 |
| }, |
| { |
| "epoch": 17.299695037427224, |
| "eval_loss": 6.414647102355957, |
| "eval_runtime": 174.8381, |
| "eval_samples_per_second": 57.196, |
| "eval_steps_per_second": 7.149, |
| "step": 7800 |
| }, |
| { |
| "epoch": 17.521485999445524, |
| "grad_norm": 0.588058352470398, |
| "learning_rate": 5.22742474916388e-06, |
| "loss": 6.3976, |
| "step": 7900 |
| }, |
| { |
| "epoch": 17.521485999445524, |
| "eval_loss": 6.413895130157471, |
| "eval_runtime": 174.9633, |
| "eval_samples_per_second": 57.155, |
| "eval_steps_per_second": 7.144, |
| "step": 7900 |
| }, |
| { |
| "epoch": 17.74327696146382, |
| "grad_norm": 0.365583598613739, |
| "learning_rate": 5.2173913043478265e-06, |
| "loss": 6.3966, |
| "step": 8000 |
| }, |
| { |
| "epoch": 17.74327696146382, |
| "eval_loss": 6.409445285797119, |
| "eval_runtime": 174.9468, |
| "eval_samples_per_second": 57.16, |
| "eval_steps_per_second": 7.145, |
| "step": 8000 |
| }, |
| { |
| "epoch": 17.965067923482117, |
| "grad_norm": 0.6981125473976135, |
| "learning_rate": 5.207357859531772e-06, |
| "loss": 6.3974, |
| "step": 8100 |
| }, |
| { |
| "epoch": 17.965067923482117, |
| "eval_loss": 6.413646221160889, |
| "eval_runtime": 174.9417, |
| "eval_samples_per_second": 57.162, |
| "eval_steps_per_second": 7.145, |
| "step": 8100 |
| }, |
| { |
| "epoch": 18.186858885500417, |
| "grad_norm": 0.6041765213012695, |
| "learning_rate": 5.197324414715719e-06, |
| "loss": 6.3985, |
| "step": 8200 |
| }, |
| { |
| "epoch": 18.186858885500417, |
| "eval_loss": 6.411979675292969, |
| "eval_runtime": 174.8191, |
| "eval_samples_per_second": 57.202, |
| "eval_steps_per_second": 7.15, |
| "step": 8200 |
| }, |
| { |
| "epoch": 18.408649847518713, |
| "grad_norm": 0.7936201095581055, |
| "learning_rate": 5.187290969899666e-06, |
| "loss": 6.3964, |
| "step": 8300 |
| }, |
| { |
| "epoch": 18.408649847518713, |
| "eval_loss": 6.40911865234375, |
| "eval_runtime": 174.818, |
| "eval_samples_per_second": 57.202, |
| "eval_steps_per_second": 7.15, |
| "step": 8300 |
| }, |
| { |
| "epoch": 18.63044080953701, |
| "grad_norm": 0.6278252005577087, |
| "learning_rate": 5.177257525083612e-06, |
| "loss": 6.3957, |
| "step": 8400 |
| }, |
| { |
| "epoch": 18.63044080953701, |
| "eval_loss": 6.413068771362305, |
| "eval_runtime": 172.3693, |
| "eval_samples_per_second": 58.015, |
| "eval_steps_per_second": 7.252, |
| "step": 8400 |
| }, |
| { |
| "epoch": 18.85223177155531, |
| "grad_norm": 0.6582921743392944, |
| "learning_rate": 5.167224080267559e-06, |
| "loss": 6.3956, |
| "step": 8500 |
| }, |
| { |
| "epoch": 18.85223177155531, |
| "eval_loss": 6.410306453704834, |
| "eval_runtime": 174.8171, |
| "eval_samples_per_second": 57.203, |
| "eval_steps_per_second": 7.15, |
| "step": 8500 |
| }, |
| { |
| "epoch": 19.074022733573607, |
| "grad_norm": 0.8874194622039795, |
| "learning_rate": 5.157190635451505e-06, |
| "loss": 6.3975, |
| "step": 8600 |
| }, |
| { |
| "epoch": 19.074022733573607, |
| "eval_loss": 6.409109592437744, |
| "eval_runtime": 172.2351, |
| "eval_samples_per_second": 58.06, |
| "eval_steps_per_second": 7.258, |
| "step": 8600 |
| }, |
| { |
| "epoch": 19.295813695591903, |
| "grad_norm": 0.589608907699585, |
| "learning_rate": 5.147157190635451e-06, |
| "loss": 6.3957, |
| "step": 8700 |
| }, |
| { |
| "epoch": 19.295813695591903, |
| "eval_loss": 6.413524150848389, |
| "eval_runtime": 174.767, |
| "eval_samples_per_second": 57.219, |
| "eval_steps_per_second": 7.152, |
| "step": 8700 |
| }, |
| { |
| "epoch": 19.517604657610203, |
| "grad_norm": 0.7026548385620117, |
| "learning_rate": 5.137123745819398e-06, |
| "loss": 6.3942, |
| "step": 8800 |
| }, |
| { |
| "epoch": 19.517604657610203, |
| "eval_loss": 6.41259241104126, |
| "eval_runtime": 174.7786, |
| "eval_samples_per_second": 57.215, |
| "eval_steps_per_second": 7.152, |
| "step": 8800 |
| }, |
| { |
| "epoch": 19.7393956196285, |
| "grad_norm": 0.7508072257041931, |
| "learning_rate": 5.127090301003345e-06, |
| "loss": 6.3936, |
| "step": 8900 |
| }, |
| { |
| "epoch": 19.7393956196285, |
| "eval_loss": 6.410432815551758, |
| "eval_runtime": 174.7865, |
| "eval_samples_per_second": 57.213, |
| "eval_steps_per_second": 7.152, |
| "step": 8900 |
| }, |
| { |
| "epoch": 19.9611865816468, |
| "grad_norm": 0.36028188467025757, |
| "learning_rate": 5.117056856187291e-06, |
| "loss": 6.3943, |
| "step": 9000 |
| }, |
| { |
| "epoch": 19.9611865816468, |
| "eval_loss": 6.409936904907227, |
| "eval_runtime": 174.852, |
| "eval_samples_per_second": 57.191, |
| "eval_steps_per_second": 7.149, |
| "step": 9000 |
| }, |
| { |
| "epoch": 20.182977543665096, |
| "grad_norm": 0.8198152184486389, |
| "learning_rate": 5.1070234113712375e-06, |
| "loss": 6.3939, |
| "step": 9100 |
| }, |
| { |
| "epoch": 20.182977543665096, |
| "eval_loss": 6.412051677703857, |
| "eval_runtime": 174.8255, |
| "eval_samples_per_second": 57.2, |
| "eval_steps_per_second": 7.15, |
| "step": 9100 |
| }, |
| { |
| "epoch": 20.404768505683393, |
| "grad_norm": 0.6599276065826416, |
| "learning_rate": 5.096989966555184e-06, |
| "loss": 6.3939, |
| "step": 9200 |
| }, |
| { |
| "epoch": 20.404768505683393, |
| "eval_loss": 6.411386489868164, |
| "eval_runtime": 174.8596, |
| "eval_samples_per_second": 57.189, |
| "eval_steps_per_second": 7.149, |
| "step": 9200 |
| }, |
| { |
| "epoch": 20.626559467701693, |
| "grad_norm": 0.736455500125885, |
| "learning_rate": 5.08695652173913e-06, |
| "loss": 6.3931, |
| "step": 9300 |
| }, |
| { |
| "epoch": 20.626559467701693, |
| "eval_loss": 6.40945291519165, |
| "eval_runtime": 174.8463, |
| "eval_samples_per_second": 57.193, |
| "eval_steps_per_second": 7.149, |
| "step": 9300 |
| }, |
| { |
| "epoch": 20.84835042971999, |
| "grad_norm": 0.7547162175178528, |
| "learning_rate": 5.076923076923077e-06, |
| "loss": 6.393, |
| "step": 9400 |
| }, |
| { |
| "epoch": 20.84835042971999, |
| "eval_loss": 6.409768581390381, |
| "eval_runtime": 175.1356, |
| "eval_samples_per_second": 57.099, |
| "eval_steps_per_second": 7.137, |
| "step": 9400 |
| }, |
| { |
| "epoch": 21.070141391738286, |
| "grad_norm": 0.4197324216365814, |
| "learning_rate": 5.066889632107024e-06, |
| "loss": 6.3943, |
| "step": 9500 |
| }, |
| { |
| "epoch": 21.070141391738286, |
| "eval_loss": 6.4077606201171875, |
| "eval_runtime": 174.8821, |
| "eval_samples_per_second": 57.181, |
| "eval_steps_per_second": 7.148, |
| "step": 9500 |
| }, |
| { |
| "epoch": 21.291932353756586, |
| "grad_norm": 0.6552382111549377, |
| "learning_rate": 5.05685618729097e-06, |
| "loss": 6.3927, |
| "step": 9600 |
| }, |
| { |
| "epoch": 21.291932353756586, |
| "eval_loss": 6.40675163269043, |
| "eval_runtime": 174.964, |
| "eval_samples_per_second": 57.155, |
| "eval_steps_per_second": 7.144, |
| "step": 9600 |
| }, |
| { |
| "epoch": 21.513723315774882, |
| "grad_norm": 0.507618248462677, |
| "learning_rate": 5.046822742474916e-06, |
| "loss": 6.3948, |
| "step": 9700 |
| }, |
| { |
| "epoch": 21.513723315774882, |
| "eval_loss": 6.4094719886779785, |
| "eval_runtime": 174.8996, |
| "eval_samples_per_second": 57.176, |
| "eval_steps_per_second": 7.147, |
| "step": 9700 |
| }, |
| { |
| "epoch": 21.73551427779318, |
| "grad_norm": 1.0394549369812012, |
| "learning_rate": 5.036789297658863e-06, |
| "loss": 6.3933, |
| "step": 9800 |
| }, |
| { |
| "epoch": 21.73551427779318, |
| "eval_loss": 6.411880016326904, |
| "eval_runtime": 174.8373, |
| "eval_samples_per_second": 57.196, |
| "eval_steps_per_second": 7.15, |
| "step": 9800 |
| }, |
| { |
| "epoch": 21.95730523981148, |
| "grad_norm": 0.852592945098877, |
| "learning_rate": 5.02675585284281e-06, |
| "loss": 6.3932, |
| "step": 9900 |
| }, |
| { |
| "epoch": 21.95730523981148, |
| "eval_loss": 6.405695915222168, |
| "eval_runtime": 174.9233, |
| "eval_samples_per_second": 57.168, |
| "eval_steps_per_second": 7.146, |
| "step": 9900 |
| }, |
| { |
| "epoch": 22.179096201829775, |
| "grad_norm": 0.6302698254585266, |
| "learning_rate": 5.016722408026756e-06, |
| "loss": 6.3914, |
| "step": 10000 |
| }, |
| { |
| "epoch": 22.179096201829775, |
| "eval_loss": 6.404843807220459, |
| "eval_runtime": 174.881, |
| "eval_samples_per_second": 57.182, |
| "eval_steps_per_second": 7.148, |
| "step": 10000 |
| }, |
| { |
| "epoch": 22.400887163848072, |
| "grad_norm": 0.5545974969863892, |
| "learning_rate": 5.0066889632107026e-06, |
| "loss": 6.3913, |
| "step": 10100 |
| }, |
| { |
| "epoch": 22.400887163848072, |
| "eval_loss": 6.4088826179504395, |
| "eval_runtime": 174.9362, |
| "eval_samples_per_second": 57.164, |
| "eval_steps_per_second": 7.145, |
| "step": 10100 |
| }, |
| { |
| "epoch": 22.622678125866372, |
| "grad_norm": 0.6303640007972717, |
| "learning_rate": 4.996655518394649e-06, |
| "loss": 6.3916, |
| "step": 10200 |
| }, |
| { |
| "epoch": 22.622678125866372, |
| "eval_loss": 6.406084060668945, |
| "eval_runtime": 174.8669, |
| "eval_samples_per_second": 57.186, |
| "eval_steps_per_second": 7.148, |
| "step": 10200 |
| }, |
| { |
| "epoch": 22.84446908788467, |
| "grad_norm": 0.6866323947906494, |
| "learning_rate": 4.986622073578595e-06, |
| "loss": 6.3922, |
| "step": 10300 |
| }, |
| { |
| "epoch": 22.84446908788467, |
| "eval_loss": 6.406491279602051, |
| "eval_runtime": 172.5199, |
| "eval_samples_per_second": 57.964, |
| "eval_steps_per_second": 7.246, |
| "step": 10300 |
| }, |
| { |
| "epoch": 23.066260049902965, |
| "grad_norm": 0.5681377649307251, |
| "learning_rate": 4.976588628762542e-06, |
| "loss": 6.3919, |
| "step": 10400 |
| }, |
| { |
| "epoch": 23.066260049902965, |
| "eval_loss": 6.407881259918213, |
| "eval_runtime": 174.7996, |
| "eval_samples_per_second": 57.208, |
| "eval_steps_per_second": 7.151, |
| "step": 10400 |
| }, |
| { |
| "epoch": 23.288051011921265, |
| "grad_norm": 0.5302285552024841, |
| "learning_rate": 4.966555183946489e-06, |
| "loss": 6.3928, |
| "step": 10500 |
| }, |
| { |
| "epoch": 23.288051011921265, |
| "eval_loss": 6.4045891761779785, |
| "eval_runtime": 175.2024, |
| "eval_samples_per_second": 57.077, |
| "eval_steps_per_second": 7.135, |
| "step": 10500 |
| }, |
| { |
| "epoch": 23.50984197393956, |
| "grad_norm": 0.5630497336387634, |
| "learning_rate": 4.956521739130435e-06, |
| "loss": 6.3903, |
| "step": 10600 |
| }, |
| { |
| "epoch": 23.50984197393956, |
| "eval_loss": 6.406449317932129, |
| "eval_runtime": 172.7598, |
| "eval_samples_per_second": 57.884, |
| "eval_steps_per_second": 7.235, |
| "step": 10600 |
| }, |
| { |
| "epoch": 23.731632935957858, |
| "grad_norm": 0.5340705513954163, |
| "learning_rate": 4.9464882943143815e-06, |
| "loss": 6.3896, |
| "step": 10700 |
| }, |
| { |
| "epoch": 23.731632935957858, |
| "eval_loss": 6.408339023590088, |
| "eval_runtime": 175.3018, |
| "eval_samples_per_second": 57.044, |
| "eval_steps_per_second": 7.131, |
| "step": 10700 |
| }, |
| { |
| "epoch": 23.95342389797616, |
| "grad_norm": 0.7192414402961731, |
| "learning_rate": 4.936454849498328e-06, |
| "loss": 6.3904, |
| "step": 10800 |
| }, |
| { |
| "epoch": 23.95342389797616, |
| "eval_loss": 6.408904075622559, |
| "eval_runtime": 175.321, |
| "eval_samples_per_second": 57.038, |
| "eval_steps_per_second": 7.13, |
| "step": 10800 |
| }, |
| { |
| "epoch": 24.175214859994455, |
| "grad_norm": 0.7297828197479248, |
| "learning_rate": 4.926421404682274e-06, |
| "loss": 6.3906, |
| "step": 10900 |
| }, |
| { |
| "epoch": 24.175214859994455, |
| "eval_loss": 6.406455993652344, |
| "eval_runtime": 175.3497, |
| "eval_samples_per_second": 57.029, |
| "eval_steps_per_second": 7.129, |
| "step": 10900 |
| }, |
| { |
| "epoch": 24.39700582201275, |
| "grad_norm": 0.8612614870071411, |
| "learning_rate": 4.916387959866221e-06, |
| "loss": 6.389, |
| "step": 11000 |
| }, |
| { |
| "epoch": 24.39700582201275, |
| "eval_loss": 6.4049272537231445, |
| "eval_runtime": 174.9884, |
| "eval_samples_per_second": 57.147, |
| "eval_steps_per_second": 7.143, |
| "step": 11000 |
| }, |
| { |
| "epoch": 24.61879678403105, |
| "grad_norm": 0.39626169204711914, |
| "learning_rate": 4.906354515050168e-06, |
| "loss": 6.3904, |
| "step": 11100 |
| }, |
| { |
| "epoch": 24.61879678403105, |
| "eval_loss": 6.399599075317383, |
| "eval_runtime": 175.2182, |
| "eval_samples_per_second": 57.072, |
| "eval_steps_per_second": 7.134, |
| "step": 11100 |
| }, |
| { |
| "epoch": 24.840587746049348, |
| "grad_norm": 0.47381725907325745, |
| "learning_rate": 4.8963210702341136e-06, |
| "loss": 6.3896, |
| "step": 11200 |
| }, |
| { |
| "epoch": 24.840587746049348, |
| "eval_loss": 6.405921459197998, |
| "eval_runtime": 174.9189, |
| "eval_samples_per_second": 57.169, |
| "eval_steps_per_second": 7.146, |
| "step": 11200 |
| }, |
| { |
| "epoch": 25.062378708067648, |
| "grad_norm": 0.567333996295929, |
| "learning_rate": 4.88628762541806e-06, |
| "loss": 6.3886, |
| "step": 11300 |
| }, |
| { |
| "epoch": 25.062378708067648, |
| "eval_loss": 6.409249782562256, |
| "eval_runtime": 174.8058, |
| "eval_samples_per_second": 57.206, |
| "eval_steps_per_second": 7.151, |
| "step": 11300 |
| }, |
| { |
| "epoch": 25.284169670085944, |
| "grad_norm": 0.47083523869514465, |
| "learning_rate": 4.876254180602007e-06, |
| "loss": 6.3892, |
| "step": 11400 |
| }, |
| { |
| "epoch": 25.284169670085944, |
| "eval_loss": 6.406309604644775, |
| "eval_runtime": 174.8008, |
| "eval_samples_per_second": 57.208, |
| "eval_steps_per_second": 7.151, |
| "step": 11400 |
| }, |
| { |
| "epoch": 25.50596063210424, |
| "grad_norm": 0.4636823832988739, |
| "learning_rate": 4.866220735785953e-06, |
| "loss": 6.3905, |
| "step": 11500 |
| }, |
| { |
| "epoch": 25.50596063210424, |
| "eval_loss": 6.4087066650390625, |
| "eval_runtime": 174.7802, |
| "eval_samples_per_second": 57.215, |
| "eval_steps_per_second": 7.152, |
| "step": 11500 |
| }, |
| { |
| "epoch": 25.72775159412254, |
| "grad_norm": 0.8328993916511536, |
| "learning_rate": 4.8561872909699e-06, |
| "loss": 6.3888, |
| "step": 11600 |
| }, |
| { |
| "epoch": 25.72775159412254, |
| "eval_loss": 6.405496120452881, |
| "eval_runtime": 172.4449, |
| "eval_samples_per_second": 57.99, |
| "eval_steps_per_second": 7.249, |
| "step": 11600 |
| }, |
| { |
| "epoch": 25.949542556140837, |
| "grad_norm": 0.5866479873657227, |
| "learning_rate": 4.8461538461538465e-06, |
| "loss": 6.3895, |
| "step": 11700 |
| }, |
| { |
| "epoch": 25.949542556140837, |
| "eval_loss": 6.4065117835998535, |
| "eval_runtime": 172.4094, |
| "eval_samples_per_second": 58.001, |
| "eval_steps_per_second": 7.25, |
| "step": 11700 |
| }, |
| { |
| "epoch": 26.171333518159134, |
| "grad_norm": 0.7557168006896973, |
| "learning_rate": 4.8361204013377925e-06, |
| "loss": 6.3901, |
| "step": 11800 |
| }, |
| { |
| "epoch": 26.171333518159134, |
| "eval_loss": 6.404352188110352, |
| "eval_runtime": 174.7894, |
| "eval_samples_per_second": 57.212, |
| "eval_steps_per_second": 7.151, |
| "step": 11800 |
| }, |
| { |
| "epoch": 26.393124480177434, |
| "grad_norm": 0.5010234117507935, |
| "learning_rate": 4.826086956521739e-06, |
| "loss": 6.3881, |
| "step": 11900 |
| }, |
| { |
| "epoch": 26.393124480177434, |
| "eval_loss": 6.406057834625244, |
| "eval_runtime": 174.8928, |
| "eval_samples_per_second": 57.178, |
| "eval_steps_per_second": 7.147, |
| "step": 11900 |
| }, |
| { |
| "epoch": 26.61491544219573, |
| "grad_norm": 0.5228267312049866, |
| "learning_rate": 4.816053511705686e-06, |
| "loss": 6.3893, |
| "step": 12000 |
| }, |
| { |
| "epoch": 26.61491544219573, |
| "eval_loss": 6.403919219970703, |
| "eval_runtime": 173.0368, |
| "eval_samples_per_second": 57.791, |
| "eval_steps_per_second": 7.224, |
| "step": 12000 |
| }, |
| { |
| "epoch": 26.836706404214027, |
| "grad_norm": 0.41645535826683044, |
| "learning_rate": 4.806020066889633e-06, |
| "loss": 6.3893, |
| "step": 12100 |
| }, |
| { |
| "epoch": 26.836706404214027, |
| "eval_loss": 6.403182029724121, |
| "eval_runtime": 173.7518, |
| "eval_samples_per_second": 57.553, |
| "eval_steps_per_second": 7.194, |
| "step": 12100 |
| }, |
| { |
| "epoch": 27.058497366232327, |
| "grad_norm": 0.6280103325843811, |
| "learning_rate": 4.795986622073579e-06, |
| "loss": 6.388, |
| "step": 12200 |
| }, |
| { |
| "epoch": 27.058497366232327, |
| "eval_loss": 6.406325817108154, |
| "eval_runtime": 172.5376, |
| "eval_samples_per_second": 57.958, |
| "eval_steps_per_second": 7.245, |
| "step": 12200 |
| }, |
| { |
| "epoch": 27.280288328250624, |
| "grad_norm": 0.4701608419418335, |
| "learning_rate": 4.785953177257525e-06, |
| "loss": 6.3891, |
| "step": 12300 |
| }, |
| { |
| "epoch": 27.280288328250624, |
| "eval_loss": 6.403144836425781, |
| "eval_runtime": 174.7938, |
| "eval_samples_per_second": 57.21, |
| "eval_steps_per_second": 7.151, |
| "step": 12300 |
| }, |
| { |
| "epoch": 27.50207929026892, |
| "grad_norm": 0.49227380752563477, |
| "learning_rate": 4.775919732441472e-06, |
| "loss": 6.3893, |
| "step": 12400 |
| }, |
| { |
| "epoch": 27.50207929026892, |
| "eval_loss": 6.404545783996582, |
| "eval_runtime": 172.6406, |
| "eval_samples_per_second": 57.924, |
| "eval_steps_per_second": 7.24, |
| "step": 12400 |
| }, |
| { |
| "epoch": 27.72387025228722, |
| "grad_norm": 0.5558980703353882, |
| "learning_rate": 4.765886287625418e-06, |
| "loss": 6.3883, |
| "step": 12500 |
| }, |
| { |
| "epoch": 27.72387025228722, |
| "eval_loss": 6.402305603027344, |
| "eval_runtime": 174.8403, |
| "eval_samples_per_second": 57.195, |
| "eval_steps_per_second": 7.149, |
| "step": 12500 |
| }, |
| { |
| "epoch": 27.945661214305517, |
| "grad_norm": 0.7037143707275391, |
| "learning_rate": 4.755852842809365e-06, |
| "loss": 6.3885, |
| "step": 12600 |
| }, |
| { |
| "epoch": 27.945661214305517, |
| "eval_loss": 6.403327465057373, |
| "eval_runtime": 172.6409, |
| "eval_samples_per_second": 57.924, |
| "eval_steps_per_second": 7.24, |
| "step": 12600 |
| }, |
| { |
| "epoch": 28.167452176323813, |
| "grad_norm": 0.8158712983131409, |
| "learning_rate": 4.745819397993312e-06, |
| "loss": 6.3858, |
| "step": 12700 |
| }, |
| { |
| "epoch": 28.167452176323813, |
| "eval_loss": 6.40453577041626, |
| "eval_runtime": 174.7251, |
| "eval_samples_per_second": 57.233, |
| "eval_steps_per_second": 7.154, |
| "step": 12700 |
| }, |
| { |
| "epoch": 28.389243138342113, |
| "grad_norm": 0.49727940559387207, |
| "learning_rate": 4.7357859531772575e-06, |
| "loss": 6.3882, |
| "step": 12800 |
| }, |
| { |
| "epoch": 28.389243138342113, |
| "eval_loss": 6.404928684234619, |
| "eval_runtime": 174.7858, |
| "eval_samples_per_second": 57.213, |
| "eval_steps_per_second": 7.152, |
| "step": 12800 |
| }, |
| { |
| "epoch": 28.61103410036041, |
| "grad_norm": 0.5173976421356201, |
| "learning_rate": 4.725752508361204e-06, |
| "loss": 6.3866, |
| "step": 12900 |
| }, |
| { |
| "epoch": 28.61103410036041, |
| "eval_loss": 6.40172815322876, |
| "eval_runtime": 174.5561, |
| "eval_samples_per_second": 57.288, |
| "eval_steps_per_second": 7.161, |
| "step": 12900 |
| }, |
| { |
| "epoch": 28.832825062378706, |
| "grad_norm": 0.5842565298080444, |
| "learning_rate": 4.715719063545151e-06, |
| "loss": 6.3891, |
| "step": 13000 |
| }, |
| { |
| "epoch": 28.832825062378706, |
| "eval_loss": 6.401641845703125, |
| "eval_runtime": 172.3935, |
| "eval_samples_per_second": 58.007, |
| "eval_steps_per_second": 7.251, |
| "step": 13000 |
| }, |
| { |
| "epoch": 29.054616024397006, |
| "grad_norm": 0.6438339948654175, |
| "learning_rate": 4.705685618729097e-06, |
| "loss": 6.3869, |
| "step": 13100 |
| }, |
| { |
| "epoch": 29.054616024397006, |
| "eval_loss": 6.403342247009277, |
| "eval_runtime": 174.8489, |
| "eval_samples_per_second": 57.192, |
| "eval_steps_per_second": 7.149, |
| "step": 13100 |
| }, |
| { |
| "epoch": 29.276406986415303, |
| "grad_norm": 0.5338951349258423, |
| "learning_rate": 4.695652173913044e-06, |
| "loss": 6.3882, |
| "step": 13200 |
| }, |
| { |
| "epoch": 29.276406986415303, |
| "eval_loss": 6.400930404663086, |
| "eval_runtime": 172.4302, |
| "eval_samples_per_second": 57.994, |
| "eval_steps_per_second": 7.249, |
| "step": 13200 |
| }, |
| { |
| "epoch": 29.498197948433603, |
| "grad_norm": 0.5359793305397034, |
| "learning_rate": 4.6856187290969905e-06, |
| "loss": 6.3878, |
| "step": 13300 |
| }, |
| { |
| "epoch": 29.498197948433603, |
| "eval_loss": 6.406982898712158, |
| "eval_runtime": 174.802, |
| "eval_samples_per_second": 57.208, |
| "eval_steps_per_second": 7.151, |
| "step": 13300 |
| }, |
| { |
| "epoch": 29.7199889104519, |
| "grad_norm": 0.715033233165741, |
| "learning_rate": 4.675585284280936e-06, |
| "loss": 6.3859, |
| "step": 13400 |
| }, |
| { |
| "epoch": 29.7199889104519, |
| "eval_loss": 6.40342903137207, |
| "eval_runtime": 174.8452, |
| "eval_samples_per_second": 57.193, |
| "eval_steps_per_second": 7.149, |
| "step": 13400 |
| }, |
| { |
| "epoch": 29.941779872470196, |
| "grad_norm": 0.934853732585907, |
| "learning_rate": 4.665551839464883e-06, |
| "loss": 6.3875, |
| "step": 13500 |
| }, |
| { |
| "epoch": 29.941779872470196, |
| "eval_loss": 6.401629447937012, |
| "eval_runtime": 174.7924, |
| "eval_samples_per_second": 57.211, |
| "eval_steps_per_second": 7.151, |
| "step": 13500 |
| }, |
| { |
| "epoch": 30.163570834488496, |
| "grad_norm": 0.479612797498703, |
| "learning_rate": 4.65551839464883e-06, |
| "loss": 6.3866, |
| "step": 13600 |
| }, |
| { |
| "epoch": 30.163570834488496, |
| "eval_loss": 6.399043560028076, |
| "eval_runtime": 176.925, |
| "eval_samples_per_second": 56.521, |
| "eval_steps_per_second": 7.065, |
| "step": 13600 |
| }, |
| { |
| "epoch": 30.385361796506793, |
| "grad_norm": 0.5256738662719727, |
| "learning_rate": 4.645484949832776e-06, |
| "loss": 6.3878, |
| "step": 13700 |
| }, |
| { |
| "epoch": 30.385361796506793, |
| "eval_loss": 6.400505065917969, |
| "eval_runtime": 175.3482, |
| "eval_samples_per_second": 57.029, |
| "eval_steps_per_second": 7.129, |
| "step": 13700 |
| }, |
| { |
| "epoch": 30.60715275852509, |
| "grad_norm": 0.5690653920173645, |
| "learning_rate": 4.635451505016723e-06, |
| "loss": 6.3848, |
| "step": 13800 |
| }, |
| { |
| "epoch": 30.60715275852509, |
| "eval_loss": 6.403696060180664, |
| "eval_runtime": 172.8083, |
| "eval_samples_per_second": 57.868, |
| "eval_steps_per_second": 7.233, |
| "step": 13800 |
| }, |
| { |
| "epoch": 30.82894372054339, |
| "grad_norm": 0.4565252363681793, |
| "learning_rate": 4.625418060200669e-06, |
| "loss": 6.3849, |
| "step": 13900 |
| }, |
| { |
| "epoch": 30.82894372054339, |
| "eval_loss": 6.403767108917236, |
| "eval_runtime": 175.7515, |
| "eval_samples_per_second": 56.899, |
| "eval_steps_per_second": 7.112, |
| "step": 13900 |
| }, |
| { |
| "epoch": 31.050734682561686, |
| "grad_norm": 0.4801616966724396, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 6.3869, |
| "step": 14000 |
| }, |
| { |
| "epoch": 31.050734682561686, |
| "eval_loss": 6.400508403778076, |
| "eval_runtime": 174.4685, |
| "eval_samples_per_second": 57.317, |
| "eval_steps_per_second": 7.165, |
| "step": 14000 |
| }, |
| { |
| "epoch": 31.272525644579982, |
| "grad_norm": 0.5834231972694397, |
| "learning_rate": 4.605351170568562e-06, |
| "loss": 6.3853, |
| "step": 14100 |
| }, |
| { |
| "epoch": 31.272525644579982, |
| "eval_loss": 6.400169849395752, |
| "eval_runtime": 175.4977, |
| "eval_samples_per_second": 56.981, |
| "eval_steps_per_second": 7.123, |
| "step": 14100 |
| }, |
| { |
| "epoch": 31.494316606598282, |
| "grad_norm": 0.6701497435569763, |
| "learning_rate": 4.595317725752509e-06, |
| "loss": 6.3865, |
| "step": 14200 |
| }, |
| { |
| "epoch": 31.494316606598282, |
| "eval_loss": 6.397976875305176, |
| "eval_runtime": 175.4612, |
| "eval_samples_per_second": 56.993, |
| "eval_steps_per_second": 7.124, |
| "step": 14200 |
| }, |
| { |
| "epoch": 31.71610756861658, |
| "grad_norm": 0.4794948697090149, |
| "learning_rate": 4.585284280936456e-06, |
| "loss": 6.3852, |
| "step": 14300 |
| }, |
| { |
| "epoch": 31.71610756861658, |
| "eval_loss": 6.403610706329346, |
| "eval_runtime": 176.2646, |
| "eval_samples_per_second": 56.733, |
| "eval_steps_per_second": 7.092, |
| "step": 14300 |
| }, |
| { |
| "epoch": 31.937898530634875, |
| "grad_norm": 0.6028741002082825, |
| "learning_rate": 4.5752508361204015e-06, |
| "loss": 6.3851, |
| "step": 14400 |
| }, |
| { |
| "epoch": 31.937898530634875, |
| "eval_loss": 6.400261878967285, |
| "eval_runtime": 174.9022, |
| "eval_samples_per_second": 57.175, |
| "eval_steps_per_second": 7.147, |
| "step": 14400 |
| }, |
| { |
| "epoch": 32.15968949265317, |
| "grad_norm": 0.7439810037612915, |
| "learning_rate": 4.565217391304348e-06, |
| "loss": 6.3839, |
| "step": 14500 |
| }, |
| { |
| "epoch": 32.15968949265317, |
| "eval_loss": 6.397915363311768, |
| "eval_runtime": 172.885, |
| "eval_samples_per_second": 57.842, |
| "eval_steps_per_second": 7.23, |
| "step": 14500 |
| }, |
| { |
| "epoch": 32.381480454671475, |
| "grad_norm": 0.4727949798107147, |
| "learning_rate": 4.555183946488295e-06, |
| "loss": 6.3855, |
| "step": 14600 |
| }, |
| { |
| "epoch": 32.381480454671475, |
| "eval_loss": 6.39973258972168, |
| "eval_runtime": 175.4295, |
| "eval_samples_per_second": 57.003, |
| "eval_steps_per_second": 7.125, |
| "step": 14600 |
| }, |
| { |
| "epoch": 32.60327141668977, |
| "grad_norm": 0.5084313154220581, |
| "learning_rate": 4.545150501672241e-06, |
| "loss": 6.3833, |
| "step": 14700 |
| }, |
| { |
| "epoch": 32.60327141668977, |
| "eval_loss": 6.39823055267334, |
| "eval_runtime": 173.3819, |
| "eval_samples_per_second": 57.676, |
| "eval_steps_per_second": 7.21, |
| "step": 14700 |
| }, |
| { |
| "epoch": 32.82506237870807, |
| "grad_norm": 0.36422112584114075, |
| "learning_rate": 4.535117056856188e-06, |
| "loss": 6.3854, |
| "step": 14800 |
| }, |
| { |
| "epoch": 32.82506237870807, |
| "eval_loss": 6.402724266052246, |
| "eval_runtime": 174.9732, |
| "eval_samples_per_second": 57.152, |
| "eval_steps_per_second": 7.144, |
| "step": 14800 |
| }, |
| { |
| "epoch": 33.046853340726365, |
| "grad_norm": 0.5722773671150208, |
| "learning_rate": 4.5250836120401345e-06, |
| "loss": 6.3859, |
| "step": 14900 |
| }, |
| { |
| "epoch": 33.046853340726365, |
| "eval_loss": 6.396421909332275, |
| "eval_runtime": 175.3976, |
| "eval_samples_per_second": 57.013, |
| "eval_steps_per_second": 7.127, |
| "step": 14900 |
| }, |
| { |
| "epoch": 33.26864430274466, |
| "grad_norm": 0.859866201877594, |
| "learning_rate": 4.51505016722408e-06, |
| "loss": 6.3851, |
| "step": 15000 |
| }, |
| { |
| "epoch": 33.26864430274466, |
| "eval_loss": 6.396206855773926, |
| "eval_runtime": 174.5321, |
| "eval_samples_per_second": 57.296, |
| "eval_steps_per_second": 7.162, |
| "step": 15000 |
| }, |
| { |
| "epoch": 33.49043526476296, |
| "grad_norm": 0.8327785134315491, |
| "learning_rate": 4.505016722408027e-06, |
| "loss": 6.3848, |
| "step": 15100 |
| }, |
| { |
| "epoch": 33.49043526476296, |
| "eval_loss": 6.403675556182861, |
| "eval_runtime": 172.9138, |
| "eval_samples_per_second": 57.832, |
| "eval_steps_per_second": 7.229, |
| "step": 15100 |
| }, |
| { |
| "epoch": 33.71222622678126, |
| "grad_norm": 0.4790419042110443, |
| "learning_rate": 4.494983277591973e-06, |
| "loss": 6.3843, |
| "step": 15200 |
| }, |
| { |
| "epoch": 33.71222622678126, |
| "eval_loss": 6.397605895996094, |
| "eval_runtime": 175.348, |
| "eval_samples_per_second": 57.029, |
| "eval_steps_per_second": 7.129, |
| "step": 15200 |
| }, |
| { |
| "epoch": 33.93401718879956, |
| "grad_norm": 0.8004974722862244, |
| "learning_rate": 4.48494983277592e-06, |
| "loss": 6.3852, |
| "step": 15300 |
| }, |
| { |
| "epoch": 33.93401718879956, |
| "eval_loss": 6.396829605102539, |
| "eval_runtime": 172.9108, |
| "eval_samples_per_second": 57.833, |
| "eval_steps_per_second": 7.229, |
| "step": 15300 |
| }, |
| { |
| "epoch": 34.155808150817855, |
| "grad_norm": 0.40926745533943176, |
| "learning_rate": 4.474916387959866e-06, |
| "loss": 6.3835, |
| "step": 15400 |
| }, |
| { |
| "epoch": 34.155808150817855, |
| "eval_loss": 6.400079727172852, |
| "eval_runtime": 175.4491, |
| "eval_samples_per_second": 56.997, |
| "eval_steps_per_second": 7.125, |
| "step": 15400 |
| }, |
| { |
| "epoch": 34.37759911283615, |
| "grad_norm": 0.3634837567806244, |
| "learning_rate": 4.4648829431438125e-06, |
| "loss": 6.3836, |
| "step": 15500 |
| }, |
| { |
| "epoch": 34.37759911283615, |
| "eval_loss": 6.399561882019043, |
| "eval_runtime": 173.2399, |
| "eval_samples_per_second": 57.723, |
| "eval_steps_per_second": 7.215, |
| "step": 15500 |
| }, |
| { |
| "epoch": 34.59939007485445, |
| "grad_norm": 0.4545910954475403, |
| "learning_rate": 4.454849498327759e-06, |
| "loss": 6.3836, |
| "step": 15600 |
| }, |
| { |
| "epoch": 34.59939007485445, |
| "eval_loss": 6.3967742919921875, |
| "eval_runtime": 175.8575, |
| "eval_samples_per_second": 56.864, |
| "eval_steps_per_second": 7.108, |
| "step": 15600 |
| }, |
| { |
| "epoch": 34.821181036872744, |
| "grad_norm": 0.5282755494117737, |
| "learning_rate": 4.444816053511705e-06, |
| "loss": 6.3851, |
| "step": 15700 |
| }, |
| { |
| "epoch": 34.821181036872744, |
| "eval_loss": 6.399077892303467, |
| "eval_runtime": 175.7729, |
| "eval_samples_per_second": 56.892, |
| "eval_steps_per_second": 7.111, |
| "step": 15700 |
| }, |
| { |
| "epoch": 35.04297199889105, |
| "grad_norm": 0.5991719961166382, |
| "learning_rate": 4.434782608695652e-06, |
| "loss": 6.3846, |
| "step": 15800 |
| }, |
| { |
| "epoch": 35.04297199889105, |
| "eval_loss": 6.4012532234191895, |
| "eval_runtime": 175.8802, |
| "eval_samples_per_second": 56.857, |
| "eval_steps_per_second": 7.107, |
| "step": 15800 |
| }, |
| { |
| "epoch": 35.264762960909344, |
| "grad_norm": 0.5155884623527527, |
| "learning_rate": 4.424749163879599e-06, |
| "loss": 6.3836, |
| "step": 15900 |
| }, |
| { |
| "epoch": 35.264762960909344, |
| "eval_loss": 6.396469593048096, |
| "eval_runtime": 175.4084, |
| "eval_samples_per_second": 57.01, |
| "eval_steps_per_second": 7.126, |
| "step": 15900 |
| }, |
| { |
| "epoch": 35.48655392292764, |
| "grad_norm": 0.5687472224235535, |
| "learning_rate": 4.414715719063545e-06, |
| "loss": 6.3851, |
| "step": 16000 |
| }, |
| { |
| "epoch": 35.48655392292764, |
| "eval_loss": 6.39898681640625, |
| "eval_runtime": 172.8397, |
| "eval_samples_per_second": 57.857, |
| "eval_steps_per_second": 7.232, |
| "step": 16000 |
| }, |
| { |
| "epoch": 35.70834488494594, |
| "grad_norm": 0.43625304102897644, |
| "learning_rate": 4.404682274247491e-06, |
| "loss": 6.3839, |
| "step": 16100 |
| }, |
| { |
| "epoch": 35.70834488494594, |
| "eval_loss": 6.397797584533691, |
| "eval_runtime": 175.3929, |
| "eval_samples_per_second": 57.015, |
| "eval_steps_per_second": 7.127, |
| "step": 16100 |
| }, |
| { |
| "epoch": 35.930135846964234, |
| "grad_norm": 0.45570382475852966, |
| "learning_rate": 4.394648829431438e-06, |
| "loss": 6.383, |
| "step": 16200 |
| }, |
| { |
| "epoch": 35.930135846964234, |
| "eval_loss": 6.396146774291992, |
| "eval_runtime": 172.944, |
| "eval_samples_per_second": 57.822, |
| "eval_steps_per_second": 7.228, |
| "step": 16200 |
| }, |
| { |
| "epoch": 36.15192680898254, |
| "grad_norm": 0.5023874044418335, |
| "learning_rate": 4.384615384615384e-06, |
| "loss": 6.3832, |
| "step": 16300 |
| }, |
| { |
| "epoch": 36.15192680898254, |
| "eval_loss": 6.394959449768066, |
| "eval_runtime": 175.3162, |
| "eval_samples_per_second": 57.04, |
| "eval_steps_per_second": 7.13, |
| "step": 16300 |
| }, |
| { |
| "epoch": 36.373717771000834, |
| "grad_norm": 0.6336263418197632, |
| "learning_rate": 4.374581939799331e-06, |
| "loss": 6.384, |
| "step": 16400 |
| }, |
| { |
| "epoch": 36.373717771000834, |
| "eval_loss": 6.396052360534668, |
| "eval_runtime": 172.9338, |
| "eval_samples_per_second": 57.826, |
| "eval_steps_per_second": 7.228, |
| "step": 16400 |
| }, |
| { |
| "epoch": 36.59550873301913, |
| "grad_norm": 0.49517419934272766, |
| "learning_rate": 4.364548494983278e-06, |
| "loss": 6.3837, |
| "step": 16500 |
| }, |
| { |
| "epoch": 36.59550873301913, |
| "eval_loss": 6.394345760345459, |
| "eval_runtime": 175.3695, |
| "eval_samples_per_second": 57.022, |
| "eval_steps_per_second": 7.128, |
| "step": 16500 |
| }, |
| { |
| "epoch": 36.81729969503743, |
| "grad_norm": 0.6354840993881226, |
| "learning_rate": 4.354515050167224e-06, |
| "loss": 6.3819, |
| "step": 16600 |
| }, |
| { |
| "epoch": 36.81729969503743, |
| "eval_loss": 6.399397850036621, |
| "eval_runtime": 172.967, |
| "eval_samples_per_second": 57.814, |
| "eval_steps_per_second": 7.227, |
| "step": 16600 |
| }, |
| { |
| "epoch": 37.03909065705572, |
| "grad_norm": 0.6154801845550537, |
| "learning_rate": 4.34448160535117e-06, |
| "loss": 6.3846, |
| "step": 16700 |
| }, |
| { |
| "epoch": 37.03909065705572, |
| "eval_loss": 6.398616313934326, |
| "eval_runtime": 175.382, |
| "eval_samples_per_second": 57.018, |
| "eval_steps_per_second": 7.127, |
| "step": 16700 |
| }, |
| { |
| "epoch": 37.26088161907402, |
| "grad_norm": 0.5332671999931335, |
| "learning_rate": 4.334448160535117e-06, |
| "loss": 6.3833, |
| "step": 16800 |
| }, |
| { |
| "epoch": 37.26088161907402, |
| "eval_loss": 6.400417327880859, |
| "eval_runtime": 172.8252, |
| "eval_samples_per_second": 57.862, |
| "eval_steps_per_second": 7.233, |
| "step": 16800 |
| }, |
| { |
| "epoch": 37.482672581092324, |
| "grad_norm": 0.4707394242286682, |
| "learning_rate": 4.324414715719064e-06, |
| "loss": 6.382, |
| "step": 16900 |
| }, |
| { |
| "epoch": 37.482672581092324, |
| "eval_loss": 6.399077415466309, |
| "eval_runtime": 175.3262, |
| "eval_samples_per_second": 57.037, |
| "eval_steps_per_second": 7.13, |
| "step": 16900 |
| }, |
| { |
| "epoch": 37.70446354311062, |
| "grad_norm": 0.5503630042076111, |
| "learning_rate": 4.31438127090301e-06, |
| "loss": 6.3825, |
| "step": 17000 |
| }, |
| { |
| "epoch": 37.70446354311062, |
| "eval_loss": 6.3964338302612305, |
| "eval_runtime": 175.3567, |
| "eval_samples_per_second": 57.027, |
| "eval_steps_per_second": 7.128, |
| "step": 17000 |
| }, |
| { |
| "epoch": 37.92625450512892, |
| "grad_norm": 0.4225850999355316, |
| "learning_rate": 4.3043478260869565e-06, |
| "loss": 6.3808, |
| "step": 17100 |
| }, |
| { |
| "epoch": 37.92625450512892, |
| "eval_loss": 6.399682998657227, |
| "eval_runtime": 175.5337, |
| "eval_samples_per_second": 56.969, |
| "eval_steps_per_second": 7.121, |
| "step": 17100 |
| }, |
| { |
| "epoch": 38.14804546714721, |
| "grad_norm": 0.26002365350723267, |
| "learning_rate": 4.294314381270903e-06, |
| "loss": 6.3825, |
| "step": 17200 |
| }, |
| { |
| "epoch": 38.14804546714721, |
| "eval_loss": 6.394641399383545, |
| "eval_runtime": 175.4187, |
| "eval_samples_per_second": 57.006, |
| "eval_steps_per_second": 7.126, |
| "step": 17200 |
| }, |
| { |
| "epoch": 38.36983642916551, |
| "grad_norm": 0.5679543614387512, |
| "learning_rate": 4.284280936454849e-06, |
| "loss": 6.381, |
| "step": 17300 |
| }, |
| { |
| "epoch": 38.36983642916551, |
| "eval_loss": 6.39400053024292, |
| "eval_runtime": 175.3915, |
| "eval_samples_per_second": 57.015, |
| "eval_steps_per_second": 7.127, |
| "step": 17300 |
| }, |
| { |
| "epoch": 38.591627391183806, |
| "grad_norm": 0.6668972373008728, |
| "learning_rate": 4.274247491638796e-06, |
| "loss": 6.3833, |
| "step": 17400 |
| }, |
| { |
| "epoch": 38.591627391183806, |
| "eval_loss": 6.395496845245361, |
| "eval_runtime": 175.3632, |
| "eval_samples_per_second": 57.025, |
| "eval_steps_per_second": 7.128, |
| "step": 17400 |
| }, |
| { |
| "epoch": 38.81341835320211, |
| "grad_norm": 0.7112624049186707, |
| "learning_rate": 4.264214046822743e-06, |
| "loss": 6.3819, |
| "step": 17500 |
| }, |
| { |
| "epoch": 38.81341835320211, |
| "eval_loss": 6.394676685333252, |
| "eval_runtime": 174.8435, |
| "eval_samples_per_second": 57.194, |
| "eval_steps_per_second": 7.149, |
| "step": 17500 |
| }, |
| { |
| "epoch": 39.035209315220406, |
| "grad_norm": 0.550544261932373, |
| "learning_rate": 4.254180602006689e-06, |
| "loss": 6.3826, |
| "step": 17600 |
| }, |
| { |
| "epoch": 39.035209315220406, |
| "eval_loss": 6.396825313568115, |
| "eval_runtime": 175.8952, |
| "eval_samples_per_second": 56.852, |
| "eval_steps_per_second": 7.107, |
| "step": 17600 |
| }, |
| { |
| "epoch": 39.2570002772387, |
| "grad_norm": 0.43430355191230774, |
| "learning_rate": 4.244147157190635e-06, |
| "loss": 6.3829, |
| "step": 17700 |
| }, |
| { |
| "epoch": 39.2570002772387, |
| "eval_loss": 6.396999835968018, |
| "eval_runtime": 173.2928, |
| "eval_samples_per_second": 57.706, |
| "eval_steps_per_second": 7.213, |
| "step": 17700 |
| }, |
| { |
| "epoch": 39.478791239257, |
| "grad_norm": 0.4726496636867523, |
| "learning_rate": 4.234113712374582e-06, |
| "loss": 6.3832, |
| "step": 17800 |
| }, |
| { |
| "epoch": 39.478791239257, |
| "eval_loss": 6.394546031951904, |
| "eval_runtime": 175.1792, |
| "eval_samples_per_second": 57.084, |
| "eval_steps_per_second": 7.136, |
| "step": 17800 |
| }, |
| { |
| "epoch": 39.700582201275296, |
| "grad_norm": 0.6477558612823486, |
| "learning_rate": 4.224080267558528e-06, |
| "loss": 6.383, |
| "step": 17900 |
| }, |
| { |
| "epoch": 39.700582201275296, |
| "eval_loss": 6.39369010925293, |
| "eval_runtime": 175.8821, |
| "eval_samples_per_second": 56.856, |
| "eval_steps_per_second": 7.107, |
| "step": 17900 |
| }, |
| { |
| "epoch": 39.92237316329359, |
| "grad_norm": 0.3382057845592499, |
| "learning_rate": 4.214046822742475e-06, |
| "loss": 6.3794, |
| "step": 18000 |
| }, |
| { |
| "epoch": 39.92237316329359, |
| "eval_loss": 6.394671440124512, |
| "eval_runtime": 175.9089, |
| "eval_samples_per_second": 56.848, |
| "eval_steps_per_second": 7.106, |
| "step": 18000 |
| }, |
| { |
| "epoch": 40.144164125311896, |
| "grad_norm": 0.32499295473098755, |
| "learning_rate": 4.2040133779264216e-06, |
| "loss": 6.3836, |
| "step": 18100 |
| }, |
| { |
| "epoch": 40.144164125311896, |
| "eval_loss": 6.393697738647461, |
| "eval_runtime": 173.0953, |
| "eval_samples_per_second": 57.772, |
| "eval_steps_per_second": 7.221, |
| "step": 18100 |
| }, |
| { |
| "epoch": 40.36595508733019, |
| "grad_norm": 0.4412948489189148, |
| "learning_rate": 4.1939799331103675e-06, |
| "loss": 6.382, |
| "step": 18200 |
| }, |
| { |
| "epoch": 40.36595508733019, |
| "eval_loss": 6.395814895629883, |
| "eval_runtime": 175.6272, |
| "eval_samples_per_second": 56.939, |
| "eval_steps_per_second": 7.117, |
| "step": 18200 |
| }, |
| { |
| "epoch": 40.58774604934849, |
| "grad_norm": 0.46561938524246216, |
| "learning_rate": 4.183946488294314e-06, |
| "loss": 6.3809, |
| "step": 18300 |
| }, |
| { |
| "epoch": 40.58774604934849, |
| "eval_loss": 6.395906448364258, |
| "eval_runtime": 173.1113, |
| "eval_samples_per_second": 57.766, |
| "eval_steps_per_second": 7.221, |
| "step": 18300 |
| }, |
| { |
| "epoch": 40.809537011366785, |
| "grad_norm": 0.3944660425186157, |
| "learning_rate": 4.173913043478261e-06, |
| "loss": 6.3816, |
| "step": 18400 |
| }, |
| { |
| "epoch": 40.809537011366785, |
| "eval_loss": 6.395975589752197, |
| "eval_runtime": 175.6877, |
| "eval_samples_per_second": 56.919, |
| "eval_steps_per_second": 7.115, |
| "step": 18400 |
| }, |
| { |
| "epoch": 41.03132797338508, |
| "grad_norm": 0.6692656874656677, |
| "learning_rate": 4.163879598662208e-06, |
| "loss": 6.3812, |
| "step": 18500 |
| }, |
| { |
| "epoch": 41.03132797338508, |
| "eval_loss": 6.39307975769043, |
| "eval_runtime": 173.2571, |
| "eval_samples_per_second": 57.718, |
| "eval_steps_per_second": 7.215, |
| "step": 18500 |
| }, |
| { |
| "epoch": 41.253118935403386, |
| "grad_norm": 0.5447328090667725, |
| "learning_rate": 4.153846153846154e-06, |
| "loss": 6.382, |
| "step": 18600 |
| }, |
| { |
| "epoch": 41.253118935403386, |
| "eval_loss": 6.392385005950928, |
| "eval_runtime": 175.7445, |
| "eval_samples_per_second": 56.901, |
| "eval_steps_per_second": 7.113, |
| "step": 18600 |
| }, |
| { |
| "epoch": 41.47490989742168, |
| "grad_norm": 0.4197390079498291, |
| "learning_rate": 4.1438127090301005e-06, |
| "loss": 6.3809, |
| "step": 18700 |
| }, |
| { |
| "epoch": 41.47490989742168, |
| "eval_loss": 6.395226001739502, |
| "eval_runtime": 173.3622, |
| "eval_samples_per_second": 57.683, |
| "eval_steps_per_second": 7.21, |
| "step": 18700 |
| }, |
| { |
| "epoch": 41.69670085943998, |
| "grad_norm": 0.37331509590148926, |
| "learning_rate": 4.133779264214047e-06, |
| "loss": 6.3821, |
| "step": 18800 |
| }, |
| { |
| "epoch": 41.69670085943998, |
| "eval_loss": 6.397747039794922, |
| "eval_runtime": 175.5873, |
| "eval_samples_per_second": 56.952, |
| "eval_steps_per_second": 7.119, |
| "step": 18800 |
| }, |
| { |
| "epoch": 41.918491821458275, |
| "grad_norm": 0.439635306596756, |
| "learning_rate": 4.123745819397993e-06, |
| "loss": 6.3802, |
| "step": 18900 |
| }, |
| { |
| "epoch": 41.918491821458275, |
| "eval_loss": 6.393184185028076, |
| "eval_runtime": 175.6266, |
| "eval_samples_per_second": 56.939, |
| "eval_steps_per_second": 7.117, |
| "step": 18900 |
| }, |
| { |
| "epoch": 42.14028278347657, |
| "grad_norm": 0.4135972857475281, |
| "learning_rate": 4.11371237458194e-06, |
| "loss": 6.381, |
| "step": 19000 |
| }, |
| { |
| "epoch": 42.14028278347657, |
| "eval_loss": 6.396628379821777, |
| "eval_runtime": 175.68, |
| "eval_samples_per_second": 56.922, |
| "eval_steps_per_second": 7.115, |
| "step": 19000 |
| }, |
| { |
| "epoch": 42.36207374549487, |
| "grad_norm": 0.3350447118282318, |
| "learning_rate": 4.103678929765887e-06, |
| "loss": 6.382, |
| "step": 19100 |
| }, |
| { |
| "epoch": 42.36207374549487, |
| "eval_loss": 6.3959784507751465, |
| "eval_runtime": 173.1015, |
| "eval_samples_per_second": 57.77, |
| "eval_steps_per_second": 7.221, |
| "step": 19100 |
| }, |
| { |
| "epoch": 42.58386470751317, |
| "grad_norm": 0.40015509724617004, |
| "learning_rate": 4.0936454849498326e-06, |
| "loss": 6.3793, |
| "step": 19200 |
| }, |
| { |
| "epoch": 42.58386470751317, |
| "eval_loss": 6.392791271209717, |
| "eval_runtime": 175.6231, |
| "eval_samples_per_second": 56.94, |
| "eval_steps_per_second": 7.118, |
| "step": 19200 |
| }, |
| { |
| "epoch": 42.80565566953147, |
| "grad_norm": 0.42993155121803284, |
| "learning_rate": 4.083612040133779e-06, |
| "loss": 6.3817, |
| "step": 19300 |
| }, |
| { |
| "epoch": 42.80565566953147, |
| "eval_loss": 6.393764495849609, |
| "eval_runtime": 175.7583, |
| "eval_samples_per_second": 56.896, |
| "eval_steps_per_second": 7.112, |
| "step": 19300 |
| }, |
| { |
| "epoch": 43.027446631549765, |
| "grad_norm": 0.506564199924469, |
| "learning_rate": 4.073578595317726e-06, |
| "loss": 6.3805, |
| "step": 19400 |
| }, |
| { |
| "epoch": 43.027446631549765, |
| "eval_loss": 6.395299434661865, |
| "eval_runtime": 172.8685, |
| "eval_samples_per_second": 57.847, |
| "eval_steps_per_second": 7.231, |
| "step": 19400 |
| }, |
| { |
| "epoch": 43.24923759356806, |
| "grad_norm": 0.34368619322776794, |
| "learning_rate": 4.063545150501672e-06, |
| "loss": 6.3791, |
| "step": 19500 |
| }, |
| { |
| "epoch": 43.24923759356806, |
| "eval_loss": 6.390516757965088, |
| "eval_runtime": 175.4183, |
| "eval_samples_per_second": 57.007, |
| "eval_steps_per_second": 7.126, |
| "step": 19500 |
| }, |
| { |
| "epoch": 43.47102855558636, |
| "grad_norm": 0.5442679524421692, |
| "learning_rate": 4.053511705685619e-06, |
| "loss": 6.3805, |
| "step": 19600 |
| }, |
| { |
| "epoch": 43.47102855558636, |
| "eval_loss": 6.390527248382568, |
| "eval_runtime": 172.8815, |
| "eval_samples_per_second": 57.843, |
| "eval_steps_per_second": 7.23, |
| "step": 19600 |
| }, |
| { |
| "epoch": 43.692819517604654, |
| "grad_norm": 0.6060280799865723, |
| "learning_rate": 4.0434782608695655e-06, |
| "loss": 6.3792, |
| "step": 19700 |
| }, |
| { |
| "epoch": 43.692819517604654, |
| "eval_loss": 6.393373489379883, |
| "eval_runtime": 175.3372, |
| "eval_samples_per_second": 57.033, |
| "eval_steps_per_second": 7.129, |
| "step": 19700 |
| }, |
| { |
| "epoch": 43.91461047962296, |
| "grad_norm": 0.5891469120979309, |
| "learning_rate": 4.0334448160535115e-06, |
| "loss": 6.382, |
| "step": 19800 |
| }, |
| { |
| "epoch": 43.91461047962296, |
| "eval_loss": 6.395658493041992, |
| "eval_runtime": 173.3068, |
| "eval_samples_per_second": 57.701, |
| "eval_steps_per_second": 7.213, |
| "step": 19800 |
| }, |
| { |
| "epoch": 44.136401441641254, |
| "grad_norm": 0.3623868525028229, |
| "learning_rate": 4.023411371237458e-06, |
| "loss": 6.3794, |
| "step": 19900 |
| }, |
| { |
| "epoch": 44.136401441641254, |
| "eval_loss": 6.394290447235107, |
| "eval_runtime": 175.7778, |
| "eval_samples_per_second": 56.89, |
| "eval_steps_per_second": 7.111, |
| "step": 19900 |
| }, |
| { |
| "epoch": 44.35819240365955, |
| "grad_norm": 0.6197667121887207, |
| "learning_rate": 4.013377926421405e-06, |
| "loss": 6.3798, |
| "step": 20000 |
| }, |
| { |
| "epoch": 44.35819240365955, |
| "eval_loss": 6.393582820892334, |
| "eval_runtime": 175.4817, |
| "eval_samples_per_second": 56.986, |
| "eval_steps_per_second": 7.123, |
| "step": 20000 |
| }, |
| { |
| "epoch": 44.57998336567785, |
| "grad_norm": 0.5198450684547424, |
| "learning_rate": 4.003344481605351e-06, |
| "loss": 6.3792, |
| "step": 20100 |
| }, |
| { |
| "epoch": 44.57998336567785, |
| "eval_loss": 6.3943023681640625, |
| "eval_runtime": 175.4115, |
| "eval_samples_per_second": 57.009, |
| "eval_steps_per_second": 7.126, |
| "step": 20100 |
| }, |
| { |
| "epoch": 44.801774327696144, |
| "grad_norm": 0.4044889211654663, |
| "learning_rate": 3.993311036789298e-06, |
| "loss": 6.3798, |
| "step": 20200 |
| }, |
| { |
| "epoch": 44.801774327696144, |
| "eval_loss": 6.396990776062012, |
| "eval_runtime": 172.8449, |
| "eval_samples_per_second": 57.855, |
| "eval_steps_per_second": 7.232, |
| "step": 20200 |
| }, |
| { |
| "epoch": 45.02356528971445, |
| "grad_norm": 0.4656885862350464, |
| "learning_rate": 3.9832775919732444e-06, |
| "loss": 6.3807, |
| "step": 20300 |
| }, |
| { |
| "epoch": 45.02356528971445, |
| "eval_loss": 6.395167350769043, |
| "eval_runtime": 175.2548, |
| "eval_samples_per_second": 57.06, |
| "eval_steps_per_second": 7.132, |
| "step": 20300 |
| }, |
| { |
| "epoch": 45.245356251732744, |
| "grad_norm": 0.5882771611213684, |
| "learning_rate": 3.97324414715719e-06, |
| "loss": 6.3802, |
| "step": 20400 |
| }, |
| { |
| "epoch": 45.245356251732744, |
| "eval_loss": 6.392847537994385, |
| "eval_runtime": 175.4165, |
| "eval_samples_per_second": 57.007, |
| "eval_steps_per_second": 7.126, |
| "step": 20400 |
| }, |
| { |
| "epoch": 45.46714721375104, |
| "grad_norm": 0.31189513206481934, |
| "learning_rate": 3.963210702341137e-06, |
| "loss": 6.3799, |
| "step": 20500 |
| }, |
| { |
| "epoch": 45.46714721375104, |
| "eval_loss": 6.391454696655273, |
| "eval_runtime": 175.3822, |
| "eval_samples_per_second": 57.018, |
| "eval_steps_per_second": 7.127, |
| "step": 20500 |
| }, |
| { |
| "epoch": 45.68893817576934, |
| "grad_norm": 0.7188530564308167, |
| "learning_rate": 3.953177257525084e-06, |
| "loss": 6.3775, |
| "step": 20600 |
| }, |
| { |
| "epoch": 45.68893817576934, |
| "eval_loss": 6.391802787780762, |
| "eval_runtime": 175.4136, |
| "eval_samples_per_second": 57.008, |
| "eval_steps_per_second": 7.126, |
| "step": 20600 |
| }, |
| { |
| "epoch": 45.910729137787634, |
| "grad_norm": 0.4235071837902069, |
| "learning_rate": 3.943143812709031e-06, |
| "loss": 6.3791, |
| "step": 20700 |
| }, |
| { |
| "epoch": 45.910729137787634, |
| "eval_loss": 6.3952836990356445, |
| "eval_runtime": 175.3753, |
| "eval_samples_per_second": 57.021, |
| "eval_steps_per_second": 7.128, |
| "step": 20700 |
| }, |
| { |
| "epoch": 46.13252009980593, |
| "grad_norm": 0.4977140724658966, |
| "learning_rate": 3.9331103678929765e-06, |
| "loss": 6.3807, |
| "step": 20800 |
| }, |
| { |
| "epoch": 46.13252009980593, |
| "eval_loss": 6.397064208984375, |
| "eval_runtime": 175.8439, |
| "eval_samples_per_second": 56.869, |
| "eval_steps_per_second": 7.109, |
| "step": 20800 |
| }, |
| { |
| "epoch": 46.354311061824234, |
| "grad_norm": 0.5896762609481812, |
| "learning_rate": 3.923076923076923e-06, |
| "loss": 6.3801, |
| "step": 20900 |
| }, |
| { |
| "epoch": 46.354311061824234, |
| "eval_loss": 6.394172191619873, |
| "eval_runtime": 173.449, |
| "eval_samples_per_second": 57.654, |
| "eval_steps_per_second": 7.207, |
| "step": 20900 |
| }, |
| { |
| "epoch": 46.57610202384253, |
| "grad_norm": 0.47281450033187866, |
| "learning_rate": 3.91304347826087e-06, |
| "loss": 6.3787, |
| "step": 21000 |
| }, |
| { |
| "epoch": 46.57610202384253, |
| "eval_loss": 6.3905463218688965, |
| "eval_runtime": 175.9964, |
| "eval_samples_per_second": 56.819, |
| "eval_steps_per_second": 7.102, |
| "step": 21000 |
| }, |
| { |
| "epoch": 46.79789298586083, |
| "grad_norm": 0.42211413383483887, |
| "learning_rate": 3.903010033444816e-06, |
| "loss": 6.3798, |
| "step": 21100 |
| }, |
| { |
| "epoch": 46.79789298586083, |
| "eval_loss": 6.39119291305542, |
| "eval_runtime": 175.8005, |
| "eval_samples_per_second": 56.883, |
| "eval_steps_per_second": 7.11, |
| "step": 21100 |
| }, |
| { |
| "epoch": 47.01968394787912, |
| "grad_norm": 0.7232652306556702, |
| "learning_rate": 3.892976588628763e-06, |
| "loss": 6.3795, |
| "step": 21200 |
| }, |
| { |
| "epoch": 47.01968394787912, |
| "eval_loss": 6.39454984664917, |
| "eval_runtime": 174.7314, |
| "eval_samples_per_second": 57.231, |
| "eval_steps_per_second": 7.154, |
| "step": 21200 |
| }, |
| { |
| "epoch": 47.24147490989742, |
| "grad_norm": 0.4875265657901764, |
| "learning_rate": 3.8829431438127095e-06, |
| "loss": 6.3798, |
| "step": 21300 |
| }, |
| { |
| "epoch": 47.24147490989742, |
| "eval_loss": 6.391242027282715, |
| "eval_runtime": 173.1294, |
| "eval_samples_per_second": 57.76, |
| "eval_steps_per_second": 7.22, |
| "step": 21300 |
| }, |
| { |
| "epoch": 47.463265871915716, |
| "grad_norm": 0.689365804195404, |
| "learning_rate": 3.8729096989966554e-06, |
| "loss": 6.3797, |
| "step": 21400 |
| }, |
| { |
| "epoch": 47.463265871915716, |
| "eval_loss": 6.392244338989258, |
| "eval_runtime": 175.7048, |
| "eval_samples_per_second": 56.914, |
| "eval_steps_per_second": 7.114, |
| "step": 21400 |
| }, |
| { |
| "epoch": 47.68505683393402, |
| "grad_norm": 0.34326601028442383, |
| "learning_rate": 3.862876254180602e-06, |
| "loss": 6.3799, |
| "step": 21500 |
| }, |
| { |
| "epoch": 47.68505683393402, |
| "eval_loss": 6.390882968902588, |
| "eval_runtime": 173.1981, |
| "eval_samples_per_second": 57.737, |
| "eval_steps_per_second": 7.217, |
| "step": 21500 |
| }, |
| { |
| "epoch": 47.90684779595232, |
| "grad_norm": 0.5094731450080872, |
| "learning_rate": 3.852842809364549e-06, |
| "loss": 6.3789, |
| "step": 21600 |
| }, |
| { |
| "epoch": 47.90684779595232, |
| "eval_loss": 6.391824245452881, |
| "eval_runtime": 175.6758, |
| "eval_samples_per_second": 56.923, |
| "eval_steps_per_second": 7.115, |
| "step": 21600 |
| }, |
| { |
| "epoch": 48.12863875797061, |
| "grad_norm": 0.5096613764762878, |
| "learning_rate": 3.842809364548495e-06, |
| "loss": 6.3788, |
| "step": 21700 |
| }, |
| { |
| "epoch": 48.12863875797061, |
| "eval_loss": 6.3908467292785645, |
| "eval_runtime": 175.722, |
| "eval_samples_per_second": 56.908, |
| "eval_steps_per_second": 7.114, |
| "step": 21700 |
| }, |
| { |
| "epoch": 48.35042971998891, |
| "grad_norm": 0.49328041076660156, |
| "learning_rate": 3.832775919732442e-06, |
| "loss": 6.3801, |
| "step": 21800 |
| }, |
| { |
| "epoch": 48.35042971998891, |
| "eval_loss": 6.392337322235107, |
| "eval_runtime": 175.7017, |
| "eval_samples_per_second": 56.915, |
| "eval_steps_per_second": 7.114, |
| "step": 21800 |
| }, |
| { |
| "epoch": 48.572220682007206, |
| "grad_norm": 0.331511914730072, |
| "learning_rate": 3.822742474916388e-06, |
| "loss": 6.3787, |
| "step": 21900 |
| }, |
| { |
| "epoch": 48.572220682007206, |
| "eval_loss": 6.392426013946533, |
| "eval_runtime": 175.6914, |
| "eval_samples_per_second": 56.918, |
| "eval_steps_per_second": 7.115, |
| "step": 21900 |
| }, |
| { |
| "epoch": 48.7940116440255, |
| "grad_norm": 0.5596035718917847, |
| "learning_rate": 3.8127090301003347e-06, |
| "loss": 6.3783, |
| "step": 22000 |
| }, |
| { |
| "epoch": 48.7940116440255, |
| "eval_loss": 6.396266460418701, |
| "eval_runtime": 175.7217, |
| "eval_samples_per_second": 56.908, |
| "eval_steps_per_second": 7.114, |
| "step": 22000 |
| }, |
| { |
| "epoch": 49.015802606043806, |
| "grad_norm": 0.42308327555656433, |
| "learning_rate": 3.802675585284281e-06, |
| "loss": 6.3788, |
| "step": 22100 |
| }, |
| { |
| "epoch": 49.015802606043806, |
| "eval_loss": 6.392462730407715, |
| "eval_runtime": 175.6395, |
| "eval_samples_per_second": 56.935, |
| "eval_steps_per_second": 7.117, |
| "step": 22100 |
| }, |
| { |
| "epoch": 49.2375935680621, |
| "grad_norm": 0.47657862305641174, |
| "learning_rate": 3.792642140468228e-06, |
| "loss": 6.3768, |
| "step": 22200 |
| }, |
| { |
| "epoch": 49.2375935680621, |
| "eval_loss": 6.392263412475586, |
| "eval_runtime": 175.6228, |
| "eval_samples_per_second": 56.94, |
| "eval_steps_per_second": 7.118, |
| "step": 22200 |
| }, |
| { |
| "epoch": 49.4593845300804, |
| "grad_norm": 0.4417143166065216, |
| "learning_rate": 3.782608695652174e-06, |
| "loss": 6.3785, |
| "step": 22300 |
| }, |
| { |
| "epoch": 49.4593845300804, |
| "eval_loss": 6.39237642288208, |
| "eval_runtime": 175.5904, |
| "eval_samples_per_second": 56.951, |
| "eval_steps_per_second": 7.119, |
| "step": 22300 |
| }, |
| { |
| "epoch": 49.681175492098696, |
| "grad_norm": 0.3279063105583191, |
| "learning_rate": 3.7725752508361205e-06, |
| "loss": 6.3791, |
| "step": 22400 |
| }, |
| { |
| "epoch": 49.681175492098696, |
| "eval_loss": 6.3924407958984375, |
| "eval_runtime": 175.6501, |
| "eval_samples_per_second": 56.931, |
| "eval_steps_per_second": 7.116, |
| "step": 22400 |
| }, |
| { |
| "epoch": 49.90296645411699, |
| "grad_norm": 0.6854652166366577, |
| "learning_rate": 3.7625418060200673e-06, |
| "loss": 6.3785, |
| "step": 22500 |
| }, |
| { |
| "epoch": 49.90296645411699, |
| "eval_loss": 6.390333652496338, |
| "eval_runtime": 175.1173, |
| "eval_samples_per_second": 57.105, |
| "eval_steps_per_second": 7.138, |
| "step": 22500 |
| }, |
| { |
| "epoch": 50.124757416135296, |
| "grad_norm": 0.3522402048110962, |
| "learning_rate": 3.7525083612040136e-06, |
| "loss": 6.3776, |
| "step": 22600 |
| }, |
| { |
| "epoch": 50.124757416135296, |
| "eval_loss": 6.395279884338379, |
| "eval_runtime": 172.8769, |
| "eval_samples_per_second": 57.845, |
| "eval_steps_per_second": 7.231, |
| "step": 22600 |
| }, |
| { |
| "epoch": 50.34654837815359, |
| "grad_norm": 0.4847201704978943, |
| "learning_rate": 3.74247491638796e-06, |
| "loss": 6.3798, |
| "step": 22700 |
| }, |
| { |
| "epoch": 50.34654837815359, |
| "eval_loss": 6.385508060455322, |
| "eval_runtime": 175.3898, |
| "eval_samples_per_second": 57.016, |
| "eval_steps_per_second": 7.127, |
| "step": 22700 |
| }, |
| { |
| "epoch": 50.56833934017189, |
| "grad_norm": 0.6891096234321594, |
| "learning_rate": 3.7324414715719067e-06, |
| "loss": 6.379, |
| "step": 22800 |
| }, |
| { |
| "epoch": 50.56833934017189, |
| "eval_loss": 6.389738082885742, |
| "eval_runtime": 172.9656, |
| "eval_samples_per_second": 57.815, |
| "eval_steps_per_second": 7.227, |
| "step": 22800 |
| }, |
| { |
| "epoch": 50.790130302190185, |
| "grad_norm": 0.5377815365791321, |
| "learning_rate": 3.722408026755853e-06, |
| "loss": 6.3781, |
| "step": 22900 |
| }, |
| { |
| "epoch": 50.790130302190185, |
| "eval_loss": 6.393865585327148, |
| "eval_runtime": 175.4211, |
| "eval_samples_per_second": 57.006, |
| "eval_steps_per_second": 7.126, |
| "step": 22900 |
| }, |
| { |
| "epoch": 51.01192126420848, |
| "grad_norm": 0.33496779203414917, |
| "learning_rate": 3.7123745819398e-06, |
| "loss": 6.3774, |
| "step": 23000 |
| }, |
| { |
| "epoch": 51.01192126420848, |
| "eval_loss": 6.388363838195801, |
| "eval_runtime": 172.9308, |
| "eval_samples_per_second": 57.827, |
| "eval_steps_per_second": 7.228, |
| "step": 23000 |
| }, |
| { |
| "epoch": 51.23371222622678, |
| "grad_norm": 0.374717116355896, |
| "learning_rate": 3.702341137123746e-06, |
| "loss": 6.3782, |
| "step": 23100 |
| }, |
| { |
| "epoch": 51.23371222622678, |
| "eval_loss": 6.3933634757995605, |
| "eval_runtime": 175.8194, |
| "eval_samples_per_second": 56.877, |
| "eval_steps_per_second": 7.11, |
| "step": 23100 |
| }, |
| { |
| "epoch": 51.45550318824508, |
| "grad_norm": 0.5700441002845764, |
| "learning_rate": 3.6923076923076925e-06, |
| "loss": 6.3779, |
| "step": 23200 |
| }, |
| { |
| "epoch": 51.45550318824508, |
| "eval_loss": 6.391829490661621, |
| "eval_runtime": 173.0462, |
| "eval_samples_per_second": 57.788, |
| "eval_steps_per_second": 7.224, |
| "step": 23200 |
| }, |
| { |
| "epoch": 51.67729415026338, |
| "grad_norm": 0.5987123250961304, |
| "learning_rate": 3.6822742474916393e-06, |
| "loss": 6.3775, |
| "step": 23300 |
| }, |
| { |
| "epoch": 51.67729415026338, |
| "eval_loss": 6.391645908355713, |
| "eval_runtime": 175.546, |
| "eval_samples_per_second": 56.965, |
| "eval_steps_per_second": 7.121, |
| "step": 23300 |
| }, |
| { |
| "epoch": 51.899085112281675, |
| "grad_norm": 0.6282506585121155, |
| "learning_rate": 3.6722408026755856e-06, |
| "loss": 6.3785, |
| "step": 23400 |
| }, |
| { |
| "epoch": 51.899085112281675, |
| "eval_loss": 6.394507884979248, |
| "eval_runtime": 175.5236, |
| "eval_samples_per_second": 56.972, |
| "eval_steps_per_second": 7.122, |
| "step": 23400 |
| }, |
| { |
| "epoch": 52.12087607429997, |
| "grad_norm": 0.4422946572303772, |
| "learning_rate": 3.662207357859532e-06, |
| "loss": 6.378, |
| "step": 23500 |
| }, |
| { |
| "epoch": 52.12087607429997, |
| "eval_loss": 6.389113903045654, |
| "eval_runtime": 172.8391, |
| "eval_samples_per_second": 57.857, |
| "eval_steps_per_second": 7.232, |
| "step": 23500 |
| }, |
| { |
| "epoch": 52.34266703631827, |
| "grad_norm": 0.43772438168525696, |
| "learning_rate": 3.6521739130434787e-06, |
| "loss": 6.3769, |
| "step": 23600 |
| }, |
| { |
| "epoch": 52.34266703631827, |
| "eval_loss": 6.389682292938232, |
| "eval_runtime": 174.37, |
| "eval_samples_per_second": 57.349, |
| "eval_steps_per_second": 7.169, |
| "step": 23600 |
| }, |
| { |
| "epoch": 52.564457998336565, |
| "grad_norm": 0.4291711449623108, |
| "learning_rate": 3.642140468227425e-06, |
| "loss": 6.3787, |
| "step": 23700 |
| }, |
| { |
| "epoch": 52.564457998336565, |
| "eval_loss": 6.387042999267578, |
| "eval_runtime": 175.3622, |
| "eval_samples_per_second": 57.025, |
| "eval_steps_per_second": 7.128, |
| "step": 23700 |
| }, |
| { |
| "epoch": 52.78624896035487, |
| "grad_norm": 0.3986354172229767, |
| "learning_rate": 3.6321070234113714e-06, |
| "loss": 6.378, |
| "step": 23800 |
| }, |
| { |
| "epoch": 52.78624896035487, |
| "eval_loss": 6.394027233123779, |
| "eval_runtime": 175.4238, |
| "eval_samples_per_second": 57.005, |
| "eval_steps_per_second": 7.126, |
| "step": 23800 |
| }, |
| { |
| "epoch": 53.008039922373165, |
| "grad_norm": 0.4198819398880005, |
| "learning_rate": 3.622073578595318e-06, |
| "loss": 6.378, |
| "step": 23900 |
| }, |
| { |
| "epoch": 53.008039922373165, |
| "eval_loss": 6.391998291015625, |
| "eval_runtime": 175.3995, |
| "eval_samples_per_second": 57.013, |
| "eval_steps_per_second": 7.127, |
| "step": 23900 |
| }, |
| { |
| "epoch": 53.22983088439146, |
| "grad_norm": 0.42992842197418213, |
| "learning_rate": 3.6120401337792645e-06, |
| "loss": 6.378, |
| "step": 24000 |
| }, |
| { |
| "epoch": 53.22983088439146, |
| "eval_loss": 6.391213893890381, |
| "eval_runtime": 175.5148, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 24000 |
| }, |
| { |
| "epoch": 53.45162184640976, |
| "grad_norm": 0.3845984637737274, |
| "learning_rate": 3.6020066889632112e-06, |
| "loss": 6.3794, |
| "step": 24100 |
| }, |
| { |
| "epoch": 53.45162184640976, |
| "eval_loss": 6.395719528198242, |
| "eval_runtime": 175.2358, |
| "eval_samples_per_second": 57.066, |
| "eval_steps_per_second": 7.133, |
| "step": 24100 |
| }, |
| { |
| "epoch": 53.673412808428054, |
| "grad_norm": 0.4092540144920349, |
| "learning_rate": 3.5919732441471576e-06, |
| "loss": 6.3764, |
| "step": 24200 |
| }, |
| { |
| "epoch": 53.673412808428054, |
| "eval_loss": 6.392786502838135, |
| "eval_runtime": 173.4491, |
| "eval_samples_per_second": 57.654, |
| "eval_steps_per_second": 7.207, |
| "step": 24200 |
| }, |
| { |
| "epoch": 53.89520377044636, |
| "grad_norm": 0.4434932470321655, |
| "learning_rate": 3.581939799331104e-06, |
| "loss": 6.3784, |
| "step": 24300 |
| }, |
| { |
| "epoch": 53.89520377044636, |
| "eval_loss": 6.392944812774658, |
| "eval_runtime": 173.4556, |
| "eval_samples_per_second": 57.652, |
| "eval_steps_per_second": 7.206, |
| "step": 24300 |
| }, |
| { |
| "epoch": 54.116994732464654, |
| "grad_norm": 0.3644530177116394, |
| "learning_rate": 3.5719063545150507e-06, |
| "loss": 6.3777, |
| "step": 24400 |
| }, |
| { |
| "epoch": 54.116994732464654, |
| "eval_loss": 6.389293193817139, |
| "eval_runtime": 175.8393, |
| "eval_samples_per_second": 56.87, |
| "eval_steps_per_second": 7.109, |
| "step": 24400 |
| }, |
| { |
| "epoch": 54.33878569448295, |
| "grad_norm": 0.42048630118370056, |
| "learning_rate": 3.561872909698997e-06, |
| "loss": 6.3779, |
| "step": 24500 |
| }, |
| { |
| "epoch": 54.33878569448295, |
| "eval_loss": 6.392094612121582, |
| "eval_runtime": 173.3329, |
| "eval_samples_per_second": 57.692, |
| "eval_steps_per_second": 7.212, |
| "step": 24500 |
| }, |
| { |
| "epoch": 54.56057665650125, |
| "grad_norm": 0.5288220047950745, |
| "learning_rate": 3.5518394648829434e-06, |
| "loss": 6.3768, |
| "step": 24600 |
| }, |
| { |
| "epoch": 54.56057665650125, |
| "eval_loss": 6.389921188354492, |
| "eval_runtime": 175.5087, |
| "eval_samples_per_second": 56.977, |
| "eval_steps_per_second": 7.122, |
| "step": 24600 |
| }, |
| { |
| "epoch": 54.782367618519544, |
| "grad_norm": 0.5413895845413208, |
| "learning_rate": 3.54180602006689e-06, |
| "loss": 6.3788, |
| "step": 24700 |
| }, |
| { |
| "epoch": 54.782367618519544, |
| "eval_loss": 6.389023303985596, |
| "eval_runtime": 172.9846, |
| "eval_samples_per_second": 57.809, |
| "eval_steps_per_second": 7.226, |
| "step": 24700 |
| }, |
| { |
| "epoch": 55.00415858053784, |
| "grad_norm": 0.35512205958366394, |
| "learning_rate": 3.5317725752508365e-06, |
| "loss": 6.3777, |
| "step": 24800 |
| }, |
| { |
| "epoch": 55.00415858053784, |
| "eval_loss": 6.390623569488525, |
| "eval_runtime": 175.3777, |
| "eval_samples_per_second": 57.02, |
| "eval_steps_per_second": 7.127, |
| "step": 24800 |
| }, |
| { |
| "epoch": 55.225949542556144, |
| "grad_norm": 0.46963444352149963, |
| "learning_rate": 3.521739130434783e-06, |
| "loss": 6.3759, |
| "step": 24900 |
| }, |
| { |
| "epoch": 55.225949542556144, |
| "eval_loss": 6.392442226409912, |
| "eval_runtime": 173.0136, |
| "eval_samples_per_second": 57.799, |
| "eval_steps_per_second": 7.225, |
| "step": 24900 |
| }, |
| { |
| "epoch": 55.44774050457444, |
| "grad_norm": 0.4473781883716583, |
| "learning_rate": 3.5117056856187296e-06, |
| "loss": 6.3766, |
| "step": 25000 |
| }, |
| { |
| "epoch": 55.44774050457444, |
| "eval_loss": 6.392148971557617, |
| "eval_runtime": 175.4775, |
| "eval_samples_per_second": 56.987, |
| "eval_steps_per_second": 7.123, |
| "step": 25000 |
| }, |
| { |
| "epoch": 55.66953146659274, |
| "grad_norm": 0.4387643337249756, |
| "learning_rate": 3.501672240802676e-06, |
| "loss": 6.3768, |
| "step": 25100 |
| }, |
| { |
| "epoch": 55.66953146659274, |
| "eval_loss": 6.391911506652832, |
| "eval_runtime": 175.6257, |
| "eval_samples_per_second": 56.939, |
| "eval_steps_per_second": 7.117, |
| "step": 25100 |
| }, |
| { |
| "epoch": 55.89132242861103, |
| "grad_norm": 0.5157041549682617, |
| "learning_rate": 3.491638795986622e-06, |
| "loss": 6.3784, |
| "step": 25200 |
| }, |
| { |
| "epoch": 55.89132242861103, |
| "eval_loss": 6.384146690368652, |
| "eval_runtime": 175.6148, |
| "eval_samples_per_second": 56.943, |
| "eval_steps_per_second": 7.118, |
| "step": 25200 |
| }, |
| { |
| "epoch": 56.11311339062933, |
| "grad_norm": 0.36674726009368896, |
| "learning_rate": 3.481605351170568e-06, |
| "loss": 6.3757, |
| "step": 25300 |
| }, |
| { |
| "epoch": 56.11311339062933, |
| "eval_loss": 6.3921380043029785, |
| "eval_runtime": 175.3664, |
| "eval_samples_per_second": 57.023, |
| "eval_steps_per_second": 7.128, |
| "step": 25300 |
| }, |
| { |
| "epoch": 56.33490435264763, |
| "grad_norm": 0.44830092787742615, |
| "learning_rate": 3.471571906354515e-06, |
| "loss": 6.3785, |
| "step": 25400 |
| }, |
| { |
| "epoch": 56.33490435264763, |
| "eval_loss": 6.387638092041016, |
| "eval_runtime": 175.4426, |
| "eval_samples_per_second": 56.999, |
| "eval_steps_per_second": 7.125, |
| "step": 25400 |
| }, |
| { |
| "epoch": 56.55669531466593, |
| "grad_norm": 0.4037076532840729, |
| "learning_rate": 3.4615384615384613e-06, |
| "loss": 6.3753, |
| "step": 25500 |
| }, |
| { |
| "epoch": 56.55669531466593, |
| "eval_loss": 6.390742778778076, |
| "eval_runtime": 175.869, |
| "eval_samples_per_second": 56.861, |
| "eval_steps_per_second": 7.108, |
| "step": 25500 |
| }, |
| { |
| "epoch": 56.77848627668423, |
| "grad_norm": 0.5410855412483215, |
| "learning_rate": 3.4515050167224076e-06, |
| "loss": 6.3773, |
| "step": 25600 |
| }, |
| { |
| "epoch": 56.77848627668423, |
| "eval_loss": 6.388538837432861, |
| "eval_runtime": 175.5689, |
| "eval_samples_per_second": 56.958, |
| "eval_steps_per_second": 7.12, |
| "step": 25600 |
| }, |
| { |
| "epoch": 57.00027723870252, |
| "grad_norm": 0.6200158596038818, |
| "learning_rate": 3.4414715719063544e-06, |
| "loss": 6.3762, |
| "step": 25700 |
| }, |
| { |
| "epoch": 57.00027723870252, |
| "eval_loss": 6.392038345336914, |
| "eval_runtime": 172.8867, |
| "eval_samples_per_second": 57.841, |
| "eval_steps_per_second": 7.23, |
| "step": 25700 |
| }, |
| { |
| "epoch": 57.22206820072082, |
| "grad_norm": 0.33977118134498596, |
| "learning_rate": 3.4314381270903007e-06, |
| "loss": 6.3782, |
| "step": 25800 |
| }, |
| { |
| "epoch": 57.22206820072082, |
| "eval_loss": 6.390758037567139, |
| "eval_runtime": 172.9474, |
| "eval_samples_per_second": 57.821, |
| "eval_steps_per_second": 7.228, |
| "step": 25800 |
| }, |
| { |
| "epoch": 57.443859162739116, |
| "grad_norm": 0.396681010723114, |
| "learning_rate": 3.4214046822742475e-06, |
| "loss": 6.3766, |
| "step": 25900 |
| }, |
| { |
| "epoch": 57.443859162739116, |
| "eval_loss": 6.391767501831055, |
| "eval_runtime": 175.4265, |
| "eval_samples_per_second": 57.004, |
| "eval_steps_per_second": 7.125, |
| "step": 25900 |
| }, |
| { |
| "epoch": 57.66565012475741, |
| "grad_norm": 0.3652241826057434, |
| "learning_rate": 3.411371237458194e-06, |
| "loss": 6.3766, |
| "step": 26000 |
| }, |
| { |
| "epoch": 57.66565012475741, |
| "eval_loss": 6.388927936553955, |
| "eval_runtime": 173.1869, |
| "eval_samples_per_second": 57.741, |
| "eval_steps_per_second": 7.218, |
| "step": 26000 |
| }, |
| { |
| "epoch": 57.887441086775716, |
| "grad_norm": 0.40237948298454285, |
| "learning_rate": 3.40133779264214e-06, |
| "loss": 6.3786, |
| "step": 26100 |
| }, |
| { |
| "epoch": 57.887441086775716, |
| "eval_loss": 6.385989665985107, |
| "eval_runtime": 175.7809, |
| "eval_samples_per_second": 56.889, |
| "eval_steps_per_second": 7.111, |
| "step": 26100 |
| }, |
| { |
| "epoch": 58.10923204879401, |
| "grad_norm": 0.47134748101234436, |
| "learning_rate": 3.391304347826087e-06, |
| "loss": 6.3766, |
| "step": 26200 |
| }, |
| { |
| "epoch": 58.10923204879401, |
| "eval_loss": 6.388063907623291, |
| "eval_runtime": 172.8868, |
| "eval_samples_per_second": 57.841, |
| "eval_steps_per_second": 7.23, |
| "step": 26200 |
| }, |
| { |
| "epoch": 58.33102301081231, |
| "grad_norm": 0.35729169845581055, |
| "learning_rate": 3.3812709030100333e-06, |
| "loss": 6.376, |
| "step": 26300 |
| }, |
| { |
| "epoch": 58.33102301081231, |
| "eval_loss": 6.38781213760376, |
| "eval_runtime": 175.295, |
| "eval_samples_per_second": 57.047, |
| "eval_steps_per_second": 7.131, |
| "step": 26300 |
| }, |
| { |
| "epoch": 58.552813972830606, |
| "grad_norm": 0.38715028762817383, |
| "learning_rate": 3.3712374581939796e-06, |
| "loss": 6.3765, |
| "step": 26400 |
| }, |
| { |
| "epoch": 58.552813972830606, |
| "eval_loss": 6.389337539672852, |
| "eval_runtime": 172.8668, |
| "eval_samples_per_second": 57.848, |
| "eval_steps_per_second": 7.231, |
| "step": 26400 |
| }, |
| { |
| "epoch": 58.7746049348489, |
| "grad_norm": 0.46873271465301514, |
| "learning_rate": 3.3612040133779264e-06, |
| "loss": 6.3768, |
| "step": 26500 |
| }, |
| { |
| "epoch": 58.7746049348489, |
| "eval_loss": 6.392114162445068, |
| "eval_runtime": 175.4104, |
| "eval_samples_per_second": 57.009, |
| "eval_steps_per_second": 7.126, |
| "step": 26500 |
| }, |
| { |
| "epoch": 58.996395896867206, |
| "grad_norm": 0.3447762131690979, |
| "learning_rate": 3.3511705685618727e-06, |
| "loss": 6.3759, |
| "step": 26600 |
| }, |
| { |
| "epoch": 58.996395896867206, |
| "eval_loss": 6.387296676635742, |
| "eval_runtime": 175.3375, |
| "eval_samples_per_second": 57.033, |
| "eval_steps_per_second": 7.129, |
| "step": 26600 |
| }, |
| { |
| "epoch": 59.2181868588855, |
| "grad_norm": 0.3914731442928314, |
| "learning_rate": 3.3411371237458195e-06, |
| "loss": 6.3771, |
| "step": 26700 |
| }, |
| { |
| "epoch": 59.2181868588855, |
| "eval_loss": 6.387917995452881, |
| "eval_runtime": 175.4868, |
| "eval_samples_per_second": 56.984, |
| "eval_steps_per_second": 7.123, |
| "step": 26700 |
| }, |
| { |
| "epoch": 59.4399778209038, |
| "grad_norm": 0.5208538174629211, |
| "learning_rate": 3.331103678929766e-06, |
| "loss": 6.3765, |
| "step": 26800 |
| }, |
| { |
| "epoch": 59.4399778209038, |
| "eval_loss": 6.389184474945068, |
| "eval_runtime": 174.2169, |
| "eval_samples_per_second": 57.4, |
| "eval_steps_per_second": 7.175, |
| "step": 26800 |
| }, |
| { |
| "epoch": 59.661768782922096, |
| "grad_norm": 0.3724886178970337, |
| "learning_rate": 3.321070234113712e-06, |
| "loss": 6.3757, |
| "step": 26900 |
| }, |
| { |
| "epoch": 59.661768782922096, |
| "eval_loss": 6.392241954803467, |
| "eval_runtime": 175.4491, |
| "eval_samples_per_second": 56.997, |
| "eval_steps_per_second": 7.125, |
| "step": 26900 |
| }, |
| { |
| "epoch": 59.88355974494039, |
| "grad_norm": 0.33004748821258545, |
| "learning_rate": 3.311036789297659e-06, |
| "loss": 6.3759, |
| "step": 27000 |
| }, |
| { |
| "epoch": 59.88355974494039, |
| "eval_loss": 6.389077186584473, |
| "eval_runtime": 172.9579, |
| "eval_samples_per_second": 57.818, |
| "eval_steps_per_second": 7.227, |
| "step": 27000 |
| }, |
| { |
| "epoch": 60.10535070695869, |
| "grad_norm": 0.3995635211467743, |
| "learning_rate": 3.3010033444816052e-06, |
| "loss": 6.3774, |
| "step": 27100 |
| }, |
| { |
| "epoch": 60.10535070695869, |
| "eval_loss": 6.389009952545166, |
| "eval_runtime": 175.8118, |
| "eval_samples_per_second": 56.879, |
| "eval_steps_per_second": 7.11, |
| "step": 27100 |
| }, |
| { |
| "epoch": 60.32714166897699, |
| "grad_norm": 0.49882611632347107, |
| "learning_rate": 3.2909698996655516e-06, |
| "loss": 6.3762, |
| "step": 27200 |
| }, |
| { |
| "epoch": 60.32714166897699, |
| "eval_loss": 6.3899922370910645, |
| "eval_runtime": 175.7786, |
| "eval_samples_per_second": 56.89, |
| "eval_steps_per_second": 7.111, |
| "step": 27200 |
| }, |
| { |
| "epoch": 60.54893263099529, |
| "grad_norm": 0.46321776509284973, |
| "learning_rate": 3.2809364548494983e-06, |
| "loss": 6.3758, |
| "step": 27300 |
| }, |
| { |
| "epoch": 60.54893263099529, |
| "eval_loss": 6.389715671539307, |
| "eval_runtime": 175.8928, |
| "eval_samples_per_second": 56.853, |
| "eval_steps_per_second": 7.107, |
| "step": 27300 |
| }, |
| { |
| "epoch": 60.770723593013585, |
| "grad_norm": 0.4512879252433777, |
| "learning_rate": 3.2709030100334447e-06, |
| "loss": 6.3764, |
| "step": 27400 |
| }, |
| { |
| "epoch": 60.770723593013585, |
| "eval_loss": 6.388641357421875, |
| "eval_runtime": 175.8755, |
| "eval_samples_per_second": 56.858, |
| "eval_steps_per_second": 7.107, |
| "step": 27400 |
| }, |
| { |
| "epoch": 60.99251455503188, |
| "grad_norm": 0.5370669364929199, |
| "learning_rate": 3.260869565217391e-06, |
| "loss": 6.3764, |
| "step": 27500 |
| }, |
| { |
| "epoch": 60.99251455503188, |
| "eval_loss": 6.391347885131836, |
| "eval_runtime": 173.6027, |
| "eval_samples_per_second": 57.603, |
| "eval_steps_per_second": 7.2, |
| "step": 27500 |
| }, |
| { |
| "epoch": 61.21430551705018, |
| "grad_norm": 0.4362497627735138, |
| "learning_rate": 3.2508361204013378e-06, |
| "loss": 6.3747, |
| "step": 27600 |
| }, |
| { |
| "epoch": 61.21430551705018, |
| "eval_loss": 6.390707969665527, |
| "eval_runtime": 175.8739, |
| "eval_samples_per_second": 56.859, |
| "eval_steps_per_second": 7.107, |
| "step": 27600 |
| }, |
| { |
| "epoch": 61.436096479068475, |
| "grad_norm": 0.36759933829307556, |
| "learning_rate": 3.240802675585284e-06, |
| "loss": 6.3768, |
| "step": 27700 |
| }, |
| { |
| "epoch": 61.436096479068475, |
| "eval_loss": 6.390637397766113, |
| "eval_runtime": 173.3683, |
| "eval_samples_per_second": 57.681, |
| "eval_steps_per_second": 7.21, |
| "step": 27700 |
| }, |
| { |
| "epoch": 61.65788744108678, |
| "grad_norm": 0.4922894537448883, |
| "learning_rate": 3.230769230769231e-06, |
| "loss": 6.3758, |
| "step": 27800 |
| }, |
| { |
| "epoch": 61.65788744108678, |
| "eval_loss": 6.386129379272461, |
| "eval_runtime": 175.8295, |
| "eval_samples_per_second": 56.873, |
| "eval_steps_per_second": 7.109, |
| "step": 27800 |
| }, |
| { |
| "epoch": 61.879678403105075, |
| "grad_norm": 0.5007067918777466, |
| "learning_rate": 3.2207357859531772e-06, |
| "loss": 6.3755, |
| "step": 27900 |
| }, |
| { |
| "epoch": 61.879678403105075, |
| "eval_loss": 6.389693737030029, |
| "eval_runtime": 173.4229, |
| "eval_samples_per_second": 57.663, |
| "eval_steps_per_second": 7.208, |
| "step": 27900 |
| }, |
| { |
| "epoch": 62.10146936512337, |
| "grad_norm": 0.5208317041397095, |
| "learning_rate": 3.2107023411371236e-06, |
| "loss": 6.3766, |
| "step": 28000 |
| }, |
| { |
| "epoch": 62.10146936512337, |
| "eval_loss": 6.387614727020264, |
| "eval_runtime": 175.7473, |
| "eval_samples_per_second": 56.9, |
| "eval_steps_per_second": 7.112, |
| "step": 28000 |
| }, |
| { |
| "epoch": 62.32326032714167, |
| "grad_norm": 0.5632686614990234, |
| "learning_rate": 3.2006688963210703e-06, |
| "loss": 6.3759, |
| "step": 28100 |
| }, |
| { |
| "epoch": 62.32326032714167, |
| "eval_loss": 6.392298221588135, |
| "eval_runtime": 173.3859, |
| "eval_samples_per_second": 57.675, |
| "eval_steps_per_second": 7.209, |
| "step": 28100 |
| }, |
| { |
| "epoch": 62.545051289159964, |
| "grad_norm": 0.44811296463012695, |
| "learning_rate": 3.1906354515050167e-06, |
| "loss": 6.376, |
| "step": 28200 |
| }, |
| { |
| "epoch": 62.545051289159964, |
| "eval_loss": 6.388302326202393, |
| "eval_runtime": 175.8812, |
| "eval_samples_per_second": 56.857, |
| "eval_steps_per_second": 7.107, |
| "step": 28200 |
| }, |
| { |
| "epoch": 62.76684225117826, |
| "grad_norm": 0.434894323348999, |
| "learning_rate": 3.180602006688963e-06, |
| "loss": 6.3754, |
| "step": 28300 |
| }, |
| { |
| "epoch": 62.76684225117826, |
| "eval_loss": 6.388329982757568, |
| "eval_runtime": 173.1819, |
| "eval_samples_per_second": 57.743, |
| "eval_steps_per_second": 7.218, |
| "step": 28300 |
| }, |
| { |
| "epoch": 62.988633213196564, |
| "grad_norm": 0.4996633231639862, |
| "learning_rate": 3.1705685618729098e-06, |
| "loss": 6.3753, |
| "step": 28400 |
| }, |
| { |
| "epoch": 62.988633213196564, |
| "eval_loss": 6.386618614196777, |
| "eval_runtime": 175.5366, |
| "eval_samples_per_second": 56.968, |
| "eval_steps_per_second": 7.121, |
| "step": 28400 |
| }, |
| { |
| "epoch": 63.21042417521486, |
| "grad_norm": 0.4766680598258972, |
| "learning_rate": 3.160535117056856e-06, |
| "loss": 6.3757, |
| "step": 28500 |
| }, |
| { |
| "epoch": 63.21042417521486, |
| "eval_loss": 6.388480186462402, |
| "eval_runtime": 175.3311, |
| "eval_samples_per_second": 57.035, |
| "eval_steps_per_second": 7.129, |
| "step": 28500 |
| }, |
| { |
| "epoch": 63.43221513723316, |
| "grad_norm": 0.28831642866134644, |
| "learning_rate": 3.1505016722408024e-06, |
| "loss": 6.3764, |
| "step": 28600 |
| }, |
| { |
| "epoch": 63.43221513723316, |
| "eval_loss": 6.3880767822265625, |
| "eval_runtime": 175.3784, |
| "eval_samples_per_second": 57.02, |
| "eval_steps_per_second": 7.127, |
| "step": 28600 |
| }, |
| { |
| "epoch": 63.654006099251454, |
| "grad_norm": 0.2838084399700165, |
| "learning_rate": 3.140468227424749e-06, |
| "loss": 6.3755, |
| "step": 28700 |
| }, |
| { |
| "epoch": 63.654006099251454, |
| "eval_loss": 6.386078357696533, |
| "eval_runtime": 172.9388, |
| "eval_samples_per_second": 57.824, |
| "eval_steps_per_second": 7.228, |
| "step": 28700 |
| }, |
| { |
| "epoch": 63.87579706126975, |
| "grad_norm": 0.47868525981903076, |
| "learning_rate": 3.1304347826086955e-06, |
| "loss": 6.377, |
| "step": 28800 |
| }, |
| { |
| "epoch": 63.87579706126975, |
| "eval_loss": 6.387932777404785, |
| "eval_runtime": 175.4569, |
| "eval_samples_per_second": 56.994, |
| "eval_steps_per_second": 7.124, |
| "step": 28800 |
| }, |
| { |
| "epoch": 64.09758802328805, |
| "grad_norm": 0.5446937680244446, |
| "learning_rate": 3.1204013377926423e-06, |
| "loss": 6.3753, |
| "step": 28900 |
| }, |
| { |
| "epoch": 64.09758802328805, |
| "eval_loss": 6.388584136962891, |
| "eval_runtime": 172.8884, |
| "eval_samples_per_second": 57.841, |
| "eval_steps_per_second": 7.23, |
| "step": 28900 |
| }, |
| { |
| "epoch": 64.31937898530634, |
| "grad_norm": 0.41702982783317566, |
| "learning_rate": 3.1103678929765886e-06, |
| "loss": 6.3761, |
| "step": 29000 |
| }, |
| { |
| "epoch": 64.31937898530634, |
| "eval_loss": 6.3896894454956055, |
| "eval_runtime": 172.9657, |
| "eval_samples_per_second": 57.815, |
| "eval_steps_per_second": 7.227, |
| "step": 29000 |
| }, |
| { |
| "epoch": 64.54116994732465, |
| "grad_norm": 0.39311668276786804, |
| "learning_rate": 3.100334448160535e-06, |
| "loss": 6.3753, |
| "step": 29100 |
| }, |
| { |
| "epoch": 64.54116994732465, |
| "eval_loss": 6.3889970779418945, |
| "eval_runtime": 175.6814, |
| "eval_samples_per_second": 56.921, |
| "eval_steps_per_second": 7.115, |
| "step": 29100 |
| }, |
| { |
| "epoch": 64.76296090934295, |
| "grad_norm": 0.31582164764404297, |
| "learning_rate": 3.0903010033444818e-06, |
| "loss": 6.3763, |
| "step": 29200 |
| }, |
| { |
| "epoch": 64.76296090934295, |
| "eval_loss": 6.388535976409912, |
| "eval_runtime": 173.1769, |
| "eval_samples_per_second": 57.744, |
| "eval_steps_per_second": 7.218, |
| "step": 29200 |
| }, |
| { |
| "epoch": 64.98475187136124, |
| "grad_norm": 0.4400019347667694, |
| "learning_rate": 3.080267558528428e-06, |
| "loss": 6.3752, |
| "step": 29300 |
| }, |
| { |
| "epoch": 64.98475187136124, |
| "eval_loss": 6.38809061050415, |
| "eval_runtime": 175.7068, |
| "eval_samples_per_second": 56.913, |
| "eval_steps_per_second": 7.114, |
| "step": 29300 |
| }, |
| { |
| "epoch": 65.20654283337954, |
| "grad_norm": 0.3871637284755707, |
| "learning_rate": 3.0702341137123744e-06, |
| "loss": 6.3761, |
| "step": 29400 |
| }, |
| { |
| "epoch": 65.20654283337954, |
| "eval_loss": 6.3887200355529785, |
| "eval_runtime": 175.6633, |
| "eval_samples_per_second": 56.927, |
| "eval_steps_per_second": 7.116, |
| "step": 29400 |
| }, |
| { |
| "epoch": 65.42833379539783, |
| "grad_norm": 0.3527097702026367, |
| "learning_rate": 3.060200668896321e-06, |
| "loss": 6.375, |
| "step": 29500 |
| }, |
| { |
| "epoch": 65.42833379539783, |
| "eval_loss": 6.385637283325195, |
| "eval_runtime": 175.6827, |
| "eval_samples_per_second": 56.921, |
| "eval_steps_per_second": 7.115, |
| "step": 29500 |
| }, |
| { |
| "epoch": 65.65012475741614, |
| "grad_norm": 0.3956551253795624, |
| "learning_rate": 3.0501672240802675e-06, |
| "loss": 6.3763, |
| "step": 29600 |
| }, |
| { |
| "epoch": 65.65012475741614, |
| "eval_loss": 6.388696670532227, |
| "eval_runtime": 175.6777, |
| "eval_samples_per_second": 56.922, |
| "eval_steps_per_second": 7.115, |
| "step": 29600 |
| }, |
| { |
| "epoch": 65.87191571943443, |
| "grad_norm": 0.317006379365921, |
| "learning_rate": 3.0401337792642143e-06, |
| "loss": 6.3747, |
| "step": 29700 |
| }, |
| { |
| "epoch": 65.87191571943443, |
| "eval_loss": 6.386444568634033, |
| "eval_runtime": 175.0294, |
| "eval_samples_per_second": 57.133, |
| "eval_steps_per_second": 7.142, |
| "step": 29700 |
| }, |
| { |
| "epoch": 66.09370668145273, |
| "grad_norm": 0.29853495955467224, |
| "learning_rate": 3.0301003344481606e-06, |
| "loss": 6.3742, |
| "step": 29800 |
| }, |
| { |
| "epoch": 66.09370668145273, |
| "eval_loss": 6.38703727722168, |
| "eval_runtime": 173.1862, |
| "eval_samples_per_second": 57.741, |
| "eval_steps_per_second": 7.218, |
| "step": 29800 |
| }, |
| { |
| "epoch": 66.31549764347103, |
| "grad_norm": 0.3481820225715637, |
| "learning_rate": 3.020066889632107e-06, |
| "loss": 6.3756, |
| "step": 29900 |
| }, |
| { |
| "epoch": 66.31549764347103, |
| "eval_loss": 6.385500907897949, |
| "eval_runtime": 175.6985, |
| "eval_samples_per_second": 56.916, |
| "eval_steps_per_second": 7.114, |
| "step": 29900 |
| }, |
| { |
| "epoch": 66.53728860548932, |
| "grad_norm": 0.3467808961868286, |
| "learning_rate": 3.0100334448160537e-06, |
| "loss": 6.3755, |
| "step": 30000 |
| }, |
| { |
| "epoch": 66.53728860548932, |
| "eval_loss": 6.389315605163574, |
| "eval_runtime": 173.203, |
| "eval_samples_per_second": 57.736, |
| "eval_steps_per_second": 7.217, |
| "step": 30000 |
| }, |
| { |
| "epoch": 66.75907956750763, |
| "grad_norm": 0.3288291096687317, |
| "learning_rate": 3e-06, |
| "loss": 6.3762, |
| "step": 30100 |
| }, |
| { |
| "epoch": 66.75907956750763, |
| "eval_loss": 6.389954090118408, |
| "eval_runtime": 175.5948, |
| "eval_samples_per_second": 56.949, |
| "eval_steps_per_second": 7.119, |
| "step": 30100 |
| }, |
| { |
| "epoch": 66.98087052952592, |
| "grad_norm": 0.3450663387775421, |
| "learning_rate": 2.9899665551839464e-06, |
| "loss": 6.3749, |
| "step": 30200 |
| }, |
| { |
| "epoch": 66.98087052952592, |
| "eval_loss": 6.388577938079834, |
| "eval_runtime": 173.1084, |
| "eval_samples_per_second": 57.767, |
| "eval_steps_per_second": 7.221, |
| "step": 30200 |
| }, |
| { |
| "epoch": 67.20266149154422, |
| "grad_norm": 0.4391154646873474, |
| "learning_rate": 2.979933110367893e-06, |
| "loss": 6.3757, |
| "step": 30300 |
| }, |
| { |
| "epoch": 67.20266149154422, |
| "eval_loss": 6.3895344734191895, |
| "eval_runtime": 175.4784, |
| "eval_samples_per_second": 56.987, |
| "eval_steps_per_second": 7.123, |
| "step": 30300 |
| }, |
| { |
| "epoch": 67.42445245356252, |
| "grad_norm": 0.4594007730484009, |
| "learning_rate": 2.9698996655518395e-06, |
| "loss": 6.3742, |
| "step": 30400 |
| }, |
| { |
| "epoch": 67.42445245356252, |
| "eval_loss": 6.387800216674805, |
| "eval_runtime": 175.4386, |
| "eval_samples_per_second": 57.0, |
| "eval_steps_per_second": 7.125, |
| "step": 30400 |
| }, |
| { |
| "epoch": 67.64624341558081, |
| "grad_norm": 0.2892398238182068, |
| "learning_rate": 2.959866220735786e-06, |
| "loss": 6.3758, |
| "step": 30500 |
| }, |
| { |
| "epoch": 67.64624341558081, |
| "eval_loss": 6.3860883712768555, |
| "eval_runtime": 175.4706, |
| "eval_samples_per_second": 56.99, |
| "eval_steps_per_second": 7.124, |
| "step": 30500 |
| }, |
| { |
| "epoch": 67.86803437759912, |
| "grad_norm": 0.5031465888023376, |
| "learning_rate": 2.9498327759197326e-06, |
| "loss": 6.3738, |
| "step": 30600 |
| }, |
| { |
| "epoch": 67.86803437759912, |
| "eval_loss": 6.38906192779541, |
| "eval_runtime": 175.4554, |
| "eval_samples_per_second": 56.995, |
| "eval_steps_per_second": 7.124, |
| "step": 30600 |
| }, |
| { |
| "epoch": 68.0898253396174, |
| "grad_norm": 0.2999316453933716, |
| "learning_rate": 2.939799331103679e-06, |
| "loss": 6.3732, |
| "step": 30700 |
| }, |
| { |
| "epoch": 68.0898253396174, |
| "eval_loss": 6.387207984924316, |
| "eval_runtime": 172.9284, |
| "eval_samples_per_second": 57.827, |
| "eval_steps_per_second": 7.228, |
| "step": 30700 |
| }, |
| { |
| "epoch": 68.31161630163571, |
| "grad_norm": 0.3920566737651825, |
| "learning_rate": 2.9297658862876257e-06, |
| "loss": 6.3746, |
| "step": 30800 |
| }, |
| { |
| "epoch": 68.31161630163571, |
| "eval_loss": 6.388418197631836, |
| "eval_runtime": 175.4686, |
| "eval_samples_per_second": 56.99, |
| "eval_steps_per_second": 7.124, |
| "step": 30800 |
| }, |
| { |
| "epoch": 68.53340726365401, |
| "grad_norm": 0.3810490369796753, |
| "learning_rate": 2.919732441471572e-06, |
| "loss": 6.3736, |
| "step": 30900 |
| }, |
| { |
| "epoch": 68.53340726365401, |
| "eval_loss": 6.382778167724609, |
| "eval_runtime": 172.9448, |
| "eval_samples_per_second": 57.822, |
| "eval_steps_per_second": 7.228, |
| "step": 30900 |
| }, |
| { |
| "epoch": 68.7551982256723, |
| "grad_norm": 0.282163143157959, |
| "learning_rate": 2.9096989966555184e-06, |
| "loss": 6.3764, |
| "step": 31000 |
| }, |
| { |
| "epoch": 68.7551982256723, |
| "eval_loss": 6.3898420333862305, |
| "eval_runtime": 175.8822, |
| "eval_samples_per_second": 56.856, |
| "eval_steps_per_second": 7.107, |
| "step": 31000 |
| }, |
| { |
| "epoch": 68.9769891876906, |
| "grad_norm": 0.5345416069030762, |
| "learning_rate": 2.899665551839465e-06, |
| "loss": 6.3744, |
| "step": 31100 |
| }, |
| { |
| "epoch": 68.9769891876906, |
| "eval_loss": 6.389834880828857, |
| "eval_runtime": 173.048, |
| "eval_samples_per_second": 57.787, |
| "eval_steps_per_second": 7.223, |
| "step": 31100 |
| }, |
| { |
| "epoch": 69.1987801497089, |
| "grad_norm": 0.2955686151981354, |
| "learning_rate": 2.8896321070234115e-06, |
| "loss": 6.3752, |
| "step": 31200 |
| }, |
| { |
| "epoch": 69.1987801497089, |
| "eval_loss": 6.385989189147949, |
| "eval_runtime": 175.4356, |
| "eval_samples_per_second": 57.001, |
| "eval_steps_per_second": 7.125, |
| "step": 31200 |
| }, |
| { |
| "epoch": 69.4205711117272, |
| "grad_norm": 0.2998807430267334, |
| "learning_rate": 2.879598662207358e-06, |
| "loss": 6.3744, |
| "step": 31300 |
| }, |
| { |
| "epoch": 69.4205711117272, |
| "eval_loss": 6.3874688148498535, |
| "eval_runtime": 175.8432, |
| "eval_samples_per_second": 56.869, |
| "eval_steps_per_second": 7.109, |
| "step": 31300 |
| }, |
| { |
| "epoch": 69.64236207374549, |
| "grad_norm": 0.5946409702301025, |
| "learning_rate": 2.8695652173913046e-06, |
| "loss": 6.3742, |
| "step": 31400 |
| }, |
| { |
| "epoch": 69.64236207374549, |
| "eval_loss": 6.386292934417725, |
| "eval_runtime": 175.7657, |
| "eval_samples_per_second": 56.894, |
| "eval_steps_per_second": 7.112, |
| "step": 31400 |
| }, |
| { |
| "epoch": 69.86415303576379, |
| "grad_norm": 0.4089396595954895, |
| "learning_rate": 2.859531772575251e-06, |
| "loss": 6.3741, |
| "step": 31500 |
| }, |
| { |
| "epoch": 69.86415303576379, |
| "eval_loss": 6.386563301086426, |
| "eval_runtime": 175.832, |
| "eval_samples_per_second": 56.872, |
| "eval_steps_per_second": 7.109, |
| "step": 31500 |
| }, |
| { |
| "epoch": 70.0859439977821, |
| "grad_norm": 0.4220736622810364, |
| "learning_rate": 2.8494983277591977e-06, |
| "loss": 6.3761, |
| "step": 31600 |
| }, |
| { |
| "epoch": 70.0859439977821, |
| "eval_loss": 6.386425495147705, |
| "eval_runtime": 175.4574, |
| "eval_samples_per_second": 56.994, |
| "eval_steps_per_second": 7.124, |
| "step": 31600 |
| }, |
| { |
| "epoch": 70.30773495980038, |
| "grad_norm": 0.5009733438491821, |
| "learning_rate": 2.839464882943144e-06, |
| "loss": 6.3746, |
| "step": 31700 |
| }, |
| { |
| "epoch": 70.30773495980038, |
| "eval_loss": 6.386416435241699, |
| "eval_runtime": 175.5124, |
| "eval_samples_per_second": 56.976, |
| "eval_steps_per_second": 7.122, |
| "step": 31700 |
| }, |
| { |
| "epoch": 70.52952592181869, |
| "grad_norm": 0.41243863105773926, |
| "learning_rate": 2.8294314381270904e-06, |
| "loss": 6.3738, |
| "step": 31800 |
| }, |
| { |
| "epoch": 70.52952592181869, |
| "eval_loss": 6.388505935668945, |
| "eval_runtime": 175.5511, |
| "eval_samples_per_second": 56.963, |
| "eval_steps_per_second": 7.12, |
| "step": 31800 |
| }, |
| { |
| "epoch": 70.75131688383698, |
| "grad_norm": 0.3510850667953491, |
| "learning_rate": 2.819397993311037e-06, |
| "loss": 6.3754, |
| "step": 31900 |
| }, |
| { |
| "epoch": 70.75131688383698, |
| "eval_loss": 6.388024806976318, |
| "eval_runtime": 175.6891, |
| "eval_samples_per_second": 56.919, |
| "eval_steps_per_second": 7.115, |
| "step": 31900 |
| }, |
| { |
| "epoch": 70.97310784585528, |
| "grad_norm": 0.2912569046020508, |
| "learning_rate": 2.8093645484949835e-06, |
| "loss": 6.374, |
| "step": 32000 |
| }, |
| { |
| "epoch": 70.97310784585528, |
| "eval_loss": 6.385600566864014, |
| "eval_runtime": 175.9407, |
| "eval_samples_per_second": 56.837, |
| "eval_steps_per_second": 7.105, |
| "step": 32000 |
| }, |
| { |
| "epoch": 71.19489880787359, |
| "grad_norm": 0.3566642105579376, |
| "learning_rate": 2.79933110367893e-06, |
| "loss": 6.3728, |
| "step": 32100 |
| }, |
| { |
| "epoch": 71.19489880787359, |
| "eval_loss": 6.384610652923584, |
| "eval_runtime": 175.7319, |
| "eval_samples_per_second": 56.905, |
| "eval_steps_per_second": 7.113, |
| "step": 32100 |
| }, |
| { |
| "epoch": 71.41668976989187, |
| "grad_norm": 0.36077818274497986, |
| "learning_rate": 2.7892976588628766e-06, |
| "loss": 6.3742, |
| "step": 32200 |
| }, |
| { |
| "epoch": 71.41668976989187, |
| "eval_loss": 6.389194488525391, |
| "eval_runtime": 173.108, |
| "eval_samples_per_second": 57.767, |
| "eval_steps_per_second": 7.221, |
| "step": 32200 |
| }, |
| { |
| "epoch": 71.63848073191018, |
| "grad_norm": 0.4366162121295929, |
| "learning_rate": 2.779264214046823e-06, |
| "loss": 6.373, |
| "step": 32300 |
| }, |
| { |
| "epoch": 71.63848073191018, |
| "eval_loss": 6.388595104217529, |
| "eval_runtime": 175.5624, |
| "eval_samples_per_second": 56.96, |
| "eval_steps_per_second": 7.12, |
| "step": 32300 |
| }, |
| { |
| "epoch": 71.86027169392847, |
| "grad_norm": 0.3485216498374939, |
| "learning_rate": 2.7692307692307693e-06, |
| "loss": 6.3744, |
| "step": 32400 |
| }, |
| { |
| "epoch": 71.86027169392847, |
| "eval_loss": 6.38759708404541, |
| "eval_runtime": 173.3825, |
| "eval_samples_per_second": 57.676, |
| "eval_steps_per_second": 7.209, |
| "step": 32400 |
| }, |
| { |
| "epoch": 72.08206265594677, |
| "grad_norm": 0.41392314434051514, |
| "learning_rate": 2.759197324414716e-06, |
| "loss": 6.3733, |
| "step": 32500 |
| }, |
| { |
| "epoch": 72.08206265594677, |
| "eval_loss": 6.388287544250488, |
| "eval_runtime": 175.8186, |
| "eval_samples_per_second": 56.877, |
| "eval_steps_per_second": 7.11, |
| "step": 32500 |
| }, |
| { |
| "epoch": 72.30385361796507, |
| "grad_norm": 0.38669446110725403, |
| "learning_rate": 2.749163879598662e-06, |
| "loss": 6.3736, |
| "step": 32600 |
| }, |
| { |
| "epoch": 72.30385361796507, |
| "eval_loss": 6.387938499450684, |
| "eval_runtime": 167.9516, |
| "eval_samples_per_second": 59.541, |
| "eval_steps_per_second": 7.443, |
| "step": 32600 |
| }, |
| { |
| "epoch": 72.52564457998336, |
| "grad_norm": 0.42049235105514526, |
| "learning_rate": 2.7391304347826087e-06, |
| "loss": 6.3744, |
| "step": 32700 |
| }, |
| { |
| "epoch": 72.52564457998336, |
| "eval_loss": 6.387884140014648, |
| "eval_runtime": 175.7946, |
| "eval_samples_per_second": 56.885, |
| "eval_steps_per_second": 7.111, |
| "step": 32700 |
| }, |
| { |
| "epoch": 72.74743554200167, |
| "grad_norm": 0.45259612798690796, |
| "learning_rate": 2.729096989966555e-06, |
| "loss": 6.3733, |
| "step": 32800 |
| }, |
| { |
| "epoch": 72.74743554200167, |
| "eval_loss": 6.383664608001709, |
| "eval_runtime": 175.4633, |
| "eval_samples_per_second": 56.992, |
| "eval_steps_per_second": 7.124, |
| "step": 32800 |
| }, |
| { |
| "epoch": 72.96922650401996, |
| "grad_norm": 0.35638928413391113, |
| "learning_rate": 2.7190635451505014e-06, |
| "loss": 6.3752, |
| "step": 32900 |
| }, |
| { |
| "epoch": 72.96922650401996, |
| "eval_loss": 6.385019302368164, |
| "eval_runtime": 175.4207, |
| "eval_samples_per_second": 57.006, |
| "eval_steps_per_second": 7.126, |
| "step": 32900 |
| }, |
| { |
| "epoch": 73.19101746603826, |
| "grad_norm": 0.4410247206687927, |
| "learning_rate": 2.709030100334448e-06, |
| "loss": 6.3739, |
| "step": 33000 |
| }, |
| { |
| "epoch": 73.19101746603826, |
| "eval_loss": 6.385441303253174, |
| "eval_runtime": 175.4138, |
| "eval_samples_per_second": 57.008, |
| "eval_steps_per_second": 7.126, |
| "step": 33000 |
| }, |
| { |
| "epoch": 73.41280842805655, |
| "grad_norm": 0.2410985231399536, |
| "learning_rate": 2.6989966555183945e-06, |
| "loss": 6.3728, |
| "step": 33100 |
| }, |
| { |
| "epoch": 73.41280842805655, |
| "eval_loss": 6.38595724105835, |
| "eval_runtime": 175.8764, |
| "eval_samples_per_second": 56.858, |
| "eval_steps_per_second": 7.107, |
| "step": 33100 |
| }, |
| { |
| "epoch": 73.63459939007485, |
| "grad_norm": 0.43327927589416504, |
| "learning_rate": 2.6889632107023413e-06, |
| "loss": 6.3742, |
| "step": 33200 |
| }, |
| { |
| "epoch": 73.63459939007485, |
| "eval_loss": 6.387829780578613, |
| "eval_runtime": 175.8542, |
| "eval_samples_per_second": 56.865, |
| "eval_steps_per_second": 7.108, |
| "step": 33200 |
| }, |
| { |
| "epoch": 73.85639035209316, |
| "grad_norm": 0.2946775555610657, |
| "learning_rate": 2.6789297658862876e-06, |
| "loss": 6.3751, |
| "step": 33300 |
| }, |
| { |
| "epoch": 73.85639035209316, |
| "eval_loss": 6.385344505310059, |
| "eval_runtime": 173.3421, |
| "eval_samples_per_second": 57.689, |
| "eval_steps_per_second": 7.211, |
| "step": 33300 |
| }, |
| { |
| "epoch": 74.07818131411145, |
| "grad_norm": 0.33265405893325806, |
| "learning_rate": 2.668896321070234e-06, |
| "loss": 6.3737, |
| "step": 33400 |
| }, |
| { |
| "epoch": 74.07818131411145, |
| "eval_loss": 6.38824987411499, |
| "eval_runtime": 173.3017, |
| "eval_samples_per_second": 57.703, |
| "eval_steps_per_second": 7.213, |
| "step": 33400 |
| }, |
| { |
| "epoch": 74.29997227612975, |
| "grad_norm": 0.40044334530830383, |
| "learning_rate": 2.6588628762541807e-06, |
| "loss": 6.3752, |
| "step": 33500 |
| }, |
| { |
| "epoch": 74.29997227612975, |
| "eval_loss": 6.385106086730957, |
| "eval_runtime": 175.777, |
| "eval_samples_per_second": 56.89, |
| "eval_steps_per_second": 7.111, |
| "step": 33500 |
| }, |
| { |
| "epoch": 74.52176323814804, |
| "grad_norm": 0.3776157796382904, |
| "learning_rate": 2.648829431438127e-06, |
| "loss": 6.3739, |
| "step": 33600 |
| }, |
| { |
| "epoch": 74.52176323814804, |
| "eval_loss": 6.387485980987549, |
| "eval_runtime": 174.2601, |
| "eval_samples_per_second": 57.385, |
| "eval_steps_per_second": 7.173, |
| "step": 33600 |
| }, |
| { |
| "epoch": 74.74355420016634, |
| "grad_norm": 0.33734750747680664, |
| "learning_rate": 2.6387959866220734e-06, |
| "loss": 6.3739, |
| "step": 33700 |
| }, |
| { |
| "epoch": 74.74355420016634, |
| "eval_loss": 6.383073806762695, |
| "eval_runtime": 175.8418, |
| "eval_samples_per_second": 56.869, |
| "eval_steps_per_second": 7.109, |
| "step": 33700 |
| }, |
| { |
| "epoch": 74.96534516218465, |
| "grad_norm": 0.2771698534488678, |
| "learning_rate": 2.62876254180602e-06, |
| "loss": 6.3729, |
| "step": 33800 |
| }, |
| { |
| "epoch": 74.96534516218465, |
| "eval_loss": 6.388527870178223, |
| "eval_runtime": 174.0412, |
| "eval_samples_per_second": 57.458, |
| "eval_steps_per_second": 7.182, |
| "step": 33800 |
| }, |
| { |
| "epoch": 75.18713612420294, |
| "grad_norm": 0.3911442458629608, |
| "learning_rate": 2.6187290969899665e-06, |
| "loss": 6.374, |
| "step": 33900 |
| }, |
| { |
| "epoch": 75.18713612420294, |
| "eval_loss": 6.386963367462158, |
| "eval_runtime": 172.9704, |
| "eval_samples_per_second": 57.813, |
| "eval_steps_per_second": 7.227, |
| "step": 33900 |
| }, |
| { |
| "epoch": 75.40892708622124, |
| "grad_norm": 0.3304766118526459, |
| "learning_rate": 2.6086956521739132e-06, |
| "loss": 6.3746, |
| "step": 34000 |
| }, |
| { |
| "epoch": 75.40892708622124, |
| "eval_loss": 6.386199951171875, |
| "eval_runtime": 175.3788, |
| "eval_samples_per_second": 57.019, |
| "eval_steps_per_second": 7.127, |
| "step": 34000 |
| }, |
| { |
| "epoch": 75.63071804823953, |
| "grad_norm": 0.4422440230846405, |
| "learning_rate": 2.5986622073578596e-06, |
| "loss": 6.3737, |
| "step": 34100 |
| }, |
| { |
| "epoch": 75.63071804823953, |
| "eval_loss": 6.384350776672363, |
| "eval_runtime": 173.0054, |
| "eval_samples_per_second": 57.802, |
| "eval_steps_per_second": 7.225, |
| "step": 34100 |
| }, |
| { |
| "epoch": 75.85250901025783, |
| "grad_norm": 0.28921636939048767, |
| "learning_rate": 2.588628762541806e-06, |
| "loss": 6.3739, |
| "step": 34200 |
| }, |
| { |
| "epoch": 75.85250901025783, |
| "eval_loss": 6.387299537658691, |
| "eval_runtime": 175.5823, |
| "eval_samples_per_second": 56.953, |
| "eval_steps_per_second": 7.119, |
| "step": 34200 |
| }, |
| { |
| "epoch": 76.07429997227614, |
| "grad_norm": 0.3911747336387634, |
| "learning_rate": 2.5785953177257527e-06, |
| "loss": 6.3734, |
| "step": 34300 |
| }, |
| { |
| "epoch": 76.07429997227614, |
| "eval_loss": 6.389584541320801, |
| "eval_runtime": 172.7277, |
| "eval_samples_per_second": 57.895, |
| "eval_steps_per_second": 7.237, |
| "step": 34300 |
| }, |
| { |
| "epoch": 76.29609093429443, |
| "grad_norm": 0.3622056245803833, |
| "learning_rate": 2.568561872909699e-06, |
| "loss": 6.3739, |
| "step": 34400 |
| }, |
| { |
| "epoch": 76.29609093429443, |
| "eval_loss": 6.386180400848389, |
| "eval_runtime": 175.4099, |
| "eval_samples_per_second": 57.009, |
| "eval_steps_per_second": 7.126, |
| "step": 34400 |
| }, |
| { |
| "epoch": 76.51788189631273, |
| "grad_norm": 0.24905167520046234, |
| "learning_rate": 2.5585284280936454e-06, |
| "loss": 6.3746, |
| "step": 34500 |
| }, |
| { |
| "epoch": 76.51788189631273, |
| "eval_loss": 6.383036136627197, |
| "eval_runtime": 172.8585, |
| "eval_samples_per_second": 57.851, |
| "eval_steps_per_second": 7.231, |
| "step": 34500 |
| }, |
| { |
| "epoch": 76.73967285833102, |
| "grad_norm": 0.3207278549671173, |
| "learning_rate": 2.548494983277592e-06, |
| "loss": 6.3749, |
| "step": 34600 |
| }, |
| { |
| "epoch": 76.73967285833102, |
| "eval_loss": 6.3871564865112305, |
| "eval_runtime": 175.7463, |
| "eval_samples_per_second": 56.9, |
| "eval_steps_per_second": 7.113, |
| "step": 34600 |
| }, |
| { |
| "epoch": 76.96146382034932, |
| "grad_norm": 0.3537052273750305, |
| "learning_rate": 2.5384615384615385e-06, |
| "loss": 6.3719, |
| "step": 34700 |
| }, |
| { |
| "epoch": 76.96146382034932, |
| "eval_loss": 6.384720325469971, |
| "eval_runtime": 172.9583, |
| "eval_samples_per_second": 57.817, |
| "eval_steps_per_second": 7.227, |
| "step": 34700 |
| }, |
| { |
| "epoch": 77.18325478236761, |
| "grad_norm": 0.4220789074897766, |
| "learning_rate": 2.528428093645485e-06, |
| "loss": 6.3736, |
| "step": 34800 |
| }, |
| { |
| "epoch": 77.18325478236761, |
| "eval_loss": 6.384481906890869, |
| "eval_runtime": 175.4683, |
| "eval_samples_per_second": 56.99, |
| "eval_steps_per_second": 7.124, |
| "step": 34800 |
| }, |
| { |
| "epoch": 77.40504574438592, |
| "grad_norm": 0.3726615011692047, |
| "learning_rate": 2.5183946488294316e-06, |
| "loss": 6.3726, |
| "step": 34900 |
| }, |
| { |
| "epoch": 77.40504574438592, |
| "eval_loss": 6.383063793182373, |
| "eval_runtime": 175.4526, |
| "eval_samples_per_second": 56.995, |
| "eval_steps_per_second": 7.124, |
| "step": 34900 |
| }, |
| { |
| "epoch": 77.62683670640422, |
| "grad_norm": 0.3583526909351349, |
| "learning_rate": 2.508361204013378e-06, |
| "loss": 6.3742, |
| "step": 35000 |
| }, |
| { |
| "epoch": 77.62683670640422, |
| "eval_loss": 6.383593559265137, |
| "eval_runtime": 175.5123, |
| "eval_samples_per_second": 56.976, |
| "eval_steps_per_second": 7.122, |
| "step": 35000 |
| }, |
| { |
| "epoch": 77.84862766842251, |
| "grad_norm": 0.31663283705711365, |
| "learning_rate": 2.4983277591973247e-06, |
| "loss": 6.3746, |
| "step": 35100 |
| }, |
| { |
| "epoch": 77.84862766842251, |
| "eval_loss": 6.385804653167725, |
| "eval_runtime": 175.8899, |
| "eval_samples_per_second": 56.854, |
| "eval_steps_per_second": 7.107, |
| "step": 35100 |
| }, |
| { |
| "epoch": 78.07041863044081, |
| "grad_norm": 0.3281422555446625, |
| "learning_rate": 2.488294314381271e-06, |
| "loss": 6.374, |
| "step": 35200 |
| }, |
| { |
| "epoch": 78.07041863044081, |
| "eval_loss": 6.382884979248047, |
| "eval_runtime": 176.0174, |
| "eval_samples_per_second": 56.813, |
| "eval_steps_per_second": 7.102, |
| "step": 35200 |
| }, |
| { |
| "epoch": 78.2922095924591, |
| "grad_norm": 0.35885676741600037, |
| "learning_rate": 2.4782608695652173e-06, |
| "loss": 6.3737, |
| "step": 35300 |
| }, |
| { |
| "epoch": 78.2922095924591, |
| "eval_loss": 6.38320255279541, |
| "eval_runtime": 175.5121, |
| "eval_samples_per_second": 56.976, |
| "eval_steps_per_second": 7.122, |
| "step": 35300 |
| }, |
| { |
| "epoch": 78.5140005544774, |
| "grad_norm": 0.40301480889320374, |
| "learning_rate": 2.468227424749164e-06, |
| "loss": 6.3742, |
| "step": 35400 |
| }, |
| { |
| "epoch": 78.5140005544774, |
| "eval_loss": 6.386338233947754, |
| "eval_runtime": 175.6402, |
| "eval_samples_per_second": 56.935, |
| "eval_steps_per_second": 7.117, |
| "step": 35400 |
| }, |
| { |
| "epoch": 78.73579151649571, |
| "grad_norm": 0.3202325701713562, |
| "learning_rate": 2.4581939799331104e-06, |
| "loss": 6.3736, |
| "step": 35500 |
| }, |
| { |
| "epoch": 78.73579151649571, |
| "eval_loss": 6.385340690612793, |
| "eval_runtime": 173.2176, |
| "eval_samples_per_second": 57.731, |
| "eval_steps_per_second": 7.216, |
| "step": 35500 |
| }, |
| { |
| "epoch": 78.957582478514, |
| "grad_norm": 0.370046466588974, |
| "learning_rate": 2.4481605351170568e-06, |
| "loss": 6.3733, |
| "step": 35600 |
| }, |
| { |
| "epoch": 78.957582478514, |
| "eval_loss": 6.3839592933654785, |
| "eval_runtime": 173.1936, |
| "eval_samples_per_second": 57.739, |
| "eval_steps_per_second": 7.217, |
| "step": 35600 |
| }, |
| { |
| "epoch": 79.1793734405323, |
| "grad_norm": 0.3682570457458496, |
| "learning_rate": 2.4381270903010035e-06, |
| "loss": 6.373, |
| "step": 35700 |
| }, |
| { |
| "epoch": 79.1793734405323, |
| "eval_loss": 6.384267807006836, |
| "eval_runtime": 175.7512, |
| "eval_samples_per_second": 56.899, |
| "eval_steps_per_second": 7.112, |
| "step": 35700 |
| }, |
| { |
| "epoch": 79.40116440255059, |
| "grad_norm": 0.42555299401283264, |
| "learning_rate": 2.42809364548495e-06, |
| "loss": 6.3724, |
| "step": 35800 |
| }, |
| { |
| "epoch": 79.40116440255059, |
| "eval_loss": 6.386261940002441, |
| "eval_runtime": 173.3473, |
| "eval_samples_per_second": 57.688, |
| "eval_steps_per_second": 7.211, |
| "step": 35800 |
| }, |
| { |
| "epoch": 79.6229553645689, |
| "grad_norm": 0.4109131693840027, |
| "learning_rate": 2.4180602006688962e-06, |
| "loss": 6.3738, |
| "step": 35900 |
| }, |
| { |
| "epoch": 79.6229553645689, |
| "eval_loss": 6.385996341705322, |
| "eval_runtime": 175.868, |
| "eval_samples_per_second": 56.861, |
| "eval_steps_per_second": 7.108, |
| "step": 35900 |
| }, |
| { |
| "epoch": 79.84474632658718, |
| "grad_norm": 0.4770185351371765, |
| "learning_rate": 2.408026755852843e-06, |
| "loss": 6.373, |
| "step": 36000 |
| }, |
| { |
| "epoch": 79.84474632658718, |
| "eval_loss": 6.385003566741943, |
| "eval_runtime": 175.9258, |
| "eval_samples_per_second": 56.842, |
| "eval_steps_per_second": 7.105, |
| "step": 36000 |
| }, |
| { |
| "epoch": 80.06653728860549, |
| "grad_norm": 0.31983354687690735, |
| "learning_rate": 2.3979933110367893e-06, |
| "loss": 6.3721, |
| "step": 36100 |
| }, |
| { |
| "epoch": 80.06653728860549, |
| "eval_loss": 6.384030818939209, |
| "eval_runtime": 175.9559, |
| "eval_samples_per_second": 56.832, |
| "eval_steps_per_second": 7.104, |
| "step": 36100 |
| }, |
| { |
| "epoch": 80.28832825062379, |
| "grad_norm": 0.42961299419403076, |
| "learning_rate": 2.387959866220736e-06, |
| "loss": 6.3712, |
| "step": 36200 |
| }, |
| { |
| "epoch": 80.28832825062379, |
| "eval_loss": 6.385640621185303, |
| "eval_runtime": 173.4173, |
| "eval_samples_per_second": 57.664, |
| "eval_steps_per_second": 7.208, |
| "step": 36200 |
| }, |
| { |
| "epoch": 80.51011921264208, |
| "grad_norm": 0.31057417392730713, |
| "learning_rate": 2.3779264214046824e-06, |
| "loss": 6.3731, |
| "step": 36300 |
| }, |
| { |
| "epoch": 80.51011921264208, |
| "eval_loss": 6.384836196899414, |
| "eval_runtime": 175.866, |
| "eval_samples_per_second": 56.861, |
| "eval_steps_per_second": 7.108, |
| "step": 36300 |
| }, |
| { |
| "epoch": 80.73191017466038, |
| "grad_norm": 0.2894494831562042, |
| "learning_rate": 2.3678929765886288e-06, |
| "loss": 6.3741, |
| "step": 36400 |
| }, |
| { |
| "epoch": 80.73191017466038, |
| "eval_loss": 6.385368824005127, |
| "eval_runtime": 175.9096, |
| "eval_samples_per_second": 56.847, |
| "eval_steps_per_second": 7.106, |
| "step": 36400 |
| }, |
| { |
| "epoch": 80.95370113667867, |
| "grad_norm": 0.4780093729496002, |
| "learning_rate": 2.3578595317725755e-06, |
| "loss": 6.3749, |
| "step": 36500 |
| }, |
| { |
| "epoch": 80.95370113667867, |
| "eval_loss": 6.384347438812256, |
| "eval_runtime": 175.8521, |
| "eval_samples_per_second": 56.866, |
| "eval_steps_per_second": 7.108, |
| "step": 36500 |
| }, |
| { |
| "epoch": 81.17549209869698, |
| "grad_norm": 0.31205832958221436, |
| "learning_rate": 2.347826086956522e-06, |
| "loss": 6.3743, |
| "step": 36600 |
| }, |
| { |
| "epoch": 81.17549209869698, |
| "eval_loss": 6.385135173797607, |
| "eval_runtime": 175.8923, |
| "eval_samples_per_second": 56.853, |
| "eval_steps_per_second": 7.107, |
| "step": 36600 |
| }, |
| { |
| "epoch": 81.39728306071528, |
| "grad_norm": 0.3318498134613037, |
| "learning_rate": 2.337792642140468e-06, |
| "loss": 6.3735, |
| "step": 36700 |
| }, |
| { |
| "epoch": 81.39728306071528, |
| "eval_loss": 6.3830389976501465, |
| "eval_runtime": 174.576, |
| "eval_samples_per_second": 57.282, |
| "eval_steps_per_second": 7.16, |
| "step": 36700 |
| }, |
| { |
| "epoch": 81.61907402273357, |
| "grad_norm": 0.35717305541038513, |
| "learning_rate": 2.327759197324415e-06, |
| "loss": 6.3726, |
| "step": 36800 |
| }, |
| { |
| "epoch": 81.61907402273357, |
| "eval_loss": 6.384567737579346, |
| "eval_runtime": 173.0226, |
| "eval_samples_per_second": 57.796, |
| "eval_steps_per_second": 7.224, |
| "step": 36800 |
| }, |
| { |
| "epoch": 81.84086498475187, |
| "grad_norm": 0.36196058988571167, |
| "learning_rate": 2.3177257525083613e-06, |
| "loss": 6.3734, |
| "step": 36900 |
| }, |
| { |
| "epoch": 81.84086498475187, |
| "eval_loss": 6.385857582092285, |
| "eval_runtime": 175.4192, |
| "eval_samples_per_second": 57.006, |
| "eval_steps_per_second": 7.126, |
| "step": 36900 |
| }, |
| { |
| "epoch": 82.06265594677016, |
| "grad_norm": 0.34454473853111267, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 6.3732, |
| "step": 37000 |
| }, |
| { |
| "epoch": 82.06265594677016, |
| "eval_loss": 6.384513854980469, |
| "eval_runtime": 175.4568, |
| "eval_samples_per_second": 56.994, |
| "eval_steps_per_second": 7.124, |
| "step": 37000 |
| }, |
| { |
| "epoch": 82.28444690878847, |
| "grad_norm": 0.3330673575401306, |
| "learning_rate": 2.2976588628762544e-06, |
| "loss": 6.3717, |
| "step": 37100 |
| }, |
| { |
| "epoch": 82.28444690878847, |
| "eval_loss": 6.383497714996338, |
| "eval_runtime": 173.2512, |
| "eval_samples_per_second": 57.72, |
| "eval_steps_per_second": 7.215, |
| "step": 37100 |
| }, |
| { |
| "epoch": 82.50623787080677, |
| "grad_norm": 0.40681159496307373, |
| "learning_rate": 2.2876254180602008e-06, |
| "loss": 6.3728, |
| "step": 37200 |
| }, |
| { |
| "epoch": 82.50623787080677, |
| "eval_loss": 6.38515567779541, |
| "eval_runtime": 175.8178, |
| "eval_samples_per_second": 56.877, |
| "eval_steps_per_second": 7.11, |
| "step": 37200 |
| }, |
| { |
| "epoch": 82.72802883282506, |
| "grad_norm": 0.3258204162120819, |
| "learning_rate": 2.2775919732441475e-06, |
| "loss": 6.3743, |
| "step": 37300 |
| }, |
| { |
| "epoch": 82.72802883282506, |
| "eval_loss": 6.38502311706543, |
| "eval_runtime": 173.1187, |
| "eval_samples_per_second": 57.764, |
| "eval_steps_per_second": 7.22, |
| "step": 37300 |
| }, |
| { |
| "epoch": 82.94981979484336, |
| "grad_norm": 0.37041613459587097, |
| "learning_rate": 2.267558528428094e-06, |
| "loss": 6.3728, |
| "step": 37400 |
| }, |
| { |
| "epoch": 82.94981979484336, |
| "eval_loss": 6.3821611404418945, |
| "eval_runtime": 175.5559, |
| "eval_samples_per_second": 56.962, |
| "eval_steps_per_second": 7.12, |
| "step": 37400 |
| }, |
| { |
| "epoch": 83.17161075686165, |
| "grad_norm": 0.33911818265914917, |
| "learning_rate": 2.25752508361204e-06, |
| "loss": 6.3738, |
| "step": 37500 |
| }, |
| { |
| "epoch": 83.17161075686165, |
| "eval_loss": 6.386144638061523, |
| "eval_runtime": 173.2969, |
| "eval_samples_per_second": 57.704, |
| "eval_steps_per_second": 7.213, |
| "step": 37500 |
| }, |
| { |
| "epoch": 83.39340171887996, |
| "grad_norm": 0.48508045077323914, |
| "learning_rate": 2.2474916387959865e-06, |
| "loss": 6.3728, |
| "step": 37600 |
| }, |
| { |
| "epoch": 83.39340171887996, |
| "eval_loss": 6.383870601654053, |
| "eval_runtime": 175.8417, |
| "eval_samples_per_second": 56.869, |
| "eval_steps_per_second": 7.109, |
| "step": 37600 |
| }, |
| { |
| "epoch": 83.61519268089825, |
| "grad_norm": 0.3488113284111023, |
| "learning_rate": 2.237458193979933e-06, |
| "loss": 6.3726, |
| "step": 37700 |
| }, |
| { |
| "epoch": 83.61519268089825, |
| "eval_loss": 6.385016441345215, |
| "eval_runtime": 175.8197, |
| "eval_samples_per_second": 56.876, |
| "eval_steps_per_second": 7.11, |
| "step": 37700 |
| }, |
| { |
| "epoch": 83.83698364291655, |
| "grad_norm": 0.3524182438850403, |
| "learning_rate": 2.2274247491638796e-06, |
| "loss": 6.3725, |
| "step": 37800 |
| }, |
| { |
| "epoch": 83.83698364291655, |
| "eval_loss": 6.384798526763916, |
| "eval_runtime": 175.4988, |
| "eval_samples_per_second": 56.98, |
| "eval_steps_per_second": 7.123, |
| "step": 37800 |
| }, |
| { |
| "epoch": 84.05877460493485, |
| "grad_norm": 0.28423815965652466, |
| "learning_rate": 2.217391304347826e-06, |
| "loss": 6.374, |
| "step": 37900 |
| }, |
| { |
| "epoch": 84.05877460493485, |
| "eval_loss": 6.387665748596191, |
| "eval_runtime": 172.948, |
| "eval_samples_per_second": 57.821, |
| "eval_steps_per_second": 7.228, |
| "step": 37900 |
| }, |
| { |
| "epoch": 84.28056556695314, |
| "grad_norm": 0.32828596234321594, |
| "learning_rate": 2.2073578595317723e-06, |
| "loss": 6.3724, |
| "step": 38000 |
| }, |
| { |
| "epoch": 84.28056556695314, |
| "eval_loss": 6.383293628692627, |
| "eval_runtime": 175.4508, |
| "eval_samples_per_second": 56.996, |
| "eval_steps_per_second": 7.125, |
| "step": 38000 |
| }, |
| { |
| "epoch": 84.50235652897145, |
| "grad_norm": 0.33721184730529785, |
| "learning_rate": 2.197324414715719e-06, |
| "loss": 6.373, |
| "step": 38100 |
| }, |
| { |
| "epoch": 84.50235652897145, |
| "eval_loss": 6.385343551635742, |
| "eval_runtime": 175.531, |
| "eval_samples_per_second": 56.97, |
| "eval_steps_per_second": 7.121, |
| "step": 38100 |
| }, |
| { |
| "epoch": 84.72414749098974, |
| "grad_norm": 0.2766687273979187, |
| "learning_rate": 2.1872909698996654e-06, |
| "loss": 6.3728, |
| "step": 38200 |
| }, |
| { |
| "epoch": 84.72414749098974, |
| "eval_loss": 6.38714599609375, |
| "eval_runtime": 175.5001, |
| "eval_samples_per_second": 56.98, |
| "eval_steps_per_second": 7.123, |
| "step": 38200 |
| }, |
| { |
| "epoch": 84.94593845300804, |
| "grad_norm": 0.26238977909088135, |
| "learning_rate": 2.177257525083612e-06, |
| "loss": 6.3733, |
| "step": 38300 |
| }, |
| { |
| "epoch": 84.94593845300804, |
| "eval_loss": 6.385676383972168, |
| "eval_runtime": 175.6778, |
| "eval_samples_per_second": 56.922, |
| "eval_steps_per_second": 7.115, |
| "step": 38300 |
| }, |
| { |
| "epoch": 85.16772941502634, |
| "grad_norm": 0.2862393260002136, |
| "learning_rate": 2.1672240802675585e-06, |
| "loss": 6.3729, |
| "step": 38400 |
| }, |
| { |
| "epoch": 85.16772941502634, |
| "eval_loss": 6.384363174438477, |
| "eval_runtime": 175.3945, |
| "eval_samples_per_second": 57.014, |
| "eval_steps_per_second": 7.127, |
| "step": 38400 |
| }, |
| { |
| "epoch": 85.38952037704463, |
| "grad_norm": 0.34560856223106384, |
| "learning_rate": 2.157190635451505e-06, |
| "loss": 6.3732, |
| "step": 38500 |
| }, |
| { |
| "epoch": 85.38952037704463, |
| "eval_loss": 6.383378982543945, |
| "eval_runtime": 172.9454, |
| "eval_samples_per_second": 57.822, |
| "eval_steps_per_second": 7.228, |
| "step": 38500 |
| }, |
| { |
| "epoch": 85.61131133906294, |
| "grad_norm": 0.31079375743865967, |
| "learning_rate": 2.1471571906354516e-06, |
| "loss": 6.373, |
| "step": 38600 |
| }, |
| { |
| "epoch": 85.61131133906294, |
| "eval_loss": 6.383601665496826, |
| "eval_runtime": 175.4201, |
| "eval_samples_per_second": 57.006, |
| "eval_steps_per_second": 7.126, |
| "step": 38600 |
| }, |
| { |
| "epoch": 85.83310230108123, |
| "grad_norm": 0.3083253800868988, |
| "learning_rate": 2.137123745819398e-06, |
| "loss": 6.3731, |
| "step": 38700 |
| }, |
| { |
| "epoch": 85.83310230108123, |
| "eval_loss": 6.383668899536133, |
| "eval_runtime": 175.9754, |
| "eval_samples_per_second": 56.826, |
| "eval_steps_per_second": 7.103, |
| "step": 38700 |
| }, |
| { |
| "epoch": 86.05489326309953, |
| "grad_norm": 0.344168096780777, |
| "learning_rate": 2.1270903010033443e-06, |
| "loss": 6.3731, |
| "step": 38800 |
| }, |
| { |
| "epoch": 86.05489326309953, |
| "eval_loss": 6.382165431976318, |
| "eval_runtime": 173.0611, |
| "eval_samples_per_second": 57.783, |
| "eval_steps_per_second": 7.223, |
| "step": 38800 |
| }, |
| { |
| "epoch": 86.27668422511783, |
| "grad_norm": 0.42378509044647217, |
| "learning_rate": 2.117056856187291e-06, |
| "loss": 6.3735, |
| "step": 38900 |
| }, |
| { |
| "epoch": 86.27668422511783, |
| "eval_loss": 6.386937618255615, |
| "eval_runtime": 175.7527, |
| "eval_samples_per_second": 56.898, |
| "eval_steps_per_second": 7.112, |
| "step": 38900 |
| }, |
| { |
| "epoch": 86.49847518713612, |
| "grad_norm": 0.4086206555366516, |
| "learning_rate": 2.1070234113712374e-06, |
| "loss": 6.372, |
| "step": 39000 |
| }, |
| { |
| "epoch": 86.49847518713612, |
| "eval_loss": 6.385149955749512, |
| "eval_runtime": 172.9793, |
| "eval_samples_per_second": 57.81, |
| "eval_steps_per_second": 7.226, |
| "step": 39000 |
| }, |
| { |
| "epoch": 86.72026614915443, |
| "grad_norm": 0.3867028057575226, |
| "learning_rate": 2.0969899665551837e-06, |
| "loss": 6.371, |
| "step": 39100 |
| }, |
| { |
| "epoch": 86.72026614915443, |
| "eval_loss": 6.385136604309082, |
| "eval_runtime": 175.5185, |
| "eval_samples_per_second": 56.974, |
| "eval_steps_per_second": 7.122, |
| "step": 39100 |
| }, |
| { |
| "epoch": 86.94205711117272, |
| "grad_norm": 0.34638744592666626, |
| "learning_rate": 2.0869565217391305e-06, |
| "loss": 6.3723, |
| "step": 39200 |
| }, |
| { |
| "epoch": 86.94205711117272, |
| "eval_loss": 6.382205486297607, |
| "eval_runtime": 172.979, |
| "eval_samples_per_second": 57.81, |
| "eval_steps_per_second": 7.226, |
| "step": 39200 |
| }, |
| { |
| "epoch": 87.16384807319102, |
| "grad_norm": 0.45395034551620483, |
| "learning_rate": 2.076923076923077e-06, |
| "loss": 6.374, |
| "step": 39300 |
| }, |
| { |
| "epoch": 87.16384807319102, |
| "eval_loss": 6.383747100830078, |
| "eval_runtime": 175.4954, |
| "eval_samples_per_second": 56.982, |
| "eval_steps_per_second": 7.123, |
| "step": 39300 |
| }, |
| { |
| "epoch": 87.38563903520931, |
| "grad_norm": 0.2925475537776947, |
| "learning_rate": 2.0668896321070236e-06, |
| "loss": 6.3746, |
| "step": 39400 |
| }, |
| { |
| "epoch": 87.38563903520931, |
| "eval_loss": 6.3860931396484375, |
| "eval_runtime": 172.9827, |
| "eval_samples_per_second": 57.809, |
| "eval_steps_per_second": 7.226, |
| "step": 39400 |
| }, |
| { |
| "epoch": 87.60742999722761, |
| "grad_norm": 0.25185534358024597, |
| "learning_rate": 2.05685618729097e-06, |
| "loss": 6.3721, |
| "step": 39500 |
| }, |
| { |
| "epoch": 87.60742999722761, |
| "eval_loss": 6.383828163146973, |
| "eval_runtime": 175.4682, |
| "eval_samples_per_second": 56.99, |
| "eval_steps_per_second": 7.124, |
| "step": 39500 |
| }, |
| { |
| "epoch": 87.82922095924592, |
| "grad_norm": 0.35766276717185974, |
| "learning_rate": 2.0468227424749163e-06, |
| "loss": 6.3713, |
| "step": 39600 |
| }, |
| { |
| "epoch": 87.82922095924592, |
| "eval_loss": 6.383662700653076, |
| "eval_runtime": 173.3378, |
| "eval_samples_per_second": 57.691, |
| "eval_steps_per_second": 7.211, |
| "step": 39600 |
| }, |
| { |
| "epoch": 88.0510119212642, |
| "grad_norm": 0.31199392676353455, |
| "learning_rate": 2.036789297658863e-06, |
| "loss": 6.3717, |
| "step": 39700 |
| }, |
| { |
| "epoch": 88.0510119212642, |
| "eval_loss": 6.383730411529541, |
| "eval_runtime": 175.0814, |
| "eval_samples_per_second": 57.116, |
| "eval_steps_per_second": 7.14, |
| "step": 39700 |
| }, |
| { |
| "epoch": 88.27280288328251, |
| "grad_norm": 0.3334641754627228, |
| "learning_rate": 2.0267558528428094e-06, |
| "loss": 6.372, |
| "step": 39800 |
| }, |
| { |
| "epoch": 88.27280288328251, |
| "eval_loss": 6.381414890289307, |
| "eval_runtime": 172.9715, |
| "eval_samples_per_second": 57.813, |
| "eval_steps_per_second": 7.227, |
| "step": 39800 |
| }, |
| { |
| "epoch": 88.4945938453008, |
| "grad_norm": 0.5019832849502563, |
| "learning_rate": 2.0167224080267557e-06, |
| "loss": 6.3721, |
| "step": 39900 |
| }, |
| { |
| "epoch": 88.4945938453008, |
| "eval_loss": 6.383211612701416, |
| "eval_runtime": 175.5157, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 39900 |
| }, |
| { |
| "epoch": 88.7163848073191, |
| "grad_norm": 0.4383368194103241, |
| "learning_rate": 2.0066889632107025e-06, |
| "loss": 6.3731, |
| "step": 40000 |
| }, |
| { |
| "epoch": 88.7163848073191, |
| "eval_loss": 6.385327339172363, |
| "eval_runtime": 175.457, |
| "eval_samples_per_second": 56.994, |
| "eval_steps_per_second": 7.124, |
| "step": 40000 |
| }, |
| { |
| "epoch": 88.9381757693374, |
| "grad_norm": 0.27147725224494934, |
| "learning_rate": 1.996655518394649e-06, |
| "loss": 6.3741, |
| "step": 40100 |
| }, |
| { |
| "epoch": 88.9381757693374, |
| "eval_loss": 6.383349418640137, |
| "eval_runtime": 173.4084, |
| "eval_samples_per_second": 57.667, |
| "eval_steps_per_second": 7.208, |
| "step": 40100 |
| }, |
| { |
| "epoch": 89.1599667313557, |
| "grad_norm": 0.2689467966556549, |
| "learning_rate": 1.986622073578595e-06, |
| "loss": 6.3719, |
| "step": 40200 |
| }, |
| { |
| "epoch": 89.1599667313557, |
| "eval_loss": 6.38576078414917, |
| "eval_runtime": 173.3868, |
| "eval_samples_per_second": 57.675, |
| "eval_steps_per_second": 7.209, |
| "step": 40200 |
| }, |
| { |
| "epoch": 89.381757693374, |
| "grad_norm": 0.3858400881290436, |
| "learning_rate": 1.976588628762542e-06, |
| "loss": 6.3722, |
| "step": 40300 |
| }, |
| { |
| "epoch": 89.381757693374, |
| "eval_loss": 6.38473653793335, |
| "eval_runtime": 174.5973, |
| "eval_samples_per_second": 57.275, |
| "eval_steps_per_second": 7.159, |
| "step": 40300 |
| }, |
| { |
| "epoch": 89.60354865539229, |
| "grad_norm": 0.372864693403244, |
| "learning_rate": 1.9665551839464883e-06, |
| "loss": 6.3727, |
| "step": 40400 |
| }, |
| { |
| "epoch": 89.60354865539229, |
| "eval_loss": 6.384860992431641, |
| "eval_runtime": 175.5793, |
| "eval_samples_per_second": 56.954, |
| "eval_steps_per_second": 7.119, |
| "step": 40400 |
| }, |
| { |
| "epoch": 89.82533961741059, |
| "grad_norm": 0.31050923466682434, |
| "learning_rate": 1.956521739130435e-06, |
| "loss": 6.3721, |
| "step": 40500 |
| }, |
| { |
| "epoch": 89.82533961741059, |
| "eval_loss": 6.3831257820129395, |
| "eval_runtime": 173.5084, |
| "eval_samples_per_second": 57.634, |
| "eval_steps_per_second": 7.204, |
| "step": 40500 |
| }, |
| { |
| "epoch": 90.0471305794289, |
| "grad_norm": 0.31580400466918945, |
| "learning_rate": 1.9464882943143814e-06, |
| "loss": 6.3716, |
| "step": 40600 |
| }, |
| { |
| "epoch": 90.0471305794289, |
| "eval_loss": 6.382096767425537, |
| "eval_runtime": 175.7748, |
| "eval_samples_per_second": 56.891, |
| "eval_steps_per_second": 7.111, |
| "step": 40600 |
| }, |
| { |
| "epoch": 90.26892154144718, |
| "grad_norm": 0.30445969104766846, |
| "learning_rate": 1.9364548494983277e-06, |
| "loss": 6.3738, |
| "step": 40700 |
| }, |
| { |
| "epoch": 90.26892154144718, |
| "eval_loss": 6.383363246917725, |
| "eval_runtime": 175.8814, |
| "eval_samples_per_second": 56.856, |
| "eval_steps_per_second": 7.107, |
| "step": 40700 |
| }, |
| { |
| "epoch": 90.49071250346549, |
| "grad_norm": 0.3509177565574646, |
| "learning_rate": 1.9264214046822745e-06, |
| "loss": 6.3711, |
| "step": 40800 |
| }, |
| { |
| "epoch": 90.49071250346549, |
| "eval_loss": 6.3791728019714355, |
| "eval_runtime": 175.2022, |
| "eval_samples_per_second": 57.077, |
| "eval_steps_per_second": 7.135, |
| "step": 40800 |
| }, |
| { |
| "epoch": 90.71250346548378, |
| "grad_norm": 0.2431792914867401, |
| "learning_rate": 1.916387959866221e-06, |
| "loss": 6.3717, |
| "step": 40900 |
| }, |
| { |
| "epoch": 90.71250346548378, |
| "eval_loss": 6.383620262145996, |
| "eval_runtime": 173.3604, |
| "eval_samples_per_second": 57.683, |
| "eval_steps_per_second": 7.21, |
| "step": 40900 |
| }, |
| { |
| "epoch": 90.93429442750208, |
| "grad_norm": 0.3652373254299164, |
| "learning_rate": 1.9063545150501674e-06, |
| "loss": 6.3702, |
| "step": 41000 |
| }, |
| { |
| "epoch": 90.93429442750208, |
| "eval_loss": 6.384062767028809, |
| "eval_runtime": 175.9398, |
| "eval_samples_per_second": 56.838, |
| "eval_steps_per_second": 7.105, |
| "step": 41000 |
| }, |
| { |
| "epoch": 91.15608538952037, |
| "grad_norm": 0.3120420575141907, |
| "learning_rate": 1.896321070234114e-06, |
| "loss": 6.3734, |
| "step": 41100 |
| }, |
| { |
| "epoch": 91.15608538952037, |
| "eval_loss": 6.383402347564697, |
| "eval_runtime": 173.0565, |
| "eval_samples_per_second": 57.785, |
| "eval_steps_per_second": 7.223, |
| "step": 41100 |
| }, |
| { |
| "epoch": 91.37787635153867, |
| "grad_norm": 0.36098653078079224, |
| "learning_rate": 1.8862876254180603e-06, |
| "loss": 6.3731, |
| "step": 41200 |
| }, |
| { |
| "epoch": 91.37787635153867, |
| "eval_loss": 6.384464263916016, |
| "eval_runtime": 175.6772, |
| "eval_samples_per_second": 56.923, |
| "eval_steps_per_second": 7.115, |
| "step": 41200 |
| }, |
| { |
| "epoch": 91.59966731355698, |
| "grad_norm": 0.2494172751903534, |
| "learning_rate": 1.8762541806020068e-06, |
| "loss": 6.3727, |
| "step": 41300 |
| }, |
| { |
| "epoch": 91.59966731355698, |
| "eval_loss": 6.384238243103027, |
| "eval_runtime": 175.6493, |
| "eval_samples_per_second": 56.932, |
| "eval_steps_per_second": 7.116, |
| "step": 41300 |
| }, |
| { |
| "epoch": 91.82145827557527, |
| "grad_norm": 0.2649492025375366, |
| "learning_rate": 1.8662207357859534e-06, |
| "loss": 6.3715, |
| "step": 41400 |
| }, |
| { |
| "epoch": 91.82145827557527, |
| "eval_loss": 6.386543273925781, |
| "eval_runtime": 173.0007, |
| "eval_samples_per_second": 57.803, |
| "eval_steps_per_second": 7.225, |
| "step": 41400 |
| }, |
| { |
| "epoch": 92.04324923759357, |
| "grad_norm": 0.31116828322410583, |
| "learning_rate": 1.8561872909699e-06, |
| "loss": 6.3714, |
| "step": 41500 |
| }, |
| { |
| "epoch": 92.04324923759357, |
| "eval_loss": 6.384570121765137, |
| "eval_runtime": 172.9737, |
| "eval_samples_per_second": 57.812, |
| "eval_steps_per_second": 7.227, |
| "step": 41500 |
| }, |
| { |
| "epoch": 92.26504019961186, |
| "grad_norm": 0.39690667390823364, |
| "learning_rate": 1.8461538461538462e-06, |
| "loss": 6.3722, |
| "step": 41600 |
| }, |
| { |
| "epoch": 92.26504019961186, |
| "eval_loss": 6.384208679199219, |
| "eval_runtime": 175.5344, |
| "eval_samples_per_second": 56.969, |
| "eval_steps_per_second": 7.121, |
| "step": 41600 |
| }, |
| { |
| "epoch": 92.48683116163016, |
| "grad_norm": 0.31385165452957153, |
| "learning_rate": 1.8361204013377928e-06, |
| "loss": 6.3727, |
| "step": 41700 |
| }, |
| { |
| "epoch": 92.48683116163016, |
| "eval_loss": 6.382976055145264, |
| "eval_runtime": 175.571, |
| "eval_samples_per_second": 56.957, |
| "eval_steps_per_second": 7.12, |
| "step": 41700 |
| }, |
| { |
| "epoch": 92.70862212364847, |
| "grad_norm": 0.2589961886405945, |
| "learning_rate": 1.8260869565217394e-06, |
| "loss": 6.373, |
| "step": 41800 |
| }, |
| { |
| "epoch": 92.70862212364847, |
| "eval_loss": 6.384578704833984, |
| "eval_runtime": 172.8987, |
| "eval_samples_per_second": 57.837, |
| "eval_steps_per_second": 7.23, |
| "step": 41800 |
| }, |
| { |
| "epoch": 92.93041308566676, |
| "grad_norm": 0.3754993677139282, |
| "learning_rate": 1.8160535117056857e-06, |
| "loss": 6.3716, |
| "step": 41900 |
| }, |
| { |
| "epoch": 92.93041308566676, |
| "eval_loss": 6.387712478637695, |
| "eval_runtime": 173.081, |
| "eval_samples_per_second": 57.776, |
| "eval_steps_per_second": 7.222, |
| "step": 41900 |
| }, |
| { |
| "epoch": 93.15220404768506, |
| "grad_norm": 0.34123027324676514, |
| "learning_rate": 1.8060200668896322e-06, |
| "loss": 6.3719, |
| "step": 42000 |
| }, |
| { |
| "epoch": 93.15220404768506, |
| "eval_loss": 6.387158393859863, |
| "eval_runtime": 173.0202, |
| "eval_samples_per_second": 57.797, |
| "eval_steps_per_second": 7.225, |
| "step": 42000 |
| }, |
| { |
| "epoch": 93.37399500970335, |
| "grad_norm": 0.28870150446891785, |
| "learning_rate": 1.7959866220735788e-06, |
| "loss": 6.3717, |
| "step": 42100 |
| }, |
| { |
| "epoch": 93.37399500970335, |
| "eval_loss": 6.384382247924805, |
| "eval_runtime": 175.9359, |
| "eval_samples_per_second": 56.839, |
| "eval_steps_per_second": 7.105, |
| "step": 42100 |
| }, |
| { |
| "epoch": 93.59578597172165, |
| "grad_norm": 0.33736997842788696, |
| "learning_rate": 1.7859531772575253e-06, |
| "loss": 6.3731, |
| "step": 42200 |
| }, |
| { |
| "epoch": 93.59578597172165, |
| "eval_loss": 6.384626865386963, |
| "eval_runtime": 172.9425, |
| "eval_samples_per_second": 57.823, |
| "eval_steps_per_second": 7.228, |
| "step": 42200 |
| }, |
| { |
| "epoch": 93.81757693373996, |
| "grad_norm": 0.30697163939476013, |
| "learning_rate": 1.7759197324414717e-06, |
| "loss": 6.3731, |
| "step": 42300 |
| }, |
| { |
| "epoch": 93.81757693373996, |
| "eval_loss": 6.384149074554443, |
| "eval_runtime": 175.533, |
| "eval_samples_per_second": 56.969, |
| "eval_steps_per_second": 7.121, |
| "step": 42300 |
| }, |
| { |
| "epoch": 94.03936789575825, |
| "grad_norm": 0.31292060017585754, |
| "learning_rate": 1.7658862876254182e-06, |
| "loss": 6.372, |
| "step": 42400 |
| }, |
| { |
| "epoch": 94.03936789575825, |
| "eval_loss": 6.38083553314209, |
| "eval_runtime": 173.0184, |
| "eval_samples_per_second": 57.797, |
| "eval_steps_per_second": 7.225, |
| "step": 42400 |
| }, |
| { |
| "epoch": 94.26115885777655, |
| "grad_norm": 0.3728470504283905, |
| "learning_rate": 1.7558528428093648e-06, |
| "loss": 6.3713, |
| "step": 42500 |
| }, |
| { |
| "epoch": 94.26115885777655, |
| "eval_loss": 6.381670951843262, |
| "eval_runtime": 175.3914, |
| "eval_samples_per_second": 57.015, |
| "eval_steps_per_second": 7.127, |
| "step": 42500 |
| }, |
| { |
| "epoch": 94.48294981979484, |
| "grad_norm": 0.44780856370925903, |
| "learning_rate": 1.745819397993311e-06, |
| "loss": 6.3718, |
| "step": 42600 |
| }, |
| { |
| "epoch": 94.48294981979484, |
| "eval_loss": 6.385097503662109, |
| "eval_runtime": 175.3778, |
| "eval_samples_per_second": 57.02, |
| "eval_steps_per_second": 7.127, |
| "step": 42600 |
| }, |
| { |
| "epoch": 94.70474078181314, |
| "grad_norm": 0.29420205950737, |
| "learning_rate": 1.7357859531772575e-06, |
| "loss": 6.3709, |
| "step": 42700 |
| }, |
| { |
| "epoch": 94.70474078181314, |
| "eval_loss": 6.382612705230713, |
| "eval_runtime": 173.3858, |
| "eval_samples_per_second": 57.675, |
| "eval_steps_per_second": 7.209, |
| "step": 42700 |
| }, |
| { |
| "epoch": 94.92653174383143, |
| "grad_norm": 0.43360549211502075, |
| "learning_rate": 1.7257525083612038e-06, |
| "loss": 6.3708, |
| "step": 42800 |
| }, |
| { |
| "epoch": 94.92653174383143, |
| "eval_loss": 6.382971286773682, |
| "eval_runtime": 172.9207, |
| "eval_samples_per_second": 57.83, |
| "eval_steps_per_second": 7.229, |
| "step": 42800 |
| }, |
| { |
| "epoch": 95.14832270584974, |
| "grad_norm": 0.29865312576293945, |
| "learning_rate": 1.7157190635451504e-06, |
| "loss": 6.372, |
| "step": 42900 |
| }, |
| { |
| "epoch": 95.14832270584974, |
| "eval_loss": 6.3829755783081055, |
| "eval_runtime": 175.5167, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 42900 |
| }, |
| { |
| "epoch": 95.37011366786804, |
| "grad_norm": 0.32399508357048035, |
| "learning_rate": 1.705685618729097e-06, |
| "loss": 6.3712, |
| "step": 43000 |
| }, |
| { |
| "epoch": 95.37011366786804, |
| "eval_loss": 6.381554126739502, |
| "eval_runtime": 172.9843, |
| "eval_samples_per_second": 57.809, |
| "eval_steps_per_second": 7.226, |
| "step": 43000 |
| }, |
| { |
| "epoch": 95.59190462988633, |
| "grad_norm": 0.2875135540962219, |
| "learning_rate": 1.6956521739130435e-06, |
| "loss": 6.3709, |
| "step": 43100 |
| }, |
| { |
| "epoch": 95.59190462988633, |
| "eval_loss": 6.381914138793945, |
| "eval_runtime": 175.5546, |
| "eval_samples_per_second": 56.962, |
| "eval_steps_per_second": 7.12, |
| "step": 43100 |
| }, |
| { |
| "epoch": 95.81369559190463, |
| "grad_norm": 0.4401540756225586, |
| "learning_rate": 1.6856187290969898e-06, |
| "loss": 6.3723, |
| "step": 43200 |
| }, |
| { |
| "epoch": 95.81369559190463, |
| "eval_loss": 6.383592128753662, |
| "eval_runtime": 175.584, |
| "eval_samples_per_second": 56.953, |
| "eval_steps_per_second": 7.119, |
| "step": 43200 |
| }, |
| { |
| "epoch": 96.03548655392292, |
| "grad_norm": 0.2576783001422882, |
| "learning_rate": 1.6755852842809363e-06, |
| "loss": 6.3722, |
| "step": 43300 |
| }, |
| { |
| "epoch": 96.03548655392292, |
| "eval_loss": 6.383729457855225, |
| "eval_runtime": 175.4696, |
| "eval_samples_per_second": 56.99, |
| "eval_steps_per_second": 7.124, |
| "step": 43300 |
| }, |
| { |
| "epoch": 96.25727751594123, |
| "grad_norm": 0.3146987855434418, |
| "learning_rate": 1.665551839464883e-06, |
| "loss": 6.3716, |
| "step": 43400 |
| }, |
| { |
| "epoch": 96.25727751594123, |
| "eval_loss": 6.380384922027588, |
| "eval_runtime": 175.0534, |
| "eval_samples_per_second": 57.125, |
| "eval_steps_per_second": 7.141, |
| "step": 43400 |
| }, |
| { |
| "epoch": 96.47906847795953, |
| "grad_norm": 0.3195679485797882, |
| "learning_rate": 1.6555183946488294e-06, |
| "loss": 6.3714, |
| "step": 43500 |
| }, |
| { |
| "epoch": 96.47906847795953, |
| "eval_loss": 6.382904529571533, |
| "eval_runtime": 175.5685, |
| "eval_samples_per_second": 56.958, |
| "eval_steps_per_second": 7.12, |
| "step": 43500 |
| }, |
| { |
| "epoch": 96.70085943997782, |
| "grad_norm": 0.2415214329957962, |
| "learning_rate": 1.6454849498327758e-06, |
| "loss": 6.3711, |
| "step": 43600 |
| }, |
| { |
| "epoch": 96.70085943997782, |
| "eval_loss": 6.380964279174805, |
| "eval_runtime": 173.0267, |
| "eval_samples_per_second": 57.795, |
| "eval_steps_per_second": 7.224, |
| "step": 43600 |
| }, |
| { |
| "epoch": 96.92265040199612, |
| "grad_norm": 0.40489473938941956, |
| "learning_rate": 1.6354515050167223e-06, |
| "loss": 6.3726, |
| "step": 43700 |
| }, |
| { |
| "epoch": 96.92265040199612, |
| "eval_loss": 6.381808757781982, |
| "eval_runtime": 173.1061, |
| "eval_samples_per_second": 57.768, |
| "eval_steps_per_second": 7.221, |
| "step": 43700 |
| }, |
| { |
| "epoch": 97.14444136401441, |
| "grad_norm": 0.30804529786109924, |
| "learning_rate": 1.6254180602006689e-06, |
| "loss": 6.372, |
| "step": 43800 |
| }, |
| { |
| "epoch": 97.14444136401441, |
| "eval_loss": 6.384749889373779, |
| "eval_runtime": 175.6167, |
| "eval_samples_per_second": 56.942, |
| "eval_steps_per_second": 7.118, |
| "step": 43800 |
| }, |
| { |
| "epoch": 97.36623232603272, |
| "grad_norm": 0.31247368454933167, |
| "learning_rate": 1.6153846153846154e-06, |
| "loss": 6.3738, |
| "step": 43900 |
| }, |
| { |
| "epoch": 97.36623232603272, |
| "eval_loss": 6.383345127105713, |
| "eval_runtime": 172.9449, |
| "eval_samples_per_second": 57.822, |
| "eval_steps_per_second": 7.228, |
| "step": 43900 |
| }, |
| { |
| "epoch": 97.588023288051, |
| "grad_norm": 0.3146020174026489, |
| "learning_rate": 1.6053511705685618e-06, |
| "loss": 6.3736, |
| "step": 44000 |
| }, |
| { |
| "epoch": 97.588023288051, |
| "eval_loss": 6.38405179977417, |
| "eval_runtime": 175.4959, |
| "eval_samples_per_second": 56.981, |
| "eval_steps_per_second": 7.123, |
| "step": 44000 |
| }, |
| { |
| "epoch": 97.80981425006931, |
| "grad_norm": 0.30886611342430115, |
| "learning_rate": 1.5953177257525083e-06, |
| "loss": 6.3706, |
| "step": 44100 |
| }, |
| { |
| "epoch": 97.80981425006931, |
| "eval_loss": 6.381131172180176, |
| "eval_runtime": 172.9957, |
| "eval_samples_per_second": 57.805, |
| "eval_steps_per_second": 7.226, |
| "step": 44100 |
| }, |
| { |
| "epoch": 98.03160521208761, |
| "grad_norm": 0.3250170648097992, |
| "learning_rate": 1.5852842809364549e-06, |
| "loss": 6.3711, |
| "step": 44200 |
| }, |
| { |
| "epoch": 98.03160521208761, |
| "eval_loss": 6.382991313934326, |
| "eval_runtime": 175.9006, |
| "eval_samples_per_second": 56.85, |
| "eval_steps_per_second": 7.106, |
| "step": 44200 |
| }, |
| { |
| "epoch": 98.2533961741059, |
| "grad_norm": 0.2637650966644287, |
| "learning_rate": 1.5752508361204012e-06, |
| "loss": 6.3721, |
| "step": 44300 |
| }, |
| { |
| "epoch": 98.2533961741059, |
| "eval_loss": 6.385432243347168, |
| "eval_runtime": 175.8265, |
| "eval_samples_per_second": 56.874, |
| "eval_steps_per_second": 7.109, |
| "step": 44300 |
| }, |
| { |
| "epoch": 98.4751871361242, |
| "grad_norm": 0.3357675075531006, |
| "learning_rate": 1.5652173913043478e-06, |
| "loss": 6.371, |
| "step": 44400 |
| }, |
| { |
| "epoch": 98.4751871361242, |
| "eval_loss": 6.385194301605225, |
| "eval_runtime": 175.8373, |
| "eval_samples_per_second": 56.871, |
| "eval_steps_per_second": 7.109, |
| "step": 44400 |
| }, |
| { |
| "epoch": 98.6969780981425, |
| "grad_norm": 0.3793193995952606, |
| "learning_rate": 1.5551839464882943e-06, |
| "loss": 6.3717, |
| "step": 44500 |
| }, |
| { |
| "epoch": 98.6969780981425, |
| "eval_loss": 6.382778167724609, |
| "eval_runtime": 173.4199, |
| "eval_samples_per_second": 57.664, |
| "eval_steps_per_second": 7.208, |
| "step": 44500 |
| }, |
| { |
| "epoch": 98.9187690601608, |
| "grad_norm": 0.3075515329837799, |
| "learning_rate": 1.5451505016722409e-06, |
| "loss": 6.3705, |
| "step": 44600 |
| }, |
| { |
| "epoch": 98.9187690601608, |
| "eval_loss": 6.384821891784668, |
| "eval_runtime": 175.4722, |
| "eval_samples_per_second": 56.989, |
| "eval_steps_per_second": 7.124, |
| "step": 44600 |
| }, |
| { |
| "epoch": 99.1405600221791, |
| "grad_norm": 0.27654966711997986, |
| "learning_rate": 1.5351170568561872e-06, |
| "loss": 6.3725, |
| "step": 44700 |
| }, |
| { |
| "epoch": 99.1405600221791, |
| "eval_loss": 6.378158092498779, |
| "eval_runtime": 173.0439, |
| "eval_samples_per_second": 57.789, |
| "eval_steps_per_second": 7.224, |
| "step": 44700 |
| }, |
| { |
| "epoch": 99.36235098419739, |
| "grad_norm": 0.25358349084854126, |
| "learning_rate": 1.5250836120401338e-06, |
| "loss": 6.3718, |
| "step": 44800 |
| }, |
| { |
| "epoch": 99.36235098419739, |
| "eval_loss": 6.381252288818359, |
| "eval_runtime": 175.5178, |
| "eval_samples_per_second": 56.974, |
| "eval_steps_per_second": 7.122, |
| "step": 44800 |
| }, |
| { |
| "epoch": 99.5841419462157, |
| "grad_norm": 0.27983585000038147, |
| "learning_rate": 1.5150501672240803e-06, |
| "loss": 6.3709, |
| "step": 44900 |
| }, |
| { |
| "epoch": 99.5841419462157, |
| "eval_loss": 6.383197784423828, |
| "eval_runtime": 175.4823, |
| "eval_samples_per_second": 56.986, |
| "eval_steps_per_second": 7.123, |
| "step": 44900 |
| }, |
| { |
| "epoch": 99.80593290823398, |
| "grad_norm": 0.35121074318885803, |
| "learning_rate": 1.5050167224080269e-06, |
| "loss": 6.3726, |
| "step": 45000 |
| }, |
| { |
| "epoch": 99.80593290823398, |
| "eval_loss": 6.385370254516602, |
| "eval_runtime": 175.4757, |
| "eval_samples_per_second": 56.988, |
| "eval_steps_per_second": 7.123, |
| "step": 45000 |
| }, |
| { |
| "epoch": 100.02772387025229, |
| "grad_norm": 0.22111310064792633, |
| "learning_rate": 1.4949832775919732e-06, |
| "loss": 6.3716, |
| "step": 45100 |
| }, |
| { |
| "epoch": 100.02772387025229, |
| "eval_loss": 6.38284158706665, |
| "eval_runtime": 175.8886, |
| "eval_samples_per_second": 56.854, |
| "eval_steps_per_second": 7.107, |
| "step": 45100 |
| }, |
| { |
| "epoch": 100.24951483227059, |
| "grad_norm": 0.22795332968235016, |
| "learning_rate": 1.4849498327759198e-06, |
| "loss": 6.3721, |
| "step": 45200 |
| }, |
| { |
| "epoch": 100.24951483227059, |
| "eval_loss": 6.378814697265625, |
| "eval_runtime": 173.36, |
| "eval_samples_per_second": 57.683, |
| "eval_steps_per_second": 7.21, |
| "step": 45200 |
| }, |
| { |
| "epoch": 100.47130579428888, |
| "grad_norm": 0.3906308710575104, |
| "learning_rate": 1.4749163879598663e-06, |
| "loss": 6.3711, |
| "step": 45300 |
| }, |
| { |
| "epoch": 100.47130579428888, |
| "eval_loss": 6.380859375, |
| "eval_runtime": 175.65, |
| "eval_samples_per_second": 56.931, |
| "eval_steps_per_second": 7.116, |
| "step": 45300 |
| }, |
| { |
| "epoch": 100.69309675630718, |
| "grad_norm": 0.35361433029174805, |
| "learning_rate": 1.4648829431438129e-06, |
| "loss": 6.3689, |
| "step": 45400 |
| }, |
| { |
| "epoch": 100.69309675630718, |
| "eval_loss": 6.386940956115723, |
| "eval_runtime": 173.0816, |
| "eval_samples_per_second": 57.776, |
| "eval_steps_per_second": 7.222, |
| "step": 45400 |
| }, |
| { |
| "epoch": 100.91488771832547, |
| "grad_norm": 0.3520587682723999, |
| "learning_rate": 1.4548494983277592e-06, |
| "loss": 6.371, |
| "step": 45500 |
| }, |
| { |
| "epoch": 100.91488771832547, |
| "eval_loss": 6.384310245513916, |
| "eval_runtime": 175.5343, |
| "eval_samples_per_second": 56.969, |
| "eval_steps_per_second": 7.121, |
| "step": 45500 |
| }, |
| { |
| "epoch": 101.13667868034378, |
| "grad_norm": 0.37038084864616394, |
| "learning_rate": 1.4448160535117058e-06, |
| "loss": 6.3712, |
| "step": 45600 |
| }, |
| { |
| "epoch": 101.13667868034378, |
| "eval_loss": 6.381255626678467, |
| "eval_runtime": 172.9314, |
| "eval_samples_per_second": 57.826, |
| "eval_steps_per_second": 7.228, |
| "step": 45600 |
| }, |
| { |
| "epoch": 101.35846964236207, |
| "grad_norm": 0.2583162188529968, |
| "learning_rate": 1.4347826086956523e-06, |
| "loss": 6.3693, |
| "step": 45700 |
| }, |
| { |
| "epoch": 101.35846964236207, |
| "eval_loss": 6.385676383972168, |
| "eval_runtime": 175.4492, |
| "eval_samples_per_second": 56.997, |
| "eval_steps_per_second": 7.125, |
| "step": 45700 |
| }, |
| { |
| "epoch": 101.58026060438037, |
| "grad_norm": 0.37049952149391174, |
| "learning_rate": 1.4247491638795989e-06, |
| "loss": 6.3715, |
| "step": 45800 |
| }, |
| { |
| "epoch": 101.58026060438037, |
| "eval_loss": 6.383345603942871, |
| "eval_runtime": 172.9908, |
| "eval_samples_per_second": 57.807, |
| "eval_steps_per_second": 7.226, |
| "step": 45800 |
| }, |
| { |
| "epoch": 101.80205156639867, |
| "grad_norm": 0.3586992919445038, |
| "learning_rate": 1.4147157190635452e-06, |
| "loss": 6.3709, |
| "step": 45900 |
| }, |
| { |
| "epoch": 101.80205156639867, |
| "eval_loss": 6.383970260620117, |
| "eval_runtime": 175.5127, |
| "eval_samples_per_second": 56.976, |
| "eval_steps_per_second": 7.122, |
| "step": 45900 |
| }, |
| { |
| "epoch": 102.02384252841696, |
| "grad_norm": 0.274954229593277, |
| "learning_rate": 1.4046822742474917e-06, |
| "loss": 6.3721, |
| "step": 46000 |
| }, |
| { |
| "epoch": 102.02384252841696, |
| "eval_loss": 6.379533767700195, |
| "eval_runtime": 175.5086, |
| "eval_samples_per_second": 56.977, |
| "eval_steps_per_second": 7.122, |
| "step": 46000 |
| }, |
| { |
| "epoch": 102.24563349043527, |
| "grad_norm": 0.2859888970851898, |
| "learning_rate": 1.3946488294314383e-06, |
| "loss": 6.3704, |
| "step": 46100 |
| }, |
| { |
| "epoch": 102.24563349043527, |
| "eval_loss": 6.3819146156311035, |
| "eval_runtime": 175.6284, |
| "eval_samples_per_second": 56.938, |
| "eval_steps_per_second": 7.117, |
| "step": 46100 |
| }, |
| { |
| "epoch": 102.46742445245356, |
| "grad_norm": 0.27162763476371765, |
| "learning_rate": 1.3846153846153846e-06, |
| "loss": 6.3718, |
| "step": 46200 |
| }, |
| { |
| "epoch": 102.46742445245356, |
| "eval_loss": 6.383949279785156, |
| "eval_runtime": 173.0341, |
| "eval_samples_per_second": 57.792, |
| "eval_steps_per_second": 7.224, |
| "step": 46200 |
| }, |
| { |
| "epoch": 102.68921541447186, |
| "grad_norm": 0.24669644236564636, |
| "learning_rate": 1.374581939799331e-06, |
| "loss": 6.3706, |
| "step": 46300 |
| }, |
| { |
| "epoch": 102.68921541447186, |
| "eval_loss": 6.384088516235352, |
| "eval_runtime": 175.8327, |
| "eval_samples_per_second": 56.872, |
| "eval_steps_per_second": 7.109, |
| "step": 46300 |
| }, |
| { |
| "epoch": 102.91100637649016, |
| "grad_norm": 0.32821038365364075, |
| "learning_rate": 1.3645484949832775e-06, |
| "loss": 6.3716, |
| "step": 46400 |
| }, |
| { |
| "epoch": 102.91100637649016, |
| "eval_loss": 6.383686065673828, |
| "eval_runtime": 173.1011, |
| "eval_samples_per_second": 57.77, |
| "eval_steps_per_second": 7.221, |
| "step": 46400 |
| }, |
| { |
| "epoch": 103.13279733850845, |
| "grad_norm": 0.23931552469730377, |
| "learning_rate": 1.354515050167224e-06, |
| "loss": 6.3706, |
| "step": 46500 |
| }, |
| { |
| "epoch": 103.13279733850845, |
| "eval_loss": 6.379798412322998, |
| "eval_runtime": 175.5988, |
| "eval_samples_per_second": 56.948, |
| "eval_steps_per_second": 7.118, |
| "step": 46500 |
| }, |
| { |
| "epoch": 103.35458830052676, |
| "grad_norm": 0.2975938022136688, |
| "learning_rate": 1.3444816053511706e-06, |
| "loss": 6.3713, |
| "step": 46600 |
| }, |
| { |
| "epoch": 103.35458830052676, |
| "eval_loss": 6.3860554695129395, |
| "eval_runtime": 175.5887, |
| "eval_samples_per_second": 56.951, |
| "eval_steps_per_second": 7.119, |
| "step": 46600 |
| }, |
| { |
| "epoch": 103.57637926254505, |
| "grad_norm": 0.2592810392379761, |
| "learning_rate": 1.334448160535117e-06, |
| "loss": 6.3717, |
| "step": 46700 |
| }, |
| { |
| "epoch": 103.57637926254505, |
| "eval_loss": 6.3828301429748535, |
| "eval_runtime": 175.6957, |
| "eval_samples_per_second": 56.917, |
| "eval_steps_per_second": 7.115, |
| "step": 46700 |
| }, |
| { |
| "epoch": 103.79817022456335, |
| "grad_norm": 0.2834523320198059, |
| "learning_rate": 1.3244147157190635e-06, |
| "loss": 6.3713, |
| "step": 46800 |
| }, |
| { |
| "epoch": 103.79817022456335, |
| "eval_loss": 6.386697769165039, |
| "eval_runtime": 172.9159, |
| "eval_samples_per_second": 57.832, |
| "eval_steps_per_second": 7.229, |
| "step": 46800 |
| }, |
| { |
| "epoch": 104.01996118658165, |
| "grad_norm": 0.2672658860683441, |
| "learning_rate": 1.31438127090301e-06, |
| "loss": 6.3721, |
| "step": 46900 |
| }, |
| { |
| "epoch": 104.01996118658165, |
| "eval_loss": 6.381076812744141, |
| "eval_runtime": 173.0101, |
| "eval_samples_per_second": 57.8, |
| "eval_steps_per_second": 7.225, |
| "step": 46900 |
| }, |
| { |
| "epoch": 104.24175214859994, |
| "grad_norm": 0.29608866572380066, |
| "learning_rate": 1.3043478260869566e-06, |
| "loss": 6.3722, |
| "step": 47000 |
| }, |
| { |
| "epoch": 104.24175214859994, |
| "eval_loss": 6.383474826812744, |
| "eval_runtime": 175.9295, |
| "eval_samples_per_second": 56.841, |
| "eval_steps_per_second": 7.105, |
| "step": 47000 |
| }, |
| { |
| "epoch": 104.46354311061825, |
| "grad_norm": 0.31595227122306824, |
| "learning_rate": 1.294314381270903e-06, |
| "loss": 6.3715, |
| "step": 47100 |
| }, |
| { |
| "epoch": 104.46354311061825, |
| "eval_loss": 6.382750988006592, |
| "eval_runtime": 173.1316, |
| "eval_samples_per_second": 57.76, |
| "eval_steps_per_second": 7.22, |
| "step": 47100 |
| }, |
| { |
| "epoch": 104.68533407263654, |
| "grad_norm": 0.2782845199108124, |
| "learning_rate": 1.2842809364548495e-06, |
| "loss": 6.3715, |
| "step": 47200 |
| }, |
| { |
| "epoch": 104.68533407263654, |
| "eval_loss": 6.381110191345215, |
| "eval_runtime": 175.6479, |
| "eval_samples_per_second": 56.932, |
| "eval_steps_per_second": 7.117, |
| "step": 47200 |
| }, |
| { |
| "epoch": 104.90712503465484, |
| "grad_norm": 0.32985934615135193, |
| "learning_rate": 1.274247491638796e-06, |
| "loss": 6.3707, |
| "step": 47300 |
| }, |
| { |
| "epoch": 104.90712503465484, |
| "eval_loss": 6.380244731903076, |
| "eval_runtime": 173.0618, |
| "eval_samples_per_second": 57.783, |
| "eval_steps_per_second": 7.223, |
| "step": 47300 |
| }, |
| { |
| "epoch": 105.12891599667313, |
| "grad_norm": 0.27673158049583435, |
| "learning_rate": 1.2642140468227424e-06, |
| "loss": 6.371, |
| "step": 47400 |
| }, |
| { |
| "epoch": 105.12891599667313, |
| "eval_loss": 6.382138252258301, |
| "eval_runtime": 175.4509, |
| "eval_samples_per_second": 56.996, |
| "eval_steps_per_second": 7.125, |
| "step": 47400 |
| }, |
| { |
| "epoch": 105.35070695869143, |
| "grad_norm": 0.2984777092933655, |
| "learning_rate": 1.254180602006689e-06, |
| "loss": 6.3719, |
| "step": 47500 |
| }, |
| { |
| "epoch": 105.35070695869143, |
| "eval_loss": 6.382594585418701, |
| "eval_runtime": 173.0122, |
| "eval_samples_per_second": 57.799, |
| "eval_steps_per_second": 7.225, |
| "step": 47500 |
| }, |
| { |
| "epoch": 105.57249792070974, |
| "grad_norm": 0.29209384322166443, |
| "learning_rate": 1.2441471571906355e-06, |
| "loss": 6.3715, |
| "step": 47600 |
| }, |
| { |
| "epoch": 105.57249792070974, |
| "eval_loss": 6.38098669052124, |
| "eval_runtime": 175.7524, |
| "eval_samples_per_second": 56.898, |
| "eval_steps_per_second": 7.112, |
| "step": 47600 |
| }, |
| { |
| "epoch": 105.79428888272803, |
| "grad_norm": 0.35189709067344666, |
| "learning_rate": 1.234113712374582e-06, |
| "loss": 6.3701, |
| "step": 47700 |
| }, |
| { |
| "epoch": 105.79428888272803, |
| "eval_loss": 6.384945392608643, |
| "eval_runtime": 175.5438, |
| "eval_samples_per_second": 56.966, |
| "eval_steps_per_second": 7.121, |
| "step": 47700 |
| }, |
| { |
| "epoch": 106.01607984474633, |
| "grad_norm": 0.37181735038757324, |
| "learning_rate": 1.2240802675585284e-06, |
| "loss": 6.3703, |
| "step": 47800 |
| }, |
| { |
| "epoch": 106.01607984474633, |
| "eval_loss": 6.378709316253662, |
| "eval_runtime": 175.523, |
| "eval_samples_per_second": 56.973, |
| "eval_steps_per_second": 7.122, |
| "step": 47800 |
| }, |
| { |
| "epoch": 106.23787080676462, |
| "grad_norm": 0.2793137729167938, |
| "learning_rate": 1.214046822742475e-06, |
| "loss": 6.3706, |
| "step": 47900 |
| }, |
| { |
| "epoch": 106.23787080676462, |
| "eval_loss": 6.380676746368408, |
| "eval_runtime": 173.0355, |
| "eval_samples_per_second": 57.792, |
| "eval_steps_per_second": 7.224, |
| "step": 47900 |
| }, |
| { |
| "epoch": 106.45966176878292, |
| "grad_norm": 0.2996074855327606, |
| "learning_rate": 1.2040133779264215e-06, |
| "loss": 6.3714, |
| "step": 48000 |
| }, |
| { |
| "epoch": 106.45966176878292, |
| "eval_loss": 6.382739067077637, |
| "eval_runtime": 175.5807, |
| "eval_samples_per_second": 56.954, |
| "eval_steps_per_second": 7.119, |
| "step": 48000 |
| }, |
| { |
| "epoch": 106.68145273080123, |
| "grad_norm": 0.32835853099823, |
| "learning_rate": 1.193979933110368e-06, |
| "loss": 6.3717, |
| "step": 48100 |
| }, |
| { |
| "epoch": 106.68145273080123, |
| "eval_loss": 6.382002353668213, |
| "eval_runtime": 173.3264, |
| "eval_samples_per_second": 57.695, |
| "eval_steps_per_second": 7.212, |
| "step": 48100 |
| }, |
| { |
| "epoch": 106.90324369281952, |
| "grad_norm": 0.31071096658706665, |
| "learning_rate": 1.1839464882943144e-06, |
| "loss": 6.3715, |
| "step": 48200 |
| }, |
| { |
| "epoch": 106.90324369281952, |
| "eval_loss": 6.385354042053223, |
| "eval_runtime": 175.863, |
| "eval_samples_per_second": 56.862, |
| "eval_steps_per_second": 7.108, |
| "step": 48200 |
| }, |
| { |
| "epoch": 107.12503465483782, |
| "grad_norm": 0.32424595952033997, |
| "learning_rate": 1.173913043478261e-06, |
| "loss": 6.3713, |
| "step": 48300 |
| }, |
| { |
| "epoch": 107.12503465483782, |
| "eval_loss": 6.381778240203857, |
| "eval_runtime": 176.0254, |
| "eval_samples_per_second": 56.81, |
| "eval_steps_per_second": 7.101, |
| "step": 48300 |
| }, |
| { |
| "epoch": 107.34682561685611, |
| "grad_norm": 0.25034162402153015, |
| "learning_rate": 1.1638795986622075e-06, |
| "loss": 6.3714, |
| "step": 48400 |
| }, |
| { |
| "epoch": 107.34682561685611, |
| "eval_loss": 6.383028507232666, |
| "eval_runtime": 175.899, |
| "eval_samples_per_second": 56.851, |
| "eval_steps_per_second": 7.106, |
| "step": 48400 |
| }, |
| { |
| "epoch": 107.56861657887441, |
| "grad_norm": 0.2586011290550232, |
| "learning_rate": 1.153846153846154e-06, |
| "loss": 6.3722, |
| "step": 48500 |
| }, |
| { |
| "epoch": 107.56861657887441, |
| "eval_loss": 6.382985591888428, |
| "eval_runtime": 175.861, |
| "eval_samples_per_second": 56.863, |
| "eval_steps_per_second": 7.108, |
| "step": 48500 |
| }, |
| { |
| "epoch": 107.79040754089272, |
| "grad_norm": 0.28121402859687805, |
| "learning_rate": 1.1438127090301004e-06, |
| "loss": 6.3715, |
| "step": 48600 |
| }, |
| { |
| "epoch": 107.79040754089272, |
| "eval_loss": 6.381731986999512, |
| "eval_runtime": 173.3663, |
| "eval_samples_per_second": 57.681, |
| "eval_steps_per_second": 7.21, |
| "step": 48600 |
| }, |
| { |
| "epoch": 108.012198502911, |
| "grad_norm": 0.27013683319091797, |
| "learning_rate": 1.133779264214047e-06, |
| "loss": 6.3689, |
| "step": 48700 |
| }, |
| { |
| "epoch": 108.012198502911, |
| "eval_loss": 6.381706237792969, |
| "eval_runtime": 175.9392, |
| "eval_samples_per_second": 56.838, |
| "eval_steps_per_second": 7.105, |
| "step": 48700 |
| }, |
| { |
| "epoch": 108.23398946492931, |
| "grad_norm": 0.345570832490921, |
| "learning_rate": 1.1237458193979933e-06, |
| "loss": 6.3706, |
| "step": 48800 |
| }, |
| { |
| "epoch": 108.23398946492931, |
| "eval_loss": 6.384325981140137, |
| "eval_runtime": 173.2557, |
| "eval_samples_per_second": 57.718, |
| "eval_steps_per_second": 7.215, |
| "step": 48800 |
| }, |
| { |
| "epoch": 108.4557804269476, |
| "grad_norm": 0.26037341356277466, |
| "learning_rate": 1.1137123745819398e-06, |
| "loss": 6.3728, |
| "step": 48900 |
| }, |
| { |
| "epoch": 108.4557804269476, |
| "eval_loss": 6.383279323577881, |
| "eval_runtime": 175.662, |
| "eval_samples_per_second": 56.927, |
| "eval_steps_per_second": 7.116, |
| "step": 48900 |
| }, |
| { |
| "epoch": 108.6775713889659, |
| "grad_norm": 0.25174733996391296, |
| "learning_rate": 1.1036789297658862e-06, |
| "loss": 6.3711, |
| "step": 49000 |
| }, |
| { |
| "epoch": 108.6775713889659, |
| "eval_loss": 6.384110927581787, |
| "eval_runtime": 173.04, |
| "eval_samples_per_second": 57.79, |
| "eval_steps_per_second": 7.224, |
| "step": 49000 |
| }, |
| { |
| "epoch": 108.89936235098419, |
| "grad_norm": 0.22819426655769348, |
| "learning_rate": 1.0936454849498327e-06, |
| "loss": 6.3725, |
| "step": 49100 |
| }, |
| { |
| "epoch": 108.89936235098419, |
| "eval_loss": 6.383809566497803, |
| "eval_runtime": 175.8514, |
| "eval_samples_per_second": 56.866, |
| "eval_steps_per_second": 7.108, |
| "step": 49100 |
| }, |
| { |
| "epoch": 109.1211533130025, |
| "grad_norm": 0.3142814636230469, |
| "learning_rate": 1.0836120401337793e-06, |
| "loss": 6.3707, |
| "step": 49200 |
| }, |
| { |
| "epoch": 109.1211533130025, |
| "eval_loss": 6.38060998916626, |
| "eval_runtime": 172.941, |
| "eval_samples_per_second": 57.823, |
| "eval_steps_per_second": 7.228, |
| "step": 49200 |
| }, |
| { |
| "epoch": 109.3429442750208, |
| "grad_norm": 0.30092594027519226, |
| "learning_rate": 1.0735785953177258e-06, |
| "loss": 6.3699, |
| "step": 49300 |
| }, |
| { |
| "epoch": 109.3429442750208, |
| "eval_loss": 6.385983943939209, |
| "eval_runtime": 175.5269, |
| "eval_samples_per_second": 56.971, |
| "eval_steps_per_second": 7.121, |
| "step": 49300 |
| }, |
| { |
| "epoch": 109.56473523703909, |
| "grad_norm": 0.31821510195732117, |
| "learning_rate": 1.0635451505016722e-06, |
| "loss": 6.3694, |
| "step": 49400 |
| }, |
| { |
| "epoch": 109.56473523703909, |
| "eval_loss": 6.383793830871582, |
| "eval_runtime": 175.5382, |
| "eval_samples_per_second": 56.968, |
| "eval_steps_per_second": 7.121, |
| "step": 49400 |
| }, |
| { |
| "epoch": 109.78652619905739, |
| "grad_norm": 0.3837875425815582, |
| "learning_rate": 1.0535117056856187e-06, |
| "loss": 6.3701, |
| "step": 49500 |
| }, |
| { |
| "epoch": 109.78652619905739, |
| "eval_loss": 6.380537509918213, |
| "eval_runtime": 175.935, |
| "eval_samples_per_second": 56.839, |
| "eval_steps_per_second": 7.105, |
| "step": 49500 |
| }, |
| { |
| "epoch": 110.00831716107568, |
| "grad_norm": 0.23530994355678558, |
| "learning_rate": 1.0434782608695653e-06, |
| "loss": 6.3705, |
| "step": 49600 |
| }, |
| { |
| "epoch": 110.00831716107568, |
| "eval_loss": 6.386258602142334, |
| "eval_runtime": 175.9707, |
| "eval_samples_per_second": 56.828, |
| "eval_steps_per_second": 7.103, |
| "step": 49600 |
| }, |
| { |
| "epoch": 110.23010812309398, |
| "grad_norm": 0.26103320717811584, |
| "learning_rate": 1.0334448160535118e-06, |
| "loss": 6.3707, |
| "step": 49700 |
| }, |
| { |
| "epoch": 110.23010812309398, |
| "eval_loss": 6.383273124694824, |
| "eval_runtime": 173.4608, |
| "eval_samples_per_second": 57.65, |
| "eval_steps_per_second": 7.206, |
| "step": 49700 |
| }, |
| { |
| "epoch": 110.45189908511229, |
| "grad_norm": 0.2887881398200989, |
| "learning_rate": 1.0234113712374581e-06, |
| "loss": 6.3721, |
| "step": 49800 |
| }, |
| { |
| "epoch": 110.45189908511229, |
| "eval_loss": 6.384125709533691, |
| "eval_runtime": 172.9625, |
| "eval_samples_per_second": 57.816, |
| "eval_steps_per_second": 7.227, |
| "step": 49800 |
| }, |
| { |
| "epoch": 110.67369004713058, |
| "grad_norm": 0.31840309500694275, |
| "learning_rate": 1.0133779264214047e-06, |
| "loss": 6.3717, |
| "step": 49900 |
| }, |
| { |
| "epoch": 110.67369004713058, |
| "eval_loss": 6.381842136383057, |
| "eval_runtime": 175.4749, |
| "eval_samples_per_second": 56.988, |
| "eval_steps_per_second": 7.124, |
| "step": 49900 |
| }, |
| { |
| "epoch": 110.89548100914888, |
| "grad_norm": 0.21653781831264496, |
| "learning_rate": 1.0033444816053512e-06, |
| "loss": 6.3707, |
| "step": 50000 |
| }, |
| { |
| "epoch": 110.89548100914888, |
| "eval_loss": 6.381892204284668, |
| "eval_runtime": 175.5709, |
| "eval_samples_per_second": 56.957, |
| "eval_steps_per_second": 7.12, |
| "step": 50000 |
| }, |
| { |
| "epoch": 111.11727197116717, |
| "grad_norm": 0.3267481327056885, |
| "learning_rate": 9.933110367892976e-07, |
| "loss": 6.3708, |
| "step": 50100 |
| }, |
| { |
| "epoch": 111.11727197116717, |
| "eval_loss": 6.3821611404418945, |
| "eval_runtime": 172.9472, |
| "eval_samples_per_second": 57.821, |
| "eval_steps_per_second": 7.228, |
| "step": 50100 |
| }, |
| { |
| "epoch": 111.33906293318547, |
| "grad_norm": 0.27063196897506714, |
| "learning_rate": 9.832775919732441e-07, |
| "loss": 6.3717, |
| "step": 50200 |
| }, |
| { |
| "epoch": 111.33906293318547, |
| "eval_loss": 6.380985736846924, |
| "eval_runtime": 175.4969, |
| "eval_samples_per_second": 56.981, |
| "eval_steps_per_second": 7.123, |
| "step": 50200 |
| }, |
| { |
| "epoch": 111.56085389520376, |
| "grad_norm": 0.3249282240867615, |
| "learning_rate": 9.732441471571907e-07, |
| "loss": 6.3712, |
| "step": 50300 |
| }, |
| { |
| "epoch": 111.56085389520376, |
| "eval_loss": 6.380914688110352, |
| "eval_runtime": 172.9605, |
| "eval_samples_per_second": 57.817, |
| "eval_steps_per_second": 7.227, |
| "step": 50300 |
| }, |
| { |
| "epoch": 111.78264485722207, |
| "grad_norm": 0.23895922303199768, |
| "learning_rate": 9.632107023411372e-07, |
| "loss": 6.3703, |
| "step": 50400 |
| }, |
| { |
| "epoch": 111.78264485722207, |
| "eval_loss": 6.382885932922363, |
| "eval_runtime": 175.5673, |
| "eval_samples_per_second": 56.958, |
| "eval_steps_per_second": 7.12, |
| "step": 50400 |
| }, |
| { |
| "epoch": 112.00443581924037, |
| "grad_norm": 0.35966283082962036, |
| "learning_rate": 9.531772575250837e-07, |
| "loss": 6.37, |
| "step": 50500 |
| }, |
| { |
| "epoch": 112.00443581924037, |
| "eval_loss": 6.383852481842041, |
| "eval_runtime": 173.4529, |
| "eval_samples_per_second": 57.653, |
| "eval_steps_per_second": 7.207, |
| "step": 50500 |
| }, |
| { |
| "epoch": 112.22622678125866, |
| "grad_norm": 0.3259362578392029, |
| "learning_rate": 9.431438127090301e-07, |
| "loss": 6.371, |
| "step": 50600 |
| }, |
| { |
| "epoch": 112.22622678125866, |
| "eval_loss": 6.385578155517578, |
| "eval_runtime": 175.6993, |
| "eval_samples_per_second": 56.915, |
| "eval_steps_per_second": 7.114, |
| "step": 50600 |
| }, |
| { |
| "epoch": 112.44801774327696, |
| "grad_norm": 0.26321855187416077, |
| "learning_rate": 9.331103678929767e-07, |
| "loss": 6.3701, |
| "step": 50700 |
| }, |
| { |
| "epoch": 112.44801774327696, |
| "eval_loss": 6.380197048187256, |
| "eval_runtime": 175.6967, |
| "eval_samples_per_second": 56.916, |
| "eval_steps_per_second": 7.115, |
| "step": 50700 |
| }, |
| { |
| "epoch": 112.66980870529525, |
| "grad_norm": 0.25881582498550415, |
| "learning_rate": 9.230769230769231e-07, |
| "loss": 6.3701, |
| "step": 50800 |
| }, |
| { |
| "epoch": 112.66980870529525, |
| "eval_loss": 6.379401683807373, |
| "eval_runtime": 175.5648, |
| "eval_samples_per_second": 56.959, |
| "eval_steps_per_second": 7.12, |
| "step": 50800 |
| }, |
| { |
| "epoch": 112.89159966731356, |
| "grad_norm": 0.23602035641670227, |
| "learning_rate": 9.130434782608697e-07, |
| "loss": 6.3697, |
| "step": 50900 |
| }, |
| { |
| "epoch": 112.89159966731356, |
| "eval_loss": 6.380613803863525, |
| "eval_runtime": 172.9114, |
| "eval_samples_per_second": 57.833, |
| "eval_steps_per_second": 7.229, |
| "step": 50900 |
| }, |
| { |
| "epoch": 113.11339062933186, |
| "grad_norm": 0.3607383072376251, |
| "learning_rate": 9.030100334448161e-07, |
| "loss": 6.3717, |
| "step": 51000 |
| }, |
| { |
| "epoch": 113.11339062933186, |
| "eval_loss": 6.3821024894714355, |
| "eval_runtime": 175.5159, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 51000 |
| }, |
| { |
| "epoch": 113.33518159135015, |
| "grad_norm": 0.313915878534317, |
| "learning_rate": 8.929765886287627e-07, |
| "loss": 6.3703, |
| "step": 51100 |
| }, |
| { |
| "epoch": 113.33518159135015, |
| "eval_loss": 6.381007194519043, |
| "eval_runtime": 175.5953, |
| "eval_samples_per_second": 56.949, |
| "eval_steps_per_second": 7.119, |
| "step": 51100 |
| }, |
| { |
| "epoch": 113.55697255336845, |
| "grad_norm": 0.26152804493904114, |
| "learning_rate": 8.829431438127091e-07, |
| "loss": 6.3712, |
| "step": 51200 |
| }, |
| { |
| "epoch": 113.55697255336845, |
| "eval_loss": 6.381545543670654, |
| "eval_runtime": 175.5198, |
| "eval_samples_per_second": 56.974, |
| "eval_steps_per_second": 7.122, |
| "step": 51200 |
| }, |
| { |
| "epoch": 113.77876351538674, |
| "grad_norm": 0.31778955459594727, |
| "learning_rate": 8.729096989966555e-07, |
| "loss": 6.3718, |
| "step": 51300 |
| }, |
| { |
| "epoch": 113.77876351538674, |
| "eval_loss": 6.384615421295166, |
| "eval_runtime": 175.9251, |
| "eval_samples_per_second": 56.842, |
| "eval_steps_per_second": 7.105, |
| "step": 51300 |
| }, |
| { |
| "epoch": 114.00055447740505, |
| "grad_norm": 0.2694382965564728, |
| "learning_rate": 8.628762541806019e-07, |
| "loss": 6.371, |
| "step": 51400 |
| }, |
| { |
| "epoch": 114.00055447740505, |
| "eval_loss": 6.383395671844482, |
| "eval_runtime": 175.9708, |
| "eval_samples_per_second": 56.828, |
| "eval_steps_per_second": 7.103, |
| "step": 51400 |
| }, |
| { |
| "epoch": 114.22234543942335, |
| "grad_norm": 0.29690447449684143, |
| "learning_rate": 8.528428093645485e-07, |
| "loss": 6.37, |
| "step": 51500 |
| }, |
| { |
| "epoch": 114.22234543942335, |
| "eval_loss": 6.382811546325684, |
| "eval_runtime": 173.537, |
| "eval_samples_per_second": 57.625, |
| "eval_steps_per_second": 7.203, |
| "step": 51500 |
| }, |
| { |
| "epoch": 114.44413640144164, |
| "grad_norm": 0.39484673738479614, |
| "learning_rate": 8.428093645484949e-07, |
| "loss": 6.3711, |
| "step": 51600 |
| }, |
| { |
| "epoch": 114.44413640144164, |
| "eval_loss": 6.382282257080078, |
| "eval_runtime": 175.9709, |
| "eval_samples_per_second": 56.828, |
| "eval_steps_per_second": 7.103, |
| "step": 51600 |
| }, |
| { |
| "epoch": 114.66592736345994, |
| "grad_norm": 0.2630254626274109, |
| "learning_rate": 8.327759197324414e-07, |
| "loss": 6.3707, |
| "step": 51700 |
| }, |
| { |
| "epoch": 114.66592736345994, |
| "eval_loss": 6.382809162139893, |
| "eval_runtime": 176.0003, |
| "eval_samples_per_second": 56.818, |
| "eval_steps_per_second": 7.102, |
| "step": 51700 |
| }, |
| { |
| "epoch": 114.88771832547823, |
| "grad_norm": 0.3054973781108856, |
| "learning_rate": 8.227424749163879e-07, |
| "loss": 6.3708, |
| "step": 51800 |
| }, |
| { |
| "epoch": 114.88771832547823, |
| "eval_loss": 6.3818230628967285, |
| "eval_runtime": 173.4807, |
| "eval_samples_per_second": 57.643, |
| "eval_steps_per_second": 7.205, |
| "step": 51800 |
| }, |
| { |
| "epoch": 115.10950928749654, |
| "grad_norm": 0.24989312887191772, |
| "learning_rate": 8.127090301003344e-07, |
| "loss": 6.3697, |
| "step": 51900 |
| }, |
| { |
| "epoch": 115.10950928749654, |
| "eval_loss": 6.3821187019348145, |
| "eval_runtime": 175.9917, |
| "eval_samples_per_second": 56.821, |
| "eval_steps_per_second": 7.103, |
| "step": 51900 |
| }, |
| { |
| "epoch": 115.33130024951483, |
| "grad_norm": 0.3176492750644684, |
| "learning_rate": 8.026755852842809e-07, |
| "loss": 6.3716, |
| "step": 52000 |
| }, |
| { |
| "epoch": 115.33130024951483, |
| "eval_loss": 6.3822808265686035, |
| "eval_runtime": 173.4423, |
| "eval_samples_per_second": 57.656, |
| "eval_steps_per_second": 7.207, |
| "step": 52000 |
| }, |
| { |
| "epoch": 115.55309121153313, |
| "grad_norm": 0.2542394995689392, |
| "learning_rate": 7.926421404682274e-07, |
| "loss": 6.3712, |
| "step": 52100 |
| }, |
| { |
| "epoch": 115.55309121153313, |
| "eval_loss": 6.380392074584961, |
| "eval_runtime": 175.9555, |
| "eval_samples_per_second": 56.833, |
| "eval_steps_per_second": 7.104, |
| "step": 52100 |
| }, |
| { |
| "epoch": 115.77488217355143, |
| "grad_norm": 0.2998870313167572, |
| "learning_rate": 7.826086956521739e-07, |
| "loss": 6.3699, |
| "step": 52200 |
| }, |
| { |
| "epoch": 115.77488217355143, |
| "eval_loss": 6.381204605102539, |
| "eval_runtime": 173.1037, |
| "eval_samples_per_second": 57.769, |
| "eval_steps_per_second": 7.221, |
| "step": 52200 |
| }, |
| { |
| "epoch": 115.99667313556972, |
| "grad_norm": 0.2524458169937134, |
| "learning_rate": 7.725752508361204e-07, |
| "loss": 6.3704, |
| "step": 52300 |
| }, |
| { |
| "epoch": 115.99667313556972, |
| "eval_loss": 6.383292198181152, |
| "eval_runtime": 175.5333, |
| "eval_samples_per_second": 56.969, |
| "eval_steps_per_second": 7.121, |
| "step": 52300 |
| }, |
| { |
| "epoch": 116.21846409758803, |
| "grad_norm": 0.2731904983520508, |
| "learning_rate": 7.625418060200669e-07, |
| "loss": 6.3715, |
| "step": 52400 |
| }, |
| { |
| "epoch": 116.21846409758803, |
| "eval_loss": 6.380125999450684, |
| "eval_runtime": 173.003, |
| "eval_samples_per_second": 57.802, |
| "eval_steps_per_second": 7.225, |
| "step": 52400 |
| }, |
| { |
| "epoch": 116.44025505960632, |
| "grad_norm": 0.3370875120162964, |
| "learning_rate": 7.525083612040134e-07, |
| "loss": 6.3702, |
| "step": 52500 |
| }, |
| { |
| "epoch": 116.44025505960632, |
| "eval_loss": 6.383055686950684, |
| "eval_runtime": 175.6351, |
| "eval_samples_per_second": 56.936, |
| "eval_steps_per_second": 7.117, |
| "step": 52500 |
| }, |
| { |
| "epoch": 116.66204602162462, |
| "grad_norm": 0.2853044867515564, |
| "learning_rate": 7.424749163879599e-07, |
| "loss": 6.3706, |
| "step": 52600 |
| }, |
| { |
| "epoch": 116.66204602162462, |
| "eval_loss": 6.381393909454346, |
| "eval_runtime": 175.6586, |
| "eval_samples_per_second": 56.929, |
| "eval_steps_per_second": 7.116, |
| "step": 52600 |
| }, |
| { |
| "epoch": 116.88383698364292, |
| "grad_norm": 0.3378102481365204, |
| "learning_rate": 7.324414715719064e-07, |
| "loss": 6.3701, |
| "step": 52700 |
| }, |
| { |
| "epoch": 116.88383698364292, |
| "eval_loss": 6.380878448486328, |
| "eval_runtime": 175.5156, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 52700 |
| }, |
| { |
| "epoch": 117.10562794566121, |
| "grad_norm": 0.27575579285621643, |
| "learning_rate": 7.224080267558529e-07, |
| "loss": 6.3698, |
| "step": 52800 |
| }, |
| { |
| "epoch": 117.10562794566121, |
| "eval_loss": 6.381886959075928, |
| "eval_runtime": 175.558, |
| "eval_samples_per_second": 56.961, |
| "eval_steps_per_second": 7.12, |
| "step": 52800 |
| }, |
| { |
| "epoch": 117.32741890767952, |
| "grad_norm": 0.22469982504844666, |
| "learning_rate": 7.123745819397994e-07, |
| "loss": 6.3689, |
| "step": 52900 |
| }, |
| { |
| "epoch": 117.32741890767952, |
| "eval_loss": 6.378075122833252, |
| "eval_runtime": 176.0795, |
| "eval_samples_per_second": 56.793, |
| "eval_steps_per_second": 7.099, |
| "step": 52900 |
| }, |
| { |
| "epoch": 117.5492098696978, |
| "grad_norm": 0.26414427161216736, |
| "learning_rate": 7.023411371237459e-07, |
| "loss": 6.3715, |
| "step": 53000 |
| }, |
| { |
| "epoch": 117.5492098696978, |
| "eval_loss": 6.38188362121582, |
| "eval_runtime": 176.003, |
| "eval_samples_per_second": 56.817, |
| "eval_steps_per_second": 7.102, |
| "step": 53000 |
| }, |
| { |
| "epoch": 117.77100083171611, |
| "grad_norm": 0.2348640114068985, |
| "learning_rate": 6.923076923076923e-07, |
| "loss": 6.3699, |
| "step": 53100 |
| }, |
| { |
| "epoch": 117.77100083171611, |
| "eval_loss": 6.382396697998047, |
| "eval_runtime": 175.8429, |
| "eval_samples_per_second": 56.869, |
| "eval_steps_per_second": 7.109, |
| "step": 53100 |
| }, |
| { |
| "epoch": 117.99279179373441, |
| "grad_norm": 0.36397331953048706, |
| "learning_rate": 6.822742474916388e-07, |
| "loss": 6.3703, |
| "step": 53200 |
| }, |
| { |
| "epoch": 117.99279179373441, |
| "eval_loss": 6.384123802185059, |
| "eval_runtime": 174.4907, |
| "eval_samples_per_second": 57.31, |
| "eval_steps_per_second": 7.164, |
| "step": 53200 |
| }, |
| { |
| "epoch": 118.2145827557527, |
| "grad_norm": 0.25135567784309387, |
| "learning_rate": 6.722408026755853e-07, |
| "loss": 6.3701, |
| "step": 53300 |
| }, |
| { |
| "epoch": 118.2145827557527, |
| "eval_loss": 6.3801045417785645, |
| "eval_runtime": 173.0674, |
| "eval_samples_per_second": 57.781, |
| "eval_steps_per_second": 7.223, |
| "step": 53300 |
| }, |
| { |
| "epoch": 118.436373717771, |
| "grad_norm": 0.30894702672958374, |
| "learning_rate": 6.622073578595318e-07, |
| "loss": 6.3702, |
| "step": 53400 |
| }, |
| { |
| "epoch": 118.436373717771, |
| "eval_loss": 6.379894733428955, |
| "eval_runtime": 175.516, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 53400 |
| }, |
| { |
| "epoch": 118.6581646797893, |
| "grad_norm": 0.26461485028266907, |
| "learning_rate": 6.521739130434783e-07, |
| "loss": 6.3713, |
| "step": 53500 |
| }, |
| { |
| "epoch": 118.6581646797893, |
| "eval_loss": 6.3835272789001465, |
| "eval_runtime": 173.4513, |
| "eval_samples_per_second": 57.653, |
| "eval_steps_per_second": 7.207, |
| "step": 53500 |
| }, |
| { |
| "epoch": 118.8799556418076, |
| "grad_norm": 0.24245497584342957, |
| "learning_rate": 6.421404682274248e-07, |
| "loss": 6.3705, |
| "step": 53600 |
| }, |
| { |
| "epoch": 118.8799556418076, |
| "eval_loss": 6.381874084472656, |
| "eval_runtime": 176.0158, |
| "eval_samples_per_second": 56.813, |
| "eval_steps_per_second": 7.102, |
| "step": 53600 |
| }, |
| { |
| "epoch": 119.10174660382589, |
| "grad_norm": 0.23844820261001587, |
| "learning_rate": 6.321070234113712e-07, |
| "loss": 6.3698, |
| "step": 53700 |
| }, |
| { |
| "epoch": 119.10174660382589, |
| "eval_loss": 6.381486415863037, |
| "eval_runtime": 173.4108, |
| "eval_samples_per_second": 57.667, |
| "eval_steps_per_second": 7.208, |
| "step": 53700 |
| }, |
| { |
| "epoch": 119.32353756584419, |
| "grad_norm": 0.2418413609266281, |
| "learning_rate": 6.220735785953178e-07, |
| "loss": 6.3712, |
| "step": 53800 |
| }, |
| { |
| "epoch": 119.32353756584419, |
| "eval_loss": 6.382267951965332, |
| "eval_runtime": 175.9952, |
| "eval_samples_per_second": 56.82, |
| "eval_steps_per_second": 7.102, |
| "step": 53800 |
| }, |
| { |
| "epoch": 119.5453285278625, |
| "grad_norm": 0.22692246735095978, |
| "learning_rate": 6.120401337792642e-07, |
| "loss": 6.371, |
| "step": 53900 |
| }, |
| { |
| "epoch": 119.5453285278625, |
| "eval_loss": 6.383540630340576, |
| "eval_runtime": 173.1726, |
| "eval_samples_per_second": 57.746, |
| "eval_steps_per_second": 7.218, |
| "step": 53900 |
| }, |
| { |
| "epoch": 119.76711948988078, |
| "grad_norm": 0.29117047786712646, |
| "learning_rate": 6.020066889632107e-07, |
| "loss": 6.3713, |
| "step": 54000 |
| }, |
| { |
| "epoch": 119.76711948988078, |
| "eval_loss": 6.382152557373047, |
| "eval_runtime": 175.7557, |
| "eval_samples_per_second": 56.897, |
| "eval_steps_per_second": 7.112, |
| "step": 54000 |
| }, |
| { |
| "epoch": 119.98891045189909, |
| "grad_norm": 0.21682819724082947, |
| "learning_rate": 5.919732441471572e-07, |
| "loss": 6.3702, |
| "step": 54100 |
| }, |
| { |
| "epoch": 119.98891045189909, |
| "eval_loss": 6.380878925323486, |
| "eval_runtime": 173.0921, |
| "eval_samples_per_second": 57.773, |
| "eval_steps_per_second": 7.222, |
| "step": 54100 |
| }, |
| { |
| "epoch": 120.21070141391738, |
| "grad_norm": 0.31245148181915283, |
| "learning_rate": 5.819397993311037e-07, |
| "loss": 6.3694, |
| "step": 54200 |
| }, |
| { |
| "epoch": 120.21070141391738, |
| "eval_loss": 6.383978843688965, |
| "eval_runtime": 175.5232, |
| "eval_samples_per_second": 56.973, |
| "eval_steps_per_second": 7.122, |
| "step": 54200 |
| }, |
| { |
| "epoch": 120.43249237593568, |
| "grad_norm": 0.22876819968223572, |
| "learning_rate": 5.719063545150502e-07, |
| "loss": 6.3706, |
| "step": 54300 |
| }, |
| { |
| "epoch": 120.43249237593568, |
| "eval_loss": 6.382028102874756, |
| "eval_runtime": 173.1291, |
| "eval_samples_per_second": 57.76, |
| "eval_steps_per_second": 7.22, |
| "step": 54300 |
| }, |
| { |
| "epoch": 120.65428333795398, |
| "grad_norm": 0.25953638553619385, |
| "learning_rate": 5.618729096989966e-07, |
| "loss": 6.3707, |
| "step": 54400 |
| }, |
| { |
| "epoch": 120.65428333795398, |
| "eval_loss": 6.381461143493652, |
| "eval_runtime": 175.57, |
| "eval_samples_per_second": 56.957, |
| "eval_steps_per_second": 7.12, |
| "step": 54400 |
| }, |
| { |
| "epoch": 120.87607429997227, |
| "grad_norm": 0.1654128134250641, |
| "learning_rate": 5.518394648829431e-07, |
| "loss": 6.3707, |
| "step": 54500 |
| }, |
| { |
| "epoch": 120.87607429997227, |
| "eval_loss": 6.3789753913879395, |
| "eval_runtime": 175.5387, |
| "eval_samples_per_second": 56.967, |
| "eval_steps_per_second": 7.121, |
| "step": 54500 |
| }, |
| { |
| "epoch": 121.09786526199058, |
| "grad_norm": 0.29274898767471313, |
| "learning_rate": 5.418060200668896e-07, |
| "loss": 6.3703, |
| "step": 54600 |
| }, |
| { |
| "epoch": 121.09786526199058, |
| "eval_loss": 6.380027770996094, |
| "eval_runtime": 175.4995, |
| "eval_samples_per_second": 56.98, |
| "eval_steps_per_second": 7.123, |
| "step": 54600 |
| }, |
| { |
| "epoch": 121.31965622400887, |
| "grad_norm": 0.2235456258058548, |
| "learning_rate": 5.317725752508361e-07, |
| "loss": 6.373, |
| "step": 54700 |
| }, |
| { |
| "epoch": 121.31965622400887, |
| "eval_loss": 6.380786418914795, |
| "eval_runtime": 175.5186, |
| "eval_samples_per_second": 56.974, |
| "eval_steps_per_second": 7.122, |
| "step": 54700 |
| }, |
| { |
| "epoch": 121.54144718602717, |
| "grad_norm": 0.30965185165405273, |
| "learning_rate": 5.217391304347826e-07, |
| "loss": 6.3714, |
| "step": 54800 |
| }, |
| { |
| "epoch": 121.54144718602717, |
| "eval_loss": 6.382297039031982, |
| "eval_runtime": 175.5968, |
| "eval_samples_per_second": 56.949, |
| "eval_steps_per_second": 7.119, |
| "step": 54800 |
| }, |
| { |
| "epoch": 121.76323814804547, |
| "grad_norm": 0.28793787956237793, |
| "learning_rate": 5.117056856187291e-07, |
| "loss": 6.3707, |
| "step": 54900 |
| }, |
| { |
| "epoch": 121.76323814804547, |
| "eval_loss": 6.377398490905762, |
| "eval_runtime": 173.0039, |
| "eval_samples_per_second": 57.802, |
| "eval_steps_per_second": 7.225, |
| "step": 54900 |
| }, |
| { |
| "epoch": 121.98502911006376, |
| "grad_norm": 0.3277120590209961, |
| "learning_rate": 5.016722408026756e-07, |
| "loss": 6.3688, |
| "step": 55000 |
| }, |
| { |
| "epoch": 121.98502911006376, |
| "eval_loss": 6.383605480194092, |
| "eval_runtime": 173.0503, |
| "eval_samples_per_second": 57.787, |
| "eval_steps_per_second": 7.223, |
| "step": 55000 |
| }, |
| { |
| "epoch": 122.20682007208207, |
| "grad_norm": 0.2291731834411621, |
| "learning_rate": 4.916387959866221e-07, |
| "loss": 6.3702, |
| "step": 55100 |
| }, |
| { |
| "epoch": 122.20682007208207, |
| "eval_loss": 6.385202407836914, |
| "eval_runtime": 175.7369, |
| "eval_samples_per_second": 56.903, |
| "eval_steps_per_second": 7.113, |
| "step": 55100 |
| }, |
| { |
| "epoch": 122.42861103410036, |
| "grad_norm": 0.23682117462158203, |
| "learning_rate": 4.816053511705686e-07, |
| "loss": 6.3711, |
| "step": 55200 |
| }, |
| { |
| "epoch": 122.42861103410036, |
| "eval_loss": 6.386002063751221, |
| "eval_runtime": 173.0919, |
| "eval_samples_per_second": 57.773, |
| "eval_steps_per_second": 7.222, |
| "step": 55200 |
| }, |
| { |
| "epoch": 122.65040199611866, |
| "grad_norm": 0.21502740681171417, |
| "learning_rate": 4.7157190635451506e-07, |
| "loss": 6.37, |
| "step": 55300 |
| }, |
| { |
| "epoch": 122.65040199611866, |
| "eval_loss": 6.38268518447876, |
| "eval_runtime": 175.5194, |
| "eval_samples_per_second": 56.974, |
| "eval_steps_per_second": 7.122, |
| "step": 55300 |
| }, |
| { |
| "epoch": 122.87219295813695, |
| "grad_norm": 0.2415875792503357, |
| "learning_rate": 4.6153846153846156e-07, |
| "loss": 6.37, |
| "step": 55400 |
| }, |
| { |
| "epoch": 122.87219295813695, |
| "eval_loss": 6.379030704498291, |
| "eval_runtime": 173.0644, |
| "eval_samples_per_second": 57.782, |
| "eval_steps_per_second": 7.223, |
| "step": 55400 |
| }, |
| { |
| "epoch": 123.09398392015525, |
| "grad_norm": 0.278998464345932, |
| "learning_rate": 4.5150501672240806e-07, |
| "loss": 6.3709, |
| "step": 55500 |
| }, |
| { |
| "epoch": 123.09398392015525, |
| "eval_loss": 6.381860256195068, |
| "eval_runtime": 175.5462, |
| "eval_samples_per_second": 56.965, |
| "eval_steps_per_second": 7.121, |
| "step": 55500 |
| }, |
| { |
| "epoch": 123.31577488217356, |
| "grad_norm": 0.27015259861946106, |
| "learning_rate": 4.4147157190635456e-07, |
| "loss": 6.37, |
| "step": 55600 |
| }, |
| { |
| "epoch": 123.31577488217356, |
| "eval_loss": 6.380960464477539, |
| "eval_runtime": 175.9714, |
| "eval_samples_per_second": 56.827, |
| "eval_steps_per_second": 7.103, |
| "step": 55600 |
| }, |
| { |
| "epoch": 123.53756584419185, |
| "grad_norm": 0.23815931379795074, |
| "learning_rate": 4.3143812709030095e-07, |
| "loss": 6.37, |
| "step": 55700 |
| }, |
| { |
| "epoch": 123.53756584419185, |
| "eval_loss": 6.384081840515137, |
| "eval_runtime": 173.1242, |
| "eval_samples_per_second": 57.762, |
| "eval_steps_per_second": 7.22, |
| "step": 55700 |
| }, |
| { |
| "epoch": 123.75935680621015, |
| "grad_norm": 0.24355483055114746, |
| "learning_rate": 4.2140468227424745e-07, |
| "loss": 6.3694, |
| "step": 55800 |
| }, |
| { |
| "epoch": 123.75935680621015, |
| "eval_loss": 6.378664016723633, |
| "eval_runtime": 173.0043, |
| "eval_samples_per_second": 57.802, |
| "eval_steps_per_second": 7.225, |
| "step": 55800 |
| }, |
| { |
| "epoch": 123.98114776822844, |
| "grad_norm": 0.21320495009422302, |
| "learning_rate": 4.1137123745819395e-07, |
| "loss": 6.3693, |
| "step": 55900 |
| }, |
| { |
| "epoch": 123.98114776822844, |
| "eval_loss": 6.382479190826416, |
| "eval_runtime": 175.5916, |
| "eval_samples_per_second": 56.95, |
| "eval_steps_per_second": 7.119, |
| "step": 55900 |
| }, |
| { |
| "epoch": 124.20293873024674, |
| "grad_norm": 0.2245740443468094, |
| "learning_rate": 4.0133779264214045e-07, |
| "loss": 6.3702, |
| "step": 56000 |
| }, |
| { |
| "epoch": 124.20293873024674, |
| "eval_loss": 6.385231971740723, |
| "eval_runtime": 175.6666, |
| "eval_samples_per_second": 56.926, |
| "eval_steps_per_second": 7.116, |
| "step": 56000 |
| }, |
| { |
| "epoch": 124.42472969226505, |
| "grad_norm": 0.282416969537735, |
| "learning_rate": 3.9130434782608694e-07, |
| "loss": 6.3709, |
| "step": 56100 |
| }, |
| { |
| "epoch": 124.42472969226505, |
| "eval_loss": 6.380115032196045, |
| "eval_runtime": 175.7632, |
| "eval_samples_per_second": 56.895, |
| "eval_steps_per_second": 7.112, |
| "step": 56100 |
| }, |
| { |
| "epoch": 124.64652065428334, |
| "grad_norm": 0.19661109149456024, |
| "learning_rate": 3.8127090301003344e-07, |
| "loss": 6.3712, |
| "step": 56200 |
| }, |
| { |
| "epoch": 124.64652065428334, |
| "eval_loss": 6.3793158531188965, |
| "eval_runtime": 175.59, |
| "eval_samples_per_second": 56.951, |
| "eval_steps_per_second": 7.119, |
| "step": 56200 |
| }, |
| { |
| "epoch": 124.86831161630164, |
| "grad_norm": 0.18216532468795776, |
| "learning_rate": 3.7123745819397994e-07, |
| "loss": 6.3703, |
| "step": 56300 |
| }, |
| { |
| "epoch": 124.86831161630164, |
| "eval_loss": 6.381213188171387, |
| "eval_runtime": 175.6076, |
| "eval_samples_per_second": 56.945, |
| "eval_steps_per_second": 7.118, |
| "step": 56300 |
| }, |
| { |
| "epoch": 125.09010257831993, |
| "grad_norm": 0.3018471598625183, |
| "learning_rate": 3.6120401337792644e-07, |
| "loss": 6.3706, |
| "step": 56400 |
| }, |
| { |
| "epoch": 125.09010257831993, |
| "eval_loss": 6.3784942626953125, |
| "eval_runtime": 175.6917, |
| "eval_samples_per_second": 56.918, |
| "eval_steps_per_second": 7.115, |
| "step": 56400 |
| }, |
| { |
| "epoch": 125.31189354033823, |
| "grad_norm": 0.21381452679634094, |
| "learning_rate": 3.5117056856187294e-07, |
| "loss": 6.3722, |
| "step": 56500 |
| }, |
| { |
| "epoch": 125.31189354033823, |
| "eval_loss": 6.381383419036865, |
| "eval_runtime": 173.1305, |
| "eval_samples_per_second": 57.76, |
| "eval_steps_per_second": 7.22, |
| "step": 56500 |
| }, |
| { |
| "epoch": 125.53368450235654, |
| "grad_norm": 0.23340944945812225, |
| "learning_rate": 3.411371237458194e-07, |
| "loss": 6.3698, |
| "step": 56600 |
| }, |
| { |
| "epoch": 125.53368450235654, |
| "eval_loss": 6.380908012390137, |
| "eval_runtime": 175.6729, |
| "eval_samples_per_second": 56.924, |
| "eval_steps_per_second": 7.115, |
| "step": 56600 |
| }, |
| { |
| "epoch": 125.75547546437483, |
| "grad_norm": 0.22507449984550476, |
| "learning_rate": 3.311036789297659e-07, |
| "loss": 6.3711, |
| "step": 56700 |
| }, |
| { |
| "epoch": 125.75547546437483, |
| "eval_loss": 6.37741756439209, |
| "eval_runtime": 172.9897, |
| "eval_samples_per_second": 57.807, |
| "eval_steps_per_second": 7.226, |
| "step": 56700 |
| }, |
| { |
| "epoch": 125.97726642639313, |
| "grad_norm": 0.21832765638828278, |
| "learning_rate": 3.210702341137124e-07, |
| "loss": 6.3716, |
| "step": 56800 |
| }, |
| { |
| "epoch": 125.97726642639313, |
| "eval_loss": 6.381014823913574, |
| "eval_runtime": 175.5155, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 56800 |
| }, |
| { |
| "epoch": 126.19905738841142, |
| "grad_norm": 0.27440136671066284, |
| "learning_rate": 3.110367892976589e-07, |
| "loss": 6.3728, |
| "step": 56900 |
| }, |
| { |
| "epoch": 126.19905738841142, |
| "eval_loss": 6.3819074630737305, |
| "eval_runtime": 172.9421, |
| "eval_samples_per_second": 57.823, |
| "eval_steps_per_second": 7.228, |
| "step": 56900 |
| }, |
| { |
| "epoch": 126.42084835042972, |
| "grad_norm": 0.24798136949539185, |
| "learning_rate": 3.010033444816054e-07, |
| "loss": 6.3702, |
| "step": 57000 |
| }, |
| { |
| "epoch": 126.42084835042972, |
| "eval_loss": 6.379570484161377, |
| "eval_runtime": 176.0012, |
| "eval_samples_per_second": 56.818, |
| "eval_steps_per_second": 7.102, |
| "step": 57000 |
| }, |
| { |
| "epoch": 126.64263931244801, |
| "grad_norm": 0.196645587682724, |
| "learning_rate": 2.9096989966555187e-07, |
| "loss": 6.3702, |
| "step": 57100 |
| }, |
| { |
| "epoch": 126.64263931244801, |
| "eval_loss": 6.3817267417907715, |
| "eval_runtime": 173.0992, |
| "eval_samples_per_second": 57.77, |
| "eval_steps_per_second": 7.221, |
| "step": 57100 |
| }, |
| { |
| "epoch": 126.86443027446632, |
| "grad_norm": 0.21966499090194702, |
| "learning_rate": 2.809364548494983e-07, |
| "loss": 6.3689, |
| "step": 57200 |
| }, |
| { |
| "epoch": 126.86443027446632, |
| "eval_loss": 6.383100986480713, |
| "eval_runtime": 175.7334, |
| "eval_samples_per_second": 56.904, |
| "eval_steps_per_second": 7.113, |
| "step": 57200 |
| }, |
| { |
| "epoch": 127.08622123648462, |
| "grad_norm": 0.19457194209098816, |
| "learning_rate": 2.709030100334448e-07, |
| "loss": 6.371, |
| "step": 57300 |
| }, |
| { |
| "epoch": 127.08622123648462, |
| "eval_loss": 6.381374835968018, |
| "eval_runtime": 175.566, |
| "eval_samples_per_second": 56.959, |
| "eval_steps_per_second": 7.12, |
| "step": 57300 |
| }, |
| { |
| "epoch": 127.30801219850291, |
| "grad_norm": 0.22573208808898926, |
| "learning_rate": 2.608695652173913e-07, |
| "loss": 6.3725, |
| "step": 57400 |
| }, |
| { |
| "epoch": 127.30801219850291, |
| "eval_loss": 6.380834579467773, |
| "eval_runtime": 175.5891, |
| "eval_samples_per_second": 56.951, |
| "eval_steps_per_second": 7.119, |
| "step": 57400 |
| }, |
| { |
| "epoch": 127.52980316052121, |
| "grad_norm": 0.2630537748336792, |
| "learning_rate": 2.508361204013378e-07, |
| "loss": 6.3689, |
| "step": 57500 |
| }, |
| { |
| "epoch": 127.52980316052121, |
| "eval_loss": 6.380504131317139, |
| "eval_runtime": 175.5167, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 57500 |
| }, |
| { |
| "epoch": 127.7515941225395, |
| "grad_norm": 0.2693498134613037, |
| "learning_rate": 2.408026755852843e-07, |
| "loss": 6.3711, |
| "step": 57600 |
| }, |
| { |
| "epoch": 127.7515941225395, |
| "eval_loss": 6.379264831542969, |
| "eval_runtime": 175.6659, |
| "eval_samples_per_second": 56.926, |
| "eval_steps_per_second": 7.116, |
| "step": 57600 |
| }, |
| { |
| "epoch": 127.9733850845578, |
| "grad_norm": 0.21430125832557678, |
| "learning_rate": 2.3076923076923078e-07, |
| "loss": 6.3701, |
| "step": 57700 |
| }, |
| { |
| "epoch": 127.9733850845578, |
| "eval_loss": 6.383444309234619, |
| "eval_runtime": 175.8385, |
| "eval_samples_per_second": 56.87, |
| "eval_steps_per_second": 7.109, |
| "step": 57700 |
| }, |
| { |
| "epoch": 128.1951760465761, |
| "grad_norm": 0.23632164299488068, |
| "learning_rate": 2.2073578595317728e-07, |
| "loss": 6.37, |
| "step": 57800 |
| }, |
| { |
| "epoch": 128.1951760465761, |
| "eval_loss": 6.381924152374268, |
| "eval_runtime": 175.9161, |
| "eval_samples_per_second": 56.845, |
| "eval_steps_per_second": 7.106, |
| "step": 57800 |
| }, |
| { |
| "epoch": 128.4169670085944, |
| "grad_norm": 0.20027929544448853, |
| "learning_rate": 2.1070234113712372e-07, |
| "loss": 6.3689, |
| "step": 57900 |
| }, |
| { |
| "epoch": 128.4169670085944, |
| "eval_loss": 6.380605697631836, |
| "eval_runtime": 175.7408, |
| "eval_samples_per_second": 56.902, |
| "eval_steps_per_second": 7.113, |
| "step": 57900 |
| }, |
| { |
| "epoch": 128.6387579706127, |
| "grad_norm": 0.24598795175552368, |
| "learning_rate": 2.0066889632107022e-07, |
| "loss": 6.3703, |
| "step": 58000 |
| }, |
| { |
| "epoch": 128.6387579706127, |
| "eval_loss": 6.380997180938721, |
| "eval_runtime": 175.8242, |
| "eval_samples_per_second": 56.875, |
| "eval_steps_per_second": 7.109, |
| "step": 58000 |
| }, |
| { |
| "epoch": 128.860548932631, |
| "grad_norm": 0.22210384905338287, |
| "learning_rate": 1.9063545150501672e-07, |
| "loss": 6.3713, |
| "step": 58100 |
| }, |
| { |
| "epoch": 128.860548932631, |
| "eval_loss": 6.379730701446533, |
| "eval_runtime": 175.6297, |
| "eval_samples_per_second": 56.938, |
| "eval_steps_per_second": 7.117, |
| "step": 58100 |
| }, |
| { |
| "epoch": 129.0823398946493, |
| "grad_norm": 0.21533408761024475, |
| "learning_rate": 1.8060200668896322e-07, |
| "loss": 6.369, |
| "step": 58200 |
| }, |
| { |
| "epoch": 129.0823398946493, |
| "eval_loss": 6.379825592041016, |
| "eval_runtime": 173.1155, |
| "eval_samples_per_second": 57.765, |
| "eval_steps_per_second": 7.221, |
| "step": 58200 |
| }, |
| { |
| "epoch": 129.3041308566676, |
| "grad_norm": 0.24441500008106232, |
| "learning_rate": 1.705685618729097e-07, |
| "loss": 6.3712, |
| "step": 58300 |
| }, |
| { |
| "epoch": 129.3041308566676, |
| "eval_loss": 6.380709171295166, |
| "eval_runtime": 175.6262, |
| "eval_samples_per_second": 56.939, |
| "eval_steps_per_second": 7.117, |
| "step": 58300 |
| }, |
| { |
| "epoch": 129.5259218186859, |
| "grad_norm": 0.174821138381958, |
| "learning_rate": 1.605351170568562e-07, |
| "loss": 6.3694, |
| "step": 58400 |
| }, |
| { |
| "epoch": 129.5259218186859, |
| "eval_loss": 6.3804545402526855, |
| "eval_runtime": 174.2415, |
| "eval_samples_per_second": 57.392, |
| "eval_steps_per_second": 7.174, |
| "step": 58400 |
| }, |
| { |
| "epoch": 129.74771278070418, |
| "grad_norm": 0.24464456737041473, |
| "learning_rate": 1.505016722408027e-07, |
| "loss": 6.3713, |
| "step": 58500 |
| }, |
| { |
| "epoch": 129.74771278070418, |
| "eval_loss": 6.379507541656494, |
| "eval_runtime": 175.5413, |
| "eval_samples_per_second": 56.967, |
| "eval_steps_per_second": 7.121, |
| "step": 58500 |
| }, |
| { |
| "epoch": 129.96950374272248, |
| "grad_norm": 0.1928214728832245, |
| "learning_rate": 1.4046822742474916e-07, |
| "loss": 6.37, |
| "step": 58600 |
| }, |
| { |
| "epoch": 129.96950374272248, |
| "eval_loss": 6.384742736816406, |
| "eval_runtime": 173.0335, |
| "eval_samples_per_second": 57.792, |
| "eval_steps_per_second": 7.224, |
| "step": 58600 |
| }, |
| { |
| "epoch": 130.19129470474078, |
| "grad_norm": 0.2976389229297638, |
| "learning_rate": 1.3043478260869566e-07, |
| "loss": 6.3717, |
| "step": 58700 |
| }, |
| { |
| "epoch": 130.19129470474078, |
| "eval_loss": 6.3786187171936035, |
| "eval_runtime": 175.6076, |
| "eval_samples_per_second": 56.945, |
| "eval_steps_per_second": 7.118, |
| "step": 58700 |
| }, |
| { |
| "epoch": 130.4130856667591, |
| "grad_norm": 0.25023147463798523, |
| "learning_rate": 1.2040133779264215e-07, |
| "loss": 6.3685, |
| "step": 58800 |
| }, |
| { |
| "epoch": 130.4130856667591, |
| "eval_loss": 6.383387565612793, |
| "eval_runtime": 175.5163, |
| "eval_samples_per_second": 56.975, |
| "eval_steps_per_second": 7.122, |
| "step": 58800 |
| }, |
| { |
| "epoch": 130.6348766287774, |
| "grad_norm": 0.21737854182720184, |
| "learning_rate": 1.1036789297658864e-07, |
| "loss": 6.3712, |
| "step": 58900 |
| }, |
| { |
| "epoch": 130.6348766287774, |
| "eval_loss": 6.379786491394043, |
| "eval_runtime": 175.7874, |
| "eval_samples_per_second": 56.887, |
| "eval_steps_per_second": 7.111, |
| "step": 58900 |
| }, |
| { |
| "epoch": 130.85666759079567, |
| "grad_norm": 0.18008896708488464, |
| "learning_rate": 1.0033444816053511e-07, |
| "loss": 6.3701, |
| "step": 59000 |
| }, |
| { |
| "epoch": 130.85666759079567, |
| "eval_loss": 6.378762722015381, |
| "eval_runtime": 175.3457, |
| "eval_samples_per_second": 57.03, |
| "eval_steps_per_second": 7.129, |
| "step": 59000 |
| }, |
| { |
| "epoch": 131.07845855281397, |
| "grad_norm": 0.26529356837272644, |
| "learning_rate": 9.030100334448161e-08, |
| "loss": 6.3706, |
| "step": 59100 |
| }, |
| { |
| "epoch": 131.07845855281397, |
| "eval_loss": 6.384096622467041, |
| "eval_runtime": 173.4291, |
| "eval_samples_per_second": 57.66, |
| "eval_steps_per_second": 7.208, |
| "step": 59100 |
| }, |
| { |
| "epoch": 131.30024951483227, |
| "grad_norm": 0.2854064106941223, |
| "learning_rate": 8.02675585284281e-08, |
| "loss": 6.3699, |
| "step": 59200 |
| }, |
| { |
| "epoch": 131.30024951483227, |
| "eval_loss": 6.38028621673584, |
| "eval_runtime": 175.6366, |
| "eval_samples_per_second": 56.936, |
| "eval_steps_per_second": 7.117, |
| "step": 59200 |
| }, |
| { |
| "epoch": 131.52204047685058, |
| "grad_norm": 0.2294512242078781, |
| "learning_rate": 7.023411371237458e-08, |
| "loss": 6.3711, |
| "step": 59300 |
| }, |
| { |
| "epoch": 131.52204047685058, |
| "eval_loss": 6.384092330932617, |
| "eval_runtime": 175.5659, |
| "eval_samples_per_second": 56.959, |
| "eval_steps_per_second": 7.12, |
| "step": 59300 |
| }, |
| { |
| "epoch": 131.74383143886885, |
| "grad_norm": 0.23442944884300232, |
| "learning_rate": 6.020066889632108e-08, |
| "loss": 6.3704, |
| "step": 59400 |
| }, |
| { |
| "epoch": 131.74383143886885, |
| "eval_loss": 6.382981300354004, |
| "eval_runtime": 175.5589, |
| "eval_samples_per_second": 56.961, |
| "eval_steps_per_second": 7.12, |
| "step": 59400 |
| }, |
| { |
| "epoch": 131.96562240088716, |
| "grad_norm": 0.216475710272789, |
| "learning_rate": 5.0167224080267556e-08, |
| "loss": 6.3708, |
| "step": 59500 |
| }, |
| { |
| "epoch": 131.96562240088716, |
| "eval_loss": 6.381463050842285, |
| "eval_runtime": 175.6519, |
| "eval_samples_per_second": 56.931, |
| "eval_steps_per_second": 7.116, |
| "step": 59500 |
| }, |
| { |
| "epoch": 132.18741336290546, |
| "grad_norm": 0.2338051199913025, |
| "learning_rate": 4.013377926421405e-08, |
| "loss": 6.3693, |
| "step": 59600 |
| }, |
| { |
| "epoch": 132.18741336290546, |
| "eval_loss": 6.379833698272705, |
| "eval_runtime": 175.5243, |
| "eval_samples_per_second": 56.972, |
| "eval_steps_per_second": 7.122, |
| "step": 59600 |
| }, |
| { |
| "epoch": 132.40920432492376, |
| "grad_norm": 0.20408721268177032, |
| "learning_rate": 3.010033444816054e-08, |
| "loss": 6.3683, |
| "step": 59700 |
| }, |
| { |
| "epoch": 132.40920432492376, |
| "eval_loss": 6.38368034362793, |
| "eval_runtime": 175.3937, |
| "eval_samples_per_second": 57.015, |
| "eval_steps_per_second": 7.127, |
| "step": 59700 |
| }, |
| { |
| "epoch": 132.63099528694207, |
| "grad_norm": 0.24998629093170166, |
| "learning_rate": 2.0066889632107024e-08, |
| "loss": 6.3697, |
| "step": 59800 |
| }, |
| { |
| "epoch": 132.63099528694207, |
| "eval_loss": 6.381494522094727, |
| "eval_runtime": 176.0167, |
| "eval_samples_per_second": 56.813, |
| "eval_steps_per_second": 7.102, |
| "step": 59800 |
| }, |
| { |
| "epoch": 132.85278624896034, |
| "grad_norm": 0.2178734391927719, |
| "learning_rate": 1.0033444816053512e-08, |
| "loss": 6.371, |
| "step": 59900 |
| }, |
| { |
| "epoch": 132.85278624896034, |
| "eval_loss": 6.382035732269287, |
| "eval_runtime": 173.4585, |
| "eval_samples_per_second": 57.651, |
| "eval_steps_per_second": 7.206, |
| "step": 59900 |
| }, |
| { |
| "epoch": 133.07457721097865, |
| "grad_norm": 0.24738912284374237, |
| "learning_rate": 0.0, |
| "loss": 6.3696, |
| "step": 60000 |
| }, |
| { |
| "epoch": 133.07457721097865, |
| "eval_loss": 6.382532596588135, |
| "eval_runtime": 175.9883, |
| "eval_samples_per_second": 56.822, |
| "eval_steps_per_second": 7.103, |
| "step": 60000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 60000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 134, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.157205700133659e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|