| { | |
| "best_global_step": 138718, | |
| "best_metric": 0.9915470627263667, | |
| "best_model_checkpoint": "/home/skwon01/scratch/sibal/finetuned_models/serengeti_camera_ready/checkpoint-138718", | |
| "epoch": 2.0, | |
| "eval_steps": 1000.0, | |
| "global_step": 138718, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007208869793393791, | |
| "grad_norm": 2.880587577819824, | |
| "learning_rate": 1.9985611095892387e-05, | |
| "loss": 3.675, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.014417739586787583, | |
| "grad_norm": 3.1965484619140625, | |
| "learning_rate": 1.99711933563056e-05, | |
| "loss": 1.3703, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.021626609380181374, | |
| "grad_norm": 3.587383270263672, | |
| "learning_rate": 1.9956775616718814e-05, | |
| "loss": 0.7317, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.028835479173575165, | |
| "grad_norm": 2.73246169090271, | |
| "learning_rate": 1.9942357877132026e-05, | |
| "loss": 0.4764, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03604434896696896, | |
| "grad_norm": 3.9599311351776123, | |
| "learning_rate": 1.9927940137545237e-05, | |
| "loss": 0.3488, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.04325321876036275, | |
| "grad_norm": 3.690446138381958, | |
| "learning_rate": 1.991352239795845e-05, | |
| "loss": 0.2729, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05046208855375654, | |
| "grad_norm": 3.0428125858306885, | |
| "learning_rate": 1.989910465837166e-05, | |
| "loss": 0.2249, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.05767095834715033, | |
| "grad_norm": 2.6362786293029785, | |
| "learning_rate": 1.9884686918784876e-05, | |
| "loss": 0.1907, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.06487982814054413, | |
| "grad_norm": 4.072872161865234, | |
| "learning_rate": 1.9870269179198087e-05, | |
| "loss": 0.1695, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.07208869793393792, | |
| "grad_norm": 2.4177143573760986, | |
| "learning_rate": 1.98558514396113e-05, | |
| "loss": 0.1535, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.07929756772733171, | |
| "grad_norm": 2.4438438415527344, | |
| "learning_rate": 1.9841433700024514e-05, | |
| "loss": 0.1429, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.0865064375207255, | |
| "grad_norm": 1.9982225894927979, | |
| "learning_rate": 1.9827015960437722e-05, | |
| "loss": 0.1348, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.0937153073141193, | |
| "grad_norm": 2.988769769668579, | |
| "learning_rate": 1.9812598220850938e-05, | |
| "loss": 0.1226, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.10092417710751309, | |
| "grad_norm": 2.386380672454834, | |
| "learning_rate": 1.979818048126415e-05, | |
| "loss": 0.1168, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.10813304690090687, | |
| "grad_norm": 1.9924527406692505, | |
| "learning_rate": 1.978376274167736e-05, | |
| "loss": 0.1082, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.11534191669430066, | |
| "grad_norm": 1.9020510911941528, | |
| "learning_rate": 1.9769345002090573e-05, | |
| "loss": 0.1064, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.12255078648769446, | |
| "grad_norm": 2.333510160446167, | |
| "learning_rate": 1.9754927262503788e-05, | |
| "loss": 0.1029, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.12975965628108826, | |
| "grad_norm": 2.677407741546631, | |
| "learning_rate": 1.9740509522917e-05, | |
| "loss": 0.0995, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.13696852607448204, | |
| "grad_norm": 1.5480279922485352, | |
| "learning_rate": 1.972609178333021e-05, | |
| "loss": 0.0948, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.14417739586787584, | |
| "grad_norm": 2.630037546157837, | |
| "learning_rate": 1.9711674043743423e-05, | |
| "loss": 0.0937, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.15138626566126961, | |
| "grad_norm": 2.267946243286133, | |
| "learning_rate": 1.9697256304156634e-05, | |
| "loss": 0.0909, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.15859513545466342, | |
| "grad_norm": 2.932375907897949, | |
| "learning_rate": 1.968283856456985e-05, | |
| "loss": 0.0889, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.16580400524805722, | |
| "grad_norm": 2.69350528717041, | |
| "learning_rate": 1.966842082498306e-05, | |
| "loss": 0.086, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.173012875041451, | |
| "grad_norm": 2.1316378116607666, | |
| "learning_rate": 1.9654003085396273e-05, | |
| "loss": 0.0843, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.1802217448348448, | |
| "grad_norm": 2.52103853225708, | |
| "learning_rate": 1.9639585345809488e-05, | |
| "loss": 0.0828, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.1874306146282386, | |
| "grad_norm": 1.939334511756897, | |
| "learning_rate": 1.9625167606222696e-05, | |
| "loss": 0.0795, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.19463948442163237, | |
| "grad_norm": 2.3057949542999268, | |
| "learning_rate": 1.961074986663591e-05, | |
| "loss": 0.0786, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.20184835421502617, | |
| "grad_norm": 2.0021777153015137, | |
| "learning_rate": 1.9596332127049123e-05, | |
| "loss": 0.0773, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.20905722400841997, | |
| "grad_norm": 2.276421546936035, | |
| "learning_rate": 1.9581914387462335e-05, | |
| "loss": 0.0772, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.21626609380181375, | |
| "grad_norm": 2.426966428756714, | |
| "learning_rate": 1.9567496647875546e-05, | |
| "loss": 0.0746, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.22347496359520755, | |
| "grad_norm": 1.984330415725708, | |
| "learning_rate": 1.955307890828876e-05, | |
| "loss": 0.074, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.23068383338860132, | |
| "grad_norm": 2.1131157875061035, | |
| "learning_rate": 1.9538661168701973e-05, | |
| "loss": 0.0754, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.23789270318199512, | |
| "grad_norm": 2.672717332839966, | |
| "learning_rate": 1.9524243429115185e-05, | |
| "loss": 0.0719, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.24510157297538893, | |
| "grad_norm": 1.4720840454101562, | |
| "learning_rate": 1.9509825689528396e-05, | |
| "loss": 0.0689, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.25231044276878273, | |
| "grad_norm": 1.7824233770370483, | |
| "learning_rate": 1.9495407949941608e-05, | |
| "loss": 0.0711, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.25951931256217653, | |
| "grad_norm": 1.7139828205108643, | |
| "learning_rate": 1.9480990210354823e-05, | |
| "loss": 0.067, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.2667281823555703, | |
| "grad_norm": 2.2731082439422607, | |
| "learning_rate": 1.9466572470768035e-05, | |
| "loss": 0.0678, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.2739370521489641, | |
| "grad_norm": 2.2537448406219482, | |
| "learning_rate": 1.9452154731181247e-05, | |
| "loss": 0.0657, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.2811459219423579, | |
| "grad_norm": 3.0216615200042725, | |
| "learning_rate": 1.943773699159446e-05, | |
| "loss": 0.0656, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.2883547917357517, | |
| "grad_norm": 1.4544578790664673, | |
| "learning_rate": 1.942331925200767e-05, | |
| "loss": 0.0658, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.2955636615291455, | |
| "grad_norm": 2.4549198150634766, | |
| "learning_rate": 1.9408901512420885e-05, | |
| "loss": 0.0641, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.30277253132253923, | |
| "grad_norm": 1.514060616493225, | |
| "learning_rate": 1.9394483772834097e-05, | |
| "loss": 0.0633, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.30998140111593303, | |
| "grad_norm": 2.4346635341644287, | |
| "learning_rate": 1.9380066033247308e-05, | |
| "loss": 0.0627, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.31719027090932683, | |
| "grad_norm": 1.432133436203003, | |
| "learning_rate": 1.9365648293660523e-05, | |
| "loss": 0.0616, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.32439914070272063, | |
| "grad_norm": 1.2359411716461182, | |
| "learning_rate": 1.9351230554073735e-05, | |
| "loss": 0.0628, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.33160801049611444, | |
| "grad_norm": 2.1902575492858887, | |
| "learning_rate": 1.9336812814486947e-05, | |
| "loss": 0.0628, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.33881688028950824, | |
| "grad_norm": 1.7415978908538818, | |
| "learning_rate": 1.932239507490016e-05, | |
| "loss": 0.0616, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.346025750082902, | |
| "grad_norm": 1.401383399963379, | |
| "learning_rate": 1.930797733531337e-05, | |
| "loss": 0.0589, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.3532346198762958, | |
| "grad_norm": 1.5828105211257935, | |
| "learning_rate": 1.9293559595726582e-05, | |
| "loss": 0.0604, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.3604434896696896, | |
| "grad_norm": 0.8541142344474792, | |
| "learning_rate": 1.9279141856139797e-05, | |
| "loss": 0.0599, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.3676523594630834, | |
| "grad_norm": 2.8157145977020264, | |
| "learning_rate": 1.926472411655301e-05, | |
| "loss": 0.0593, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.3748612292564772, | |
| "grad_norm": 2.129725217819214, | |
| "learning_rate": 1.925030637696622e-05, | |
| "loss": 0.0578, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.38207009904987094, | |
| "grad_norm": 2.5838279724121094, | |
| "learning_rate": 1.9235888637379435e-05, | |
| "loss": 0.0574, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.38927896884326474, | |
| "grad_norm": 1.7000998258590698, | |
| "learning_rate": 1.9221470897792647e-05, | |
| "loss": 0.0553, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.39648783863665854, | |
| "grad_norm": 1.2641727924346924, | |
| "learning_rate": 1.920705315820586e-05, | |
| "loss": 0.0549, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.40369670843005234, | |
| "grad_norm": 1.7529101371765137, | |
| "learning_rate": 1.919263541861907e-05, | |
| "loss": 0.0562, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.41090557822344614, | |
| "grad_norm": 1.4027022123336792, | |
| "learning_rate": 1.9178217679032282e-05, | |
| "loss": 0.0552, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.41811444801683995, | |
| "grad_norm": 1.6767141819000244, | |
| "learning_rate": 1.9163799939445497e-05, | |
| "loss": 0.0572, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.4253233178102337, | |
| "grad_norm": 0.8946545720100403, | |
| "learning_rate": 1.914938219985871e-05, | |
| "loss": 0.0556, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.4325321876036275, | |
| "grad_norm": 2.469862937927246, | |
| "learning_rate": 1.913496446027192e-05, | |
| "loss": 0.0546, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.4397410573970213, | |
| "grad_norm": 3.368171215057373, | |
| "learning_rate": 1.9120546720685132e-05, | |
| "loss": 0.0527, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.4469499271904151, | |
| "grad_norm": 2.107477903366089, | |
| "learning_rate": 1.9106128981098344e-05, | |
| "loss": 0.0538, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.4541587969838089, | |
| "grad_norm": 1.8676276206970215, | |
| "learning_rate": 1.9091711241511555e-05, | |
| "loss": 0.0529, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.46136766677720265, | |
| "grad_norm": 1.8789501190185547, | |
| "learning_rate": 1.907729350192477e-05, | |
| "loss": 0.0525, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.46857653657059645, | |
| "grad_norm": 1.8588016033172607, | |
| "learning_rate": 1.9062875762337982e-05, | |
| "loss": 0.0519, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.47578540636399025, | |
| "grad_norm": 1.6721725463867188, | |
| "learning_rate": 1.9048458022751194e-05, | |
| "loss": 0.0508, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.48299427615738405, | |
| "grad_norm": 1.9724555015563965, | |
| "learning_rate": 1.903404028316441e-05, | |
| "loss": 0.0502, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.49020314595077785, | |
| "grad_norm": 1.9921311140060425, | |
| "learning_rate": 1.901962254357762e-05, | |
| "loss": 0.051, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.49741201574417165, | |
| "grad_norm": 2.889782190322876, | |
| "learning_rate": 1.9005204803990832e-05, | |
| "loss": 0.0518, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.5046208855375655, | |
| "grad_norm": 1.7622694969177246, | |
| "learning_rate": 1.8990787064404044e-05, | |
| "loss": 0.0494, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.5118297553309592, | |
| "grad_norm": 1.713699460029602, | |
| "learning_rate": 1.8976369324817256e-05, | |
| "loss": 0.0493, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.5190386251243531, | |
| "grad_norm": 1.262862205505371, | |
| "learning_rate": 1.896195158523047e-05, | |
| "loss": 0.0496, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.5262474949177468, | |
| "grad_norm": 2.085010051727295, | |
| "learning_rate": 1.8947533845643682e-05, | |
| "loss": 0.0509, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.5334563647111406, | |
| "grad_norm": 1.6257765293121338, | |
| "learning_rate": 1.8933116106056894e-05, | |
| "loss": 0.0498, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.5406652345045344, | |
| "grad_norm": 0.6558777093887329, | |
| "learning_rate": 1.8918698366470106e-05, | |
| "loss": 0.0484, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.5478741042979282, | |
| "grad_norm": 1.7351698875427246, | |
| "learning_rate": 1.8904280626883318e-05, | |
| "loss": 0.0496, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.555082974091322, | |
| "grad_norm": 0.915392279624939, | |
| "learning_rate": 1.888986288729653e-05, | |
| "loss": 0.0467, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.5622918438847158, | |
| "grad_norm": 0.9719710350036621, | |
| "learning_rate": 1.8875445147709744e-05, | |
| "loss": 0.0491, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.5695007136781095, | |
| "grad_norm": 0.4347970485687256, | |
| "learning_rate": 1.8861027408122956e-05, | |
| "loss": 0.0478, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.5767095834715034, | |
| "grad_norm": 1.4013206958770752, | |
| "learning_rate": 1.8846609668536168e-05, | |
| "loss": 0.0482, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.5839184532648971, | |
| "grad_norm": 1.6916135549545288, | |
| "learning_rate": 1.8832191928949383e-05, | |
| "loss": 0.0487, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.591127323058291, | |
| "grad_norm": 1.1497479677200317, | |
| "learning_rate": 1.8817774189362594e-05, | |
| "loss": 0.0473, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.5983361928516847, | |
| "grad_norm": 2.1202707290649414, | |
| "learning_rate": 1.8803356449775806e-05, | |
| "loss": 0.046, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.6055450626450785, | |
| "grad_norm": 1.8288294076919556, | |
| "learning_rate": 1.8788938710189018e-05, | |
| "loss": 0.0473, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.6127539324384723, | |
| "grad_norm": 0.8600142598152161, | |
| "learning_rate": 1.877452097060223e-05, | |
| "loss": 0.0452, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.6199628022318661, | |
| "grad_norm": 2.8069839477539062, | |
| "learning_rate": 1.8760103231015445e-05, | |
| "loss": 0.048, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.6271716720252599, | |
| "grad_norm": 0.8850429058074951, | |
| "learning_rate": 1.8745685491428656e-05, | |
| "loss": 0.0474, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.6343805418186537, | |
| "grad_norm": 1.063219666481018, | |
| "learning_rate": 1.8731267751841868e-05, | |
| "loss": 0.0446, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.6415894116120474, | |
| "grad_norm": 1.3925724029541016, | |
| "learning_rate": 1.871685001225508e-05, | |
| "loss": 0.0468, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.6487982814054413, | |
| "grad_norm": 0.9575428366661072, | |
| "learning_rate": 1.870243227266829e-05, | |
| "loss": 0.0447, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.656007151198835, | |
| "grad_norm": 2.547752618789673, | |
| "learning_rate": 1.8688014533081503e-05, | |
| "loss": 0.0456, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.6632160209922289, | |
| "grad_norm": 0.6029974222183228, | |
| "learning_rate": 1.8673596793494718e-05, | |
| "loss": 0.0464, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.6704248907856226, | |
| "grad_norm": 0.27106812596321106, | |
| "learning_rate": 1.865917905390793e-05, | |
| "loss": 0.0437, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.6776337605790165, | |
| "grad_norm": 1.3233801126480103, | |
| "learning_rate": 1.864476131432114e-05, | |
| "loss": 0.0447, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.6848426303724102, | |
| "grad_norm": 0.38903898000717163, | |
| "learning_rate": 1.8630343574734356e-05, | |
| "loss": 0.0455, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.692051500165804, | |
| "grad_norm": 1.247036337852478, | |
| "learning_rate": 1.8615925835147568e-05, | |
| "loss": 0.044, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.6992603699591978, | |
| "grad_norm": 0.9771102666854858, | |
| "learning_rate": 1.860150809556078e-05, | |
| "loss": 0.0446, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.7064692397525916, | |
| "grad_norm": 1.6191680431365967, | |
| "learning_rate": 1.858709035597399e-05, | |
| "loss": 0.0455, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.7136781095459854, | |
| "grad_norm": 0.9542379975318909, | |
| "learning_rate": 1.8572672616387203e-05, | |
| "loss": 0.0426, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.7208869793393792, | |
| "grad_norm": 1.6160619258880615, | |
| "learning_rate": 1.8558254876800418e-05, | |
| "loss": 0.0433, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.7280958491327729, | |
| "grad_norm": 1.1810977458953857, | |
| "learning_rate": 1.854383713721363e-05, | |
| "loss": 0.0443, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.7353047189261668, | |
| "grad_norm": 1.4848960638046265, | |
| "learning_rate": 1.852941939762684e-05, | |
| "loss": 0.0442, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.7425135887195605, | |
| "grad_norm": 1.2140188217163086, | |
| "learning_rate": 1.8515001658040053e-05, | |
| "loss": 0.0436, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.7497224585129544, | |
| "grad_norm": 0.6803346276283264, | |
| "learning_rate": 1.8500583918453265e-05, | |
| "loss": 0.0416, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.7569313283063481, | |
| "grad_norm": 2.847879409790039, | |
| "learning_rate": 1.8486166178866477e-05, | |
| "loss": 0.0401, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.7641401980997419, | |
| "grad_norm": 1.3574286699295044, | |
| "learning_rate": 1.8471748439279692e-05, | |
| "loss": 0.0426, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.7713490678931357, | |
| "grad_norm": 1.5763428211212158, | |
| "learning_rate": 1.8457330699692903e-05, | |
| "loss": 0.0416, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.7785579376865295, | |
| "grad_norm": 2.006143808364868, | |
| "learning_rate": 1.8442912960106115e-05, | |
| "loss": 0.0423, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.7857668074799233, | |
| "grad_norm": 2.0041260719299316, | |
| "learning_rate": 1.842849522051933e-05, | |
| "loss": 0.043, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.7929756772733171, | |
| "grad_norm": 1.0083436965942383, | |
| "learning_rate": 1.8414077480932542e-05, | |
| "loss": 0.0428, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.8001845470667108, | |
| "grad_norm": 1.2364863157272339, | |
| "learning_rate": 1.8399659741345754e-05, | |
| "loss": 0.0431, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.8073934168601047, | |
| "grad_norm": 1.1397020816802979, | |
| "learning_rate": 1.8385242001758965e-05, | |
| "loss": 0.0408, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.8146022866534984, | |
| "grad_norm": 1.046647071838379, | |
| "learning_rate": 1.8370824262172177e-05, | |
| "loss": 0.0424, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.8218111564468923, | |
| "grad_norm": 0.7180289626121521, | |
| "learning_rate": 1.8356406522585392e-05, | |
| "loss": 0.0417, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.829020026240286, | |
| "grad_norm": 1.866095781326294, | |
| "learning_rate": 1.8341988782998604e-05, | |
| "loss": 0.0406, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.8362288960336799, | |
| "grad_norm": 1.7192025184631348, | |
| "learning_rate": 1.8327571043411815e-05, | |
| "loss": 0.042, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.8434377658270736, | |
| "grad_norm": 1.3043447732925415, | |
| "learning_rate": 1.8313153303825027e-05, | |
| "loss": 0.0419, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.8506466356204674, | |
| "grad_norm": 2.372190237045288, | |
| "learning_rate": 1.829873556423824e-05, | |
| "loss": 0.0421, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.8578555054138612, | |
| "grad_norm": 0.9028930068016052, | |
| "learning_rate": 1.828431782465145e-05, | |
| "loss": 0.0396, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.865064375207255, | |
| "grad_norm": 1.2869058847427368, | |
| "learning_rate": 1.8269900085064665e-05, | |
| "loss": 0.0401, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.8722732450006488, | |
| "grad_norm": 2.214855670928955, | |
| "learning_rate": 1.8255482345477877e-05, | |
| "loss": 0.04, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.8794821147940426, | |
| "grad_norm": 0.9826574325561523, | |
| "learning_rate": 1.824106460589109e-05, | |
| "loss": 0.0397, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.8866909845874363, | |
| "grad_norm": 0.7741074562072754, | |
| "learning_rate": 1.8226646866304304e-05, | |
| "loss": 0.0397, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.8938998543808302, | |
| "grad_norm": 1.2778081893920898, | |
| "learning_rate": 1.8212229126717516e-05, | |
| "loss": 0.0396, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.9011087241742239, | |
| "grad_norm": 0.7415226697921753, | |
| "learning_rate": 1.8197811387130727e-05, | |
| "loss": 0.0398, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.9083175939676178, | |
| "grad_norm": 2.152737617492676, | |
| "learning_rate": 1.818339364754394e-05, | |
| "loss": 0.0395, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.9155264637610115, | |
| "grad_norm": 0.9719590544700623, | |
| "learning_rate": 1.816897590795715e-05, | |
| "loss": 0.0387, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.9227353335544053, | |
| "grad_norm": 1.4587551355361938, | |
| "learning_rate": 1.8154558168370366e-05, | |
| "loss": 0.0395, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.9299442033477991, | |
| "grad_norm": 1.4218809604644775, | |
| "learning_rate": 1.8140140428783577e-05, | |
| "loss": 0.0375, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.9371530731411929, | |
| "grad_norm": 1.8009737730026245, | |
| "learning_rate": 1.812572268919679e-05, | |
| "loss": 0.0387, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.9443619429345868, | |
| "grad_norm": 1.2379016876220703, | |
| "learning_rate": 1.811130494961e-05, | |
| "loss": 0.0386, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.9515708127279805, | |
| "grad_norm": 1.1901589632034302, | |
| "learning_rate": 1.8096887210023216e-05, | |
| "loss": 0.0381, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.9587796825213742, | |
| "grad_norm": 1.0341569185256958, | |
| "learning_rate": 1.8082469470436424e-05, | |
| "loss": 0.0402, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.9659885523147681, | |
| "grad_norm": 1.4235957860946655, | |
| "learning_rate": 1.806805173084964e-05, | |
| "loss": 0.0382, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.9731974221081618, | |
| "grad_norm": 1.095893383026123, | |
| "learning_rate": 1.805363399126285e-05, | |
| "loss": 0.0396, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.9804062919015557, | |
| "grad_norm": 1.8859561681747437, | |
| "learning_rate": 1.8039216251676063e-05, | |
| "loss": 0.038, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.9876151616949495, | |
| "grad_norm": 1.8770360946655273, | |
| "learning_rate": 1.8024798512089278e-05, | |
| "loss": 0.039, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.9948240314883433, | |
| "grad_norm": 1.870827555656433, | |
| "learning_rate": 1.801038077250249e-05, | |
| "loss": 0.038, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.9895049158009324, | |
| "eval_loss": 0.034001659601926804, | |
| "eval_runtime": 683.1241, | |
| "eval_samples_per_second": 1528.989, | |
| "eval_steps_per_second": 47.782, | |
| "step": 69359 | |
| }, | |
| { | |
| "epoch": 1.002032901281737, | |
| "grad_norm": 0.4856395125389099, | |
| "learning_rate": 1.79959630329157e-05, | |
| "loss": 0.0352, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.009241771075131, | |
| "grad_norm": 1.8835086822509766, | |
| "learning_rate": 1.7981545293328913e-05, | |
| "loss": 0.0287, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.0164506408685245, | |
| "grad_norm": 1.941490888595581, | |
| "learning_rate": 1.7967127553742124e-05, | |
| "loss": 0.0307, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.0236595106619184, | |
| "grad_norm": 1.525707483291626, | |
| "learning_rate": 1.795270981415534e-05, | |
| "loss": 0.03, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.0308683804553123, | |
| "grad_norm": 0.6174446940422058, | |
| "learning_rate": 1.793829207456855e-05, | |
| "loss": 0.029, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.0380772502487061, | |
| "grad_norm": 1.043771505355835, | |
| "learning_rate": 1.7923874334981763e-05, | |
| "loss": 0.0311, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.0452861200420998, | |
| "grad_norm": 0.28765255212783813, | |
| "learning_rate": 1.7909456595394978e-05, | |
| "loss": 0.0291, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.0524949898354936, | |
| "grad_norm": 0.8367669582366943, | |
| "learning_rate": 1.789503885580819e-05, | |
| "loss": 0.0307, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.0597038596288875, | |
| "grad_norm": 0.8930952548980713, | |
| "learning_rate": 1.7880621116221398e-05, | |
| "loss": 0.0297, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.066912729422281, | |
| "grad_norm": 1.0413399934768677, | |
| "learning_rate": 1.7866203376634613e-05, | |
| "loss": 0.03, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.074121599215675, | |
| "grad_norm": 1.1929751634597778, | |
| "learning_rate": 1.7851785637047825e-05, | |
| "loss": 0.0287, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.0813304690090688, | |
| "grad_norm": 0.8676954507827759, | |
| "learning_rate": 1.7837367897461036e-05, | |
| "loss": 0.0307, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.0885393388024625, | |
| "grad_norm": 0.733383059501648, | |
| "learning_rate": 1.782295015787425e-05, | |
| "loss": 0.029, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.0957482085958563, | |
| "grad_norm": 1.005913257598877, | |
| "learning_rate": 1.7808532418287463e-05, | |
| "loss": 0.0288, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.1029570783892502, | |
| "grad_norm": 1.4946510791778564, | |
| "learning_rate": 1.7794114678700675e-05, | |
| "loss": 0.0294, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.110165948182644, | |
| "grad_norm": 0.966665506362915, | |
| "learning_rate": 1.7779696939113886e-05, | |
| "loss": 0.0311, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.1173748179760377, | |
| "grad_norm": 0.8129379749298096, | |
| "learning_rate": 1.7765279199527098e-05, | |
| "loss": 0.0301, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.1245836877694315, | |
| "grad_norm": 1.1672717332839966, | |
| "learning_rate": 1.7750861459940313e-05, | |
| "loss": 0.0297, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.1317925575628254, | |
| "grad_norm": 1.0149409770965576, | |
| "learning_rate": 1.7736443720353525e-05, | |
| "loss": 0.031, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.139001427356219, | |
| "grad_norm": 1.3319754600524902, | |
| "learning_rate": 1.7722025980766736e-05, | |
| "loss": 0.0294, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.1462102971496129, | |
| "grad_norm": 3.036787509918213, | |
| "learning_rate": 1.770760824117995e-05, | |
| "loss": 0.0294, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.1534191669430067, | |
| "grad_norm": 0.6281238198280334, | |
| "learning_rate": 1.7693190501593163e-05, | |
| "loss": 0.0312, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.1606280367364006, | |
| "grad_norm": 1.39284086227417, | |
| "learning_rate": 1.767877276200637e-05, | |
| "loss": 0.0299, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.1678369065297942, | |
| "grad_norm": 2.4636764526367188, | |
| "learning_rate": 1.7664355022419587e-05, | |
| "loss": 0.0304, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.175045776323188, | |
| "grad_norm": 1.0513309240341187, | |
| "learning_rate": 1.7649937282832798e-05, | |
| "loss": 0.0293, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.182254646116582, | |
| "grad_norm": 0.739205539226532, | |
| "learning_rate": 1.763551954324601e-05, | |
| "loss": 0.0297, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.1894635159099756, | |
| "grad_norm": 1.1646817922592163, | |
| "learning_rate": 1.7621101803659225e-05, | |
| "loss": 0.0281, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.1966723857033694, | |
| "grad_norm": 1.6882481575012207, | |
| "learning_rate": 1.7606684064072437e-05, | |
| "loss": 0.0308, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.2038812554967633, | |
| "grad_norm": 2.1905980110168457, | |
| "learning_rate": 1.759226632448565e-05, | |
| "loss": 0.0301, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.211090125290157, | |
| "grad_norm": 0.4102253317832947, | |
| "learning_rate": 1.757784858489886e-05, | |
| "loss": 0.0296, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.2182989950835508, | |
| "grad_norm": 1.5355827808380127, | |
| "learning_rate": 1.7563430845312072e-05, | |
| "loss": 0.031, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.2255078648769446, | |
| "grad_norm": 0.4144400954246521, | |
| "learning_rate": 1.7549013105725287e-05, | |
| "loss": 0.0303, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.2327167346703383, | |
| "grad_norm": 0.5286178588867188, | |
| "learning_rate": 1.75345953661385e-05, | |
| "loss": 0.0311, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.2399256044637321, | |
| "grad_norm": 1.3401720523834229, | |
| "learning_rate": 1.752017762655171e-05, | |
| "loss": 0.0303, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.247134474257126, | |
| "grad_norm": 1.5546993017196655, | |
| "learning_rate": 1.7505759886964925e-05, | |
| "loss": 0.0296, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.2543433440505198, | |
| "grad_norm": 1.7993361949920654, | |
| "learning_rate": 1.7491342147378137e-05, | |
| "loss": 0.03, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.2615522138439135, | |
| "grad_norm": 1.058311939239502, | |
| "learning_rate": 1.7476924407791345e-05, | |
| "loss": 0.0283, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.2687610836373073, | |
| "grad_norm": 1.1616915464401245, | |
| "learning_rate": 1.746250666820456e-05, | |
| "loss": 0.0306, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.2759699534307012, | |
| "grad_norm": 1.5120762586593628, | |
| "learning_rate": 1.7448088928617772e-05, | |
| "loss": 0.0296, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.283178823224095, | |
| "grad_norm": 1.033087134361267, | |
| "learning_rate": 1.7433671189030984e-05, | |
| "loss": 0.0296, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.2903876930174887, | |
| "grad_norm": 0.9456692337989807, | |
| "learning_rate": 1.74192534494442e-05, | |
| "loss": 0.0293, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.2975965628108825, | |
| "grad_norm": 0.4252309799194336, | |
| "learning_rate": 1.740483570985741e-05, | |
| "loss": 0.0287, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.3048054326042764, | |
| "grad_norm": 1.4315825700759888, | |
| "learning_rate": 1.7390417970270622e-05, | |
| "loss": 0.0314, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 1.31201430239767, | |
| "grad_norm": 0.9023242592811584, | |
| "learning_rate": 1.7376000230683834e-05, | |
| "loss": 0.0296, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 1.3192231721910639, | |
| "grad_norm": 1.8055963516235352, | |
| "learning_rate": 1.7361582491097045e-05, | |
| "loss": 0.0289, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 1.3264320419844577, | |
| "grad_norm": 1.2063618898391724, | |
| "learning_rate": 1.734716475151026e-05, | |
| "loss": 0.03, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.3336409117778514, | |
| "grad_norm": 2.5645272731781006, | |
| "learning_rate": 1.7332747011923472e-05, | |
| "loss": 0.0289, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 1.3408497815712452, | |
| "grad_norm": 1.9335203170776367, | |
| "learning_rate": 1.7318329272336684e-05, | |
| "loss": 0.0285, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 1.348058651364639, | |
| "grad_norm": 0.8842147588729858, | |
| "learning_rate": 1.73039115327499e-05, | |
| "loss": 0.0287, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 1.3552675211580327, | |
| "grad_norm": 1.2006937265396118, | |
| "learning_rate": 1.728949379316311e-05, | |
| "loss": 0.0288, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 1.3624763909514266, | |
| "grad_norm": 1.1261006593704224, | |
| "learning_rate": 1.7275076053576322e-05, | |
| "loss": 0.0293, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 1.3696852607448204, | |
| "grad_norm": 1.2065215110778809, | |
| "learning_rate": 1.7260658313989534e-05, | |
| "loss": 0.0282, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 1.3768941305382143, | |
| "grad_norm": 1.8486534357070923, | |
| "learning_rate": 1.7246240574402746e-05, | |
| "loss": 0.029, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 1.384103000331608, | |
| "grad_norm": 0.8908069729804993, | |
| "learning_rate": 1.7231822834815957e-05, | |
| "loss": 0.0294, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.3913118701250018, | |
| "grad_norm": 0.6375325918197632, | |
| "learning_rate": 1.7217405095229172e-05, | |
| "loss": 0.0287, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 1.3985207399183957, | |
| "grad_norm": 1.9673434495925903, | |
| "learning_rate": 1.7202987355642384e-05, | |
| "loss": 0.0282, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 1.4057296097117895, | |
| "grad_norm": 1.1606006622314453, | |
| "learning_rate": 1.7188569616055596e-05, | |
| "loss": 0.0284, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 1.4129384795051831, | |
| "grad_norm": 1.003493309020996, | |
| "learning_rate": 1.7174151876468807e-05, | |
| "loss": 0.0283, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 1.420147349298577, | |
| "grad_norm": 0.9186868071556091, | |
| "learning_rate": 1.715973413688202e-05, | |
| "loss": 0.0277, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 1.4273562190919709, | |
| "grad_norm": 1.3305683135986328, | |
| "learning_rate": 1.7145316397295234e-05, | |
| "loss": 0.0292, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 1.4345650888853645, | |
| "grad_norm": 1.3776835203170776, | |
| "learning_rate": 1.7130898657708446e-05, | |
| "loss": 0.0286, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 1.4417739586787583, | |
| "grad_norm": 1.6687921285629272, | |
| "learning_rate": 1.7116480918121658e-05, | |
| "loss": 0.029, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.4489828284721522, | |
| "grad_norm": 1.9249308109283447, | |
| "learning_rate": 1.7102063178534873e-05, | |
| "loss": 0.0262, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 1.4561916982655458, | |
| "grad_norm": 1.1834752559661865, | |
| "learning_rate": 1.7087645438948084e-05, | |
| "loss": 0.0294, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 1.4634005680589397, | |
| "grad_norm": 2.1350696086883545, | |
| "learning_rate": 1.7073227699361296e-05, | |
| "loss": 0.0276, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.4706094378523336, | |
| "grad_norm": 2.563725709915161, | |
| "learning_rate": 1.7058809959774508e-05, | |
| "loss": 0.0276, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.4778183076457272, | |
| "grad_norm": 0.9226647019386292, | |
| "learning_rate": 1.704439222018772e-05, | |
| "loss": 0.0284, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.485027177439121, | |
| "grad_norm": 0.34231990575790405, | |
| "learning_rate": 1.702997448060093e-05, | |
| "loss": 0.0281, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.492236047232515, | |
| "grad_norm": 2.339191436767578, | |
| "learning_rate": 1.7015556741014146e-05, | |
| "loss": 0.029, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.4994449170259085, | |
| "grad_norm": 1.7756520509719849, | |
| "learning_rate": 1.7001139001427358e-05, | |
| "loss": 0.0288, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.5066537868193026, | |
| "grad_norm": 2.0807387828826904, | |
| "learning_rate": 1.698672126184057e-05, | |
| "loss": 0.0281, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.5138626566126963, | |
| "grad_norm": 1.4787542819976807, | |
| "learning_rate": 1.6972303522253785e-05, | |
| "loss": 0.0284, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.52107152640609, | |
| "grad_norm": 1.719581961631775, | |
| "learning_rate": 1.6957885782666993e-05, | |
| "loss": 0.0287, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.528280396199484, | |
| "grad_norm": 0.8158332109451294, | |
| "learning_rate": 1.6943468043080208e-05, | |
| "loss": 0.029, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.5354892659928776, | |
| "grad_norm": 0.10212863981723785, | |
| "learning_rate": 1.692905030349342e-05, | |
| "loss": 0.0275, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.5426981357862715, | |
| "grad_norm": 1.0970171689987183, | |
| "learning_rate": 1.691463256390663e-05, | |
| "loss": 0.0282, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.5499070055796653, | |
| "grad_norm": 0.4221758246421814, | |
| "learning_rate": 1.6900214824319846e-05, | |
| "loss": 0.0285, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.557115875373059, | |
| "grad_norm": 1.5400525331497192, | |
| "learning_rate": 1.6885797084733058e-05, | |
| "loss": 0.0282, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.5643247451664528, | |
| "grad_norm": 1.6638318300247192, | |
| "learning_rate": 1.687137934514627e-05, | |
| "loss": 0.0301, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.5715336149598467, | |
| "grad_norm": 1.3407906293869019, | |
| "learning_rate": 1.685696160555948e-05, | |
| "loss": 0.0276, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.5787424847532403, | |
| "grad_norm": 0.8864063024520874, | |
| "learning_rate": 1.6842543865972693e-05, | |
| "loss": 0.0273, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.5859513545466342, | |
| "grad_norm": 1.5699615478515625, | |
| "learning_rate": 1.6828126126385905e-05, | |
| "loss": 0.0267, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.593160224340028, | |
| "grad_norm": 0.20337066054344177, | |
| "learning_rate": 1.681370838679912e-05, | |
| "loss": 0.0285, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.6003690941334217, | |
| "grad_norm": 0.7260587811470032, | |
| "learning_rate": 1.679929064721233e-05, | |
| "loss": 0.028, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.6075779639268155, | |
| "grad_norm": 0.434865266084671, | |
| "learning_rate": 1.6784872907625543e-05, | |
| "loss": 0.027, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.6147868337202094, | |
| "grad_norm": 1.0067859888076782, | |
| "learning_rate": 1.677045516803876e-05, | |
| "loss": 0.0276, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.621995703513603, | |
| "grad_norm": 1.7014882564544678, | |
| "learning_rate": 1.6756037428451967e-05, | |
| "loss": 0.0276, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.629204573306997, | |
| "grad_norm": 1.2809230089187622, | |
| "learning_rate": 1.674161968886518e-05, | |
| "loss": 0.0276, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.6364134431003907, | |
| "grad_norm": 1.2574232816696167, | |
| "learning_rate": 1.6727201949278393e-05, | |
| "loss": 0.0284, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.6436223128937844, | |
| "grad_norm": 1.3797274827957153, | |
| "learning_rate": 1.6712784209691605e-05, | |
| "loss": 0.0282, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.6508311826871784, | |
| "grad_norm": 0.32101693749427795, | |
| "learning_rate": 1.669836647010482e-05, | |
| "loss": 0.0274, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.658040052480572, | |
| "grad_norm": 0.41121360659599304, | |
| "learning_rate": 1.6683948730518032e-05, | |
| "loss": 0.0286, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.665248922273966, | |
| "grad_norm": 0.5161770582199097, | |
| "learning_rate": 1.6669530990931243e-05, | |
| "loss": 0.0271, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.6724577920673598, | |
| "grad_norm": 1.153785228729248, | |
| "learning_rate": 1.6655113251344455e-05, | |
| "loss": 0.0264, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.6796666618607534, | |
| "grad_norm": 1.5621336698532104, | |
| "learning_rate": 1.6640695511757667e-05, | |
| "loss": 0.0272, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.6868755316541473, | |
| "grad_norm": 2.4250948429107666, | |
| "learning_rate": 1.662627777217088e-05, | |
| "loss": 0.0282, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.6940844014475411, | |
| "grad_norm": 0.24833956360816956, | |
| "learning_rate": 1.6611860032584094e-05, | |
| "loss": 0.0279, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.7012932712409348, | |
| "grad_norm": 2.7739059925079346, | |
| "learning_rate": 1.6597442292997305e-05, | |
| "loss": 0.0283, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.7085021410343286, | |
| "grad_norm": 0.29604852199554443, | |
| "learning_rate": 1.6583024553410517e-05, | |
| "loss": 0.0271, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.7157110108277225, | |
| "grad_norm": 1.0948668718338013, | |
| "learning_rate": 1.6568606813823732e-05, | |
| "loss": 0.0269, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.7229198806211161, | |
| "grad_norm": 0.20236891508102417, | |
| "learning_rate": 1.655418907423694e-05, | |
| "loss": 0.0264, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.73012875041451, | |
| "grad_norm": 0.9090920090675354, | |
| "learning_rate": 1.6539771334650155e-05, | |
| "loss": 0.0282, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.7373376202079038, | |
| "grad_norm": 2.128474473953247, | |
| "learning_rate": 1.6525353595063367e-05, | |
| "loss": 0.0283, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.7445464900012975, | |
| "grad_norm": 1.6552634239196777, | |
| "learning_rate": 1.651093585547658e-05, | |
| "loss": 0.0272, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.7517553597946915, | |
| "grad_norm": 0.7921839356422424, | |
| "learning_rate": 1.6496518115889794e-05, | |
| "loss": 0.0301, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.7589642295880852, | |
| "grad_norm": 0.8467416763305664, | |
| "learning_rate": 1.6482100376303006e-05, | |
| "loss": 0.0266, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.7661730993814788, | |
| "grad_norm": 1.4604544639587402, | |
| "learning_rate": 1.6467682636716217e-05, | |
| "loss": 0.0253, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.773381969174873, | |
| "grad_norm": 0.677890956401825, | |
| "learning_rate": 1.645326489712943e-05, | |
| "loss": 0.0266, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.7805908389682665, | |
| "grad_norm": 0.2728472352027893, | |
| "learning_rate": 1.643884715754264e-05, | |
| "loss": 0.027, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.7877997087616604, | |
| "grad_norm": 1.2005136013031006, | |
| "learning_rate": 1.6424429417955852e-05, | |
| "loss": 0.0265, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 1.7950085785550542, | |
| "grad_norm": 2.1395583152770996, | |
| "learning_rate": 1.6410011678369067e-05, | |
| "loss": 0.0285, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 1.8022174483484479, | |
| "grad_norm": 1.5524953603744507, | |
| "learning_rate": 1.639559393878228e-05, | |
| "loss": 0.026, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 1.8094263181418417, | |
| "grad_norm": 1.5434062480926514, | |
| "learning_rate": 1.638117619919549e-05, | |
| "loss": 0.0272, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 1.8166351879352356, | |
| "grad_norm": 1.4732664823532104, | |
| "learning_rate": 1.6366758459608706e-05, | |
| "loss": 0.0264, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 1.8238440577286292, | |
| "grad_norm": 0.5316962599754333, | |
| "learning_rate": 1.6352340720021914e-05, | |
| "loss": 0.0262, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 1.831052927522023, | |
| "grad_norm": 0.09009312838315964, | |
| "learning_rate": 1.633792298043513e-05, | |
| "loss": 0.0272, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 1.838261797315417, | |
| "grad_norm": 1.211990475654602, | |
| "learning_rate": 1.632350524084834e-05, | |
| "loss": 0.0272, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 1.8454706671088106, | |
| "grad_norm": 1.1306172609329224, | |
| "learning_rate": 1.6309087501261552e-05, | |
| "loss": 0.0268, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 1.8526795369022044, | |
| "grad_norm": 1.8232672214508057, | |
| "learning_rate": 1.6294669761674768e-05, | |
| "loss": 0.0282, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 1.8598884066955983, | |
| "grad_norm": 2.736703395843506, | |
| "learning_rate": 1.628025202208798e-05, | |
| "loss": 0.0271, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 1.867097276488992, | |
| "grad_norm": 2.2017531394958496, | |
| "learning_rate": 1.626583428250119e-05, | |
| "loss": 0.0264, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 1.874306146282386, | |
| "grad_norm": 0.6630580425262451, | |
| "learning_rate": 1.6251416542914403e-05, | |
| "loss": 0.0268, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.8815150160757796, | |
| "grad_norm": 0.2576875388622284, | |
| "learning_rate": 1.6236998803327614e-05, | |
| "loss": 0.0275, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 1.8887238858691733, | |
| "grad_norm": 0.625859260559082, | |
| "learning_rate": 1.6222581063740826e-05, | |
| "loss": 0.0263, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 1.8959327556625674, | |
| "grad_norm": 2.3079171180725098, | |
| "learning_rate": 1.620816332415404e-05, | |
| "loss": 0.0266, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 1.903141625455961, | |
| "grad_norm": 0.8551648259162903, | |
| "learning_rate": 1.6193745584567253e-05, | |
| "loss": 0.0268, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 1.9103504952493549, | |
| "grad_norm": 1.2068754434585571, | |
| "learning_rate": 1.6179327844980464e-05, | |
| "loss": 0.0276, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 1.9175593650427487, | |
| "grad_norm": 0.4594031274318695, | |
| "learning_rate": 1.616491010539368e-05, | |
| "loss": 0.0271, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 1.9247682348361423, | |
| "grad_norm": 0.5821360945701599, | |
| "learning_rate": 1.6150492365806888e-05, | |
| "loss": 0.0267, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 1.9319771046295362, | |
| "grad_norm": 0.5188286304473877, | |
| "learning_rate": 1.6136074626220103e-05, | |
| "loss": 0.027, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 1.93918597442293, | |
| "grad_norm": 1.6506882905960083, | |
| "learning_rate": 1.6121656886633315e-05, | |
| "loss": 0.026, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 1.9463948442163237, | |
| "grad_norm": 1.5678963661193848, | |
| "learning_rate": 1.6107239147046526e-05, | |
| "loss": 0.0264, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 1.9536037140097176, | |
| "grad_norm": 0.3626735210418701, | |
| "learning_rate": 1.609282140745974e-05, | |
| "loss": 0.0264, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 1.9608125838031114, | |
| "grad_norm": 0.48542195558547974, | |
| "learning_rate": 1.6078403667872953e-05, | |
| "loss": 0.0257, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 1.968021453596505, | |
| "grad_norm": 0.93156498670578, | |
| "learning_rate": 1.6063985928286165e-05, | |
| "loss": 0.0274, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 1.975230323389899, | |
| "grad_norm": 0.6599089503288269, | |
| "learning_rate": 1.6049568188699376e-05, | |
| "loss": 0.0253, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 1.9824391931832928, | |
| "grad_norm": 2.511162519454956, | |
| "learning_rate": 1.6035150449112588e-05, | |
| "loss": 0.0264, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 1.9896480629766864, | |
| "grad_norm": 0.7365297675132751, | |
| "learning_rate": 1.6020732709525803e-05, | |
| "loss": 0.0263, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 1.9968569327700805, | |
| "grad_norm": 0.9106433391571045, | |
| "learning_rate": 1.6006314969939015e-05, | |
| "loss": 0.027, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.9915470627263667, | |
| "eval_loss": 0.02749801054596901, | |
| "eval_runtime": 1640.2112, | |
| "eval_samples_per_second": 636.802, | |
| "eval_steps_per_second": 19.9, | |
| "step": 138718 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 693590, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000.0, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.005 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.35149119187216e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |