| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9685230024213075, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009685230024213076, |
| "grad_norm": 6.778852939605713, |
| "learning_rate": 2.9999227754514262e-05, |
| "loss": 0.8519, |
| "num_input_tokens_seen": 25568, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01937046004842615, |
| "grad_norm": 3.0029561519622803, |
| "learning_rate": 2.9996911097572118e-05, |
| "loss": 0.189, |
| "num_input_tokens_seen": 51072, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.029055690072639227, |
| "grad_norm": 5.477710247039795, |
| "learning_rate": 2.9993050267710624e-05, |
| "loss": 0.1648, |
| "num_input_tokens_seen": 76416, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0387409200968523, |
| "grad_norm": 4.35634183883667, |
| "learning_rate": 2.9987645662464235e-05, |
| "loss": 0.1905, |
| "num_input_tokens_seen": 101344, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.048426150121065374, |
| "grad_norm": 4.523565292358398, |
| "learning_rate": 2.9980697838323884e-05, |
| "loss": 0.1794, |
| "num_input_tokens_seen": 126656, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05811138014527845, |
| "grad_norm": 1.9348187446594238, |
| "learning_rate": 2.9972207510679677e-05, |
| "loss": 0.1528, |
| "num_input_tokens_seen": 151200, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06779661016949153, |
| "grad_norm": 2.981433629989624, |
| "learning_rate": 2.996217555374725e-05, |
| "loss": 0.1742, |
| "num_input_tokens_seen": 175968, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0774818401937046, |
| "grad_norm": 3.6294591426849365, |
| "learning_rate": 2.9950603000477722e-05, |
| "loss": 0.1565, |
| "num_input_tokens_seen": 201280, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08716707021791767, |
| "grad_norm": 2.5459301471710205, |
| "learning_rate": 2.993749104245137e-05, |
| "loss": 0.1499, |
| "num_input_tokens_seen": 226432, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09685230024213075, |
| "grad_norm": 2.2721059322357178, |
| "learning_rate": 2.992284102975491e-05, |
| "loss": 0.1441, |
| "num_input_tokens_seen": 251744, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10653753026634383, |
| "grad_norm": 2.0033624172210693, |
| "learning_rate": 2.9906654470842492e-05, |
| "loss": 0.1245, |
| "num_input_tokens_seen": 276480, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1162227602905569, |
| "grad_norm": 8.585118293762207, |
| "learning_rate": 2.9888933032380397e-05, |
| "loss": 0.1333, |
| "num_input_tokens_seen": 301664, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12590799031476999, |
| "grad_norm": 1.423967719078064, |
| "learning_rate": 2.9869678539075403e-05, |
| "loss": 0.1728, |
| "num_input_tokens_seen": 326784, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.13559322033898305, |
| "grad_norm": 2.6306211948394775, |
| "learning_rate": 2.9848892973486912e-05, |
| "loss": 0.1281, |
| "num_input_tokens_seen": 351328, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14527845036319612, |
| "grad_norm": 2.5618090629577637, |
| "learning_rate": 2.9826578475822825e-05, |
| "loss": 0.1136, |
| "num_input_tokens_seen": 376000, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1549636803874092, |
| "grad_norm": 2.694077730178833, |
| "learning_rate": 2.980273734371914e-05, |
| "loss": 0.1277, |
| "num_input_tokens_seen": 400384, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.16464891041162227, |
| "grad_norm": 2.632338047027588, |
| "learning_rate": 2.9777372032003423e-05, |
| "loss": 0.1028, |
| "num_input_tokens_seen": 426432, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.17433414043583534, |
| "grad_norm": 2.3446829319000244, |
| "learning_rate": 2.975048515244199e-05, |
| "loss": 0.1245, |
| "num_input_tokens_seen": 451712, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18401937046004843, |
| "grad_norm": 1.8457319736480713, |
| "learning_rate": 2.9722079473471035e-05, |
| "loss": 0.142, |
| "num_input_tokens_seen": 476960, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1937046004842615, |
| "grad_norm": 1.8676010370254517, |
| "learning_rate": 2.9692157919911536e-05, |
| "loss": 0.1342, |
| "num_input_tokens_seen": 501440, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2033898305084746, |
| "grad_norm": 4.593673229217529, |
| "learning_rate": 2.966072357266811e-05, |
| "loss": 0.1314, |
| "num_input_tokens_seen": 526656, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.21307506053268765, |
| "grad_norm": 3.9568676948547363, |
| "learning_rate": 2.9627779668411795e-05, |
| "loss": 0.171, |
| "num_input_tokens_seen": 552544, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.22276029055690072, |
| "grad_norm": 2.4331846237182617, |
| "learning_rate": 2.9593329599246766e-05, |
| "loss": 0.115, |
| "num_input_tokens_seen": 577472, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2324455205811138, |
| "grad_norm": 2.525543212890625, |
| "learning_rate": 2.955737691236108e-05, |
| "loss": 0.1158, |
| "num_input_tokens_seen": 601856, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.24213075060532688, |
| "grad_norm": 2.2355105876922607, |
| "learning_rate": 2.9519925309661422e-05, |
| "loss": 0.111, |
| "num_input_tokens_seen": 627904, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.25181598062953997, |
| "grad_norm": 4.165389537811279, |
| "learning_rate": 2.948097864739194e-05, |
| "loss": 0.1314, |
| "num_input_tokens_seen": 651936, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.26150121065375304, |
| "grad_norm": 3.1712851524353027, |
| "learning_rate": 2.944054093573719e-05, |
| "loss": 0.143, |
| "num_input_tokens_seen": 676416, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2711864406779661, |
| "grad_norm": 2.881716728210449, |
| "learning_rate": 2.93986163384092e-05, |
| "loss": 0.1121, |
| "num_input_tokens_seen": 700832, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.28087167070217917, |
| "grad_norm": 3.060872793197632, |
| "learning_rate": 2.9355209172218777e-05, |
| "loss": 0.1159, |
| "num_input_tokens_seen": 725824, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.29055690072639223, |
| "grad_norm": 4.449444770812988, |
| "learning_rate": 2.931032390663101e-05, |
| "loss": 0.133, |
| "num_input_tokens_seen": 749408, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.30024213075060535, |
| "grad_norm": 5.323568344116211, |
| "learning_rate": 2.926396516330506e-05, |
| "loss": 0.1172, |
| "num_input_tokens_seen": 773984, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3099273607748184, |
| "grad_norm": 3.144500732421875, |
| "learning_rate": 2.921613771561829e-05, |
| "loss": 0.136, |
| "num_input_tokens_seen": 799168, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3196125907990315, |
| "grad_norm": 2.433586359024048, |
| "learning_rate": 2.916684648817478e-05, |
| "loss": 0.0973, |
| "num_input_tokens_seen": 824320, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.32929782082324455, |
| "grad_norm": 3.349472761154175, |
| "learning_rate": 2.9116096556298256e-05, |
| "loss": 0.13, |
| "num_input_tokens_seen": 849632, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3389830508474576, |
| "grad_norm": 1.8927061557769775, |
| "learning_rate": 2.9063893145509475e-05, |
| "loss": 0.1257, |
| "num_input_tokens_seen": 874400, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3486682808716707, |
| "grad_norm": 3.972686529159546, |
| "learning_rate": 2.901024163098822e-05, |
| "loss": 0.1155, |
| "num_input_tokens_seen": 899264, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3583535108958838, |
| "grad_norm": 1.177282452583313, |
| "learning_rate": 2.8955147537019815e-05, |
| "loss": 0.1251, |
| "num_input_tokens_seen": 924544, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.36803874092009686, |
| "grad_norm": 1.9911576509475708, |
| "learning_rate": 2.88986165364263e-05, |
| "loss": 0.1147, |
| "num_input_tokens_seen": 949792, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.37772397094430993, |
| "grad_norm": 2.402615785598755, |
| "learning_rate": 2.8840654449982344e-05, |
| "loss": 0.1433, |
| "num_input_tokens_seen": 974112, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.387409200968523, |
| "grad_norm": 1.3184998035430908, |
| "learning_rate": 2.8781267245815898e-05, |
| "loss": 0.1117, |
| "num_input_tokens_seen": 999168, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.39709443099273606, |
| "grad_norm": 1.9284625053405762, |
| "learning_rate": 2.8720461038793672e-05, |
| "loss": 0.1353, |
| "num_input_tokens_seen": 1024320, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4067796610169492, |
| "grad_norm": 3.1020259857177734, |
| "learning_rate": 2.8658242089891515e-05, |
| "loss": 0.1165, |
| "num_input_tokens_seen": 1049088, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.41646489104116224, |
| "grad_norm": 2.203179359436035, |
| "learning_rate": 2.8594616805549752e-05, |
| "loss": 0.1215, |
| "num_input_tokens_seen": 1073632, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4261501210653753, |
| "grad_norm": 2.053194522857666, |
| "learning_rate": 2.8529591737013526e-05, |
| "loss": 0.1066, |
| "num_input_tokens_seen": 1098208, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4358353510895884, |
| "grad_norm": 2.780935049057007, |
| "learning_rate": 2.8463173579658258e-05, |
| "loss": 0.0879, |
| "num_input_tokens_seen": 1122336, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.44552058111380144, |
| "grad_norm": 1.9929611682891846, |
| "learning_rate": 2.8395369172300235e-05, |
| "loss": 0.1141, |
| "num_input_tokens_seen": 1147392, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4552058111380145, |
| "grad_norm": 1.1469779014587402, |
| "learning_rate": 2.8326185496492464e-05, |
| "loss": 0.1052, |
| "num_input_tokens_seen": 1173248, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4648910411622276, |
| "grad_norm": 2.501117706298828, |
| "learning_rate": 2.825562967580579e-05, |
| "loss": 0.1086, |
| "num_input_tokens_seen": 1197984, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4745762711864407, |
| "grad_norm": 2.0266308784484863, |
| "learning_rate": 2.8183708975095406e-05, |
| "loss": 0.1201, |
| "num_input_tokens_seen": 1222720, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.48426150121065376, |
| "grad_norm": 1.1120251417160034, |
| "learning_rate": 2.8110430799752845e-05, |
| "loss": 0.1319, |
| "num_input_tokens_seen": 1247232, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4939467312348668, |
| "grad_norm": 1.2014496326446533, |
| "learning_rate": 2.8035802694943457e-05, |
| "loss": 0.1071, |
| "num_input_tokens_seen": 1273184, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5036319612590799, |
| "grad_norm": 1.1245245933532715, |
| "learning_rate": 2.7959832344829512e-05, |
| "loss": 0.1554, |
| "num_input_tokens_seen": 1298688, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.513317191283293, |
| "grad_norm": 2.031115770339966, |
| "learning_rate": 2.7882527571779003e-05, |
| "loss": 0.1196, |
| "num_input_tokens_seen": 1324128, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5230024213075061, |
| "grad_norm": 1.7691289186477661, |
| "learning_rate": 2.78038963355602e-05, |
| "loss": 0.1334, |
| "num_input_tokens_seen": 1349120, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5326876513317191, |
| "grad_norm": 2.9496989250183105, |
| "learning_rate": 2.7723946732522055e-05, |
| "loss": 0.1109, |
| "num_input_tokens_seen": 1374304, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5423728813559322, |
| "grad_norm": 2.2881715297698975, |
| "learning_rate": 2.764268699476058e-05, |
| "loss": 0.1274, |
| "num_input_tokens_seen": 1399136, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5520581113801453, |
| "grad_norm": 1.9754095077514648, |
| "learning_rate": 2.756012548927119e-05, |
| "loss": 0.1397, |
| "num_input_tokens_seen": 1424672, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5617433414043583, |
| "grad_norm": 1.9883428812026978, |
| "learning_rate": 2.7476270717087215e-05, |
| "loss": 0.101, |
| "num_input_tokens_seen": 1449024, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.9653130769729614, |
| "learning_rate": 2.7391131312404556e-05, |
| "loss": 0.0941, |
| "num_input_tokens_seen": 1475264, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5811138014527845, |
| "grad_norm": 4.576601028442383, |
| "learning_rate": 2.7304716041692663e-05, |
| "loss": 0.0865, |
| "num_input_tokens_seen": 1500064, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5907990314769975, |
| "grad_norm": 2.4046311378479004, |
| "learning_rate": 2.7217033802791906e-05, |
| "loss": 0.1596, |
| "num_input_tokens_seen": 1524448, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6004842615012107, |
| "grad_norm": 1.7785555124282837, |
| "learning_rate": 2.7128093623997368e-05, |
| "loss": 0.0891, |
| "num_input_tokens_seen": 1549536, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6101694915254238, |
| "grad_norm": 2.2736170291900635, |
| "learning_rate": 2.7037904663129262e-05, |
| "loss": 0.1085, |
| "num_input_tokens_seen": 1573408, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6198547215496368, |
| "grad_norm": 1.0862345695495605, |
| "learning_rate": 2.6946476206589972e-05, |
| "loss": 0.1023, |
| "num_input_tokens_seen": 1597888, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6295399515738499, |
| "grad_norm": 0.5358290672302246, |
| "learning_rate": 2.6853817668407875e-05, |
| "loss": 0.0669, |
| "num_input_tokens_seen": 1623296, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.639225181598063, |
| "grad_norm": 2.3138749599456787, |
| "learning_rate": 2.6759938589268023e-05, |
| "loss": 0.1017, |
| "num_input_tokens_seen": 1649216, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.648910411622276, |
| "grad_norm": 3.2054226398468018, |
| "learning_rate": 2.6664848635529742e-05, |
| "loss": 0.1432, |
| "num_input_tokens_seen": 1673760, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6585956416464891, |
| "grad_norm": 1.8352829217910767, |
| "learning_rate": 2.6568557598231385e-05, |
| "loss": 0.1081, |
| "num_input_tokens_seen": 1698592, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6682808716707022, |
| "grad_norm": 1.203284740447998, |
| "learning_rate": 2.6471075392082125e-05, |
| "loss": 0.1037, |
| "num_input_tokens_seen": 1723296, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6779661016949152, |
| "grad_norm": 1.635628581047058, |
| "learning_rate": 2.6372412054441116e-05, |
| "loss": 0.1216, |
| "num_input_tokens_seen": 1748384, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6876513317191283, |
| "grad_norm": 0.8993457555770874, |
| "learning_rate": 2.6272577744283965e-05, |
| "loss": 0.0853, |
| "num_input_tokens_seen": 1773600, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6973365617433414, |
| "grad_norm": 1.7306419610977173, |
| "learning_rate": 2.617158274115673e-05, |
| "loss": 0.1034, |
| "num_input_tokens_seen": 1798656, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7070217917675545, |
| "grad_norm": 2.770066976547241, |
| "learning_rate": 2.6069437444117432e-05, |
| "loss": 0.0872, |
| "num_input_tokens_seen": 1824544, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7167070217917676, |
| "grad_norm": 2.3590221405029297, |
| "learning_rate": 2.596615237066535e-05, |
| "loss": 0.1063, |
| "num_input_tokens_seen": 1848896, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7263922518159807, |
| "grad_norm": 1.0496519804000854, |
| "learning_rate": 2.586173815565805e-05, |
| "loss": 0.1104, |
| "num_input_tokens_seen": 1873248, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7360774818401937, |
| "grad_norm": 1.513573408126831, |
| "learning_rate": 2.575620555021634e-05, |
| "loss": 0.1125, |
| "num_input_tokens_seen": 1897184, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7457627118644068, |
| "grad_norm": 1.5545728206634521, |
| "learning_rate": 2.564956542061732e-05, |
| "loss": 0.0969, |
| "num_input_tokens_seen": 1922368, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.7554479418886199, |
| "grad_norm": 1.9260263442993164, |
| "learning_rate": 2.5541828747175477e-05, |
| "loss": 0.1142, |
| "num_input_tokens_seen": 1947904, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7651331719128329, |
| "grad_norm": 2.396538734436035, |
| "learning_rate": 2.543300662311211e-05, |
| "loss": 0.0926, |
| "num_input_tokens_seen": 1971872, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.774818401937046, |
| "grad_norm": 1.7069965600967407, |
| "learning_rate": 2.532311025341309e-05, |
| "loss": 0.0802, |
| "num_input_tokens_seen": 1996352, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.784503631961259, |
| "grad_norm": 5.540910243988037, |
| "learning_rate": 2.5212150953675133e-05, |
| "loss": 0.1248, |
| "num_input_tokens_seen": 2020480, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7941888619854721, |
| "grad_norm": 1.7795952558517456, |
| "learning_rate": 2.5100140148940688e-05, |
| "loss": 0.0767, |
| "num_input_tokens_seen": 2044448, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8038740920096852, |
| "grad_norm": 2.7387983798980713, |
| "learning_rate": 2.498708937252153e-05, |
| "loss": 0.1239, |
| "num_input_tokens_seen": 2070400, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8135593220338984, |
| "grad_norm": 2.1243462562561035, |
| "learning_rate": 2.4873010264811222e-05, |
| "loss": 0.108, |
| "num_input_tokens_seen": 2095392, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8232445520581114, |
| "grad_norm": 0.9928631782531738, |
| "learning_rate": 2.4757914572086555e-05, |
| "loss": 0.0994, |
| "num_input_tokens_seen": 2120192, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8329297820823245, |
| "grad_norm": 6.047460556030273, |
| "learning_rate": 2.464181414529809e-05, |
| "loss": 0.0927, |
| "num_input_tokens_seen": 2144384, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8426150121065376, |
| "grad_norm": 2.2197115421295166, |
| "learning_rate": 2.4524720938849883e-05, |
| "loss": 0.1328, |
| "num_input_tokens_seen": 2168704, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8523002421307506, |
| "grad_norm": 2.0752601623535156, |
| "learning_rate": 2.440664700936861e-05, |
| "loss": 0.1229, |
| "num_input_tokens_seen": 2193248, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8619854721549637, |
| "grad_norm": 1.00425386428833, |
| "learning_rate": 2.4287604514462152e-05, |
| "loss": 0.0957, |
| "num_input_tokens_seen": 2217568, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8716707021791767, |
| "grad_norm": 1.9153094291687012, |
| "learning_rate": 2.416760571146774e-05, |
| "loss": 0.0975, |
| "num_input_tokens_seen": 2242048, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8813559322033898, |
| "grad_norm": 2.3558013439178467, |
| "learning_rate": 2.4046662956189898e-05, |
| "loss": 0.1068, |
| "num_input_tokens_seen": 2266112, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8910411622276029, |
| "grad_norm": 2.546351909637451, |
| "learning_rate": 2.3924788701628197e-05, |
| "loss": 0.0688, |
| "num_input_tokens_seen": 2290720, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9007263922518159, |
| "grad_norm": 1.2526168823242188, |
| "learning_rate": 2.3801995496695028e-05, |
| "loss": 0.1141, |
| "num_input_tokens_seen": 2315488, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.910411622276029, |
| "grad_norm": 2.134089231491089, |
| "learning_rate": 2.367829598492348e-05, |
| "loss": 0.1328, |
| "num_input_tokens_seen": 2340992, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9200968523002422, |
| "grad_norm": 1.332915186882019, |
| "learning_rate": 2.3553702903165502e-05, |
| "loss": 0.1, |
| "num_input_tokens_seen": 2366880, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9297820823244553, |
| "grad_norm": 1.5140970945358276, |
| "learning_rate": 2.3428229080280407e-05, |
| "loss": 0.1089, |
| "num_input_tokens_seen": 2392000, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9394673123486683, |
| "grad_norm": 1.531954288482666, |
| "learning_rate": 2.330188743581398e-05, |
| "loss": 0.0924, |
| "num_input_tokens_seen": 2417472, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9491525423728814, |
| "grad_norm": 1.3347736597061157, |
| "learning_rate": 2.3174690978668155e-05, |
| "loss": 0.1205, |
| "num_input_tokens_seen": 2442496, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9588377723970944, |
| "grad_norm": 3.1497702598571777, |
| "learning_rate": 2.3046652805761588e-05, |
| "loss": 0.1004, |
| "num_input_tokens_seen": 2467392, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.9685230024213075, |
| "grad_norm": 1.6756023168563843, |
| "learning_rate": 2.2917786100681078e-05, |
| "loss": 0.1007, |
| "num_input_tokens_seen": 2492768, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3096, |
| "num_input_tokens_seen": 2492768, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0671206790148915e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|