| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6387225548902196, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006387225548902195, |
| "grad_norm": 3.5834698688009654, |
| "learning_rate": 4.007141788485205e-05, |
| "loss": 0.8162, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01277445109780439, |
| "grad_norm": 3.7982361966359415, |
| "learning_rate": 5.213411663697864e-05, |
| "loss": 0.4857, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.019161676646706587, |
| "grad_norm": 3.3097747900168066, |
| "learning_rate": 5.919034306446858e-05, |
| "loss": 0.1919, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02554890219560878, |
| "grad_norm": 1.6554650990460391, |
| "learning_rate": 6.419681538910523e-05, |
| "loss": 0.1177, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.031936127744510975, |
| "grad_norm": 1.0526158883209893, |
| "learning_rate": 6.80801370175775e-05, |
| "loss": 0.0997, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03832335329341317, |
| "grad_norm": 1.322844958779453, |
| "learning_rate": 7.125304181659517e-05, |
| "loss": 0.0987, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04471057884231537, |
| "grad_norm": 0.906098163414632, |
| "learning_rate": 7.393569459993276e-05, |
| "loss": 0.0902, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05109780439121756, |
| "grad_norm": 0.9742791206427208, |
| "learning_rate": 7.625951414123182e-05, |
| "loss": 0.08, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05748502994011976, |
| "grad_norm": 0.7369633538190276, |
| "learning_rate": 7.83092682440851e-05, |
| "loss": 0.0598, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06387225548902195, |
| "grad_norm": 0.6187428023320575, |
| "learning_rate": 8.01428357697041e-05, |
| "loss": 0.0836, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07025948103792415, |
| "grad_norm": 0.7404818716311581, |
| "learning_rate": 8.180149935405545e-05, |
| "loss": 0.0758, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07664670658682635, |
| "grad_norm": 0.4603749462575225, |
| "learning_rate": 8.331574056872175e-05, |
| "loss": 0.0698, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08303393213572854, |
| "grad_norm": 0.5780703019259222, |
| "learning_rate": 8.470870745519228e-05, |
| "loss": 0.0771, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08942115768463074, |
| "grad_norm": 0.7869338127501012, |
| "learning_rate": 8.599839335205934e-05, |
| "loss": 0.0741, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09580838323353294, |
| "grad_norm": 0.5085534390538137, |
| "learning_rate": 8.719906219719403e-05, |
| "loss": 0.0637, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10219560878243512, |
| "grad_norm": 0.6419093548122555, |
| "learning_rate": 8.83222128933584e-05, |
| "loss": 0.0683, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10858283433133732, |
| "grad_norm": 0.4014377657568054, |
| "learning_rate": 8.937725079936634e-05, |
| "loss": 0.0732, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11497005988023952, |
| "grad_norm": 0.4686184226271436, |
| "learning_rate": 9.03719669962117e-05, |
| "loss": 0.0727, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12135728542914172, |
| "grad_norm": 0.3553126799550481, |
| "learning_rate": 9.13128878003922e-05, |
| "loss": 0.0642, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1277445109780439, |
| "grad_norm": 0.2308342524288462, |
| "learning_rate": 9.220553452183068e-05, |
| "loss": 0.0662, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1341317365269461, |
| "grad_norm": 0.4244978948516096, |
| "learning_rate": 9.305461977954928e-05, |
| "loss": 0.0653, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1405189620758483, |
| "grad_norm": 0.6491159925885343, |
| "learning_rate": 9.386419810618205e-05, |
| "loss": 0.0692, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1469061876247505, |
| "grad_norm": 0.6586709935585672, |
| "learning_rate": 9.46377830473483e-05, |
| "loss": 0.0603, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1532934131736527, |
| "grad_norm": 0.4334717142812121, |
| "learning_rate": 9.537843932084834e-05, |
| "loss": 0.075, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1596806387225549, |
| "grad_norm": 0.31119556502835766, |
| "learning_rate": 9.608885615030295e-05, |
| "loss": 0.0646, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1660678642714571, |
| "grad_norm": 0.3201297094259269, |
| "learning_rate": 9.677140620731887e-05, |
| "loss": 0.0687, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1724550898203593, |
| "grad_norm": 0.3607671741835552, |
| "learning_rate": 9.742819342370165e-05, |
| "loss": 0.0611, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.17884231536926148, |
| "grad_norm": 0.2599965595224881, |
| "learning_rate": 9.806109210418593e-05, |
| "loss": 0.0682, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18522954091816368, |
| "grad_norm": 0.32241547083298766, |
| "learning_rate": 9.86717791726321e-05, |
| "loss": 0.0612, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.19161676646706588, |
| "grad_norm": 0.48624224094653656, |
| "learning_rate": 9.926176094932063e-05, |
| "loss": 0.0569, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.19800399201596808, |
| "grad_norm": 0.3035297139442493, |
| "learning_rate": 9.983239553594597e-05, |
| "loss": 0.0581, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.20439121756487025, |
| "grad_norm": 0.2209458722911404, |
| "learning_rate": 9.97870074547391e-05, |
| "loss": 0.0579, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21077844311377245, |
| "grad_norm": 0.32849509401911653, |
| "learning_rate": 9.943201987930422e-05, |
| "loss": 0.0594, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.21716566866267464, |
| "grad_norm": 0.3121594199523004, |
| "learning_rate": 9.907703230386936e-05, |
| "loss": 0.0689, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.22355289421157684, |
| "grad_norm": 0.37282218523682487, |
| "learning_rate": 9.87220447284345e-05, |
| "loss": 0.0591, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.22994011976047904, |
| "grad_norm": 0.21932185059016668, |
| "learning_rate": 9.836705715299965e-05, |
| "loss": 0.0561, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23632734530938124, |
| "grad_norm": 0.40431591808333317, |
| "learning_rate": 9.801206957756479e-05, |
| "loss": 0.0547, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.24271457085828343, |
| "grad_norm": 0.3545382413148979, |
| "learning_rate": 9.765708200212993e-05, |
| "loss": 0.068, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.24910179640718563, |
| "grad_norm": 0.3708116952070532, |
| "learning_rate": 9.730209442669507e-05, |
| "loss": 0.0644, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2554890219560878, |
| "grad_norm": 0.2933151282788518, |
| "learning_rate": 9.69471068512602e-05, |
| "loss": 0.0554, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.26187624750499, |
| "grad_norm": 0.2701122564574567, |
| "learning_rate": 9.659211927582535e-05, |
| "loss": 0.0669, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2682634730538922, |
| "grad_norm": 0.43233913171276295, |
| "learning_rate": 9.62371317003905e-05, |
| "loss": 0.0696, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2746506986027944, |
| "grad_norm": 0.29840407730897334, |
| "learning_rate": 9.588214412495564e-05, |
| "loss": 0.0583, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2810379241516966, |
| "grad_norm": 0.347519310387429, |
| "learning_rate": 9.552715654952076e-05, |
| "loss": 0.0675, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2874251497005988, |
| "grad_norm": 0.23159546291731048, |
| "learning_rate": 9.51721689740859e-05, |
| "loss": 0.0595, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.293812375249501, |
| "grad_norm": 0.19539165396565417, |
| "learning_rate": 9.481718139865106e-05, |
| "loss": 0.0593, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3001996007984032, |
| "grad_norm": 0.3711679298627976, |
| "learning_rate": 9.446219382321619e-05, |
| "loss": 0.0576, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3065868263473054, |
| "grad_norm": 0.21594608036662566, |
| "learning_rate": 9.410720624778133e-05, |
| "loss": 0.0612, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3129740518962076, |
| "grad_norm": 0.38847976792533884, |
| "learning_rate": 9.375221867234647e-05, |
| "loss": 0.0558, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3193612774451098, |
| "grad_norm": 0.48499489939381346, |
| "learning_rate": 9.339723109691161e-05, |
| "loss": 0.0598, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.325748502994012, |
| "grad_norm": 0.26871455217005846, |
| "learning_rate": 9.304224352147675e-05, |
| "loss": 0.0599, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3321357285429142, |
| "grad_norm": 0.3006779859330303, |
| "learning_rate": 9.26872559460419e-05, |
| "loss": 0.0533, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3385229540918164, |
| "grad_norm": 0.252910708387006, |
| "learning_rate": 9.233226837060704e-05, |
| "loss": 0.0609, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3449101796407186, |
| "grad_norm": 0.28615902583412134, |
| "learning_rate": 9.197728079517217e-05, |
| "loss": 0.0632, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.35129740518962077, |
| "grad_norm": 0.3309472244637015, |
| "learning_rate": 9.162229321973731e-05, |
| "loss": 0.0657, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.35768463073852297, |
| "grad_norm": 0.3181277078140241, |
| "learning_rate": 9.126730564430246e-05, |
| "loss": 0.0589, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.36407185628742517, |
| "grad_norm": 0.5196112858300266, |
| "learning_rate": 9.09123180688676e-05, |
| "loss": 0.0645, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.37045908183632736, |
| "grad_norm": 0.4316495610405202, |
| "learning_rate": 9.055733049343273e-05, |
| "loss": 0.0659, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.37684630738522956, |
| "grad_norm": 0.25853510285529524, |
| "learning_rate": 9.020234291799787e-05, |
| "loss": 0.0582, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.38323353293413176, |
| "grad_norm": 0.28576780733983087, |
| "learning_rate": 8.984735534256301e-05, |
| "loss": 0.0531, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.38962075848303396, |
| "grad_norm": 0.30628185946436504, |
| "learning_rate": 8.949236776712816e-05, |
| "loss": 0.0577, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.39600798403193616, |
| "grad_norm": 0.22146965371135377, |
| "learning_rate": 8.91373801916933e-05, |
| "loss": 0.0571, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4023952095808383, |
| "grad_norm": 0.2720642528700885, |
| "learning_rate": 8.878239261625844e-05, |
| "loss": 0.059, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4087824351297405, |
| "grad_norm": 0.35619962554831525, |
| "learning_rate": 8.842740504082358e-05, |
| "loss": 0.0614, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4151696606786427, |
| "grad_norm": 0.28668258535201413, |
| "learning_rate": 8.807241746538871e-05, |
| "loss": 0.0699, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4215568862275449, |
| "grad_norm": 0.35772133791366534, |
| "learning_rate": 8.771742988995385e-05, |
| "loss": 0.0599, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4279441117764471, |
| "grad_norm": 0.3547047177760543, |
| "learning_rate": 8.7362442314519e-05, |
| "loss": 0.0563, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4343313373253493, |
| "grad_norm": 0.24898549550006935, |
| "learning_rate": 8.700745473908413e-05, |
| "loss": 0.0553, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4407185628742515, |
| "grad_norm": 0.34439451125458864, |
| "learning_rate": 8.665246716364927e-05, |
| "loss": 0.0525, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4471057884231537, |
| "grad_norm": 0.27788279819961054, |
| "learning_rate": 8.629747958821441e-05, |
| "loss": 0.0559, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4534930139720559, |
| "grad_norm": 0.358392882540888, |
| "learning_rate": 8.594249201277956e-05, |
| "loss": 0.0657, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4598802395209581, |
| "grad_norm": 0.3111624222426367, |
| "learning_rate": 8.55875044373447e-05, |
| "loss": 0.0472, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4662674650698603, |
| "grad_norm": 0.26806713373994445, |
| "learning_rate": 8.523251686190984e-05, |
| "loss": 0.0696, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4726546906187625, |
| "grad_norm": 0.2748488176683564, |
| "learning_rate": 8.487752928647498e-05, |
| "loss": 0.063, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.47904191616766467, |
| "grad_norm": 0.18592617627322836, |
| "learning_rate": 8.452254171104012e-05, |
| "loss": 0.0609, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.48542914171656687, |
| "grad_norm": 0.38695621265068386, |
| "learning_rate": 8.416755413560525e-05, |
| "loss": 0.062, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.49181636726546907, |
| "grad_norm": 0.3158810203428474, |
| "learning_rate": 8.38125665601704e-05, |
| "loss": 0.0592, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.49820359281437127, |
| "grad_norm": 0.3362065389804566, |
| "learning_rate": 8.345757898473555e-05, |
| "loss": 0.0613, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5045908183632735, |
| "grad_norm": 0.25748557706326924, |
| "learning_rate": 8.310259140930067e-05, |
| "loss": 0.0529, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5109780439121756, |
| "grad_norm": 0.29455626027596066, |
| "learning_rate": 8.274760383386582e-05, |
| "loss": 0.065, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5173652694610779, |
| "grad_norm": 0.2564019757489936, |
| "learning_rate": 8.239261625843096e-05, |
| "loss": 0.0579, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.52375249500998, |
| "grad_norm": 0.228508320126866, |
| "learning_rate": 8.20376286829961e-05, |
| "loss": 0.0618, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5301397205588823, |
| "grad_norm": 0.17395934565113735, |
| "learning_rate": 8.168264110756124e-05, |
| "loss": 0.0601, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5365269461077844, |
| "grad_norm": 0.1845587781224566, |
| "learning_rate": 8.132765353212638e-05, |
| "loss": 0.0646, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5429141716566867, |
| "grad_norm": 0.219121974587572, |
| "learning_rate": 8.097266595669152e-05, |
| "loss": 0.0602, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5493013972055888, |
| "grad_norm": 0.3134533264569832, |
| "learning_rate": 8.061767838125665e-05, |
| "loss": 0.0593, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.555688622754491, |
| "grad_norm": 0.36630204830289737, |
| "learning_rate": 8.02626908058218e-05, |
| "loss": 0.0606, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5620758483033932, |
| "grad_norm": 0.2390442131459619, |
| "learning_rate": 7.990770323038695e-05, |
| "loss": 0.0572, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5684630738522954, |
| "grad_norm": 0.2906363057717337, |
| "learning_rate": 7.955271565495209e-05, |
| "loss": 0.0687, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5748502994011976, |
| "grad_norm": 0.2853917682780398, |
| "learning_rate": 7.919772807951722e-05, |
| "loss": 0.0643, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5812375249500998, |
| "grad_norm": 0.3544408088733472, |
| "learning_rate": 7.884274050408236e-05, |
| "loss": 0.0574, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.587624750499002, |
| "grad_norm": 0.24181631469575632, |
| "learning_rate": 7.848775292864751e-05, |
| "loss": 0.0697, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5940119760479042, |
| "grad_norm": 0.28668164498739, |
| "learning_rate": 7.813276535321264e-05, |
| "loss": 0.0641, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6003992015968064, |
| "grad_norm": 0.29519716447463473, |
| "learning_rate": 7.777777777777778e-05, |
| "loss": 0.0547, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6067864271457086, |
| "grad_norm": 0.2797455777496235, |
| "learning_rate": 7.742279020234292e-05, |
| "loss": 0.0622, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6131736526946108, |
| "grad_norm": 0.20698136186718125, |
| "learning_rate": 7.706780262690806e-05, |
| "loss": 0.0538, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.619560878243513, |
| "grad_norm": 0.3029284687538603, |
| "learning_rate": 7.671281505147319e-05, |
| "loss": 0.0553, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6259481037924152, |
| "grad_norm": 0.3308461091763622, |
| "learning_rate": 7.635782747603835e-05, |
| "loss": 0.0592, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6323353293413174, |
| "grad_norm": 0.2652350297520287, |
| "learning_rate": 7.600283990060349e-05, |
| "loss": 0.0611, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6387225548902196, |
| "grad_norm": 0.20354784105739357, |
| "learning_rate": 7.564785232516862e-05, |
| "loss": 0.059, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3130, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|