| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9962264150943396, |
| "eval_steps": 500, |
| "global_step": 308, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00646900269541779, |
| "grad_norm": 1.6469532608759772, |
| "learning_rate": 3.2258064516129035e-07, |
| "loss": 1.7835, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03234501347708895, |
| "grad_norm": 1.2482964329798694, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 1.6751, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0646900269541779, |
| "grad_norm": 0.8520203638711542, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 1.4066, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09703504043126684, |
| "grad_norm": 0.24467662319379258, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 0.8625, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1293800539083558, |
| "grad_norm": 0.17065318217659073, |
| "learning_rate": 6.451612903225806e-06, |
| "loss": 0.5845, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.16172506738544473, |
| "grad_norm": 0.12299088556353172, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 0.481, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1940700808625337, |
| "grad_norm": 0.10618332452588639, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 0.4052, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.22641509433962265, |
| "grad_norm": 0.1033707741023481, |
| "learning_rate": 9.994855706800666e-06, |
| "loss": 0.3419, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2587601078167116, |
| "grad_norm": 0.10584999343170592, |
| "learning_rate": 9.973975156498866e-06, |
| "loss": 0.3256, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.29110512129380056, |
| "grad_norm": 0.0901285523198311, |
| "learning_rate": 9.937103907387626e-06, |
| "loss": 0.2797, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.32345013477088946, |
| "grad_norm": 0.09991878402985224, |
| "learning_rate": 9.884360495852984e-06, |
| "loss": 0.2625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3557951482479784, |
| "grad_norm": 0.08464546463539248, |
| "learning_rate": 9.815914485268598e-06, |
| "loss": 0.2351, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3881401617250674, |
| "grad_norm": 0.08197436606116067, |
| "learning_rate": 9.731985920871028e-06, |
| "loss": 0.2254, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.42048517520215634, |
| "grad_norm": 0.09215717950753108, |
| "learning_rate": 9.63284462234223e-06, |
| "loss": 0.2137, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.4528301886792453, |
| "grad_norm": 0.07752856948079369, |
| "learning_rate": 9.51880931637353e-06, |
| "loss": 0.1888, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.48517520215633425, |
| "grad_norm": 0.07883922492159129, |
| "learning_rate": 9.390246611999754e-06, |
| "loss": 0.1916, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5175202156334232, |
| "grad_norm": 0.07853251479970542, |
| "learning_rate": 9.247569821997724e-06, |
| "loss": 0.1911, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5498652291105122, |
| "grad_norm": 0.08737319968077989, |
| "learning_rate": 9.091237634138133e-06, |
| "loss": 0.1711, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5822102425876011, |
| "grad_norm": 0.07843101893653079, |
| "learning_rate": 8.921752636562582e-06, |
| "loss": 0.1767, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6145552560646901, |
| "grad_norm": 0.07279664888133161, |
| "learning_rate": 8.739659702026502e-06, |
| "loss": 0.1756, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.6469002695417789, |
| "grad_norm": 0.07368129133912499, |
| "learning_rate": 8.54554423620239e-06, |
| "loss": 0.1578, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6792452830188679, |
| "grad_norm": 0.08004273889398619, |
| "learning_rate": 8.340030295674887e-06, |
| "loss": 0.1627, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7115902964959568, |
| "grad_norm": 0.06936426180633778, |
| "learning_rate": 8.123778581678064e-06, |
| "loss": 0.1458, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.7439353099730458, |
| "grad_norm": 0.07394716823095644, |
| "learning_rate": 7.897484316024799e-06, |
| "loss": 0.1502, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.7762803234501348, |
| "grad_norm": 0.06318411109637828, |
| "learning_rate": 7.661875006056914e-06, |
| "loss": 0.1324, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8086253369272237, |
| "grad_norm": 0.08071371188965755, |
| "learning_rate": 7.417708105801386e-06, |
| "loss": 0.1291, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.8409703504043127, |
| "grad_norm": 0.06518774296350069, |
| "learning_rate": 7.165768580851806e-06, |
| "loss": 0.1399, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.8733153638814016, |
| "grad_norm": 0.06678829904280204, |
| "learning_rate": 6.90686638480362e-06, |
| "loss": 0.1208, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.9056603773584906, |
| "grad_norm": 0.07712741967383008, |
| "learning_rate": 6.6418338553561225e-06, |
| "loss": 0.129, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.9380053908355795, |
| "grad_norm": 0.09245765264021505, |
| "learning_rate": 6.371523038452398e-06, |
| "loss": 0.129, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.9703504043126685, |
| "grad_norm": 0.0813308979175732, |
| "learning_rate": 6.096802949059757e-06, |
| "loss": 0.1406, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9962264150943396, |
| "eval_loss": 0.1126493588089943, |
| "eval_runtime": 4.6858, |
| "eval_samples_per_second": 16.006, |
| "eval_steps_per_second": 4.055, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.0064690026954177, |
| "grad_norm": 0.11598192209575617, |
| "learning_rate": 5.818556777396923e-06, |
| "loss": 0.1432, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.0388140161725068, |
| "grad_norm": 0.06894087767992937, |
| "learning_rate": 5.537679049589568e-06, |
| "loss": 0.0953, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.0711590296495956, |
| "grad_norm": 0.07124980246876612, |
| "learning_rate": 5.255072751882363e-06, |
| "loss": 0.0936, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.1035040431266847, |
| "grad_norm": 0.07602278070196083, |
| "learning_rate": 4.971646427652806e-06, |
| "loss": 0.1001, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.1358490566037736, |
| "grad_norm": 0.07547518876417068, |
| "learning_rate": 4.688311256559587e-06, |
| "loss": 0.1, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.1681940700808626, |
| "grad_norm": 0.07204342005275981, |
| "learning_rate": 4.405978125215627e-06, |
| "loss": 0.0878, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.2005390835579515, |
| "grad_norm": 0.060953239705185125, |
| "learning_rate": 4.125554698803241e-06, |
| "loss": 0.0879, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.2328840970350403, |
| "grad_norm": 0.07580082959742644, |
| "learning_rate": 3.847942503045776e-06, |
| "loss": 0.0813, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.2652291105121294, |
| "grad_norm": 0.06974818603848078, |
| "learning_rate": 3.5740340259168383e-06, |
| "loss": 0.0796, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.2975741239892185, |
| "grad_norm": 0.06234723254345099, |
| "learning_rate": 3.3047098484047314e-06, |
| "loss": 0.088, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.3299191374663073, |
| "grad_norm": 0.06103357802751846, |
| "learning_rate": 3.040835813556352e-06, |
| "loss": 0.0909, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.3622641509433961, |
| "grad_norm": 0.05846487357821416, |
| "learning_rate": 2.783260242901694e-06, |
| "loss": 0.0809, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.3946091644204852, |
| "grad_norm": 0.06625932022799419, |
| "learning_rate": 2.5328112092077882e-06, |
| "loss": 0.0813, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.426954177897574, |
| "grad_norm": 0.08344401168680668, |
| "learning_rate": 2.2902938743298765e-06, |
| "loss": 0.0849, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.4592991913746631, |
| "grad_norm": 0.06798398373465986, |
| "learning_rate": 2.056487900718227e-06, |
| "loss": 0.0838, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.491644204851752, |
| "grad_norm": 0.06615029905608064, |
| "learning_rate": 1.8321449449023215e-06, |
| "loss": 0.0888, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.523989218328841, |
| "grad_norm": 0.0678269709159078, |
| "learning_rate": 1.6179862410105197e-06, |
| "loss": 0.0768, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.55633423180593, |
| "grad_norm": 0.05517565703970911, |
| "learning_rate": 1.4147002820938743e-06, |
| "loss": 0.0749, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.5886792452830187, |
| "grad_norm": 0.060770439128733256, |
| "learning_rate": 1.2229406067083566e-06, |
| "loss": 0.0815, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.6210242587601078, |
| "grad_norm": 0.06213193516806644, |
| "learning_rate": 1.0433236978713546e-06, |
| "loss": 0.0783, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.6533692722371969, |
| "grad_norm": 0.07190883052211446, |
| "learning_rate": 8.764270011470144e-07, |
| "loss": 0.0762, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.6857142857142857, |
| "grad_norm": 0.0596697946070157, |
| "learning_rate": 7.227870682320432e-07, |
| "loss": 0.0755, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.7180592991913746, |
| "grad_norm": 0.05413502291668993, |
| "learning_rate": 5.828978320101109e-07, |
| "loss": 0.0832, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.7504043126684636, |
| "grad_norm": 0.07737072813418777, |
| "learning_rate": 4.572090186203171e-07, |
| "loss": 0.082, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.7827493261455527, |
| "grad_norm": 0.06289367757714523, |
| "learning_rate": 3.461247016447372e-07, |
| "loss": 0.0725, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.8150943396226416, |
| "grad_norm": 0.06648151536128295, |
| "learning_rate": 2.500020030631356e-07, |
| "loss": 0.0806, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.8474393530997304, |
| "grad_norm": 0.059849953071029575, |
| "learning_rate": 1.6914994515114082e-07, |
| "loss": 0.0758, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.8797843665768195, |
| "grad_norm": 0.06154381772700072, |
| "learning_rate": 1.0382845701284228e-07, |
| "loss": 0.0737, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.9121293800539083, |
| "grad_norm": 0.06222625997511854, |
| "learning_rate": 5.424753894171519e-08, |
| "loss": 0.0706, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.9444743935309972, |
| "grad_norm": 0.06835624541815795, |
| "learning_rate": 2.056658729633121e-08, |
| "loss": 0.0753, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.9768194070080862, |
| "grad_norm": 0.05761761360710151, |
| "learning_rate": 2.8938820612961494e-09, |
| "loss": 0.0659, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.9962264150943396, |
| "eval_loss": 0.08172182738780975, |
| "eval_runtime": 4.3645, |
| "eval_samples_per_second": 17.184, |
| "eval_steps_per_second": 4.353, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.9962264150943396, |
| "step": 308, |
| "total_flos": 1.0209786248256553e+18, |
| "train_loss": 0.2044623964405679, |
| "train_runtime": 3413.7868, |
| "train_samples_per_second": 4.347, |
| "train_steps_per_second": 0.09 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 308, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0209786248256553e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|