| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 7378, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013553808620222282, |
| "grad_norm": 0.1191592738032341, |
| "learning_rate": 2.7137042062415198e-06, |
| "loss": 2.4955, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.027107617240444564, |
| "grad_norm": 0.26774927973747253, |
| "learning_rate": 5.4274084124830395e-06, |
| "loss": 2.4596, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.040661425860666844, |
| "grad_norm": 0.22501784563064575, |
| "learning_rate": 8.14111261872456e-06, |
| "loss": 2.4267, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05421523448088913, |
| "grad_norm": 0.3591971695423126, |
| "learning_rate": 1.0854816824966079e-05, |
| "loss": 2.4028, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06776904310111141, |
| "grad_norm": 0.36190351843833923, |
| "learning_rate": 1.35685210312076e-05, |
| "loss": 2.3537, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08132285172133369, |
| "grad_norm": 0.4355062246322632, |
| "learning_rate": 1.628222523744912e-05, |
| "loss": 2.2783, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09487666034155598, |
| "grad_norm": 0.4676553010940552, |
| "learning_rate": 1.899592944369064e-05, |
| "loss": 2.2975, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10843046896177826, |
| "grad_norm": 0.5140531063079834, |
| "learning_rate": 1.9995559296849784e-05, |
| "loss": 2.2424, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.12198427758200055, |
| "grad_norm": 0.5148899555206299, |
| "learning_rate": 1.9970285884793442e-05, |
| "loss": 2.288, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.13553808620222282, |
| "grad_norm": 0.6413929462432861, |
| "learning_rate": 1.992270453601682e-05, |
| "loss": 2.2605, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.14909189482244511, |
| "grad_norm": 0.615112841129303, |
| "learning_rate": 1.985292171102966e-05, |
| "loss": 2.193, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.16264570344266738, |
| "grad_norm": 0.5734832882881165, |
| "learning_rate": 1.976109354485778e-05, |
| "loss": 2.1929, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.17619951206288967, |
| "grad_norm": 0.9036373496055603, |
| "learning_rate": 1.9647425497699984e-05, |
| "loss": 2.1792, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.18975332068311196, |
| "grad_norm": 0.6254833936691284, |
| "learning_rate": 1.9512171895222806e-05, |
| "loss": 2.1922, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.20330712930333425, |
| "grad_norm": 0.6832364797592163, |
| "learning_rate": 1.935563535952155e-05, |
| "loss": 2.1794, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2168609379235565, |
| "grad_norm": 0.7471606731414795, |
| "learning_rate": 1.91781661320209e-05, |
| "loss": 2.2264, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2304147465437788, |
| "grad_norm": 0.7134560346603394, |
| "learning_rate": 1.898016128983004e-05, |
| "loss": 2.1893, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2439685551640011, |
| "grad_norm": 0.585695743560791, |
| "learning_rate": 1.8762063857305644e-05, |
| "loss": 2.1997, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.2575223637842234, |
| "grad_norm": 1.138268232345581, |
| "learning_rate": 1.8524361814810542e-05, |
| "loss": 2.149, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.27107617240444565, |
| "grad_norm": 0.9364109039306641, |
| "learning_rate": 1.826758700688596e-05, |
| "loss": 2.1775, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2846299810246679, |
| "grad_norm": 0.849953293800354, |
| "learning_rate": 1.7992313952280175e-05, |
| "loss": 2.1315, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.29818378964489023, |
| "grad_norm": 0.8387102484703064, |
| "learning_rate": 1.7699158558496127e-05, |
| "loss": 2.1819, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3117375982651125, |
| "grad_norm": 0.7546108365058899, |
| "learning_rate": 1.73887767437341e-05, |
| "loss": 2.1215, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.32529140688533476, |
| "grad_norm": 0.5941405296325684, |
| "learning_rate": 1.7061862969312734e-05, |
| "loss": 2.1457, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3388452155055571, |
| "grad_norm": 0.8476601243019104, |
| "learning_rate": 1.6719148685852103e-05, |
| "loss": 2.1349, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.35239902412577934, |
| "grad_norm": 0.6843703985214233, |
| "learning_rate": 1.6361400696695352e-05, |
| "loss": 2.1708, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.3659528327460016, |
| "grad_norm": 0.8201411366462708, |
| "learning_rate": 1.5989419442230672e-05, |
| "loss": 2.1408, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.3795066413662239, |
| "grad_norm": 0.8031175136566162, |
| "learning_rate": 1.5604037208952308e-05, |
| "loss": 2.1407, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3930604499864462, |
| "grad_norm": 0.7488608360290527, |
| "learning_rate": 1.520611626726779e-05, |
| "loss": 2.1288, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4066142586066685, |
| "grad_norm": 0.8462947607040405, |
| "learning_rate": 1.4796546942217882e-05, |
| "loss": 2.0887, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 0.9811045527458191, |
| "learning_rate": 1.4376245621425904e-05, |
| "loss": 2.1211, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.433721875847113, |
| "grad_norm": 0.7871158719062805, |
| "learning_rate": 1.3946152704733542e-05, |
| "loss": 2.0577, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.44727568446733534, |
| "grad_norm": 0.7200583815574646, |
| "learning_rate": 1.3507230500110733e-05, |
| "loss": 2.0763, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.4608294930875576, |
| "grad_norm": 1.2278062105178833, |
| "learning_rate": 1.3060461070547336e-05, |
| "loss": 2.0654, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.47438330170777987, |
| "grad_norm": 1.0776195526123047, |
| "learning_rate": 1.2606844036744152e-05, |
| "loss": 2.1158, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.4879371103280022, |
| "grad_norm": 0.8359533548355103, |
| "learning_rate": 1.2147394340519519e-05, |
| "loss": 2.078, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.5014909189482244, |
| "grad_norm": 0.7208207249641418, |
| "learning_rate": 1.1683139973935847e-05, |
| "loss": 2.0443, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5150447275684468, |
| "grad_norm": 0.8674055337905884, |
| "learning_rate": 1.1215119679226966e-05, |
| "loss": 2.0315, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.528598536188669, |
| "grad_norm": 0.8693380951881409, |
| "learning_rate": 1.074438062467258e-05, |
| "loss": 2.0985, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5421523448088913, |
| "grad_norm": 1.1561861038208008, |
| "learning_rate": 1.027197606161996e-05, |
| "loss": 2.092, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5557061534291136, |
| "grad_norm": 1.33693265914917, |
| "learning_rate": 9.798962967895082e-06, |
| "loss": 2.1118, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5692599620493358, |
| "grad_norm": 0.8483671545982361, |
| "learning_rate": 9.326399682876032e-06, |
| "loss": 2.0907, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5828137706695582, |
| "grad_norm": 0.8882675170898438, |
| "learning_rate": 8.855343539520006e-06, |
| "loss": 2.1293, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.5963675792897805, |
| "grad_norm": 0.7699640989303589, |
| "learning_rate": 8.386848498642072e-06, |
| "loss": 2.1274, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6099213879100027, |
| "grad_norm": 1.0404322147369385, |
| "learning_rate": 7.921962790738976e-06, |
| "loss": 2.0554, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.623475196530225, |
| "grad_norm": 0.8978158831596375, |
| "learning_rate": 7.46172657063414e-06, |
| "loss": 2.1415, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.6370290051504472, |
| "grad_norm": 1.3695547580718994, |
| "learning_rate": 7.007169590191574e-06, |
| "loss": 2.0975, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.6505828137706695, |
| "grad_norm": 0.80988609790802, |
| "learning_rate": 6.5593088943057386e-06, |
| "loss": 2.1267, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6641366223908919, |
| "grad_norm": 1.1780813932418823, |
| "learning_rate": 6.119146545322567e-06, |
| "loss": 2.0794, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6776904310111141, |
| "grad_norm": 0.9567013382911682, |
| "learning_rate": 5.687667380983037e-06, |
| "loss": 2.095, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6912442396313364, |
| "grad_norm": 1.1569477319717407, |
| "learning_rate": 5.265836810905844e-06, |
| "loss": 2.0693, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.7047980482515587, |
| "grad_norm": 0.9160040020942688, |
| "learning_rate": 4.854598656539305e-06, |
| "loss": 2.1312, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.7183518568717809, |
| "grad_norm": 0.8206405639648438, |
| "learning_rate": 4.454873039415593e-06, |
| "loss": 2.0722, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.7319056654920032, |
| "grad_norm": 1.1364679336547852, |
| "learning_rate": 4.067554322432159e-06, |
| "loss": 2.0582, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.7454594741122256, |
| "grad_norm": 1.1402835845947266, |
| "learning_rate": 3.6935091087665677e-06, |
| "loss": 2.1207, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.7590132827324478, |
| "grad_norm": 1.0124040842056274, |
| "learning_rate": 3.333574302902145e-06, |
| "loss": 2.0439, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7725670913526701, |
| "grad_norm": 1.3236887454986572, |
| "learning_rate": 2.9885552381026927e-06, |
| "loss": 2.1332, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.7861208999728924, |
| "grad_norm": 0.9097657799720764, |
| "learning_rate": 2.659223874525996e-06, |
| "loss": 2.1608, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.7996747085931146, |
| "grad_norm": 0.8172687292098999, |
| "learning_rate": 2.34631707200773e-06, |
| "loss": 2.1053, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.813228517213337, |
| "grad_norm": 0.9772549271583557, |
| "learning_rate": 2.050534941380283e-06, |
| "loss": 2.0312, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.8267823258335593, |
| "grad_norm": 1.0267835855484009, |
| "learning_rate": 1.7725392780153484e-06, |
| "loss": 2.0607, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 1.166391134262085, |
| "learning_rate": 1.5129520810951426e-06, |
| "loss": 2.0938, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.8538899430740038, |
| "grad_norm": 1.170172095298767, |
| "learning_rate": 1.2723541619253044e-06, |
| "loss": 2.0053, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.867443751694226, |
| "grad_norm": 1.0053555965423584, |
| "learning_rate": 1.0512838444032515e-06, |
| "loss": 2.0844, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8809975603144483, |
| "grad_norm": 0.8027963638305664, |
| "learning_rate": 8.502357605496692e-07, |
| "loss": 2.1124, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.8945513689346707, |
| "grad_norm": 0.6970746517181396, |
| "learning_rate": 6.696597437980367e-07, |
| "loss": 2.0236, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.908105177554893, |
| "grad_norm": 1.0525530576705933, |
| "learning_rate": 5.099598225183966e-07, |
| "loss": 2.0594, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.9216589861751152, |
| "grad_norm": 1.0885214805603027, |
| "learning_rate": 3.714933160273004e-07, |
| "loss": 2.0454, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.9352127947953375, |
| "grad_norm": 0.9120861291885376, |
| "learning_rate": 2.5457003510654055e-07, |
| "loss": 2.069, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.9487666034155597, |
| "grad_norm": 1.7351340055465698, |
| "learning_rate": 1.594515888194903e-07, |
| "loss": 2.0947, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.962320412035782, |
| "grad_norm": 0.9601479172706604, |
| "learning_rate": 8.635079917599376e-08, |
| "loss": 2.1024, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.9758742206560044, |
| "grad_norm": 0.9935265779495239, |
| "learning_rate": 3.543122495545004e-08, |
| "loss": 2.0988, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.9894280292762266, |
| "grad_norm": 0.6805266737937927, |
| "learning_rate": 6.806795753524498e-09, |
| "loss": 2.0827, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.4551358222961426, |
| "eval_runtime": 26.4185, |
| "eval_samples_per_second": 8.857, |
| "eval_steps_per_second": 1.136, |
| "step": 7378 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 7378, |
| "total_flos": 1.3438841192448e+17, |
| "train_loss": 2.142452607086339, |
| "train_runtime": 3524.2818, |
| "train_samples_per_second": 4.187, |
| "train_steps_per_second": 2.093 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 7378, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3438841192448e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|