| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 6688, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.014952153110047847, |
| "grad_norm": 0.11818729341030121, |
| "learning_rate": 2.9940119760479042e-06, |
| "loss": 2.3924, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.029904306220095694, |
| "grad_norm": 0.17396187782287598, |
| "learning_rate": 5.9880239520958085e-06, |
| "loss": 2.3714, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04485645933014354, |
| "grad_norm": 0.251809298992157, |
| "learning_rate": 8.982035928143713e-06, |
| "loss": 2.3528, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05980861244019139, |
| "grad_norm": 0.3756244480609894, |
| "learning_rate": 1.1976047904191617e-05, |
| "loss": 2.3111, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.07476076555023924, |
| "grad_norm": 0.41418808698654175, |
| "learning_rate": 1.4970059880239522e-05, |
| "loss": 2.2514, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08971291866028708, |
| "grad_norm": 0.46379268169403076, |
| "learning_rate": 1.7964071856287426e-05, |
| "loss": 2.2166, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.10466507177033493, |
| "grad_norm": 0.6668035387992859, |
| "learning_rate": 1.9998605666598577e-05, |
| "loss": 2.1742, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.11961722488038277, |
| "grad_norm": 0.608900785446167, |
| "learning_rate": 1.9976283374914574e-05, |
| "loss": 2.1807, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.13456937799043062, |
| "grad_norm": 0.6352519989013672, |
| "learning_rate": 1.9926798130661576e-05, |
| "loss": 2.1485, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.14952153110047847, |
| "grad_norm": 0.7120950222015381, |
| "learning_rate": 1.9850284669922354e-05, |
| "loss": 2.1058, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.16447368421052633, |
| "grad_norm": 0.8403559923171997, |
| "learning_rate": 1.9746951319929283e-05, |
| "loss": 2.098, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.17942583732057416, |
| "grad_norm": 1.0693122148513794, |
| "learning_rate": 1.961707943184083e-05, |
| "loss": 2.073, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.194377990430622, |
| "grad_norm": 0.7334321737289429, |
| "learning_rate": 1.9461022614691904e-05, |
| "loss": 2.0591, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.20933014354066987, |
| "grad_norm": 0.9172684550285339, |
| "learning_rate": 1.9279205772603905e-05, |
| "loss": 2.0918, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.2242822966507177, |
| "grad_norm": 0.8891502618789673, |
| "learning_rate": 1.9072123947875885e-05, |
| "loss": 2.0391, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.23923444976076555, |
| "grad_norm": 0.7827827334403992, |
| "learning_rate": 1.8840340973106777e-05, |
| "loss": 2.0423, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2541866028708134, |
| "grad_norm": 0.908538818359375, |
| "learning_rate": 1.8584487936018663e-05, |
| "loss": 2.0105, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.26913875598086123, |
| "grad_norm": 1.1278783082962036, |
| "learning_rate": 1.830526146116098e-05, |
| "loss": 2.0393, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.2840909090909091, |
| "grad_norm": 0.9773155450820923, |
| "learning_rate": 1.800342181317413e-05, |
| "loss": 2.0248, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.29904306220095694, |
| "grad_norm": 1.0564335584640503, |
| "learning_rate": 1.7679790826776865e-05, |
| "loss": 2.0289, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.31399521531100477, |
| "grad_norm": 0.9529797434806824, |
| "learning_rate": 1.7335249669113613e-05, |
| "loss": 2.014, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.32894736842105265, |
| "grad_norm": 1.0069955587387085, |
| "learning_rate": 1.6970736440554218e-05, |
| "loss": 1.9883, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3438995215311005, |
| "grad_norm": 0.8809061646461487, |
| "learning_rate": 1.6587243620478617e-05, |
| "loss": 1.9769, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3588516746411483, |
| "grad_norm": 0.9701207876205444, |
| "learning_rate": 1.6185815365000955e-05, |
| "loss": 1.976, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3738038277511962, |
| "grad_norm": 1.3475958108901978, |
| "learning_rate": 1.5767544663990664e-05, |
| "loss": 1.999, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.388755980861244, |
| "grad_norm": 1.2865383625030518, |
| "learning_rate": 1.5333570365131353e-05, |
| "loss": 1.9752, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.40370813397129185, |
| "grad_norm": 1.0800195932388306, |
| "learning_rate": 1.4885074073120192e-05, |
| "loss": 1.9928, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.41866028708133973, |
| "grad_norm": 0.8922355771064758, |
| "learning_rate": 1.4423276932450512e-05, |
| "loss": 1.9922, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.43361244019138756, |
| "grad_norm": 1.251029372215271, |
| "learning_rate": 1.3949436302537357e-05, |
| "loss": 1.98, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4485645933014354, |
| "grad_norm": 0.8446579575538635, |
| "learning_rate": 1.3464842334238706e-05, |
| "loss": 1.9958, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.46351674641148327, |
| "grad_norm": 1.2263991832733154, |
| "learning_rate": 1.2970814457093732e-05, |
| "loss": 1.9642, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4784688995215311, |
| "grad_norm": 0.8603528738021851, |
| "learning_rate": 1.2468697786842481e-05, |
| "loss": 1.9668, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.4934210526315789, |
| "grad_norm": 1.107136607170105, |
| "learning_rate": 1.1959859463008316e-05, |
| "loss": 1.989, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.5083732057416268, |
| "grad_norm": 1.3708308935165405, |
| "learning_rate": 1.1445684926515088e-05, |
| "loss": 1.958, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.5233253588516746, |
| "grad_norm": 1.102388620376587, |
| "learning_rate": 1.0927574147474122e-05, |
| "loss": 1.9456, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5382775119617225, |
| "grad_norm": 1.0549287796020508, |
| "learning_rate": 1.0406937813411792e-05, |
| "loss": 1.9293, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.5532296650717703, |
| "grad_norm": 1.217860221862793, |
| "learning_rate": 9.885193488316246e-06, |
| "loss": 1.9067, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5681818181818182, |
| "grad_norm": 0.8757530450820923, |
| "learning_rate": 9.363761752961217e-06, |
| "loss": 1.9248, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5831339712918661, |
| "grad_norm": 0.948020339012146, |
| "learning_rate": 8.844062337015873e-06, |
| "loss": 1.9939, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5980861244019139, |
| "grad_norm": 1.2569465637207031, |
| "learning_rate": 8.327510253472023e-06, |
| "loss": 1.937, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.6130382775119617, |
| "grad_norm": 1.1405400037765503, |
| "learning_rate": 7.815511945913656e-06, |
| "loss": 1.9434, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.6279904306220095, |
| "grad_norm": 1.1672446727752686, |
| "learning_rate": 7.309461459118869e-06, |
| "loss": 1.9708, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.6429425837320574, |
| "grad_norm": 1.0575659275054932, |
| "learning_rate": 6.810736643420675e-06, |
| "loss": 1.9096, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.6578947368421053, |
| "grad_norm": 1.3636834621429443, |
| "learning_rate": 6.320695403161265e-06, |
| "loss": 1.9368, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6728468899521531, |
| "grad_norm": 1.0744818449020386, |
| "learning_rate": 5.840671999454305e-06, |
| "loss": 1.9416, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.687799043062201, |
| "grad_norm": 1.1180146932601929, |
| "learning_rate": 5.371973417321858e-06, |
| "loss": 1.9412, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.7027511961722488, |
| "grad_norm": 1.0380100011825562, |
| "learning_rate": 4.91587580709739e-06, |
| "loss": 1.9581, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.7177033492822966, |
| "grad_norm": 1.65896475315094, |
| "learning_rate": 4.4736210097839876e-06, |
| "loss": 1.9444, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.7326555023923444, |
| "grad_norm": 0.9622933864593506, |
| "learning_rate": 4.0464131758283965e-06, |
| "loss": 1.9271, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.7476076555023924, |
| "grad_norm": 1.1379241943359375, |
| "learning_rate": 3.635415486517151e-06, |
| "loss": 1.9383, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.7625598086124402, |
| "grad_norm": 1.2658005952835083, |
| "learning_rate": 3.24174698692157e-06, |
| "loss": 1.8978, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.777511961722488, |
| "grad_norm": 1.0809060335159302, |
| "learning_rate": 2.866479539014744e-06, |
| "loss": 1.8513, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.7924641148325359, |
| "grad_norm": 1.1973536014556885, |
| "learning_rate": 2.5106349032564683e-06, |
| "loss": 1.9, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.8074162679425837, |
| "grad_norm": 1.4524526596069336, |
| "learning_rate": 2.1751819565921774e-06, |
| "loss": 1.9109, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.8223684210526315, |
| "grad_norm": 1.475762963294983, |
| "learning_rate": 1.861034054440607e-06, |
| "loss": 1.9121, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.8373205741626795, |
| "grad_norm": 1.374098777770996, |
| "learning_rate": 1.5690465438528702e-06, |
| "loss": 1.8737, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.8522727272727273, |
| "grad_norm": 1.581127405166626, |
| "learning_rate": 1.300014434613952e-06, |
| "loss": 1.9188, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.8672248803827751, |
| "grad_norm": 1.403342604637146, |
| "learning_rate": 1.0546702346276671e-06, |
| "loss": 1.9318, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.882177033492823, |
| "grad_norm": 1.043062686920166, |
| "learning_rate": 8.336819554787723e-07, |
| "loss": 1.9304, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.8971291866028708, |
| "grad_norm": 1.1141079664230347, |
| "learning_rate": 6.37651293602628e-07, |
| "loss": 1.9026, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.9120813397129187, |
| "grad_norm": 1.0021100044250488, |
| "learning_rate": 4.6711199201459833e-07, |
| "loss": 1.926, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.9270334928229665, |
| "grad_norm": 1.2316174507141113, |
| "learning_rate": 3.225283870597973e-07, |
| "loss": 1.9084, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.9419856459330144, |
| "grad_norm": 1.3726786375045776, |
| "learning_rate": 2.0429414414006588e-07, |
| "loss": 1.9412, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.9569377990430622, |
| "grad_norm": 1.1840542554855347, |
| "learning_rate": 1.1273118586042298e-07, |
| "loss": 1.9118, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.97188995215311, |
| "grad_norm": 1.1613115072250366, |
| "learning_rate": 4.8088815513424037e-08, |
| "loss": 1.9591, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.9868421052631579, |
| "grad_norm": 1.2687839269638062, |
| "learning_rate": 1.0543038287944562e-08, |
| "loss": 1.9661, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 2.05165696144104, |
| "eval_runtime": 294.8197, |
| "eval_samples_per_second": 13.988, |
| "eval_steps_per_second": 1.75, |
| "step": 6688 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 6688, |
| "total_flos": 1.2181112832e+17, |
| "train_loss": 2.005428218385249, |
| "train_runtime": 2349.5442, |
| "train_samples_per_second": 5.693, |
| "train_steps_per_second": 2.847 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 6688, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2181112832e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|