| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9897531439217513, | |
| "eval_steps": 500, | |
| "global_step": 25500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029805930756093775, | |
| "loss": 2.1009, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002961186151218755, | |
| "loss": 1.8087, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002941779226828132, | |
| "loss": 1.8092, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002922372302437509, | |
| "loss": 1.6954, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002902965378046887, | |
| "loss": 1.7193, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002883558453656264, | |
| "loss": 1.7075, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002864151529265642, | |
| "loss": 1.6889, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002844744604875019, | |
| "loss": 1.6808, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002825337680484397, | |
| "loss": 1.691, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002805930756093774, | |
| "loss": 1.6741, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00027865238317031514, | |
| "loss": 1.6777, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00027671169073125286, | |
| "loss": 1.6533, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00027477099829219064, | |
| "loss": 1.6435, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00027283030585312836, | |
| "loss": 1.6234, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00027088961341406614, | |
| "loss": 1.6251, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00026894892097500386, | |
| "loss": 1.6333, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002670082285359416, | |
| "loss": 1.59, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0002650675360968793, | |
| "loss": 1.5915, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002631268436578171, | |
| "loss": 1.6025, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0002611861512187548, | |
| "loss": 1.5851, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002592454587796926, | |
| "loss": 1.58, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002573047663406303, | |
| "loss": 1.5755, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0002553640739015681, | |
| "loss": 1.5794, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002534233814625058, | |
| "loss": 1.5559, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00025148268902344353, | |
| "loss": 1.5587, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0002495419965843813, | |
| "loss": 1.5275, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00024760130414531903, | |
| "loss": 1.5377, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0002456606117062568, | |
| "loss": 1.5673, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0002437199192671945, | |
| "loss": 1.5705, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00024177922682813228, | |
| "loss": 1.5854, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00023983853438907, | |
| "loss": 1.5174, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00023789784195000775, | |
| "loss": 1.5235, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00023595714951094547, | |
| "loss": 1.53, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00023401645707188322, | |
| "loss": 1.5384, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00023207576463282097, | |
| "loss": 1.5231, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00023013507219375872, | |
| "loss": 1.5472, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00022819437975469644, | |
| "loss": 1.5524, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0002262536873156342, | |
| "loss": 1.5208, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00022431299487657192, | |
| "loss": 1.5142, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0002223723024375097, | |
| "loss": 1.482, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00022043160999844742, | |
| "loss": 1.5337, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00021849091755938516, | |
| "loss": 1.5183, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00021655022512032294, | |
| "loss": 1.5318, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00021460953268126066, | |
| "loss": 1.5445, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00021266884024219841, | |
| "loss": 1.541, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00021072814780313614, | |
| "loss": 1.512, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0002087874553640739, | |
| "loss": 1.5023, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0002068467629250116, | |
| "loss": 1.503, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00020490607048594939, | |
| "loss": 1.5257, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0002029653780468871, | |
| "loss": 1.4956, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00020102468560782486, | |
| "loss": 1.5261, | |
| "step": 25500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 77292, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.91194417170432e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |