| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 11.80327868852459, |
| "eval_steps": 500, |
| "global_step": 18000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.32786885245901637, |
| "grad_norm": 1.5182467699050903, |
| "learning_rate": 9.838688524590164e-06, |
| "loss": 0.3532, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 1.5529841184616089, |
| "learning_rate": 9.674754098360658e-06, |
| "loss": 0.2186, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "grad_norm": 1.2810306549072266, |
| "learning_rate": 9.51081967213115e-06, |
| "loss": 0.2085, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3114754098360657, |
| "grad_norm": 1.7146570682525635, |
| "learning_rate": 9.34688524590164e-06, |
| "loss": 0.1153, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.639344262295082, |
| "grad_norm": 1.82291579246521, |
| "learning_rate": 9.182950819672131e-06, |
| "loss": 0.1108, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.9672131147540983, |
| "grad_norm": 1.3579846620559692, |
| "learning_rate": 9.019016393442623e-06, |
| "loss": 0.1137, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.2950819672131146, |
| "grad_norm": 1.2722268104553223, |
| "learning_rate": 8.855081967213115e-06, |
| "loss": 0.0613, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.6229508196721314, |
| "grad_norm": 1.0057650804519653, |
| "learning_rate": 8.691147540983608e-06, |
| "loss": 0.0573, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.9508196721311473, |
| "grad_norm": 1.5272691249847412, |
| "learning_rate": 8.5272131147541e-06, |
| "loss": 0.0585, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.278688524590164, |
| "grad_norm": 0.9407837986946106, |
| "learning_rate": 8.36327868852459e-06, |
| "loss": 0.0391, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.6065573770491803, |
| "grad_norm": 1.2755032777786255, |
| "learning_rate": 8.199344262295082e-06, |
| "loss": 0.0377, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.9344262295081966, |
| "grad_norm": 0.9030640125274658, |
| "learning_rate": 8.035409836065574e-06, |
| "loss": 0.0405, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.262295081967213, |
| "grad_norm": 0.8674460649490356, |
| "learning_rate": 7.871475409836066e-06, |
| "loss": 0.0298, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.590163934426229, |
| "grad_norm": 0.7920001745223999, |
| "learning_rate": 7.707540983606559e-06, |
| "loss": 0.0277, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.918032786885246, |
| "grad_norm": 0.5429362654685974, |
| "learning_rate": 7.543934426229509e-06, |
| "loss": 0.0295, |
| "step": 7500 |
| }, |
| { |
| "epoch": 5.245901639344262, |
| "grad_norm": 0.613649845123291, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 0.0257, |
| "step": 8000 |
| }, |
| { |
| "epoch": 5.573770491803279, |
| "grad_norm": 0.6390576362609863, |
| "learning_rate": 7.216065573770492e-06, |
| "loss": 0.0248, |
| "step": 8500 |
| }, |
| { |
| "epoch": 5.901639344262295, |
| "grad_norm": 0.6235594153404236, |
| "learning_rate": 7.052131147540984e-06, |
| "loss": 0.025, |
| "step": 9000 |
| }, |
| { |
| "epoch": 6.229508196721311, |
| "grad_norm": 0.5020778179168701, |
| "learning_rate": 6.888196721311477e-06, |
| "loss": 0.0241, |
| "step": 9500 |
| }, |
| { |
| "epoch": 6.557377049180328, |
| "grad_norm": 0.7309982776641846, |
| "learning_rate": 6.724262295081968e-06, |
| "loss": 0.0227, |
| "step": 10000 |
| }, |
| { |
| "epoch": 6.885245901639344, |
| "grad_norm": 0.38623395562171936, |
| "learning_rate": 6.5603278688524594e-06, |
| "loss": 0.0229, |
| "step": 10500 |
| }, |
| { |
| "epoch": 7.213114754098361, |
| "grad_norm": 0.5087317228317261, |
| "learning_rate": 6.396393442622951e-06, |
| "loss": 0.0227, |
| "step": 11000 |
| }, |
| { |
| "epoch": 7.540983606557377, |
| "grad_norm": 0.5436781048774719, |
| "learning_rate": 6.232459016393443e-06, |
| "loss": 0.021, |
| "step": 11500 |
| }, |
| { |
| "epoch": 7.868852459016393, |
| "grad_norm": 0.756500780582428, |
| "learning_rate": 6.068524590163935e-06, |
| "loss": 0.0218, |
| "step": 12000 |
| }, |
| { |
| "epoch": 8.19672131147541, |
| "grad_norm": 0.20513100922107697, |
| "learning_rate": 5.9045901639344274e-06, |
| "loss": 0.0206, |
| "step": 12500 |
| }, |
| { |
| "epoch": 8.524590163934427, |
| "grad_norm": 0.6370705366134644, |
| "learning_rate": 5.740655737704918e-06, |
| "loss": 0.0194, |
| "step": 13000 |
| }, |
| { |
| "epoch": 8.852459016393443, |
| "grad_norm": 0.509864091873169, |
| "learning_rate": 5.577049180327869e-06, |
| "loss": 0.0207, |
| "step": 13500 |
| }, |
| { |
| "epoch": 9.180327868852459, |
| "grad_norm": 0.4402160942554474, |
| "learning_rate": 5.413114754098361e-06, |
| "loss": 0.0194, |
| "step": 14000 |
| }, |
| { |
| "epoch": 9.508196721311476, |
| "grad_norm": 0.29329636693000793, |
| "learning_rate": 5.2491803278688525e-06, |
| "loss": 0.0188, |
| "step": 14500 |
| }, |
| { |
| "epoch": 9.836065573770492, |
| "grad_norm": 0.25181844830513, |
| "learning_rate": 5.085245901639345e-06, |
| "loss": 0.0194, |
| "step": 15000 |
| }, |
| { |
| "epoch": 10.163934426229508, |
| "grad_norm": 0.36046847701072693, |
| "learning_rate": 4.921311475409836e-06, |
| "loss": 0.0188, |
| "step": 15500 |
| }, |
| { |
| "epoch": 10.491803278688524, |
| "grad_norm": 0.3692318797111511, |
| "learning_rate": 4.757377049180329e-06, |
| "loss": 0.0183, |
| "step": 16000 |
| }, |
| { |
| "epoch": 10.819672131147541, |
| "grad_norm": 0.6048111319541931, |
| "learning_rate": 4.59344262295082e-06, |
| "loss": 0.0185, |
| "step": 16500 |
| }, |
| { |
| "epoch": 11.147540983606557, |
| "grad_norm": 0.26122042536735535, |
| "learning_rate": 4.4295081967213114e-06, |
| "loss": 0.018, |
| "step": 17000 |
| }, |
| { |
| "epoch": 11.475409836065573, |
| "grad_norm": 0.30455446243286133, |
| "learning_rate": 4.265573770491804e-06, |
| "loss": 0.0174, |
| "step": 17500 |
| }, |
| { |
| "epoch": 11.80327868852459, |
| "grad_norm": 0.2333904653787613, |
| "learning_rate": 4.101639344262295e-06, |
| "loss": 0.0179, |
| "step": 18000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 30500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.17961666854912e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|