| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 100, |
| "global_step": 1966, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1017293997965412, |
| "grad_norm": 1.07757568359375, |
| "learning_rate": 4.985827402694507e-05, |
| "loss": 1.4448, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1017293997965412, |
| "eval_loss": 0.5420752167701721, |
| "eval_runtime": 81.2287, |
| "eval_samples_per_second": 193.478, |
| "eval_steps_per_second": 24.191, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2034587995930824, |
| "grad_norm": 0.9271150827407837, |
| "learning_rate": 4.943470300789534e-05, |
| "loss": 0.6104, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2034587995930824, |
| "eval_loss": 0.39222481846809387, |
| "eval_runtime": 80.7623, |
| "eval_samples_per_second": 194.596, |
| "eval_steps_per_second": 24.331, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3051881993896236, |
| "grad_norm": 0.8333455324172974, |
| "learning_rate": 4.873408942403743e-05, |
| "loss": 0.4651, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3051881993896236, |
| "eval_loss": 0.32220107316970825, |
| "eval_runtime": 80.7592, |
| "eval_samples_per_second": 194.603, |
| "eval_steps_per_second": 24.332, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4069175991861648, |
| "grad_norm": 0.8338972330093384, |
| "learning_rate": 4.7764376886723956e-05, |
| "loss": 0.4106, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4069175991861648, |
| "eval_loss": 0.2739136815071106, |
| "eval_runtime": 80.8876, |
| "eval_samples_per_second": 194.294, |
| "eval_steps_per_second": 24.293, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.508646998982706, |
| "grad_norm": 0.6324864029884338, |
| "learning_rate": 4.6536560072189665e-05, |
| "loss": 0.3699, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.508646998982706, |
| "eval_loss": 0.2465265691280365, |
| "eval_runtime": 80.8298, |
| "eval_samples_per_second": 194.433, |
| "eval_steps_per_second": 24.31, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6103763987792472, |
| "grad_norm": 0.7570372223854065, |
| "learning_rate": 4.506456006305641e-05, |
| "loss": 0.3256, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6103763987792472, |
| "eval_loss": 0.22816891968250275, |
| "eval_runtime": 80.8701, |
| "eval_samples_per_second": 194.336, |
| "eval_steps_per_second": 24.298, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7121057985757884, |
| "grad_norm": 0.6379811763763428, |
| "learning_rate": 4.336506651001469e-05, |
| "loss": 0.3194, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7121057985757884, |
| "eval_loss": 0.21390707790851593, |
| "eval_runtime": 80.9268, |
| "eval_samples_per_second": 194.2, |
| "eval_steps_per_second": 24.281, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8138351983723296, |
| "grad_norm": 0.8042012453079224, |
| "learning_rate": 4.145734840326494e-05, |
| "loss": 0.3032, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8138351983723296, |
| "eval_loss": 0.20113198459148407, |
| "eval_runtime": 80.8778, |
| "eval_samples_per_second": 194.318, |
| "eval_steps_per_second": 24.296, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9155645981688708, |
| "grad_norm": 0.721193253993988, |
| "learning_rate": 3.936303559920664e-05, |
| "loss": 0.2787, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9155645981688708, |
| "eval_loss": 0.19362396001815796, |
| "eval_runtime": 80.735, |
| "eval_samples_per_second": 194.662, |
| "eval_steps_per_second": 24.339, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.017293997965412, |
| "grad_norm": 0.5696636438369751, |
| "learning_rate": 3.710587357944272e-05, |
| "loss": 0.2672, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.017293997965412, |
| "eval_loss": 0.18811464309692383, |
| "eval_runtime": 80.9806, |
| "eval_samples_per_second": 194.071, |
| "eval_steps_per_second": 24.265, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1190233977619533, |
| "grad_norm": 0.5195199847221375, |
| "learning_rate": 3.471145422266069e-05, |
| "loss": 0.2665, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1190233977619533, |
| "eval_loss": 0.18336237967014313, |
| "eval_runtime": 81.0197, |
| "eval_samples_per_second": 193.978, |
| "eval_steps_per_second": 24.253, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2207527975584944, |
| "grad_norm": 0.5171718001365662, |
| "learning_rate": 3.220692564191985e-05, |
| "loss": 0.2597, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2207527975584944, |
| "eval_loss": 0.17948384582996368, |
| "eval_runtime": 80.9251, |
| "eval_samples_per_second": 194.204, |
| "eval_steps_per_second": 24.282, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3224821973550356, |
| "grad_norm": 0.6012802124023438, |
| "learning_rate": 2.9620684377232173e-05, |
| "loss": 0.2457, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.3224821973550356, |
| "eval_loss": 0.17490074038505554, |
| "eval_runtime": 80.6967, |
| "eval_samples_per_second": 194.754, |
| "eval_steps_per_second": 24.35, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4242115971515767, |
| "grad_norm": 0.8013952374458313, |
| "learning_rate": 2.6982053433381048e-05, |
| "loss": 0.2446, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.4242115971515767, |
| "eval_loss": 0.17289136350154877, |
| "eval_runtime": 80.8694, |
| "eval_samples_per_second": 194.338, |
| "eval_steps_per_second": 24.298, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5259409969481181, |
| "grad_norm": 0.5540690422058105, |
| "learning_rate": 2.4320949813410496e-05, |
| "loss": 0.2417, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5259409969481181, |
| "eval_loss": 0.1698693186044693, |
| "eval_runtime": 80.754, |
| "eval_samples_per_second": 194.616, |
| "eval_steps_per_second": 24.333, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.627670396744659, |
| "grad_norm": 0.6591632962226868, |
| "learning_rate": 2.166754531731575e-05, |
| "loss": 0.2312, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.627670396744659, |
| "eval_loss": 0.16674023866653442, |
| "eval_runtime": 80.7008, |
| "eval_samples_per_second": 194.744, |
| "eval_steps_per_second": 24.349, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7293997965412005, |
| "grad_norm": 1.1154977083206177, |
| "learning_rate": 1.90519244518262e-05, |
| "loss": 0.2362, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.7293997965412005, |
| "eval_loss": 0.16516855359077454, |
| "eval_runtime": 80.7504, |
| "eval_samples_per_second": 194.624, |
| "eval_steps_per_second": 24.334, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8311291963377416, |
| "grad_norm": 0.6043060421943665, |
| "learning_rate": 1.6503743329926174e-05, |
| "loss": 0.2308, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.8311291963377416, |
| "eval_loss": 0.16346405446529388, |
| "eval_runtime": 80.6619, |
| "eval_samples_per_second": 194.838, |
| "eval_steps_per_second": 24.361, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9328585961342828, |
| "grad_norm": 0.6331253051757812, |
| "learning_rate": 1.4051893427537416e-05, |
| "loss": 0.2278, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.9328585961342828, |
| "eval_loss": 0.16204476356506348, |
| "eval_runtime": 80.7035, |
| "eval_samples_per_second": 194.738, |
| "eval_steps_per_second": 24.348, |
| "step": 1900 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2949, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8507720982331392.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|