| { |
| "best_metric": 0.10389433056116104, |
| "best_model_checkpoint": "mgh6/TCS_Pair_base/checkpoint-650", |
| "epoch": 0.21178962230850687, |
| "eval_steps": 50, |
| "global_step": 900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011766090128250382, |
| "grad_norm": 1.275758147239685, |
| "learning_rate": 9.988232525300071e-05, |
| "loss": 0.0357, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.011766090128250382, |
| "eval_loss": 0.18010935187339783, |
| "eval_runtime": 13.7184, |
| "eval_samples_per_second": 2045.21, |
| "eval_steps_per_second": 8.018, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023532180256500765, |
| "grad_norm": 0.9043275713920593, |
| "learning_rate": 9.976465050600142e-05, |
| "loss": 0.0653, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.023532180256500765, |
| "eval_loss": 0.14653433859348297, |
| "eval_runtime": 13.7483, |
| "eval_samples_per_second": 2040.76, |
| "eval_steps_per_second": 8.001, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03529827038475115, |
| "grad_norm": 0.7122117877006531, |
| "learning_rate": 9.964697575900212e-05, |
| "loss": 0.0808, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03529827038475115, |
| "eval_loss": 0.14078955352306366, |
| "eval_runtime": 13.804, |
| "eval_samples_per_second": 2032.521, |
| "eval_steps_per_second": 7.969, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04706436051300153, |
| "grad_norm": 0.6643016338348389, |
| "learning_rate": 9.952930101200283e-05, |
| "loss": 0.0795, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04706436051300153, |
| "eval_loss": 0.14704547822475433, |
| "eval_runtime": 13.7761, |
| "eval_samples_per_second": 2036.65, |
| "eval_steps_per_second": 7.985, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05883045064125191, |
| "grad_norm": 0.7624136209487915, |
| "learning_rate": 9.941162626500353e-05, |
| "loss": 0.0713, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05883045064125191, |
| "eval_loss": 0.1481221318244934, |
| "eval_runtime": 13.7735, |
| "eval_samples_per_second": 2037.023, |
| "eval_steps_per_second": 7.986, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0705965407695023, |
| "grad_norm": 0.7390868663787842, |
| "learning_rate": 9.929395151800424e-05, |
| "loss": 0.0781, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0705965407695023, |
| "eval_loss": 0.12939240038394928, |
| "eval_runtime": 13.7779, |
| "eval_samples_per_second": 2036.381, |
| "eval_steps_per_second": 7.984, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08236263089775267, |
| "grad_norm": 0.6493538618087769, |
| "learning_rate": 9.917627677100495e-05, |
| "loss": 0.0762, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08236263089775267, |
| "eval_loss": 0.11381174623966217, |
| "eval_runtime": 13.7913, |
| "eval_samples_per_second": 2034.394, |
| "eval_steps_per_second": 7.976, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.09412872102600306, |
| "grad_norm": 0.483694463968277, |
| "learning_rate": 9.905860202400565e-05, |
| "loss": 0.1041, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09412872102600306, |
| "eval_loss": 0.1289786696434021, |
| "eval_runtime": 13.7941, |
| "eval_samples_per_second": 2033.979, |
| "eval_steps_per_second": 7.974, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.10589481115425343, |
| "grad_norm": 0.744751513004303, |
| "learning_rate": 9.894092727700636e-05, |
| "loss": 0.0957, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.10589481115425343, |
| "eval_loss": 0.1261110156774521, |
| "eval_runtime": 13.7703, |
| "eval_samples_per_second": 2037.504, |
| "eval_steps_per_second": 7.988, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.11766090128250382, |
| "grad_norm": 0.8092584609985352, |
| "learning_rate": 9.882325253000706e-05, |
| "loss": 0.0954, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11766090128250382, |
| "eval_loss": 0.11313354969024658, |
| "eval_runtime": 13.7468, |
| "eval_samples_per_second": 2040.988, |
| "eval_steps_per_second": 8.002, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1294269914107542, |
| "grad_norm": 0.5611206889152527, |
| "learning_rate": 9.870557778300777e-05, |
| "loss": 0.0912, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1294269914107542, |
| "eval_loss": 0.11919673532247543, |
| "eval_runtime": 13.802, |
| "eval_samples_per_second": 2032.825, |
| "eval_steps_per_second": 7.97, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1411930815390046, |
| "grad_norm": 0.7006365656852722, |
| "learning_rate": 9.858790303600848e-05, |
| "loss": 0.0938, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1411930815390046, |
| "eval_loss": 0.10846291482448578, |
| "eval_runtime": 13.7875, |
| "eval_samples_per_second": 2034.962, |
| "eval_steps_per_second": 7.978, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.15295917166725498, |
| "grad_norm": 0.5973498225212097, |
| "learning_rate": 9.847022828900918e-05, |
| "loss": 0.0946, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.15295917166725498, |
| "eval_loss": 0.10389433056116104, |
| "eval_runtime": 13.7964, |
| "eval_samples_per_second": 2033.643, |
| "eval_steps_per_second": 7.973, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.16472526179550534, |
| "grad_norm": 0.6352247595787048, |
| "learning_rate": 9.835255354200989e-05, |
| "loss": 0.0893, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.16472526179550534, |
| "eval_loss": 0.10826652497053146, |
| "eval_runtime": 13.8022, |
| "eval_samples_per_second": 2032.788, |
| "eval_steps_per_second": 7.97, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.17649135192375573, |
| "grad_norm": 0.6947146654129028, |
| "learning_rate": 9.823487879501059e-05, |
| "loss": 0.0867, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.17649135192375573, |
| "eval_loss": 0.10910729318857193, |
| "eval_runtime": 13.7759, |
| "eval_samples_per_second": 2036.679, |
| "eval_steps_per_second": 7.985, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.18825744205200612, |
| "grad_norm": 0.5221682190895081, |
| "learning_rate": 9.81172040480113e-05, |
| "loss": 0.0915, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.18825744205200612, |
| "eval_loss": 0.11108585447072983, |
| "eval_runtime": 13.7759, |
| "eval_samples_per_second": 2036.68, |
| "eval_steps_per_second": 7.985, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2000235321802565, |
| "grad_norm": 0.5295658707618713, |
| "learning_rate": 9.7999529301012e-05, |
| "loss": 0.0861, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2000235321802565, |
| "eval_loss": 0.10687608271837234, |
| "eval_runtime": 13.7743, |
| "eval_samples_per_second": 2036.916, |
| "eval_steps_per_second": 7.986, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.21178962230850687, |
| "grad_norm": 0.712462306022644, |
| "learning_rate": 9.788185455401271e-05, |
| "loss": 0.0832, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.21178962230850687, |
| "eval_loss": 0.10970053821802139, |
| "eval_runtime": 13.7864, |
| "eval_samples_per_second": 2035.119, |
| "eval_steps_per_second": 7.979, |
| "step": 900 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 42490, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 5 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|