| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.40160642570281124, |
| "eval_steps": 25, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.020080321285140562, |
| "grad_norm": 4.975222110748291, |
| "learning_rate": 2.375e-05, |
| "loss": 3.699, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.020080321285140562, |
| "eval_loss": 3.4337058067321777, |
| "eval_runtime": 73.7432, |
| "eval_samples_per_second": 5.967, |
| "eval_steps_per_second": 0.746, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.040160642570281124, |
| "grad_norm": 17.880794525146484, |
| "learning_rate": 2.25e-05, |
| "loss": 3.3958, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.040160642570281124, |
| "eval_loss": 3.2245774269104004, |
| "eval_runtime": 72.5106, |
| "eval_samples_per_second": 6.068, |
| "eval_steps_per_second": 0.759, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.060240963855421686, |
| "grad_norm": 14.544445991516113, |
| "learning_rate": 2.125e-05, |
| "loss": 3.2819, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.060240963855421686, |
| "eval_loss": 3.0899553298950195, |
| "eval_runtime": 71.5892, |
| "eval_samples_per_second": 6.146, |
| "eval_steps_per_second": 0.768, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08032128514056225, |
| "grad_norm": 9.934131622314453, |
| "learning_rate": 2e-05, |
| "loss": 3.0848, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08032128514056225, |
| "eval_loss": 2.9913179874420166, |
| "eval_runtime": 71.5036, |
| "eval_samples_per_second": 6.154, |
| "eval_steps_per_second": 0.769, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10040160642570281, |
| "grad_norm": 27.045995712280273, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 3.0192, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.10040160642570281, |
| "eval_loss": 2.901963710784912, |
| "eval_runtime": 71.6168, |
| "eval_samples_per_second": 6.144, |
| "eval_steps_per_second": 0.768, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.12048192771084337, |
| "grad_norm": 37.5889778137207, |
| "learning_rate": 1.75e-05, |
| "loss": 2.7809, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12048192771084337, |
| "eval_loss": 2.842862129211426, |
| "eval_runtime": 71.6469, |
| "eval_samples_per_second": 6.141, |
| "eval_steps_per_second": 0.768, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.14056224899598393, |
| "grad_norm": 29.42604637145996, |
| "learning_rate": 1.6250000000000002e-05, |
| "loss": 2.7966, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.14056224899598393, |
| "eval_loss": 2.8016371726989746, |
| "eval_runtime": 71.7672, |
| "eval_samples_per_second": 6.131, |
| "eval_steps_per_second": 0.766, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.1606425702811245, |
| "grad_norm": 18.478330612182617, |
| "learning_rate": 1.5e-05, |
| "loss": 2.7628, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1606425702811245, |
| "eval_loss": 2.7751874923706055, |
| "eval_runtime": 71.6702, |
| "eval_samples_per_second": 6.139, |
| "eval_steps_per_second": 0.767, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.18072289156626506, |
| "grad_norm": 14.404654502868652, |
| "learning_rate": 1.3750000000000002e-05, |
| "loss": 2.7318, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.18072289156626506, |
| "eval_loss": 2.761225461959839, |
| "eval_runtime": 72.6836, |
| "eval_samples_per_second": 6.054, |
| "eval_steps_per_second": 0.757, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.20080321285140562, |
| "grad_norm": 11.6594820022583, |
| "learning_rate": 1.25e-05, |
| "loss": 2.6541, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.20080321285140562, |
| "eval_loss": 2.748612642288208, |
| "eval_runtime": 71.8454, |
| "eval_samples_per_second": 6.124, |
| "eval_steps_per_second": 0.766, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.22088353413654618, |
| "grad_norm": 17.30568504333496, |
| "learning_rate": 1.125e-05, |
| "loss": 2.7524, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.22088353413654618, |
| "eval_loss": 2.7362046241760254, |
| "eval_runtime": 71.4941, |
| "eval_samples_per_second": 6.154, |
| "eval_steps_per_second": 0.769, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.24096385542168675, |
| "grad_norm": 37.215980529785156, |
| "learning_rate": 1e-05, |
| "loss": 2.6456, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.24096385542168675, |
| "eval_loss": 2.730060577392578, |
| "eval_runtime": 71.5118, |
| "eval_samples_per_second": 6.153, |
| "eval_steps_per_second": 0.769, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.26104417670682734, |
| "grad_norm": 36.52507781982422, |
| "learning_rate": 8.75e-06, |
| "loss": 2.7501, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.26104417670682734, |
| "eval_loss": 2.7272305488586426, |
| "eval_runtime": 71.6593, |
| "eval_samples_per_second": 6.14, |
| "eval_steps_per_second": 0.768, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.28112449799196787, |
| "grad_norm": 11.31718635559082, |
| "learning_rate": 7.5e-06, |
| "loss": 2.6978, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.28112449799196787, |
| "eval_loss": 2.725891590118408, |
| "eval_runtime": 71.721, |
| "eval_samples_per_second": 6.135, |
| "eval_steps_per_second": 0.767, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.30120481927710846, |
| "grad_norm": 19.520795822143555, |
| "learning_rate": 6.25e-06, |
| "loss": 2.6585, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.30120481927710846, |
| "eval_loss": 2.7254438400268555, |
| "eval_runtime": 71.6451, |
| "eval_samples_per_second": 6.141, |
| "eval_steps_per_second": 0.768, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.321285140562249, |
| "grad_norm": 9.94796371459961, |
| "learning_rate": 5e-06, |
| "loss": 2.7164, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.321285140562249, |
| "eval_loss": 2.724626302719116, |
| "eval_runtime": 71.7028, |
| "eval_samples_per_second": 6.136, |
| "eval_steps_per_second": 0.767, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3413654618473896, |
| "grad_norm": 22.991910934448242, |
| "learning_rate": 3.75e-06, |
| "loss": 2.7357, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.3413654618473896, |
| "eval_loss": 2.722055673599243, |
| "eval_runtime": 73.1673, |
| "eval_samples_per_second": 6.014, |
| "eval_steps_per_second": 0.752, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.3614457831325301, |
| "grad_norm": 10.925905227661133, |
| "learning_rate": 2.5e-06, |
| "loss": 2.5698, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3614457831325301, |
| "eval_loss": 2.7187235355377197, |
| "eval_runtime": 71.5477, |
| "eval_samples_per_second": 6.15, |
| "eval_steps_per_second": 0.769, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3815261044176707, |
| "grad_norm": 26.42523765563965, |
| "learning_rate": 1.25e-06, |
| "loss": 2.727, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3815261044176707, |
| "eval_loss": 2.7193753719329834, |
| "eval_runtime": 71.4705, |
| "eval_samples_per_second": 6.156, |
| "eval_steps_per_second": 0.77, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.40160642570281124, |
| "grad_norm": 13.879467964172363, |
| "learning_rate": 0.0, |
| "loss": 2.5964, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.40160642570281124, |
| "eval_loss": 2.718266725540161, |
| "eval_runtime": 72.5501, |
| "eval_samples_per_second": 6.065, |
| "eval_steps_per_second": 0.758, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 25, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6419582976000000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|