| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 200, | |
| "global_step": 318, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09456264775413711, | |
| "grad_norm": 18.87253189086914, | |
| "learning_rate": 2.8125e-06, | |
| "loss": 2.3773, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.18912529550827423, | |
| "grad_norm": 6.278682708740234, | |
| "learning_rate": 5.9375e-06, | |
| "loss": 1.002, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 6.021182537078857, | |
| "learning_rate": 9.0625e-06, | |
| "loss": 0.8042, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.37825059101654845, | |
| "grad_norm": 5.4127421379089355, | |
| "learning_rate": 9.985226282835216e-06, | |
| "loss": 0.8278, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4728132387706856, | |
| "grad_norm": 5.558228015899658, | |
| "learning_rate": 9.913075312749867e-06, | |
| "loss": 0.7046, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 5.16882848739624, | |
| "learning_rate": 9.781702165490638e-06, | |
| "loss": 0.6698, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.6619385342789598, | |
| "grad_norm": 4.812554836273193, | |
| "learning_rate": 9.59269041210166e-06, | |
| "loss": 0.7291, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.7565011820330969, | |
| "grad_norm": 4.815040111541748, | |
| "learning_rate": 9.348318399002347e-06, | |
| "loss": 0.641, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 4.179919719696045, | |
| "learning_rate": 9.051531784814817e-06, | |
| "loss": 0.6477, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.9456264775413712, | |
| "grad_norm": 5.225935935974121, | |
| "learning_rate": 8.705908033414426e-06, | |
| "loss": 0.5922, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.037825059101655, | |
| "grad_norm": 3.746854066848755, | |
| "learning_rate": 8.315613291203977e-06, | |
| "loss": 0.5398, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.132387706855792, | |
| "grad_norm": 4.180168628692627, | |
| "learning_rate": 7.885352168412677e-06, | |
| "loss": 0.3628, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.226950354609929, | |
| "grad_norm": 4.26162052154541, | |
| "learning_rate": 7.420311029755688e-06, | |
| "loss": 0.3878, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.3215130023640662, | |
| "grad_norm": 4.134862422943115, | |
| "learning_rate": 6.926095478028312e-06, | |
| "loss": 0.3484, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.4160756501182032, | |
| "grad_norm": 3.880824565887451, | |
| "learning_rate": 6.408662784207149e-06, | |
| "loss": 0.3692, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.5106382978723403, | |
| "grad_norm": 4.255764484405518, | |
| "learning_rate": 5.8742500785453226e-06, | |
| "loss": 0.3478, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.6052009456264775, | |
| "grad_norm": 3.5713248252868652, | |
| "learning_rate": 5.3292991682458576e-06, | |
| "loss": 0.3675, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.6997635933806148, | |
| "grad_norm": 3.488389015197754, | |
| "learning_rate": 4.7803788879604585e-06, | |
| "loss": 0.3758, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.7943262411347518, | |
| "grad_norm": 3.949796676635742, | |
| "learning_rate": 4.234105919100261e-06, | |
| "loss": 0.3756, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 3.332965612411499, | |
| "learning_rate": 3.6970650324020784e-06, | |
| "loss": 0.3716, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "eval_loss": 0.6453335285186768, | |
| "eval_runtime": 55.7022, | |
| "eval_samples_per_second": 5.906, | |
| "eval_steps_per_second": 0.844, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.983451536643026, | |
| "grad_norm": 3.7968406677246094, | |
| "learning_rate": 3.1757297151456844e-06, | |
| "loss": 0.3388, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.07565011820331, | |
| "grad_norm": 2.6286983489990234, | |
| "learning_rate": 2.6763841397811576e-06, | |
| "loss": 0.1871, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.1702127659574466, | |
| "grad_norm": 437.03662109375, | |
| "learning_rate": 2.2050474145559326e-06, | |
| "loss": 0.1906, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.264775413711584, | |
| "grad_norm": 2.9663329124450684, | |
| "learning_rate": 1.7674010292239746e-06, | |
| "loss": 0.1742, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.359338061465721, | |
| "grad_norm": 2.8688528537750244, | |
| "learning_rate": 1.3687203704060343e-06, | |
| "loss": 0.143, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.453900709219858, | |
| "grad_norm": 3.0793418884277344, | |
| "learning_rate": 1.013811132114384e-06, | |
| "loss": 0.1432, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.548463356973995, | |
| "grad_norm": 2.9158718585968018, | |
| "learning_rate": 7.06951387949118e-07, | |
| "loss": 0.1317, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.6430260047281324, | |
| "grad_norm": 2.4157230854034424, | |
| "learning_rate": 4.5184002322740784e-07, | |
| "loss": 0.1366, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.7375886524822697, | |
| "grad_norm": 2.75584077835083, | |
| "learning_rate": 2.5155214864446556e-07, | |
| "loss": 0.1303, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.8321513002364065, | |
| "grad_norm": 2.8111653327941895, | |
| "learning_rate": 1.0850203290965699e-07, | |
| "loss": 0.1506, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.9267139479905437, | |
| "grad_norm": 2.849290609359741, | |
| "learning_rate": 2.441400116752146e-08, | |
| "loss": 0.136, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 318, | |
| "total_flos": 55872632029184.0, | |
| "train_loss": 0.46639804007872093, | |
| "train_runtime": 7754.2521, | |
| "train_samples_per_second": 1.145, | |
| "train_steps_per_second": 0.041 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 318, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 55872632029184.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |