| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.054945054945054944, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001098901098901099, | |
| "grad_norm": 4.387043476104736, | |
| "learning_rate": 5.4945054945054946e-08, | |
| "loss": 0.8831, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002197802197802198, | |
| "grad_norm": 3.8907365798950195, | |
| "learning_rate": 1.0989010989010989e-07, | |
| "loss": 0.8866, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0032967032967032967, | |
| "grad_norm": 3.2492220401763916, | |
| "learning_rate": 1.6483516483516484e-07, | |
| "loss": 0.8454, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.004395604395604396, | |
| "grad_norm": 3.0804762840270996, | |
| "learning_rate": 2.1978021978021978e-07, | |
| "loss": 0.8267, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.005494505494505495, | |
| "grad_norm": 2.037411689758301, | |
| "learning_rate": 2.7472527472527475e-07, | |
| "loss": 0.7371, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.006593406593406593, | |
| "grad_norm": 1.5473365783691406, | |
| "learning_rate": 3.296703296703297e-07, | |
| "loss": 0.6765, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.007692307692307693, | |
| "grad_norm": 1.1062999963760376, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 0.5946, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.008791208791208791, | |
| "grad_norm": 1.363224744796753, | |
| "learning_rate": 4.3956043956043957e-07, | |
| "loss": 0.5402, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.00989010989010989, | |
| "grad_norm": 0.9122905731201172, | |
| "learning_rate": 4.945054945054946e-07, | |
| "loss": 0.464, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01098901098901099, | |
| "grad_norm": 0.676691472530365, | |
| "learning_rate": 5.494505494505495e-07, | |
| "loss": 0.4041, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.012087912087912088, | |
| "grad_norm": 0.5926629900932312, | |
| "learning_rate": 6.043956043956044e-07, | |
| "loss": 0.3727, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.013186813186813187, | |
| "grad_norm": 0.635013997554779, | |
| "learning_rate": 6.593406593406594e-07, | |
| "loss": 0.3609, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.014285714285714285, | |
| "grad_norm": 0.5836207270622253, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.3331, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.015384615384615385, | |
| "grad_norm": 0.548773467540741, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 0.3201, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.016483516483516484, | |
| "grad_norm": 0.5962342023849487, | |
| "learning_rate": 8.241758241758242e-07, | |
| "loss": 0.2993, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.017582417582417582, | |
| "grad_norm": 0.5346819162368774, | |
| "learning_rate": 8.791208791208791e-07, | |
| "loss": 0.2792, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.01868131868131868, | |
| "grad_norm": 0.569210946559906, | |
| "learning_rate": 9.340659340659341e-07, | |
| "loss": 0.273, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.01978021978021978, | |
| "grad_norm": 0.5142342448234558, | |
| "learning_rate": 9.890109890109891e-07, | |
| "loss": 0.2621, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.020879120879120878, | |
| "grad_norm": 0.5067290663719177, | |
| "learning_rate": 1.043956043956044e-06, | |
| "loss": 0.2641, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 0.44699764251708984, | |
| "learning_rate": 1.098901098901099e-06, | |
| "loss": 0.2553, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.023076923076923078, | |
| "grad_norm": 0.5279501080513, | |
| "learning_rate": 1.153846153846154e-06, | |
| "loss": 0.2488, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.024175824175824177, | |
| "grad_norm": 0.5009133219718933, | |
| "learning_rate": 1.2087912087912089e-06, | |
| "loss": 0.2499, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.025274725274725275, | |
| "grad_norm": 0.5484806895256042, | |
| "learning_rate": 1.263736263736264e-06, | |
| "loss": 0.2485, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.026373626373626374, | |
| "grad_norm": 0.44114941358566284, | |
| "learning_rate": 1.3186813186813187e-06, | |
| "loss": 0.2499, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.027472527472527472, | |
| "grad_norm": 0.5557438731193542, | |
| "learning_rate": 1.3736263736263736e-06, | |
| "loss": 0.2455, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02857142857142857, | |
| "grad_norm": 0.5493007898330688, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.2396, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.02967032967032967, | |
| "grad_norm": 0.5284810662269592, | |
| "learning_rate": 1.4835164835164835e-06, | |
| "loss": 0.2383, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 0.5073139667510986, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "loss": 0.2389, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.031868131868131866, | |
| "grad_norm": 0.5186975002288818, | |
| "learning_rate": 1.5934065934065933e-06, | |
| "loss": 0.2372, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03296703296703297, | |
| "grad_norm": 0.5739095211029053, | |
| "learning_rate": 1.6483516483516484e-06, | |
| "loss": 0.2358, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03406593406593406, | |
| "grad_norm": 0.5144438147544861, | |
| "learning_rate": 1.7032967032967032e-06, | |
| "loss": 0.2319, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.035164835164835165, | |
| "grad_norm": 0.4886190593242645, | |
| "learning_rate": 1.7582417582417583e-06, | |
| "loss": 0.2297, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.03626373626373627, | |
| "grad_norm": 0.6088211536407471, | |
| "learning_rate": 1.8131868131868135e-06, | |
| "loss": 0.2356, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.03736263736263736, | |
| "grad_norm": 0.4712292551994324, | |
| "learning_rate": 1.8681318681318681e-06, | |
| "loss": 0.2334, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 0.5712177157402039, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 0.2302, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03956043956043956, | |
| "grad_norm": 0.5427699089050293, | |
| "learning_rate": 1.9780219780219782e-06, | |
| "loss": 0.2248, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.04065934065934066, | |
| "grad_norm": 0.6642568707466125, | |
| "learning_rate": 2.032967032967033e-06, | |
| "loss": 0.2291, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.041758241758241756, | |
| "grad_norm": 0.5859007239341736, | |
| "learning_rate": 2.087912087912088e-06, | |
| "loss": 0.227, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.04285714285714286, | |
| "grad_norm": 0.6507712602615356, | |
| "learning_rate": 2.142857142857143e-06, | |
| "loss": 0.227, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 0.5675429105758667, | |
| "learning_rate": 2.197802197802198e-06, | |
| "loss": 0.2259, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.045054945054945054, | |
| "grad_norm": 0.6223055124282837, | |
| "learning_rate": 2.252747252747253e-06, | |
| "loss": 0.2283, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.046153846153846156, | |
| "grad_norm": 0.5504657030105591, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.2246, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.04725274725274725, | |
| "grad_norm": 0.48020097613334656, | |
| "learning_rate": 2.3626373626373625e-06, | |
| "loss": 0.225, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.04835164835164835, | |
| "grad_norm": 0.4979713261127472, | |
| "learning_rate": 2.4175824175824177e-06, | |
| "loss": 0.2212, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.04945054945054945, | |
| "grad_norm": 0.49497634172439575, | |
| "learning_rate": 2.4725274725274726e-06, | |
| "loss": 0.2234, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.05054945054945055, | |
| "grad_norm": 0.6207996010780334, | |
| "learning_rate": 2.527472527472528e-06, | |
| "loss": 0.2256, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.051648351648351645, | |
| "grad_norm": 0.530981719493866, | |
| "learning_rate": 2.5824175824175822e-06, | |
| "loss": 0.2231, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.05274725274725275, | |
| "grad_norm": 0.5495067834854126, | |
| "learning_rate": 2.6373626373626375e-06, | |
| "loss": 0.223, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.05384615384615385, | |
| "grad_norm": 0.5651763081550598, | |
| "learning_rate": 2.6923076923076928e-06, | |
| "loss": 0.2213, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 0.553247332572937, | |
| "learning_rate": 2.747252747252747e-06, | |
| "loss": 0.2219, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 91000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.336356548608e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |