| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 950, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005263157894736842, |
| "grad_norm": 7.792475124460211, |
| "learning_rate": 8.421052631578948e-07, |
| "loss": 1.844, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.010526315789473684, |
| "grad_norm": 7.77107258632735, |
| "learning_rate": 1.6842105263157895e-06, |
| "loss": 1.8342, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.015789473684210527, |
| "grad_norm": 7.734735147966475, |
| "learning_rate": 2.5263157894736844e-06, |
| "loss": 1.8324, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.021052631578947368, |
| "grad_norm": 7.248228363662717, |
| "learning_rate": 3.368421052631579e-06, |
| "loss": 1.816, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.02631578947368421, |
| "grad_norm": 5.778539169912221, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 1.7849, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.031578947368421054, |
| "grad_norm": 3.239108511490633, |
| "learning_rate": 5.052631578947369e-06, |
| "loss": 1.7388, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03684210526315789, |
| "grad_norm": 2.659157940796987, |
| "learning_rate": 5.8947368421052634e-06, |
| "loss": 1.7195, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.042105263157894736, |
| "grad_norm": 5.788438535248494, |
| "learning_rate": 6.736842105263158e-06, |
| "loss": 1.716, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04736842105263158, |
| "grad_norm": 5.78966752434045, |
| "learning_rate": 7.578947368421054e-06, |
| "loss": 1.7132, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 5.662263812934936, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 1.7155, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05789473684210526, |
| "grad_norm": 4.3906745364224955, |
| "learning_rate": 9.263157894736842e-06, |
| "loss": 1.6617, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06315789473684211, |
| "grad_norm": 3.7326640381933247, |
| "learning_rate": 1.0105263157894738e-05, |
| "loss": 1.6634, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06842105263157895, |
| "grad_norm": 2.669371108068819, |
| "learning_rate": 1.0947368421052633e-05, |
| "loss": 1.6463, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07368421052631578, |
| "grad_norm": 2.080061860580032, |
| "learning_rate": 1.1789473684210527e-05, |
| "loss": 1.5925, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.07894736842105263, |
| "grad_norm": 2.245619823013294, |
| "learning_rate": 1.263157894736842e-05, |
| "loss": 1.5797, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08421052631578947, |
| "grad_norm": 2.0053863294690806, |
| "learning_rate": 1.3473684210526316e-05, |
| "loss": 1.5613, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.08947368421052632, |
| "grad_norm": 1.9625639332300135, |
| "learning_rate": 1.4315789473684212e-05, |
| "loss": 1.5638, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.09473684210526316, |
| "grad_norm": 1.5893875551469467, |
| "learning_rate": 1.5157894736842107e-05, |
| "loss": 1.5522, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.5906140921890974, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.5209, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 1.559169647702599, |
| "learning_rate": 1.6842105263157896e-05, |
| "loss": 1.4911, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11052631578947368, |
| "grad_norm": 1.2982321447862488, |
| "learning_rate": 1.768421052631579e-05, |
| "loss": 1.4991, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.11578947368421053, |
| "grad_norm": 1.3143336518371307, |
| "learning_rate": 1.8526315789473684e-05, |
| "loss": 1.5012, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.12105263157894737, |
| "grad_norm": 1.279000313318411, |
| "learning_rate": 1.936842105263158e-05, |
| "loss": 1.5011, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.12631578947368421, |
| "grad_norm": 0.922398830661329, |
| "learning_rate": 2.0210526315789475e-05, |
| "loss": 1.4624, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.13157894736842105, |
| "grad_norm": 1.3274580990016782, |
| "learning_rate": 2.105263157894737e-05, |
| "loss": 1.4645, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1368421052631579, |
| "grad_norm": 1.1341340679878056, |
| "learning_rate": 2.1894736842105266e-05, |
| "loss": 1.4715, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.14210526315789473, |
| "grad_norm": 1.2628910185228979, |
| "learning_rate": 2.273684210526316e-05, |
| "loss": 1.4584, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.14736842105263157, |
| "grad_norm": 1.57433223809749, |
| "learning_rate": 2.3578947368421054e-05, |
| "loss": 1.448, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.15263157894736842, |
| "grad_norm": 0.8438420059614518, |
| "learning_rate": 2.442105263157895e-05, |
| "loss": 1.4442, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 1.280060736418443, |
| "learning_rate": 2.526315789473684e-05, |
| "loss": 1.4577, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1631578947368421, |
| "grad_norm": 1.8388712899282178, |
| "learning_rate": 2.610526315789474e-05, |
| "loss": 1.4354, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.16842105263157894, |
| "grad_norm": 1.1283525934214154, |
| "learning_rate": 2.6947368421052632e-05, |
| "loss": 1.4458, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1736842105263158, |
| "grad_norm": 1.559904631956879, |
| "learning_rate": 2.778947368421053e-05, |
| "loss": 1.4337, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.17894736842105263, |
| "grad_norm": 1.433428530147804, |
| "learning_rate": 2.8631578947368423e-05, |
| "loss": 1.4311, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.18421052631578946, |
| "grad_norm": 1.4475127262626666, |
| "learning_rate": 2.9473684210526317e-05, |
| "loss": 1.4296, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.18947368421052632, |
| "grad_norm": 1.6258635588736965, |
| "learning_rate": 3.0315789473684214e-05, |
| "loss": 1.403, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.19473684210526315, |
| "grad_norm": 0.9140879296838869, |
| "learning_rate": 3.1157894736842105e-05, |
| "loss": 1.4149, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.355462077600805, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 1.4241, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.20526315789473684, |
| "grad_norm": 2.025270623377536, |
| "learning_rate": 3.28421052631579e-05, |
| "loss": 1.4269, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 1.5911005582582893, |
| "learning_rate": 3.368421052631579e-05, |
| "loss": 1.4261, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21578947368421053, |
| "grad_norm": 1.7736374990816877, |
| "learning_rate": 3.452631578947369e-05, |
| "loss": 1.4248, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.22105263157894736, |
| "grad_norm": 2.020103172778917, |
| "learning_rate": 3.536842105263158e-05, |
| "loss": 1.4019, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.22631578947368422, |
| "grad_norm": 2.1032904325246693, |
| "learning_rate": 3.621052631578948e-05, |
| "loss": 1.4138, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.23157894736842105, |
| "grad_norm": 2.034839240989353, |
| "learning_rate": 3.705263157894737e-05, |
| "loss": 1.4099, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.23684210526315788, |
| "grad_norm": 1.5310564952941104, |
| "learning_rate": 3.789473684210526e-05, |
| "loss": 1.419, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.24210526315789474, |
| "grad_norm": 2.3582192444588594, |
| "learning_rate": 3.873684210526316e-05, |
| "loss": 1.3989, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.24736842105263157, |
| "grad_norm": 1.2404229618115798, |
| "learning_rate": 3.9578947368421056e-05, |
| "loss": 1.4034, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.25263157894736843, |
| "grad_norm": 2.631335015977353, |
| "learning_rate": 4.042105263157895e-05, |
| "loss": 1.4067, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2578947368421053, |
| "grad_norm": 1.7741169104229972, |
| "learning_rate": 4.126315789473685e-05, |
| "loss": 1.3842, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 2.7958288897467813, |
| "learning_rate": 4.210526315789474e-05, |
| "loss": 1.4165, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.26842105263157895, |
| "grad_norm": 2.106018978323054, |
| "learning_rate": 4.294736842105264e-05, |
| "loss": 1.408, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2736842105263158, |
| "grad_norm": 2.1854136752650284, |
| "learning_rate": 4.378947368421053e-05, |
| "loss": 1.4039, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2789473684210526, |
| "grad_norm": 2.1320541034792897, |
| "learning_rate": 4.463157894736842e-05, |
| "loss": 1.3792, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.28421052631578947, |
| "grad_norm": 2.1929241850886183, |
| "learning_rate": 4.547368421052632e-05, |
| "loss": 1.405, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2894736842105263, |
| "grad_norm": 2.4083834163686406, |
| "learning_rate": 4.6315789473684214e-05, |
| "loss": 1.397, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.29473684210526313, |
| "grad_norm": 1.8900388617787558, |
| "learning_rate": 4.715789473684211e-05, |
| "loss": 1.398, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.8869758461876107, |
| "learning_rate": 4.8e-05, |
| "loss": 1.399, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.30526315789473685, |
| "grad_norm": 1.9387939885602292, |
| "learning_rate": 4.88421052631579e-05, |
| "loss": 1.4029, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3105263157894737, |
| "grad_norm": 2.8691231917123, |
| "learning_rate": 4.9684210526315796e-05, |
| "loss": 1.3774, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 2.023114295041414, |
| "learning_rate": 5.052631578947368e-05, |
| "loss": 1.3925, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.32105263157894737, |
| "grad_norm": 1.8266981690923911, |
| "learning_rate": 5.136842105263158e-05, |
| "loss": 1.3752, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.3263157894736842, |
| "grad_norm": 2.749570229085121, |
| "learning_rate": 5.221052631578948e-05, |
| "loss": 1.3846, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.33157894736842103, |
| "grad_norm": 1.577733284649954, |
| "learning_rate": 5.305263157894737e-05, |
| "loss": 1.3983, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3368421052631579, |
| "grad_norm": 3.4308320005068897, |
| "learning_rate": 5.3894736842105265e-05, |
| "loss": 1.3898, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.34210526315789475, |
| "grad_norm": 2.7293556039435583, |
| "learning_rate": 5.4736842105263165e-05, |
| "loss": 1.3801, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3473684210526316, |
| "grad_norm": 2.7986318090963036, |
| "learning_rate": 5.557894736842106e-05, |
| "loss": 1.3929, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3526315789473684, |
| "grad_norm": 2.5730126104338407, |
| "learning_rate": 5.642105263157895e-05, |
| "loss": 1.382, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.35789473684210527, |
| "grad_norm": 2.467033189056739, |
| "learning_rate": 5.726315789473685e-05, |
| "loss": 1.3738, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3631578947368421, |
| "grad_norm": 1.5266025457633567, |
| "learning_rate": 5.810526315789475e-05, |
| "loss": 1.394, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3684210526315789, |
| "grad_norm": 2.1446942030427567, |
| "learning_rate": 5.8947368421052634e-05, |
| "loss": 1.3845, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3736842105263158, |
| "grad_norm": 2.3417890559923986, |
| "learning_rate": 5.978947368421053e-05, |
| "loss": 1.3841, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.37894736842105264, |
| "grad_norm": 1.9604854202915063, |
| "learning_rate": 6.063157894736843e-05, |
| "loss": 1.3834, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.38421052631578945, |
| "grad_norm": 2.4507109733612578, |
| "learning_rate": 6.147368421052632e-05, |
| "loss": 1.4071, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3894736842105263, |
| "grad_norm": 3.5995720583445063, |
| "learning_rate": 6.231578947368421e-05, |
| "loss": 1.3704, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.39473684210526316, |
| "grad_norm": 2.0182422302151797, |
| "learning_rate": 6.315789473684212e-05, |
| "loss": 1.379, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 4.1134652051924, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 1.3788, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4052631578947368, |
| "grad_norm": 2.929511327190635, |
| "learning_rate": 6.484210526315789e-05, |
| "loss": 1.3914, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4105263157894737, |
| "grad_norm": 3.5649183111031406, |
| "learning_rate": 6.56842105263158e-05, |
| "loss": 1.3715, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.41578947368421054, |
| "grad_norm": 3.365859410878661, |
| "learning_rate": 6.652631578947369e-05, |
| "loss": 1.3986, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 3.304005403851428, |
| "learning_rate": 6.736842105263159e-05, |
| "loss": 1.3783, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4263157894736842, |
| "grad_norm": 2.894847638309193, |
| "learning_rate": 6.821052631578948e-05, |
| "loss": 1.3802, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.43157894736842106, |
| "grad_norm": 2.698808992917365, |
| "learning_rate": 6.905263157894737e-05, |
| "loss": 1.3761, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.4368421052631579, |
| "grad_norm": 2.163408526778829, |
| "learning_rate": 6.989473684210527e-05, |
| "loss": 1.3911, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4421052631578947, |
| "grad_norm": 3.2099232827707014, |
| "learning_rate": 7.073684210526316e-05, |
| "loss": 1.3926, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4473684210526316, |
| "grad_norm": 2.475621296000252, |
| "learning_rate": 7.157894736842105e-05, |
| "loss": 1.3932, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.45263157894736844, |
| "grad_norm": 2.9203370802467936, |
| "learning_rate": 7.242105263157896e-05, |
| "loss": 1.3822, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.45789473684210524, |
| "grad_norm": 2.3862423279450393, |
| "learning_rate": 7.326315789473684e-05, |
| "loss": 1.3721, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4631578947368421, |
| "grad_norm": 3.275399501836966, |
| "learning_rate": 7.410526315789474e-05, |
| "loss": 1.4002, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.46842105263157896, |
| "grad_norm": 2.5587038178412533, |
| "learning_rate": 7.494736842105264e-05, |
| "loss": 1.3812, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 3.0292582788342375, |
| "learning_rate": 7.578947368421052e-05, |
| "loss": 1.3679, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4789473684210526, |
| "grad_norm": 2.4725567771769366, |
| "learning_rate": 7.663157894736843e-05, |
| "loss": 1.3825, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4842105263157895, |
| "grad_norm": 2.983972110527992, |
| "learning_rate": 7.747368421052633e-05, |
| "loss": 1.3727, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.48947368421052634, |
| "grad_norm": 2.498982942091222, |
| "learning_rate": 7.831578947368422e-05, |
| "loss": 1.3769, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.49473684210526314, |
| "grad_norm": 3.526109045872143, |
| "learning_rate": 7.915789473684211e-05, |
| "loss": 1.3788, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 3.1483584936822284, |
| "learning_rate": 8e-05, |
| "loss": 1.3643, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5052631578947369, |
| "grad_norm": 2.0080980217269517, |
| "learning_rate": 7.999972997932227e-05, |
| "loss": 1.3846, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5105263157894737, |
| "grad_norm": 4.353229694894079, |
| "learning_rate": 7.999891992093464e-05, |
| "loss": 1.3787, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5157894736842106, |
| "grad_norm": 2.878217785277169, |
| "learning_rate": 7.999756983577373e-05, |
| "loss": 1.3695, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5210526315789473, |
| "grad_norm": 1.962122819070823, |
| "learning_rate": 7.999567974206707e-05, |
| "loss": 1.364, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 3.9822305254995887, |
| "learning_rate": 7.999324966533291e-05, |
| "loss": 1.3928, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.531578947368421, |
| "grad_norm": 2.4880539516369713, |
| "learning_rate": 7.999027963837979e-05, |
| "loss": 1.3656, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5368421052631579, |
| "grad_norm": 4.910309768750227, |
| "learning_rate": 7.998676970130614e-05, |
| "loss": 1.3802, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5421052631578948, |
| "grad_norm": 2.905093024244942, |
| "learning_rate": 7.998271990149972e-05, |
| "loss": 1.3731, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5473684210526316, |
| "grad_norm": 3.8609393179697284, |
| "learning_rate": 7.997813029363704e-05, |
| "loss": 1.4037, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5526315789473685, |
| "grad_norm": 3.6672144152714865, |
| "learning_rate": 7.997300093968255e-05, |
| "loss": 1.3739, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5578947368421052, |
| "grad_norm": 3.0897791527691, |
| "learning_rate": 7.996733190888783e-05, |
| "loss": 1.3729, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5631578947368421, |
| "grad_norm": 3.0709819435052794, |
| "learning_rate": 7.996112327779065e-05, |
| "loss": 1.3735, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5684210526315789, |
| "grad_norm": 2.4110812616502053, |
| "learning_rate": 7.995437513021393e-05, |
| "loss": 1.3625, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5736842105263158, |
| "grad_norm": 3.20735512285491, |
| "learning_rate": 7.994708755726469e-05, |
| "loss": 1.3646, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.5789473684210527, |
| "grad_norm": 3.410126968058674, |
| "learning_rate": 7.993926065733265e-05, |
| "loss": 1.3828, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5842105263157895, |
| "grad_norm": 1.9981274214741556, |
| "learning_rate": 7.993089453608908e-05, |
| "loss": 1.3614, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5894736842105263, |
| "grad_norm": 3.8348881514308104, |
| "learning_rate": 7.992198930648527e-05, |
| "loss": 1.366, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5947368421052631, |
| "grad_norm": 3.6103553587856188, |
| "learning_rate": 7.991254508875098e-05, |
| "loss": 1.3797, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.545996054998508, |
| "learning_rate": 7.990256201039297e-05, |
| "loss": 1.37, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6052631578947368, |
| "grad_norm": 2.9285662609146597, |
| "learning_rate": 7.98920402061931e-05, |
| "loss": 1.3691, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6105263157894737, |
| "grad_norm": 3.497818597459857, |
| "learning_rate": 7.988097981820659e-05, |
| "loss": 1.3724, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6157894736842106, |
| "grad_norm": 1.955436154771224, |
| "learning_rate": 7.986938099576015e-05, |
| "loss": 1.3553, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6210526315789474, |
| "grad_norm": 2.024911219916847, |
| "learning_rate": 7.985724389544982e-05, |
| "loss": 1.3736, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6263157894736842, |
| "grad_norm": 2.726230215863742, |
| "learning_rate": 7.984456868113905e-05, |
| "loss": 1.3666, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 1.8902008783219175, |
| "learning_rate": 7.98313555239563e-05, |
| "loss": 1.358, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6368421052631579, |
| "grad_norm": 3.3343114876725215, |
| "learning_rate": 7.98176046022929e-05, |
| "loss": 1.3674, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6421052631578947, |
| "grad_norm": 2.8760888936249716, |
| "learning_rate": 7.980331610180046e-05, |
| "loss": 1.3598, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6473684210526316, |
| "grad_norm": 1.821327982649168, |
| "learning_rate": 7.978849021538855e-05, |
| "loss": 1.3559, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6526315789473685, |
| "grad_norm": 2.1330475395816038, |
| "learning_rate": 7.977312714322193e-05, |
| "loss": 1.3529, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6578947368421053, |
| "grad_norm": 2.6018688330295032, |
| "learning_rate": 7.975722709271799e-05, |
| "loss": 1.3537, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6631578947368421, |
| "grad_norm": 2.8283770858028143, |
| "learning_rate": 7.974079027854382e-05, |
| "loss": 1.3591, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.6684210526315789, |
| "grad_norm": 2.0562890332720625, |
| "learning_rate": 7.972381692261343e-05, |
| "loss": 1.3523, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.6736842105263158, |
| "grad_norm": 2.677519447523674, |
| "learning_rate": 7.970630725408467e-05, |
| "loss": 1.3588, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.6789473684210526, |
| "grad_norm": 3.39520049259372, |
| "learning_rate": 7.968826150935615e-05, |
| "loss": 1.357, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6842105263157895, |
| "grad_norm": 1.326798562942608, |
| "learning_rate": 7.96696799320641e-05, |
| "loss": 1.3547, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6894736842105263, |
| "grad_norm": 5.713227145762471, |
| "learning_rate": 7.965056277307902e-05, |
| "loss": 1.405, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.6947368421052632, |
| "grad_norm": 4.708892369834835, |
| "learning_rate": 7.963091029050231e-05, |
| "loss": 1.4096, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 3.526071821361426, |
| "learning_rate": 7.961072274966282e-05, |
| "loss": 1.3766, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.7052631578947368, |
| "grad_norm": 3.421098464992638, |
| "learning_rate": 7.95900004231132e-05, |
| "loss": 1.372, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7105263157894737, |
| "grad_norm": 3.1868582792498468, |
| "learning_rate": 7.956874359062632e-05, |
| "loss": 1.3742, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7157894736842105, |
| "grad_norm": 2.5369692116526354, |
| "learning_rate": 7.954695253919138e-05, |
| "loss": 1.38, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7210526315789474, |
| "grad_norm": 4.029612203074803, |
| "learning_rate": 7.952462756301007e-05, |
| "loss": 1.3789, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.7263157894736842, |
| "grad_norm": 3.071634072769698, |
| "learning_rate": 7.95017689634927e-05, |
| "loss": 1.3692, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7315789473684211, |
| "grad_norm": 3.38738695405503, |
| "learning_rate": 7.947837704925396e-05, |
| "loss": 1.3692, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 2.900109575705123, |
| "learning_rate": 7.94544521361089e-05, |
| "loss": 1.3851, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7421052631578947, |
| "grad_norm": 2.7866463709429903, |
| "learning_rate": 7.942999454706858e-05, |
| "loss": 1.3797, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7473684210526316, |
| "grad_norm": 2.053753694637562, |
| "learning_rate": 7.940500461233572e-05, |
| "loss": 1.3697, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7526315789473684, |
| "grad_norm": 2.9697850807629464, |
| "learning_rate": 7.93794826693003e-05, |
| "loss": 1.349, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.7578947368421053, |
| "grad_norm": 2.370919157592963, |
| "learning_rate": 7.935342906253492e-05, |
| "loss": 1.3556, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.7631578947368421, |
| "grad_norm": 3.3074201358634125, |
| "learning_rate": 7.932684414379021e-05, |
| "loss": 1.3656, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7684210526315789, |
| "grad_norm": 2.2648377858473605, |
| "learning_rate": 7.929972827199006e-05, |
| "loss": 1.3704, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.7736842105263158, |
| "grad_norm": 2.835496161570207, |
| "learning_rate": 7.927208181322679e-05, |
| "loss": 1.3466, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.7789473684210526, |
| "grad_norm": 2.343363991518181, |
| "learning_rate": 7.924390514075616e-05, |
| "loss": 1.3726, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.7842105263157895, |
| "grad_norm": 2.7560515706025455, |
| "learning_rate": 7.921519863499239e-05, |
| "loss": 1.3626, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 2.4007554609918564, |
| "learning_rate": 7.918596268350296e-05, |
| "loss": 1.3587, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7947368421052632, |
| "grad_norm": 2.5965741897469896, |
| "learning_rate": 7.915619768100348e-05, |
| "loss": 1.3813, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.0691402568946606, |
| "learning_rate": 7.912590402935223e-05, |
| "loss": 1.3466, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8052631578947368, |
| "grad_norm": 2.682134570279511, |
| "learning_rate": 7.909508213754484e-05, |
| "loss": 1.3484, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8105263157894737, |
| "grad_norm": 2.961951846828786, |
| "learning_rate": 7.906373242170872e-05, |
| "loss": 1.356, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8157894736842105, |
| "grad_norm": 1.647563803925644, |
| "learning_rate": 7.903185530509743e-05, |
| "loss": 1.3314, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8210526315789474, |
| "grad_norm": 1.7184813388266553, |
| "learning_rate": 7.899945121808501e-05, |
| "loss": 1.3521, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.8263157894736842, |
| "grad_norm": 2.795936841854527, |
| "learning_rate": 7.896652059816015e-05, |
| "loss": 1.3635, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8315789473684211, |
| "grad_norm": 3.0399857654385034, |
| "learning_rate": 7.893306388992023e-05, |
| "loss": 1.3619, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8368421052631579, |
| "grad_norm": 1.5156478386100931, |
| "learning_rate": 7.889908154506545e-05, |
| "loss": 1.332, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 2.299123403411113, |
| "learning_rate": 7.886457402239256e-05, |
| "loss": 1.351, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8473684210526315, |
| "grad_norm": 2.307814996765452, |
| "learning_rate": 7.88295417877888e-05, |
| "loss": 1.3565, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.8526315789473684, |
| "grad_norm": 3.2987572063667643, |
| "learning_rate": 7.879398531422558e-05, |
| "loss": 1.3719, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.8578947368421053, |
| "grad_norm": 1.845792873065677, |
| "learning_rate": 7.875790508175202e-05, |
| "loss": 1.3384, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.8631578947368421, |
| "grad_norm": 2.4609311250378942, |
| "learning_rate": 7.87213015774886e-05, |
| "loss": 1.3633, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.868421052631579, |
| "grad_norm": 2.693256637820666, |
| "learning_rate": 7.868417529562043e-05, |
| "loss": 1.3639, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.8736842105263158, |
| "grad_norm": 1.5086514518899445, |
| "learning_rate": 7.864652673739073e-05, |
| "loss": 1.3615, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.8789473684210526, |
| "grad_norm": 3.1066465037527147, |
| "learning_rate": 7.860835641109395e-05, |
| "loss": 1.3507, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.8842105263157894, |
| "grad_norm": 2.327552820376472, |
| "learning_rate": 7.856966483206897e-05, |
| "loss": 1.3458, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.8894736842105263, |
| "grad_norm": 2.808477876459289, |
| "learning_rate": 7.853045252269208e-05, |
| "loss": 1.3601, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.8947368421052632, |
| "grad_norm": 2.544696040692953, |
| "learning_rate": 7.849072001237001e-05, |
| "loss": 1.3529, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.9946344095632575, |
| "learning_rate": 7.845046783753276e-05, |
| "loss": 1.3612, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.9052631578947369, |
| "grad_norm": 2.1670615109125646, |
| "learning_rate": 7.840969654162627e-05, |
| "loss": 1.3403, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9105263157894737, |
| "grad_norm": 2.4274632796911324, |
| "learning_rate": 7.83684066751052e-05, |
| "loss": 1.3492, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.9157894736842105, |
| "grad_norm": 2.2078510708150416, |
| "learning_rate": 7.832659879542544e-05, |
| "loss": 1.3322, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.9210526315789473, |
| "grad_norm": 2.834266147883312, |
| "learning_rate": 7.828427346703657e-05, |
| "loss": 1.3658, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9263157894736842, |
| "grad_norm": 2.5098411818246156, |
| "learning_rate": 7.824143126137431e-05, |
| "loss": 1.3343, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.9315789473684211, |
| "grad_norm": 1.9856802860400529, |
| "learning_rate": 7.819807275685272e-05, |
| "loss": 1.3408, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9368421052631579, |
| "grad_norm": 2.4068321545997717, |
| "learning_rate": 7.815419853885644e-05, |
| "loss": 1.3482, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9421052631578948, |
| "grad_norm": 2.0424531148819507, |
| "learning_rate": 7.810980919973277e-05, |
| "loss": 1.3492, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 2.88855449179049, |
| "learning_rate": 7.806490533878368e-05, |
| "loss": 1.3409, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9526315789473684, |
| "grad_norm": 2.2025647302444593, |
| "learning_rate": 7.801948756225772e-05, |
| "loss": 1.3552, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.9578947368421052, |
| "grad_norm": 1.705774295065343, |
| "learning_rate": 7.797355648334185e-05, |
| "loss": 1.3298, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.9631578947368421, |
| "grad_norm": 2.0591090166453907, |
| "learning_rate": 7.792711272215308e-05, |
| "loss": 1.3234, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.968421052631579, |
| "grad_norm": 2.543058246007598, |
| "learning_rate": 7.788015690573025e-05, |
| "loss": 1.3454, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.9736842105263158, |
| "grad_norm": 2.7945291673690273, |
| "learning_rate": 7.783268966802539e-05, |
| "loss": 1.3623, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.9789473684210527, |
| "grad_norm": 1.2623547103194999, |
| "learning_rate": 7.778471164989532e-05, |
| "loss": 1.3253, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.9842105263157894, |
| "grad_norm": 2.8671346135920555, |
| "learning_rate": 7.773622349909285e-05, |
| "loss": 1.3516, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.9894736842105263, |
| "grad_norm": 2.37666359514784, |
| "learning_rate": 7.768722587025818e-05, |
| "loss": 1.333, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.9947368421052631, |
| "grad_norm": 2.0642846376852697, |
| "learning_rate": 7.763771942490995e-05, |
| "loss": 1.3514, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.7892483033365818, |
| "learning_rate": 7.758770483143634e-05, |
| "loss": 1.3383, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0052631578947369, |
| "grad_norm": 2.3383926476340875, |
| "learning_rate": 7.753718276508609e-05, |
| "loss": 1.3296, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.0105263157894737, |
| "grad_norm": 2.4843645129343686, |
| "learning_rate": 7.748615390795932e-05, |
| "loss": 1.3271, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.0157894736842106, |
| "grad_norm": 2.418044856395934, |
| "learning_rate": 7.743461894899837e-05, |
| "loss": 1.3272, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.0210526315789474, |
| "grad_norm": 1.5738686856678197, |
| "learning_rate": 7.738257858397844e-05, |
| "loss": 1.3345, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.0263157894736843, |
| "grad_norm": 2.648547362628862, |
| "learning_rate": 7.733003351549829e-05, |
| "loss": 1.3334, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0315789473684212, |
| "grad_norm": 1.882461821114559, |
| "learning_rate": 7.727698445297066e-05, |
| "loss": 1.3129, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.0368421052631578, |
| "grad_norm": 2.4800191556167586, |
| "learning_rate": 7.722343211261274e-05, |
| "loss": 1.3254, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.0421052631578946, |
| "grad_norm": 2.23843270259424, |
| "learning_rate": 7.71693772174365e-05, |
| "loss": 1.3273, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.0473684210526315, |
| "grad_norm": 1.9793326670930025, |
| "learning_rate": 7.71148204972389e-05, |
| "loss": 1.3286, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 2.1730809011656502, |
| "learning_rate": 7.705976268859207e-05, |
| "loss": 1.3245, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0578947368421052, |
| "grad_norm": 2.185445170389663, |
| "learning_rate": 7.700420453483336e-05, |
| "loss": 1.3222, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.063157894736842, |
| "grad_norm": 2.3909717751217903, |
| "learning_rate": 7.694814678605528e-05, |
| "loss": 1.325, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.068421052631579, |
| "grad_norm": 1.923075293942803, |
| "learning_rate": 7.68915901990954e-05, |
| "loss": 1.3107, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.0736842105263158, |
| "grad_norm": 1.7281637350526677, |
| "learning_rate": 7.683453553752611e-05, |
| "loss": 1.3252, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.0789473684210527, |
| "grad_norm": 3.2565617242431055, |
| "learning_rate": 7.677698357164431e-05, |
| "loss": 1.3269, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.0842105263157895, |
| "grad_norm": 1.2312572256380077, |
| "learning_rate": 7.671893507846109e-05, |
| "loss": 1.3208, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.0894736842105264, |
| "grad_norm": 2.928938984187919, |
| "learning_rate": 7.66603908416911e-05, |
| "loss": 1.3313, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.0947368421052632, |
| "grad_norm": 2.9307098888263026, |
| "learning_rate": 7.660135165174205e-05, |
| "loss": 1.3455, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 1.593997332291012, |
| "learning_rate": 7.654181830570404e-05, |
| "loss": 1.3103, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.1052631578947367, |
| "grad_norm": 2.6269588682109952, |
| "learning_rate": 7.648179160733883e-05, |
| "loss": 1.3167, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1105263157894736, |
| "grad_norm": 2.4790660783022043, |
| "learning_rate": 7.642127236706887e-05, |
| "loss": 1.3164, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.1157894736842104, |
| "grad_norm": 2.1512908725199544, |
| "learning_rate": 7.636026140196651e-05, |
| "loss": 1.3067, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.1210526315789473, |
| "grad_norm": 1.5786680257092611, |
| "learning_rate": 7.629875953574282e-05, |
| "loss": 1.3248, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.1263157894736842, |
| "grad_norm": 1.8684394631658845, |
| "learning_rate": 7.623676759873661e-05, |
| "loss": 1.3356, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.131578947368421, |
| "grad_norm": 1.83011908562494, |
| "learning_rate": 7.61742864279031e-05, |
| "loss": 1.3243, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.1368421052631579, |
| "grad_norm": 2.5453116567442686, |
| "learning_rate": 7.611131686680272e-05, |
| "loss": 1.3202, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.1421052631578947, |
| "grad_norm": 1.899610653373559, |
| "learning_rate": 7.604785976558961e-05, |
| "loss": 1.3196, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.1473684210526316, |
| "grad_norm": 2.7819319499079143, |
| "learning_rate": 7.598391598100029e-05, |
| "loss": 1.3223, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.1526315789473685, |
| "grad_norm": 2.119997221986913, |
| "learning_rate": 7.591948637634193e-05, |
| "loss": 1.3304, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 2.4856497917141254, |
| "learning_rate": 7.585457182148081e-05, |
| "loss": 1.3036, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1631578947368422, |
| "grad_norm": 2.1055551992495847, |
| "learning_rate": 7.578917319283055e-05, |
| "loss": 1.3269, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.168421052631579, |
| "grad_norm": 2.2379057425917424, |
| "learning_rate": 7.572329137334023e-05, |
| "loss": 1.3084, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.1736842105263159, |
| "grad_norm": 2.0190272934745055, |
| "learning_rate": 7.565692725248254e-05, |
| "loss": 1.3251, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.1789473684210527, |
| "grad_norm": 1.2213400585349068, |
| "learning_rate": 7.559008172624174e-05, |
| "loss": 1.3089, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.1842105263157894, |
| "grad_norm": 3.4238358307196375, |
| "learning_rate": 7.552275569710152e-05, |
| "loss": 1.3188, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.1894736842105262, |
| "grad_norm": 1.9434637558797097, |
| "learning_rate": 7.545495007403287e-05, |
| "loss": 1.3197, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.194736842105263, |
| "grad_norm": 3.2480882471479164, |
| "learning_rate": 7.538666577248184e-05, |
| "loss": 1.3248, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 2.686792838642632, |
| "learning_rate": 7.531790371435709e-05, |
| "loss": 1.3166, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.2052631578947368, |
| "grad_norm": 2.667702689555652, |
| "learning_rate": 7.524866482801748e-05, |
| "loss": 1.3118, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 2.0267106721992003, |
| "learning_rate": 7.517895004825956e-05, |
| "loss": 1.3311, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2157894736842105, |
| "grad_norm": 3.195120176439168, |
| "learning_rate": 7.510876031630496e-05, |
| "loss": 1.322, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.2210526315789474, |
| "grad_norm": 1.9034234117765794, |
| "learning_rate": 7.503809657978762e-05, |
| "loss": 1.3226, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.2263157894736842, |
| "grad_norm": 3.690905599022198, |
| "learning_rate": 7.496695979274103e-05, |
| "loss": 1.3255, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.231578947368421, |
| "grad_norm": 3.145636629896195, |
| "learning_rate": 7.489535091558536e-05, |
| "loss": 1.3381, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.236842105263158, |
| "grad_norm": 2.5072433312959843, |
| "learning_rate": 7.48232709151145e-05, |
| "loss": 1.3219, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2421052631578948, |
| "grad_norm": 3.2107352113754186, |
| "learning_rate": 7.475072076448298e-05, |
| "loss": 1.3227, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.2473684210526317, |
| "grad_norm": 1.58975425995912, |
| "learning_rate": 7.467770144319283e-05, |
| "loss": 1.3333, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.2526315789473683, |
| "grad_norm": 3.7587617169131082, |
| "learning_rate": 7.460421393708039e-05, |
| "loss": 1.3509, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.2578947368421054, |
| "grad_norm": 2.5861078342959614, |
| "learning_rate": 7.453025923830296e-05, |
| "loss": 1.3361, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 3.398411462187095, |
| "learning_rate": 7.445583834532546e-05, |
| "loss": 1.3309, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.268421052631579, |
| "grad_norm": 2.4423289107004664, |
| "learning_rate": 7.438095226290685e-05, |
| "loss": 1.337, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.2736842105263158, |
| "grad_norm": 2.4746167081096324, |
| "learning_rate": 7.430560200208669e-05, |
| "loss": 1.3105, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.2789473684210526, |
| "grad_norm": 2.7651610722472353, |
| "learning_rate": 7.42297885801714e-05, |
| "loss": 1.3243, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.2842105263157895, |
| "grad_norm": 1.6511969920414749, |
| "learning_rate": 7.415351302072056e-05, |
| "loss": 1.3105, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.2894736842105263, |
| "grad_norm": 3.104346761016083, |
| "learning_rate": 7.407677635353308e-05, |
| "loss": 1.3298, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.2947368421052632, |
| "grad_norm": 2.3550214994148235, |
| "learning_rate": 7.399957961463332e-05, |
| "loss": 1.3649, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 2.266967394498034, |
| "learning_rate": 7.392192384625704e-05, |
| "loss": 1.3363, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.305263157894737, |
| "grad_norm": 3.1011314193494104, |
| "learning_rate": 7.384381009683742e-05, |
| "loss": 1.3252, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.3105263157894738, |
| "grad_norm": 2.133334459450928, |
| "learning_rate": 7.376523942099084e-05, |
| "loss": 1.3307, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 4.326087201648726, |
| "learning_rate": 7.368621287950264e-05, |
| "loss": 1.4045, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3210526315789473, |
| "grad_norm": 22.674882226571523, |
| "learning_rate": 7.360673153931285e-05, |
| "loss": 1.3348, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.3263157894736843, |
| "grad_norm": 3.064941459260646, |
| "learning_rate": 7.352679647350172e-05, |
| "loss": 1.3425, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.331578947368421, |
| "grad_norm": 2.6376251183297055, |
| "learning_rate": 7.344640876127529e-05, |
| "loss": 1.3389, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.3368421052631578, |
| "grad_norm": 3.518986507081023, |
| "learning_rate": 7.33655694879508e-05, |
| "loss": 1.325, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.3421052631578947, |
| "grad_norm": 6.772119772438152, |
| "learning_rate": 7.328427974494201e-05, |
| "loss": 1.3435, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3473684210526315, |
| "grad_norm": 3.1439267189525966, |
| "learning_rate": 7.32025406297445e-05, |
| "loss": 1.3482, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.3526315789473684, |
| "grad_norm": 2.9757174788747442, |
| "learning_rate": 7.312035324592081e-05, |
| "loss": 1.4253, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.3578947368421053, |
| "grad_norm": 19.234550469937634, |
| "learning_rate": 7.303771870308561e-05, |
| "loss": 1.5748, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.3631578947368421, |
| "grad_norm": 166.65509126340316, |
| "learning_rate": 7.295463811689069e-05, |
| "loss": 7.3386, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 37.296808447795385, |
| "learning_rate": 7.28711126090098e-05, |
| "loss": 7.6292, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3736842105263158, |
| "grad_norm": 206.77559967714524, |
| "learning_rate": 7.278714330712372e-05, |
| "loss": 5.9669, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.3789473684210527, |
| "grad_norm": 31.10081965111831, |
| "learning_rate": 7.27027313449048e-05, |
| "loss": 1.9804, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.3842105263157896, |
| "grad_norm": 273.4851676662734, |
| "learning_rate": 7.261787786200179e-05, |
| "loss": 4.0434, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.3894736842105262, |
| "grad_norm": 21.94907796550795, |
| "learning_rate": 7.253258400402448e-05, |
| "loss": 2.3785, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.3947368421052633, |
| "grad_norm": 87.34643583499242, |
| "learning_rate": 7.24468509225281e-05, |
| "loss": 3.2623, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 14.043694608373665, |
| "learning_rate": 7.236067977499791e-05, |
| "loss": 2.0359, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.4052631578947368, |
| "grad_norm": 204.7560211103692, |
| "learning_rate": 7.227407172483348e-05, |
| "loss": 2.6066, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.4105263157894736, |
| "grad_norm": 8.50429112485772, |
| "learning_rate": 7.218702794133304e-05, |
| "loss": 1.8554, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.4157894736842105, |
| "grad_norm": 7.0947931831701805, |
| "learning_rate": 7.209954959967765e-05, |
| "loss": 1.7393, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.4210526315789473, |
| "grad_norm": 3.181643344667253, |
| "learning_rate": 7.201163788091536e-05, |
| "loss": 1.5682, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4263157894736842, |
| "grad_norm": 2.0126327538765865, |
| "learning_rate": 7.192329397194529e-05, |
| "loss": 1.4786, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.431578947368421, |
| "grad_norm": 2.7578093818109175, |
| "learning_rate": 7.183451906550155e-05, |
| "loss": 1.4642, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.436842105263158, |
| "grad_norm": 1.9145504095924126, |
| "learning_rate": 7.174531436013712e-05, |
| "loss": 1.4291, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.4421052631578948, |
| "grad_norm": 2.830916672053224, |
| "learning_rate": 7.165568106020779e-05, |
| "loss": 1.4538, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.4473684210526316, |
| "grad_norm": 3.3750230003094464, |
| "learning_rate": 7.156562037585576e-05, |
| "loss": 1.4218, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.4526315789473685, |
| "grad_norm": 1.886048219133163, |
| "learning_rate": 7.147513352299336e-05, |
| "loss": 1.4005, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.4578947368421051, |
| "grad_norm": 4.612254413421206, |
| "learning_rate": 7.138422172328671e-05, |
| "loss": 1.4112, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.4631578947368422, |
| "grad_norm": 3.475637100160211, |
| "learning_rate": 7.129288620413907e-05, |
| "loss": 1.388, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.4684210526315788, |
| "grad_norm": 3.762823770210365, |
| "learning_rate": 7.120112819867437e-05, |
| "loss": 1.3941, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.4736842105263157, |
| "grad_norm": 2.965363344704181, |
| "learning_rate": 7.110894894572056e-05, |
| "loss": 1.3815, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4789473684210526, |
| "grad_norm": 3.003148787485473, |
| "learning_rate": 7.101634968979287e-05, |
| "loss": 1.3805, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.4842105263157894, |
| "grad_norm": 1.9778398666652557, |
| "learning_rate": 7.092333168107697e-05, |
| "loss": 1.3752, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.4894736842105263, |
| "grad_norm": 3.5530639325816114, |
| "learning_rate": 7.082989617541217e-05, |
| "loss": 1.3919, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.4947368421052631, |
| "grad_norm": 2.4964012979013144, |
| "learning_rate": 7.073604443427437e-05, |
| "loss": 1.3752, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 3.2586608061353224, |
| "learning_rate": 7.064177772475912e-05, |
| "loss": 1.3537, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.5052631578947369, |
| "grad_norm": 2.7382891584432576, |
| "learning_rate": 7.054709731956449e-05, |
| "loss": 1.3548, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.5105263157894737, |
| "grad_norm": 2.7983043107255714, |
| "learning_rate": 7.045200449697379e-05, |
| "loss": 1.355, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.5157894736842106, |
| "grad_norm": 2.1575170098628207, |
| "learning_rate": 7.035650054083847e-05, |
| "loss": 1.3666, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.5210526315789474, |
| "grad_norm": 2.280552356804481, |
| "learning_rate": 7.026058674056067e-05, |
| "loss": 1.3729, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.526315789473684, |
| "grad_norm": 1.8204200442034197, |
| "learning_rate": 7.016426439107586e-05, |
| "loss": 1.3285, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5315789473684212, |
| "grad_norm": 2.2692718684429805, |
| "learning_rate": 7.006753479283535e-05, |
| "loss": 1.3432, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.5368421052631578, |
| "grad_norm": 1.608298273784726, |
| "learning_rate": 6.99703992517887e-05, |
| "loss": 1.3457, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.5421052631578949, |
| "grad_norm": 2.291066728931036, |
| "learning_rate": 6.987285907936617e-05, |
| "loss": 1.3489, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.5473684210526315, |
| "grad_norm": 1.799873956016166, |
| "learning_rate": 6.977491559246091e-05, |
| "loss": 1.3538, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.5526315789473686, |
| "grad_norm": 2.1836156231488144, |
| "learning_rate": 6.967657011341126e-05, |
| "loss": 1.3393, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.5578947368421052, |
| "grad_norm": 1.656082168753184, |
| "learning_rate": 6.957782396998289e-05, |
| "loss": 1.3487, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.563157894736842, |
| "grad_norm": 2.237518228348859, |
| "learning_rate": 6.94786784953508e-05, |
| "loss": 1.3431, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.568421052631579, |
| "grad_norm": 1.8074440576933803, |
| "learning_rate": 6.937913502808142e-05, |
| "loss": 1.3338, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.5736842105263158, |
| "grad_norm": 2.1852538797514134, |
| "learning_rate": 6.927919491211447e-05, |
| "loss": 1.3408, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 1.686931533274294, |
| "learning_rate": 6.917885949674483e-05, |
| "loss": 1.337, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5842105263157895, |
| "grad_norm": 2.484073100527864, |
| "learning_rate": 6.907813013660437e-05, |
| "loss": 1.3315, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.5894736842105264, |
| "grad_norm": 1.9730996981374016, |
| "learning_rate": 6.897700819164357e-05, |
| "loss": 1.3383, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.594736842105263, |
| "grad_norm": 1.4953094506502813, |
| "learning_rate": 6.887549502711323e-05, |
| "loss": 1.3316, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.7333934091879961, |
| "learning_rate": 6.877359201354606e-05, |
| "loss": 1.3338, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.6052631578947367, |
| "grad_norm": 1.701314271187227, |
| "learning_rate": 6.867130052673806e-05, |
| "loss": 1.3233, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.6105263157894738, |
| "grad_norm": 2.5170048810500365, |
| "learning_rate": 6.856862194773008e-05, |
| "loss": 1.3418, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.6157894736842104, |
| "grad_norm": 1.1423723633356422, |
| "learning_rate": 6.846555766278909e-05, |
| "loss": 1.3456, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.6210526315789475, |
| "grad_norm": 2.1226546892123688, |
| "learning_rate": 6.83621090633895e-05, |
| "loss": 1.3199, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.6263157894736842, |
| "grad_norm": 2.120207603951501, |
| "learning_rate": 6.825827754619434e-05, |
| "loss": 1.3252, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 1.3158750566710444, |
| "learning_rate": 6.815406451303647e-05, |
| "loss": 1.3213, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6368421052631579, |
| "grad_norm": 2.597320776495221, |
| "learning_rate": 6.804947137089955e-05, |
| "loss": 1.3112, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.6421052631578947, |
| "grad_norm": 1.6685217693160599, |
| "learning_rate": 6.794449953189916e-05, |
| "loss": 1.3074, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.6473684210526316, |
| "grad_norm": 2.5188932447525283, |
| "learning_rate": 6.783915041326364e-05, |
| "loss": 1.331, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.6526315789473685, |
| "grad_norm": 2.071056305940592, |
| "learning_rate": 6.773342543731503e-05, |
| "loss": 1.3173, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.6578947368421053, |
| "grad_norm": 2.427656153267591, |
| "learning_rate": 6.762732603144978e-05, |
| "loss": 1.3329, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.663157894736842, |
| "grad_norm": 1.6471906450412725, |
| "learning_rate": 6.75208536281196e-05, |
| "loss": 1.311, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.668421052631579, |
| "grad_norm": 2.3066742827555022, |
| "learning_rate": 6.7414009664812e-05, |
| "loss": 1.3349, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.6736842105263157, |
| "grad_norm": 1.8458843126503317, |
| "learning_rate": 6.730679558403093e-05, |
| "loss": 1.3236, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.6789473684210527, |
| "grad_norm": 2.1861813200392843, |
| "learning_rate": 6.719921283327736e-05, |
| "loss": 1.3268, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 2.1870673388161124, |
| "learning_rate": 6.709126286502965e-05, |
| "loss": 1.3019, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.6894736842105265, |
| "grad_norm": 1.4274808199921123, |
| "learning_rate": 6.698294713672395e-05, |
| "loss": 1.3255, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.694736842105263, |
| "grad_norm": 1.5694017468203492, |
| "learning_rate": 6.687426711073462e-05, |
| "loss": 1.3048, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 1.1078521144544478, |
| "learning_rate": 6.676522425435433e-05, |
| "loss": 1.3087, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.7052631578947368, |
| "grad_norm": 2.4538742138976386, |
| "learning_rate": 6.665582003977441e-05, |
| "loss": 1.3244, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.7105263157894737, |
| "grad_norm": 1.7323261915367696, |
| "learning_rate": 6.654605594406486e-05, |
| "loss": 1.3093, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7157894736842105, |
| "grad_norm": 1.7174315153551183, |
| "learning_rate": 6.643593344915445e-05, |
| "loss": 1.3141, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.7210526315789474, |
| "grad_norm": 1.4498322250506395, |
| "learning_rate": 6.632545404181074e-05, |
| "loss": 1.3251, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.7263157894736842, |
| "grad_norm": 2.978144373846546, |
| "learning_rate": 6.62146192136199e-05, |
| "loss": 1.3117, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.731578947368421, |
| "grad_norm": 1.8288925620523002, |
| "learning_rate": 6.610343046096674e-05, |
| "loss": 1.311, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.736842105263158, |
| "grad_norm": 2.8409045314260255, |
| "learning_rate": 6.59918892850144e-05, |
| "loss": 1.3263, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7421052631578946, |
| "grad_norm": 1.9806940703831386, |
| "learning_rate": 6.587999719168401e-05, |
| "loss": 1.3179, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.7473684210526317, |
| "grad_norm": 2.231645147378468, |
| "learning_rate": 6.576775569163458e-05, |
| "loss": 1.3216, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.7526315789473683, |
| "grad_norm": 2.242436351469589, |
| "learning_rate": 6.565516630024236e-05, |
| "loss": 1.3263, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.7578947368421054, |
| "grad_norm": 1.418447898215289, |
| "learning_rate": 6.554223053758055e-05, |
| "loss": 1.317, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.763157894736842, |
| "grad_norm": 2.1049377231565036, |
| "learning_rate": 6.542894992839873e-05, |
| "loss": 1.3278, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.768421052631579, |
| "grad_norm": 1.9649271286389844, |
| "learning_rate": 6.531532600210222e-05, |
| "loss": 1.3309, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.7736842105263158, |
| "grad_norm": 1.3002559482544591, |
| "learning_rate": 6.520136029273151e-05, |
| "loss": 1.3003, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.7789473684210526, |
| "grad_norm": 2.4684068562448136, |
| "learning_rate": 6.508705433894149e-05, |
| "loss": 1.32, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.7842105263157895, |
| "grad_norm": 1.5743465960197915, |
| "learning_rate": 6.497240968398072e-05, |
| "loss": 1.3006, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.7894736842105263, |
| "grad_norm": 2.59770911193071, |
| "learning_rate": 6.48574278756706e-05, |
| "loss": 1.3222, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.7947368421052632, |
| "grad_norm": 1.7842505149097647, |
| "learning_rate": 6.474211046638438e-05, |
| "loss": 1.3161, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 2.7074992552378805, |
| "learning_rate": 6.462645901302633e-05, |
| "loss": 1.3281, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.805263157894737, |
| "grad_norm": 1.8215475542278567, |
| "learning_rate": 6.451047507701065e-05, |
| "loss": 1.3282, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.8105263157894735, |
| "grad_norm": 3.304881983512875, |
| "learning_rate": 6.439416022424036e-05, |
| "loss": 1.3391, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.8157894736842106, |
| "grad_norm": 3.2476105816930954, |
| "learning_rate": 6.427751602508628e-05, |
| "loss": 1.3348, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.8210526315789473, |
| "grad_norm": 1.5869549786160873, |
| "learning_rate": 6.416054405436564e-05, |
| "loss": 1.3201, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.8263157894736843, |
| "grad_norm": 2.2683887128326723, |
| "learning_rate": 6.404324589132101e-05, |
| "loss": 1.3204, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.831578947368421, |
| "grad_norm": 1.9620950054062172, |
| "learning_rate": 6.392562311959886e-05, |
| "loss": 1.3158, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.836842105263158, |
| "grad_norm": 1.8047773439892525, |
| "learning_rate": 6.380767732722821e-05, |
| "loss": 1.3181, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 1.911531036771628, |
| "learning_rate": 6.368941010659921e-05, |
| "loss": 1.3292, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8473684210526315, |
| "grad_norm": 1.636720765605733, |
| "learning_rate": 6.35708230544416e-05, |
| "loss": 1.3091, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.8526315789473684, |
| "grad_norm": 1.424304904246396, |
| "learning_rate": 6.34519177718032e-05, |
| "loss": 1.3207, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.8578947368421053, |
| "grad_norm": 1.3525184453889394, |
| "learning_rate": 6.333269586402827e-05, |
| "loss": 1.3125, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.8631578947368421, |
| "grad_norm": 1.7281390731184902, |
| "learning_rate": 6.321315894073581e-05, |
| "loss": 1.3231, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.868421052631579, |
| "grad_norm": 1.1089366431889842, |
| "learning_rate": 6.309330861579786e-05, |
| "loss": 1.3238, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.8736842105263158, |
| "grad_norm": 1.876591117688095, |
| "learning_rate": 6.297314650731775e-05, |
| "loss": 1.3118, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.8789473684210525, |
| "grad_norm": 1.61640922760774, |
| "learning_rate": 6.285267423760817e-05, |
| "loss": 1.3263, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.8842105263157896, |
| "grad_norm": 1.4451990798983758, |
| "learning_rate": 6.273189343316929e-05, |
| "loss": 1.325, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.8894736842105262, |
| "grad_norm": 1.3409307869705591, |
| "learning_rate": 6.261080572466688e-05, |
| "loss": 1.3057, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.8947368421052633, |
| "grad_norm": 1.6052273256057499, |
| "learning_rate": 6.248941274691017e-05, |
| "loss": 1.3252, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 2.366978019871103, |
| "learning_rate": 6.236771613882987e-05, |
| "loss": 1.3179, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.905263157894737, |
| "grad_norm": 1.1868922713453152, |
| "learning_rate": 6.224571754345602e-05, |
| "loss": 1.3082, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.9105263157894736, |
| "grad_norm": 2.2556197419222612, |
| "learning_rate": 6.21234186078958e-05, |
| "loss": 1.3115, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.9157894736842105, |
| "grad_norm": 1.7410078285379156, |
| "learning_rate": 6.200082098331126e-05, |
| "loss": 1.3281, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.9210526315789473, |
| "grad_norm": 1.7950505417159182, |
| "learning_rate": 6.18779263248971e-05, |
| "loss": 1.3162, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.9263157894736842, |
| "grad_norm": 1.8544654429983962, |
| "learning_rate": 6.175473629185822e-05, |
| "loss": 1.3205, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.931578947368421, |
| "grad_norm": 1.7372962100479836, |
| "learning_rate": 6.163125254738751e-05, |
| "loss": 1.3065, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.936842105263158, |
| "grad_norm": 2.242141298655648, |
| "learning_rate": 6.150747675864314e-05, |
| "loss": 1.2985, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.9421052631578948, |
| "grad_norm": 1.481088212600443, |
| "learning_rate": 6.138341059672622e-05, |
| "loss": 1.3136, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.9473684210526314, |
| "grad_norm": 2.1356181680202253, |
| "learning_rate": 6.125905573665824e-05, |
| "loss": 1.3282, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9526315789473685, |
| "grad_norm": 1.6259642009051207, |
| "learning_rate": 6.113441385735836e-05, |
| "loss": 1.3131, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.9578947368421051, |
| "grad_norm": 1.973896595209029, |
| "learning_rate": 6.100948664162081e-05, |
| "loss": 1.3182, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.9631578947368422, |
| "grad_norm": 1.616074252415091, |
| "learning_rate": 6.088427577609219e-05, |
| "loss": 1.3037, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.9684210526315788, |
| "grad_norm": 1.777657189903051, |
| "learning_rate": 6.075878295124861e-05, |
| "loss": 1.3096, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.973684210526316, |
| "grad_norm": 1.5325372367258376, |
| "learning_rate": 6.063300986137297e-05, |
| "loss": 1.3092, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.9789473684210526, |
| "grad_norm": 1.7425893453777117, |
| "learning_rate": 6.0506958204531996e-05, |
| "loss": 1.3094, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.9842105263157894, |
| "grad_norm": 1.2678177296707205, |
| "learning_rate": 6.0380629682553395e-05, |
| "loss": 1.2995, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.9894736842105263, |
| "grad_norm": 2.083473793091378, |
| "learning_rate": 6.025402600100283e-05, |
| "loss": 1.3133, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.9947368421052631, |
| "grad_norm": 1.5761354608098717, |
| "learning_rate": 6.012714886916088e-05, |
| "loss": 1.3232, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.6759654932294628, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 1.295, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.0052631578947366, |
| "grad_norm": 1.6566803747791274, |
| "learning_rate": 5.987258111016139e-05, |
| "loss": 1.269, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.0105263157894737, |
| "grad_norm": 1.7773404022104615, |
| "learning_rate": 5.974489391993182e-05, |
| "loss": 1.2756, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.0157894736842104, |
| "grad_norm": 1.7734820944361986, |
| "learning_rate": 5.9616940153220336e-05, |
| "loss": 1.3024, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.0210526315789474, |
| "grad_norm": 1.3832861885005663, |
| "learning_rate": 5.948872153753509e-05, |
| "loss": 1.292, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.026315789473684, |
| "grad_norm": 1.9267936406726134, |
| "learning_rate": 5.936023980395997e-05, |
| "loss": 1.2974, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.031578947368421, |
| "grad_norm": 1.1634482135657338, |
| "learning_rate": 5.923149668713118e-05, |
| "loss": 1.2864, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.036842105263158, |
| "grad_norm": 1.4709242560357587, |
| "learning_rate": 5.9102493925213946e-05, |
| "loss": 1.2719, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.042105263157895, |
| "grad_norm": 1.3716802116661737, |
| "learning_rate": 5.8973233259878914e-05, |
| "loss": 1.2688, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.0473684210526315, |
| "grad_norm": 1.8540565451199285, |
| "learning_rate": 5.8843716436278696e-05, |
| "loss": 1.292, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.0526315789473686, |
| "grad_norm": 1.6569753347566705, |
| "learning_rate": 5.871394520302432e-05, |
| "loss": 1.2923, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.057894736842105, |
| "grad_norm": 1.3344056097550805, |
| "learning_rate": 5.85839213121616e-05, |
| "loss": 1.2783, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.0631578947368423, |
| "grad_norm": 1.0756154226095422, |
| "learning_rate": 5.845364651914752e-05, |
| "loss": 1.2823, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.068421052631579, |
| "grad_norm": 1.5938788684018976, |
| "learning_rate": 5.832312258282645e-05, |
| "loss": 1.2872, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.0736842105263156, |
| "grad_norm": 1.9960858248177353, |
| "learning_rate": 5.8192351265406466e-05, |
| "loss": 1.2819, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.0789473684210527, |
| "grad_norm": 1.500794950504469, |
| "learning_rate": 5.806133433243558e-05, |
| "loss": 1.3018, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.0842105263157893, |
| "grad_norm": 1.9776756621227738, |
| "learning_rate": 5.793007355277783e-05, |
| "loss": 1.2947, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.0894736842105264, |
| "grad_norm": 1.5136499333830533, |
| "learning_rate": 5.7798570698589465e-05, |
| "loss": 1.2847, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.094736842105263, |
| "grad_norm": 2.7888675821510467, |
| "learning_rate": 5.7666827545294965e-05, |
| "loss": 1.2803, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 1.997214443213332, |
| "learning_rate": 5.75348458715631e-05, |
| "loss": 1.2889, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 2.636705031350177, |
| "learning_rate": 5.740262745928293e-05, |
| "loss": 1.2964, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.110526315789474, |
| "grad_norm": 2.1100662062402775, |
| "learning_rate": 5.727017409353971e-05, |
| "loss": 1.2878, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.1157894736842104, |
| "grad_norm": 1.9907403756995032, |
| "learning_rate": 5.713748756259085e-05, |
| "loss": 1.2942, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.1210526315789475, |
| "grad_norm": 2.0618250265894433, |
| "learning_rate": 5.700456965784167e-05, |
| "loss": 1.2857, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.126315789473684, |
| "grad_norm": 0.9319289242731835, |
| "learning_rate": 5.687142217382129e-05, |
| "loss": 1.2708, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.1315789473684212, |
| "grad_norm": 2.992807432876805, |
| "learning_rate": 5.673804690815845e-05, |
| "loss": 1.309, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.136842105263158, |
| "grad_norm": 1.864224068302743, |
| "learning_rate": 5.660444566155709e-05, |
| "loss": 1.2854, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.1421052631578945, |
| "grad_norm": 3.4214971672572596, |
| "learning_rate": 5.647062023777221e-05, |
| "loss": 1.2927, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.1473684210526316, |
| "grad_norm": 2.508109225516717, |
| "learning_rate": 5.633657244358535e-05, |
| "loss": 1.2829, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.1526315789473682, |
| "grad_norm": 3.929215425642367, |
| "learning_rate": 5.6202304088780335e-05, |
| "loss": 1.2946, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.1578947368421053, |
| "grad_norm": 3.798085324745515, |
| "learning_rate": 5.606781698611879e-05, |
| "loss": 1.3013, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.163157894736842, |
| "grad_norm": 2.0206510011506222, |
| "learning_rate": 5.593311295131562e-05, |
| "loss": 1.2917, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.168421052631579, |
| "grad_norm": 3.092904855930784, |
| "learning_rate": 5.579819380301458e-05, |
| "loss": 1.2795, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.1736842105263157, |
| "grad_norm": 2.2386119816911623, |
| "learning_rate": 5.5663061362763665e-05, |
| "loss": 1.2964, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.1789473684210527, |
| "grad_norm": 2.8845075274191223, |
| "learning_rate": 5.552771745499051e-05, |
| "loss": 1.29, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.1842105263157894, |
| "grad_norm": 2.355680412049654, |
| "learning_rate": 5.5392163906977835e-05, |
| "loss": 1.2802, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.1894736842105265, |
| "grad_norm": 2.577365756174829, |
| "learning_rate": 5.525640254883865e-05, |
| "loss": 1.2894, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.194736842105263, |
| "grad_norm": 2.0046660703267576, |
| "learning_rate": 5.512043521349166e-05, |
| "loss": 1.2873, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 2.576417069207704, |
| "learning_rate": 5.4984263736636494e-05, |
| "loss": 1.2759, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.205263157894737, |
| "grad_norm": 1.9511016222282593, |
| "learning_rate": 5.4847889956728834e-05, |
| "loss": 1.298, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.2105263157894735, |
| "grad_norm": 2.6670388356828285, |
| "learning_rate": 5.471131571495574e-05, |
| "loss": 1.2951, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.2157894736842105, |
| "grad_norm": 2.126207826369636, |
| "learning_rate": 5.457454285521064e-05, |
| "loss": 1.2812, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.221052631578947, |
| "grad_norm": 2.400820316806507, |
| "learning_rate": 5.4437573224068595e-05, |
| "loss": 1.2948, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.2263157894736842, |
| "grad_norm": 1.7795504525185426, |
| "learning_rate": 5.4300408670761204e-05, |
| "loss": 1.2959, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.231578947368421, |
| "grad_norm": 2.8829533810849663, |
| "learning_rate": 5.416305104715175e-05, |
| "loss": 1.3074, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.236842105263158, |
| "grad_norm": 2.159110693359624, |
| "learning_rate": 5.4025502207710184e-05, |
| "loss": 1.2797, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.2421052631578946, |
| "grad_norm": 3.0783424431722666, |
| "learning_rate": 5.388776400948803e-05, |
| "loss": 1.2864, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.2473684210526317, |
| "grad_norm": 2.640451536165918, |
| "learning_rate": 5.3749838312093364e-05, |
| "loss": 1.2987, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.2526315789473683, |
| "grad_norm": 2.413356511304884, |
| "learning_rate": 5.361172697766573e-05, |
| "loss": 1.2775, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.2578947368421054, |
| "grad_norm": 2.334518355071201, |
| "learning_rate": 5.3473431870850904e-05, |
| "loss": 1.275, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.263157894736842, |
| "grad_norm": 2.4122426514527984, |
| "learning_rate": 5.333495485877583e-05, |
| "loss": 1.2961, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.268421052631579, |
| "grad_norm": 2.2362176845592208, |
| "learning_rate": 5.3196297811023316e-05, |
| "loss": 1.2937, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.2736842105263158, |
| "grad_norm": 2.293429615790018, |
| "learning_rate": 5.305746259960689e-05, |
| "loss": 1.2852, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.2789473684210524, |
| "grad_norm": 1.786254087945556, |
| "learning_rate": 5.291845109894544e-05, |
| "loss": 1.2799, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.2842105263157895, |
| "grad_norm": 2.6945565744818407, |
| "learning_rate": 5.277926518583793e-05, |
| "loss": 1.2921, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.2894736842105265, |
| "grad_norm": 2.2194659113523962, |
| "learning_rate": 5.263990673943811e-05, |
| "loss": 1.3046, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.294736842105263, |
| "grad_norm": 2.547332541805207, |
| "learning_rate": 5.250037764122907e-05, |
| "loss": 1.2842, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 2.2417529148390325, |
| "learning_rate": 5.23606797749979e-05, |
| "loss": 1.2737, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.305263157894737, |
| "grad_norm": 2.297738957213229, |
| "learning_rate": 5.2220815026810234e-05, |
| "loss": 1.2964, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.3105263157894735, |
| "grad_norm": 2.0161964482231416, |
| "learning_rate": 5.208078528498476e-05, |
| "loss": 1.2734, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.3157894736842106, |
| "grad_norm": 2.5589773064425896, |
| "learning_rate": 5.194059244006779e-05, |
| "loss": 1.3239, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.3210526315789473, |
| "grad_norm": 2.5534996515172246, |
| "learning_rate": 5.180023838480765e-05, |
| "loss": 1.2839, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.3263157894736843, |
| "grad_norm": 1.6213064440253335, |
| "learning_rate": 5.165972501412921e-05, |
| "loss": 1.2804, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.331578947368421, |
| "grad_norm": 1.3934815609685396, |
| "learning_rate": 5.151905422510825e-05, |
| "loss": 1.2733, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.336842105263158, |
| "grad_norm": 2.6675021957708447, |
| "learning_rate": 5.137822791694585e-05, |
| "loss": 1.2847, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.3421052631578947, |
| "grad_norm": 2.0956189405862027, |
| "learning_rate": 5.123724799094279e-05, |
| "loss": 1.2705, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.3473684210526318, |
| "grad_norm": 2.419629528561346, |
| "learning_rate": 5.109611635047379e-05, |
| "loss": 1.2879, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.3526315789473684, |
| "grad_norm": 2.4104356876557755, |
| "learning_rate": 5.095483490096194e-05, |
| "loss": 1.2935, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.3578947368421055, |
| "grad_norm": 1.8777710470149278, |
| "learning_rate": 5.081340554985287e-05, |
| "loss": 1.2775, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.363157894736842, |
| "grad_norm": 1.487851978844643, |
| "learning_rate": 5.067183020658905e-05, |
| "loss": 1.2761, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.3684210526315788, |
| "grad_norm": 2.4549351462483586, |
| "learning_rate": 5.053011078258397e-05, |
| "loss": 1.2692, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.373684210526316, |
| "grad_norm": 1.8656568337670418, |
| "learning_rate": 5.03882491911964e-05, |
| "loss": 1.2911, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.3789473684210525, |
| "grad_norm": 2.7641450741813265, |
| "learning_rate": 5.024624734770446e-05, |
| "loss": 1.2735, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.3842105263157896, |
| "grad_norm": 2.722572050222999, |
| "learning_rate": 5.010410716927988e-05, |
| "loss": 1.2737, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.389473684210526, |
| "grad_norm": 1.428940654329024, |
| "learning_rate": 4.9961830574962e-05, |
| "loss": 1.2888, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.3947368421052633, |
| "grad_norm": 1.5780962302368826, |
| "learning_rate": 4.981941948563197e-05, |
| "loss": 1.2812, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 2.0344826746474283, |
| "learning_rate": 4.967687582398671e-05, |
| "loss": 1.2864, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.405263157894737, |
| "grad_norm": 1.4319688356833826, |
| "learning_rate": 4.953420151451304e-05, |
| "loss": 1.2834, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.4105263157894736, |
| "grad_norm": 2.601310717014097, |
| "learning_rate": 4.939139848346164e-05, |
| "loss": 1.2823, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.4157894736842107, |
| "grad_norm": 2.348235653569354, |
| "learning_rate": 4.924846865882107e-05, |
| "loss": 1.2846, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 1.726129892949758, |
| "learning_rate": 4.9105413970291747e-05, |
| "loss": 1.3011, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.4263157894736844, |
| "grad_norm": 1.7111299716712474, |
| "learning_rate": 4.896223634925984e-05, |
| "loss": 1.3116, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.431578947368421, |
| "grad_norm": 1.803266671516624, |
| "learning_rate": 4.8818937728771294e-05, |
| "loss": 1.272, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.4368421052631577, |
| "grad_norm": 1.3759594624212466, |
| "learning_rate": 4.867552004350564e-05, |
| "loss": 1.289, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.442105263157895, |
| "grad_norm": 2.3296183382561253, |
| "learning_rate": 4.853198522974988e-05, |
| "loss": 1.2911, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.4473684210526314, |
| "grad_norm": 1.9742731162641471, |
| "learning_rate": 4.8388335225372416e-05, |
| "loss": 1.2656, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.4526315789473685, |
| "grad_norm": 1.6842871202377092, |
| "learning_rate": 4.8244571969796817e-05, |
| "loss": 1.2891, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.457894736842105, |
| "grad_norm": 1.5497120892994825, |
| "learning_rate": 4.810069740397569e-05, |
| "loss": 1.2844, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.463157894736842, |
| "grad_norm": 1.824870478700358, |
| "learning_rate": 4.795671347036439e-05, |
| "loss": 1.2902, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.468421052631579, |
| "grad_norm": 1.3990180325007069, |
| "learning_rate": 4.781262211289491e-05, |
| "loss": 1.281, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.473684210526316, |
| "grad_norm": 2.320520504849803, |
| "learning_rate": 4.7668425276949546e-05, |
| "loss": 1.2838, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.4789473684210526, |
| "grad_norm": 2.1234854667075758, |
| "learning_rate": 4.7524124909334653e-05, |
| "loss": 1.2797, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.4842105263157896, |
| "grad_norm": 1.422390194322296, |
| "learning_rate": 4.7379722958254394e-05, |
| "loss": 1.2896, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.4894736842105263, |
| "grad_norm": 1.3196019422752756, |
| "learning_rate": 4.7235221373284407e-05, |
| "loss": 1.2744, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.4947368421052634, |
| "grad_norm": 1.7461193994213873, |
| "learning_rate": 4.709062210534547e-05, |
| "loss": 1.2887, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.3331786938242027, |
| "learning_rate": 4.694592710667723e-05, |
| "loss": 1.281, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.5052631578947366, |
| "grad_norm": 2.0104890922006464, |
| "learning_rate": 4.680113833081173e-05, |
| "loss": 1.2786, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.5105263157894737, |
| "grad_norm": 1.92734853764442, |
| "learning_rate": 4.665625773254716e-05, |
| "loss": 1.2844, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.515789473684211, |
| "grad_norm": 1.3368646117339327, |
| "learning_rate": 4.6511287267921394e-05, |
| "loss": 1.2944, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.5210526315789474, |
| "grad_norm": 1.158860061239002, |
| "learning_rate": 4.636622889418558e-05, |
| "loss": 1.2728, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.526315789473684, |
| "grad_norm": 1.7510317771973813, |
| "learning_rate": 4.622108456977773e-05, |
| "loss": 1.2752, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.531578947368421, |
| "grad_norm": 1.3943091910623553, |
| "learning_rate": 4.60758562542963e-05, |
| "loss": 1.3005, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.536842105263158, |
| "grad_norm": 1.8549674666458555, |
| "learning_rate": 4.593054590847368e-05, |
| "loss": 1.281, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.542105263157895, |
| "grad_norm": 1.7188026241177852, |
| "learning_rate": 4.57851554941498e-05, |
| "loss": 1.3061, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.5473684210526315, |
| "grad_norm": 1.2685249463251793, |
| "learning_rate": 4.563968697424553e-05, |
| "loss": 1.2822, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.5526315789473686, |
| "grad_norm": 1.549673702485011, |
| "learning_rate": 4.549414231273633e-05, |
| "loss": 1.2958, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.557894736842105, |
| "grad_norm": 1.0822272735204688, |
| "learning_rate": 4.534852347462559e-05, |
| "loss": 1.2829, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.5631578947368423, |
| "grad_norm": 0.9555789360037702, |
| "learning_rate": 4.5202832425918166e-05, |
| "loss": 1.3051, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.568421052631579, |
| "grad_norm": 1.067488479666183, |
| "learning_rate": 4.5057071133593853e-05, |
| "loss": 1.275, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.5736842105263156, |
| "grad_norm": 1.3214859289777758, |
| "learning_rate": 4.4911241565580796e-05, |
| "loss": 1.2887, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.5789473684210527, |
| "grad_norm": 1.309909912707582, |
| "learning_rate": 4.476534569072895e-05, |
| "loss": 1.2933, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.5842105263157897, |
| "grad_norm": 1.103182457932209, |
| "learning_rate": 4.4619385478783456e-05, |
| "loss": 1.2785, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.5894736842105264, |
| "grad_norm": 1.050641547852913, |
| "learning_rate": 4.4473362900358065e-05, |
| "loss": 1.2877, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.594736842105263, |
| "grad_norm": 1.1472680394377797, |
| "learning_rate": 4.432727992690857e-05, |
| "loss": 1.285, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 1.362629738278887, |
| "learning_rate": 4.418113853070614e-05, |
| "loss": 1.2774, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.6052631578947367, |
| "grad_norm": 0.9212638160971107, |
| "learning_rate": 4.403494068481074e-05, |
| "loss": 1.2956, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.610526315789474, |
| "grad_norm": 1.3060601473810125, |
| "learning_rate": 4.388868836304442e-05, |
| "loss": 1.2864, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.6157894736842104, |
| "grad_norm": 0.9964781716303204, |
| "learning_rate": 4.374238353996472e-05, |
| "loss": 1.2846, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.6210526315789475, |
| "grad_norm": 1.0334496502405375, |
| "learning_rate": 4.3596028190838045e-05, |
| "loss": 1.2751, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.626315789473684, |
| "grad_norm": 0.8374244946961393, |
| "learning_rate": 4.3449624291612895e-05, |
| "loss": 1.2846, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 1.271324423070232, |
| "learning_rate": 4.33031738188933e-05, |
| "loss": 1.2893, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.636842105263158, |
| "grad_norm": 1.0290842796861808, |
| "learning_rate": 4.315667874991205e-05, |
| "loss": 1.2769, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.6421052631578945, |
| "grad_norm": 1.6287387587682205, |
| "learning_rate": 4.3010141062504e-05, |
| "loss": 1.2808, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.6473684210526316, |
| "grad_norm": 1.05633292251089, |
| "learning_rate": 4.286356273507949e-05, |
| "loss": 1.2752, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.6526315789473687, |
| "grad_norm": 1.2639851855275497, |
| "learning_rate": 4.271694574659744e-05, |
| "loss": 1.2673, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.6578947368421053, |
| "grad_norm": 1.3942797511952854, |
| "learning_rate": 4.257029207653881e-05, |
| "loss": 1.2725, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.663157894736842, |
| "grad_norm": 0.9618496184388876, |
| "learning_rate": 4.242360370487976e-05, |
| "loss": 1.2747, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.668421052631579, |
| "grad_norm": 1.141131756163478, |
| "learning_rate": 4.2276882612064936e-05, |
| "loss": 1.3005, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.6736842105263157, |
| "grad_norm": 0.9471533928078693, |
| "learning_rate": 4.213013077898084e-05, |
| "loss": 1.2726, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.6789473684210527, |
| "grad_norm": 0.9237490499511763, |
| "learning_rate": 4.1983350186928894e-05, |
| "loss": 1.2801, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.6842105263157894, |
| "grad_norm": 1.1110718726310678, |
| "learning_rate": 4.183654281759888e-05, |
| "loss": 1.2674, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.6894736842105265, |
| "grad_norm": 1.2581508468500637, |
| "learning_rate": 4.168971065304205e-05, |
| "loss": 1.2809, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.694736842105263, |
| "grad_norm": 0.9468779696627782, |
| "learning_rate": 4.154285567564442e-05, |
| "loss": 1.2796, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 1.3134333293058689, |
| "learning_rate": 4.139597986810005e-05, |
| "loss": 1.2698, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.705263157894737, |
| "grad_norm": 0.9243484356050305, |
| "learning_rate": 4.124908521338416e-05, |
| "loss": 1.2745, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.7105263157894735, |
| "grad_norm": 0.9728548533723144, |
| "learning_rate": 4.110217369472649e-05, |
| "loss": 1.2925, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.7157894736842105, |
| "grad_norm": 0.7367530477112044, |
| "learning_rate": 4.095524729558441e-05, |
| "loss": 1.2677, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.7210526315789476, |
| "grad_norm": 0.9279276228473495, |
| "learning_rate": 4.080830799961622e-05, |
| "loss": 1.2802, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.7263157894736842, |
| "grad_norm": 1.3931410811444014, |
| "learning_rate": 4.0661357790654345e-05, |
| "loss": 1.262, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.731578947368421, |
| "grad_norm": 0.9883528178224094, |
| "learning_rate": 4.0514398652678514e-05, |
| "loss": 1.2964, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.736842105263158, |
| "grad_norm": 1.3163227077320665, |
| "learning_rate": 4.0367432569789065e-05, |
| "loss": 1.2805, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.7421052631578946, |
| "grad_norm": 0.7397578201219466, |
| "learning_rate": 4.0220461526180023e-05, |
| "loss": 1.2773, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.7473684210526317, |
| "grad_norm": 0.9990315603511356, |
| "learning_rate": 4.007348750611245e-05, |
| "loss": 1.292, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.7526315789473683, |
| "grad_norm": 1.1681928253101022, |
| "learning_rate": 3.9926512493887555e-05, |
| "loss": 1.2893, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.7578947368421054, |
| "grad_norm": 1.1014263843169803, |
| "learning_rate": 3.977953847381998e-05, |
| "loss": 1.2715, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.763157894736842, |
| "grad_norm": 1.1159951506001466, |
| "learning_rate": 3.963256743021095e-05, |
| "loss": 1.2785, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.768421052631579, |
| "grad_norm": 1.2507849560008404, |
| "learning_rate": 3.9485601347321486e-05, |
| "loss": 1.2906, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.7736842105263158, |
| "grad_norm": 0.8936481542314029, |
| "learning_rate": 3.933864220934566e-05, |
| "loss": 1.2669, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.7789473684210524, |
| "grad_norm": 0.9849739951015418, |
| "learning_rate": 3.919169200038379e-05, |
| "loss": 1.2771, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.7842105263157895, |
| "grad_norm": 0.837609493415301, |
| "learning_rate": 3.904475270441561e-05, |
| "loss": 1.266, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.7894736842105265, |
| "grad_norm": 0.7219038051900546, |
| "learning_rate": 3.889782630527353e-05, |
| "loss": 1.2726, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.794736842105263, |
| "grad_norm": 0.7505423536800923, |
| "learning_rate": 3.875091478661585e-05, |
| "loss": 1.2703, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.7301064067667716, |
| "learning_rate": 3.860402013189998e-05, |
| "loss": 1.2812, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.805263157894737, |
| "grad_norm": 0.822332511103024, |
| "learning_rate": 3.845714432435558e-05, |
| "loss": 1.2718, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.8105263157894735, |
| "grad_norm": 0.7783007350783687, |
| "learning_rate": 3.8310289346957965e-05, |
| "loss": 1.2574, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.8157894736842106, |
| "grad_norm": 0.957233585531184, |
| "learning_rate": 3.816345718240113e-05, |
| "loss": 1.2805, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.8210526315789473, |
| "grad_norm": 1.1226403614971794, |
| "learning_rate": 3.8016649813071106e-05, |
| "loss": 1.2983, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.8263157894736843, |
| "grad_norm": 1.306496898793406, |
| "learning_rate": 3.7869869221019177e-05, |
| "loss": 1.2727, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.831578947368421, |
| "grad_norm": 0.5971018924485769, |
| "learning_rate": 3.772311738793507e-05, |
| "loss": 1.2834, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.836842105263158, |
| "grad_norm": 0.9138436747802899, |
| "learning_rate": 3.757639629512026e-05, |
| "loss": 1.2871, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 1.2871243761882003, |
| "learning_rate": 3.74297079234612e-05, |
| "loss": 1.2797, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.8473684210526313, |
| "grad_norm": 0.951777404285425, |
| "learning_rate": 3.7283054253402574e-05, |
| "loss": 1.2754, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.8526315789473684, |
| "grad_norm": 0.9411344162055937, |
| "learning_rate": 3.713643726492053e-05, |
| "loss": 1.2721, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.8578947368421055, |
| "grad_norm": 1.317846119765965, |
| "learning_rate": 3.698985893749599e-05, |
| "loss": 1.2887, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.863157894736842, |
| "grad_norm": 0.8089940300375972, |
| "learning_rate": 3.6843321250087966e-05, |
| "loss": 1.2848, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.8684210526315788, |
| "grad_norm": 0.5361830354797774, |
| "learning_rate": 3.669682618110671e-05, |
| "loss": 1.2657, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.873684210526316, |
| "grad_norm": 0.8183317690070797, |
| "learning_rate": 3.655037570838711e-05, |
| "loss": 1.2866, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.8789473684210525, |
| "grad_norm": 1.2353845817797051, |
| "learning_rate": 3.640397180916197e-05, |
| "loss": 1.2806, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.8842105263157896, |
| "grad_norm": 1.275438903457986, |
| "learning_rate": 3.62576164600353e-05, |
| "loss": 1.3042, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.889473684210526, |
| "grad_norm": 0.5384567882124249, |
| "learning_rate": 3.611131163695561e-05, |
| "loss": 1.2689, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.8947368421052633, |
| "grad_norm": 0.7803333541473619, |
| "learning_rate": 3.5965059315189274e-05, |
| "loss": 1.2797, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 1.4689702016559818, |
| "learning_rate": 3.581886146929387e-05, |
| "loss": 1.2648, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.905263157894737, |
| "grad_norm": 0.6546922206210783, |
| "learning_rate": 3.567272007309145e-05, |
| "loss": 1.279, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.9105263157894736, |
| "grad_norm": 0.706490220466843, |
| "learning_rate": 3.552663709964194e-05, |
| "loss": 1.2735, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.9157894736842103, |
| "grad_norm": 1.3658529786651115, |
| "learning_rate": 3.538061452121656e-05, |
| "loss": 1.2916, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.9210526315789473, |
| "grad_norm": 0.9489706293231372, |
| "learning_rate": 3.523465430927106e-05, |
| "loss": 1.2918, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.9263157894736844, |
| "grad_norm": 0.8764748345395458, |
| "learning_rate": 3.50887584344192e-05, |
| "loss": 1.3015, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.931578947368421, |
| "grad_norm": 0.5355041821626928, |
| "learning_rate": 3.494292886640615e-05, |
| "loss": 1.2751, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.9368421052631577, |
| "grad_norm": 0.9242505060104863, |
| "learning_rate": 3.479716757408185e-05, |
| "loss": 1.2819, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.942105263157895, |
| "grad_norm": 1.2554507748275814, |
| "learning_rate": 3.465147652537443e-05, |
| "loss": 1.276, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.9473684210526314, |
| "grad_norm": 0.9095910936651702, |
| "learning_rate": 3.4505857687263675e-05, |
| "loss": 1.2753, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.9526315789473685, |
| "grad_norm": 0.6651860681009616, |
| "learning_rate": 3.4360313025754476e-05, |
| "loss": 1.2695, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.957894736842105, |
| "grad_norm": 0.9291042715583687, |
| "learning_rate": 3.421484450585023e-05, |
| "loss": 1.2961, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.963157894736842, |
| "grad_norm": 1.3279167632034623, |
| "learning_rate": 3.406945409152632e-05, |
| "loss": 1.2858, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.968421052631579, |
| "grad_norm": 0.5900225146576717, |
| "learning_rate": 3.392414374570371e-05, |
| "loss": 1.2786, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.973684210526316, |
| "grad_norm": 0.8721553828236943, |
| "learning_rate": 3.377891543022229e-05, |
| "loss": 1.2712, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.9789473684210526, |
| "grad_norm": 1.0505950441437681, |
| "learning_rate": 3.363377110581442e-05, |
| "loss": 1.2719, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.984210526315789, |
| "grad_norm": 1.3040980751717048, |
| "learning_rate": 3.348871273207861e-05, |
| "loss": 1.2961, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.9894736842105263, |
| "grad_norm": 0.7750168697948207, |
| "learning_rate": 3.334374226745285e-05, |
| "loss": 1.287, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.9947368421052634, |
| "grad_norm": 0.797465388668966, |
| "learning_rate": 3.319886166918829e-05, |
| "loss": 1.2798, |
| "step": 569 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.7195120795363182, |
| "learning_rate": 3.305407289332279e-05, |
| "loss": 1.2516, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.0052631578947366, |
| "grad_norm": 0.9683227012264453, |
| "learning_rate": 3.290937789465454e-05, |
| "loss": 1.245, |
| "step": 571 |
| }, |
| { |
| "epoch": 3.0105263157894737, |
| "grad_norm": 1.0758712880323096, |
| "learning_rate": 3.276477862671562e-05, |
| "loss": 1.2628, |
| "step": 572 |
| }, |
| { |
| "epoch": 3.0157894736842104, |
| "grad_norm": 0.9883859720793654, |
| "learning_rate": 3.262027704174561e-05, |
| "loss": 1.2509, |
| "step": 573 |
| }, |
| { |
| "epoch": 3.0210526315789474, |
| "grad_norm": 0.9615705195781193, |
| "learning_rate": 3.247587509066535e-05, |
| "loss": 1.264, |
| "step": 574 |
| }, |
| { |
| "epoch": 3.026315789473684, |
| "grad_norm": 0.7910888117572926, |
| "learning_rate": 3.2331574723050474e-05, |
| "loss": 1.2454, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.031578947368421, |
| "grad_norm": 0.6384643157937262, |
| "learning_rate": 3.218737788710509e-05, |
| "loss": 1.2538, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.036842105263158, |
| "grad_norm": 0.5368984975332023, |
| "learning_rate": 3.2043286529635614e-05, |
| "loss": 1.2587, |
| "step": 577 |
| }, |
| { |
| "epoch": 3.042105263157895, |
| "grad_norm": 0.6163486545813159, |
| "learning_rate": 3.189930259602433e-05, |
| "loss": 1.2452, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.0473684210526315, |
| "grad_norm": 0.72931652275654, |
| "learning_rate": 3.175542803020319e-05, |
| "loss": 1.2414, |
| "step": 579 |
| }, |
| { |
| "epoch": 3.0526315789473686, |
| "grad_norm": 0.8975216647677429, |
| "learning_rate": 3.161166477462759e-05, |
| "loss": 1.2562, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.057894736842105, |
| "grad_norm": 0.6779341882405682, |
| "learning_rate": 3.146801477025013e-05, |
| "loss": 1.259, |
| "step": 581 |
| }, |
| { |
| "epoch": 3.0631578947368423, |
| "grad_norm": 0.35876586554623435, |
| "learning_rate": 3.132447995649438e-05, |
| "loss": 1.2439, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.068421052631579, |
| "grad_norm": 0.4633889396883284, |
| "learning_rate": 3.11810622712287e-05, |
| "loss": 1.2443, |
| "step": 583 |
| }, |
| { |
| "epoch": 3.0736842105263156, |
| "grad_norm": 0.37725222875354836, |
| "learning_rate": 3.103776365074017e-05, |
| "loss": 1.244, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.0789473684210527, |
| "grad_norm": 0.48272932389464906, |
| "learning_rate": 3.089458602970828e-05, |
| "loss": 1.2509, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.0842105263157893, |
| "grad_norm": 0.5490877544866288, |
| "learning_rate": 3.075153134117893e-05, |
| "loss": 1.264, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.0894736842105264, |
| "grad_norm": 0.4207792008385385, |
| "learning_rate": 3.060860151653837e-05, |
| "loss": 1.2519, |
| "step": 587 |
| }, |
| { |
| "epoch": 3.094736842105263, |
| "grad_norm": 0.44337087568296857, |
| "learning_rate": 3.046579848548697e-05, |
| "loss": 1.2387, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 0.4345387632778484, |
| "learning_rate": 3.0323124176013297e-05, |
| "loss": 1.2471, |
| "step": 589 |
| }, |
| { |
| "epoch": 3.1052631578947367, |
| "grad_norm": 0.34646556904906206, |
| "learning_rate": 3.0180580514368037e-05, |
| "loss": 1.2574, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.110526315789474, |
| "grad_norm": 0.5480997618564871, |
| "learning_rate": 3.0038169425038007e-05, |
| "loss": 1.2483, |
| "step": 591 |
| }, |
| { |
| "epoch": 3.1157894736842104, |
| "grad_norm": 0.3643135054525602, |
| "learning_rate": 2.9895892830720137e-05, |
| "loss": 1.2586, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.1210526315789475, |
| "grad_norm": 0.3565417102254677, |
| "learning_rate": 2.9753752652295538e-05, |
| "loss": 1.2391, |
| "step": 593 |
| }, |
| { |
| "epoch": 3.126315789473684, |
| "grad_norm": 0.4591251447075665, |
| "learning_rate": 2.961175080880362e-05, |
| "loss": 1.2496, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.1315789473684212, |
| "grad_norm": 0.46367424343953406, |
| "learning_rate": 2.9469889217416045e-05, |
| "loss": 1.2466, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.136842105263158, |
| "grad_norm": 0.3997287611132656, |
| "learning_rate": 2.9328169793410954e-05, |
| "loss": 1.2458, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.1421052631578945, |
| "grad_norm": 0.39902269987295985, |
| "learning_rate": 2.918659445014713e-05, |
| "loss": 1.2415, |
| "step": 597 |
| }, |
| { |
| "epoch": 3.1473684210526316, |
| "grad_norm": 0.3404609072909432, |
| "learning_rate": 2.9045165099038066e-05, |
| "loss": 1.2631, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.1526315789473682, |
| "grad_norm": 0.3396481784449944, |
| "learning_rate": 2.890388364952623e-05, |
| "loss": 1.2548, |
| "step": 599 |
| }, |
| { |
| "epoch": 3.1578947368421053, |
| "grad_norm": 0.37782629450525207, |
| "learning_rate": 2.8762752009057232e-05, |
| "loss": 1.2617, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.163157894736842, |
| "grad_norm": 0.5589592818613579, |
| "learning_rate": 2.8621772083054157e-05, |
| "loss": 1.2594, |
| "step": 601 |
| }, |
| { |
| "epoch": 3.168421052631579, |
| "grad_norm": 0.42966453281721634, |
| "learning_rate": 2.8480945774891764e-05, |
| "loss": 1.2413, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.1736842105263157, |
| "grad_norm": 0.37959826731834306, |
| "learning_rate": 2.83402749858708e-05, |
| "loss": 1.2509, |
| "step": 603 |
| }, |
| { |
| "epoch": 3.1789473684210527, |
| "grad_norm": 0.4661717251353946, |
| "learning_rate": 2.819976161519236e-05, |
| "loss": 1.2629, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.1842105263157894, |
| "grad_norm": 0.31707150511990617, |
| "learning_rate": 2.805940755993223e-05, |
| "loss": 1.2446, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.1894736842105265, |
| "grad_norm": 0.3596061389333874, |
| "learning_rate": 2.7919214715015236e-05, |
| "loss": 1.2487, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.194736842105263, |
| "grad_norm": 0.3125529192407293, |
| "learning_rate": 2.7779184973189773e-05, |
| "loss": 1.2575, |
| "step": 607 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.39929761965783167, |
| "learning_rate": 2.7639320225002108e-05, |
| "loss": 1.2563, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.205263157894737, |
| "grad_norm": 0.357481051645098, |
| "learning_rate": 2.7499622358770936e-05, |
| "loss": 1.2399, |
| "step": 609 |
| }, |
| { |
| "epoch": 3.2105263157894735, |
| "grad_norm": 0.3253036562346321, |
| "learning_rate": 2.7360093260561904e-05, |
| "loss": 1.2587, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.2157894736842105, |
| "grad_norm": 1.0866422737509416, |
| "learning_rate": 2.722073481416208e-05, |
| "loss": 1.253, |
| "step": 611 |
| }, |
| { |
| "epoch": 3.221052631578947, |
| "grad_norm": 0.3704975946750915, |
| "learning_rate": 2.7081548901054574e-05, |
| "loss": 1.2449, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.2263157894736842, |
| "grad_norm": 0.39681349064147786, |
| "learning_rate": 2.6942537400393117e-05, |
| "loss": 1.2393, |
| "step": 613 |
| }, |
| { |
| "epoch": 3.231578947368421, |
| "grad_norm": 0.38789399072411884, |
| "learning_rate": 2.680370218897669e-05, |
| "loss": 1.2476, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.236842105263158, |
| "grad_norm": 0.6056318300360599, |
| "learning_rate": 2.6665045141224193e-05, |
| "loss": 1.2498, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.2421052631578946, |
| "grad_norm": 0.5268591378002944, |
| "learning_rate": 2.6526568129149103e-05, |
| "loss": 1.2509, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.2473684210526317, |
| "grad_norm": 0.5275312902783164, |
| "learning_rate": 2.638827302233428e-05, |
| "loss": 1.2581, |
| "step": 617 |
| }, |
| { |
| "epoch": 3.2526315789473683, |
| "grad_norm": 0.37709353602618134, |
| "learning_rate": 2.625016168790664e-05, |
| "loss": 1.2533, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.2578947368421054, |
| "grad_norm": 0.3270640411740736, |
| "learning_rate": 2.611223599051198e-05, |
| "loss": 1.2743, |
| "step": 619 |
| }, |
| { |
| "epoch": 3.263157894736842, |
| "grad_norm": 0.32059620385654264, |
| "learning_rate": 2.597449779228983e-05, |
| "loss": 1.2568, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.268421052631579, |
| "grad_norm": 0.39808261047072035, |
| "learning_rate": 2.5836948952848255e-05, |
| "loss": 1.2525, |
| "step": 621 |
| }, |
| { |
| "epoch": 3.2736842105263158, |
| "grad_norm": 0.34113291757836145, |
| "learning_rate": 2.5699591329238812e-05, |
| "loss": 1.268, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.2789473684210524, |
| "grad_norm": 0.3042065217936969, |
| "learning_rate": 2.5562426775931418e-05, |
| "loss": 1.2483, |
| "step": 623 |
| }, |
| { |
| "epoch": 3.2842105263157895, |
| "grad_norm": 0.3974087061640213, |
| "learning_rate": 2.5425457144789364e-05, |
| "loss": 1.2609, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.2894736842105265, |
| "grad_norm": 0.321409927169932, |
| "learning_rate": 2.5288684285044283e-05, |
| "loss": 1.255, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.294736842105263, |
| "grad_norm": 0.385869925356024, |
| "learning_rate": 2.5152110043271166e-05, |
| "loss": 1.2576, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.3, |
| "grad_norm": 0.3637124957498029, |
| "learning_rate": 2.501573626336352e-05, |
| "loss": 1.2411, |
| "step": 627 |
| }, |
| { |
| "epoch": 3.305263157894737, |
| "grad_norm": 0.3710385685313032, |
| "learning_rate": 2.4879564786508343e-05, |
| "loss": 1.2592, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.3105263157894735, |
| "grad_norm": 0.4487560727745529, |
| "learning_rate": 2.474359745116136e-05, |
| "loss": 1.2404, |
| "step": 629 |
| }, |
| { |
| "epoch": 3.3157894736842106, |
| "grad_norm": 0.3231869771450256, |
| "learning_rate": 2.460783609302218e-05, |
| "loss": 1.2547, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.3210526315789473, |
| "grad_norm": 0.4088431022056057, |
| "learning_rate": 2.4472282545009493e-05, |
| "loss": 1.2548, |
| "step": 631 |
| }, |
| { |
| "epoch": 3.3263157894736843, |
| "grad_norm": 0.29515450703495905, |
| "learning_rate": 2.4336938637236352e-05, |
| "loss": 1.2525, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.331578947368421, |
| "grad_norm": 0.33297468568328076, |
| "learning_rate": 2.4201806196985426e-05, |
| "loss": 1.2737, |
| "step": 633 |
| }, |
| { |
| "epoch": 3.336842105263158, |
| "grad_norm": 0.3335294632136315, |
| "learning_rate": 2.4066887048684394e-05, |
| "loss": 1.2447, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.3421052631578947, |
| "grad_norm": 0.2879112803644998, |
| "learning_rate": 2.393218301388123e-05, |
| "loss": 1.2715, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.3473684210526318, |
| "grad_norm": 0.3133592323848536, |
| "learning_rate": 2.3797695911219668e-05, |
| "loss": 1.2561, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.3526315789473684, |
| "grad_norm": 0.2430811283889928, |
| "learning_rate": 2.3663427556414664e-05, |
| "loss": 1.2601, |
| "step": 637 |
| }, |
| { |
| "epoch": 3.3578947368421055, |
| "grad_norm": 0.3579114126056535, |
| "learning_rate": 2.352937976222781e-05, |
| "loss": 1.253, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.363157894736842, |
| "grad_norm": 0.26852343656836425, |
| "learning_rate": 2.3395554338442908e-05, |
| "loss": 1.245, |
| "step": 639 |
| }, |
| { |
| "epoch": 3.3684210526315788, |
| "grad_norm": 0.3011001164622397, |
| "learning_rate": 2.3261953091841553e-05, |
| "loss": 1.2546, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.373684210526316, |
| "grad_norm": 0.26481840311987703, |
| "learning_rate": 2.3128577826178723e-05, |
| "loss": 1.2606, |
| "step": 641 |
| }, |
| { |
| "epoch": 3.3789473684210525, |
| "grad_norm": 0.3257272912007352, |
| "learning_rate": 2.2995430342158365e-05, |
| "loss": 1.2353, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.3842105263157896, |
| "grad_norm": 0.38000488426555273, |
| "learning_rate": 2.2862512437409162e-05, |
| "loss": 1.2423, |
| "step": 643 |
| }, |
| { |
| "epoch": 3.389473684210526, |
| "grad_norm": 0.29866098174675637, |
| "learning_rate": 2.272982590646029e-05, |
| "loss": 1.2653, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.3947368421052633, |
| "grad_norm": 0.7482169914063777, |
| "learning_rate": 2.2597372540717083e-05, |
| "loss": 1.2591, |
| "step": 645 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.48442639740243737, |
| "learning_rate": 2.24651541284369e-05, |
| "loss": 1.2748, |
| "step": 646 |
| }, |
| { |
| "epoch": 3.405263157894737, |
| "grad_norm": 0.3933443985045218, |
| "learning_rate": 2.233317245470504e-05, |
| "loss": 1.2491, |
| "step": 647 |
| }, |
| { |
| "epoch": 3.4105263157894736, |
| "grad_norm": 0.4653265340743596, |
| "learning_rate": 2.220142930141054e-05, |
| "loss": 1.2592, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.4157894736842107, |
| "grad_norm": 0.42673076337011967, |
| "learning_rate": 2.206992644722216e-05, |
| "loss": 1.2396, |
| "step": 649 |
| }, |
| { |
| "epoch": 3.4210526315789473, |
| "grad_norm": 0.33876188196334395, |
| "learning_rate": 2.1938665667564435e-05, |
| "loss": 1.2436, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.4263157894736844, |
| "grad_norm": 0.3630588058950603, |
| "learning_rate": 2.1807648734593558e-05, |
| "loss": 1.2557, |
| "step": 651 |
| }, |
| { |
| "epoch": 3.431578947368421, |
| "grad_norm": 0.3529402619316208, |
| "learning_rate": 2.167687741717358e-05, |
| "loss": 1.2536, |
| "step": 652 |
| }, |
| { |
| "epoch": 3.4368421052631577, |
| "grad_norm": 0.3145009910486473, |
| "learning_rate": 2.1546353480852495e-05, |
| "loss": 1.2465, |
| "step": 653 |
| }, |
| { |
| "epoch": 3.442105263157895, |
| "grad_norm": 0.2825566028878834, |
| "learning_rate": 2.1416078687838403e-05, |
| "loss": 1.2543, |
| "step": 654 |
| }, |
| { |
| "epoch": 3.4473684210526314, |
| "grad_norm": 0.2872680469582709, |
| "learning_rate": 2.1286054796975696e-05, |
| "loss": 1.2637, |
| "step": 655 |
| }, |
| { |
| "epoch": 3.4526315789473685, |
| "grad_norm": 0.2802498708050248, |
| "learning_rate": 2.115628356372131e-05, |
| "loss": 1.245, |
| "step": 656 |
| }, |
| { |
| "epoch": 3.457894736842105, |
| "grad_norm": 0.2779169417503312, |
| "learning_rate": 2.1026766740121096e-05, |
| "loss": 1.2548, |
| "step": 657 |
| }, |
| { |
| "epoch": 3.463157894736842, |
| "grad_norm": 0.27790502165031583, |
| "learning_rate": 2.089750607478606e-05, |
| "loss": 1.2482, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.468421052631579, |
| "grad_norm": 0.3106234637273863, |
| "learning_rate": 2.076850331286881e-05, |
| "loss": 1.2474, |
| "step": 659 |
| }, |
| { |
| "epoch": 3.473684210526316, |
| "grad_norm": 0.2460612966298966, |
| "learning_rate": 2.063976019604006e-05, |
| "loss": 1.2578, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.4789473684210526, |
| "grad_norm": 0.4002624603687612, |
| "learning_rate": 2.0511278462464933e-05, |
| "loss": 1.2323, |
| "step": 661 |
| }, |
| { |
| "epoch": 3.4842105263157896, |
| "grad_norm": 0.3558072656216221, |
| "learning_rate": 2.038305984677969e-05, |
| "loss": 1.2513, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.4894736842105263, |
| "grad_norm": 0.32674276214626774, |
| "learning_rate": 2.025510608006819e-05, |
| "loss": 1.248, |
| "step": 663 |
| }, |
| { |
| "epoch": 3.4947368421052634, |
| "grad_norm": 0.3685362965399088, |
| "learning_rate": 2.012741888983861e-05, |
| "loss": 1.2612, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.3851874388241183, |
| "learning_rate": 2.0000000000000012e-05, |
| "loss": 1.26, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.5052631578947366, |
| "grad_norm": 0.2922093360847206, |
| "learning_rate": 1.9872851130839126e-05, |
| "loss": 1.2503, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.5105263157894737, |
| "grad_norm": 0.2982128935849179, |
| "learning_rate": 1.9745973998997177e-05, |
| "loss": 1.2461, |
| "step": 667 |
| }, |
| { |
| "epoch": 3.515789473684211, |
| "grad_norm": 0.36881831273338744, |
| "learning_rate": 1.9619370317446612e-05, |
| "loss": 1.2627, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.5210526315789474, |
| "grad_norm": 0.25559075127742553, |
| "learning_rate": 1.9493041795468018e-05, |
| "loss": 1.2474, |
| "step": 669 |
| }, |
| { |
| "epoch": 3.526315789473684, |
| "grad_norm": 0.6103223603779421, |
| "learning_rate": 1.9366990138627054e-05, |
| "loss": 1.2553, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.531578947368421, |
| "grad_norm": 0.32053875904249984, |
| "learning_rate": 1.9241217048751406e-05, |
| "loss": 1.2716, |
| "step": 671 |
| }, |
| { |
| "epoch": 3.536842105263158, |
| "grad_norm": 0.32627511828160094, |
| "learning_rate": 1.911572422390783e-05, |
| "loss": 1.2509, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.542105263157895, |
| "grad_norm": 0.31231339980121575, |
| "learning_rate": 1.899051335837919e-05, |
| "loss": 1.2542, |
| "step": 673 |
| }, |
| { |
| "epoch": 3.5473684210526315, |
| "grad_norm": 0.31642734990082777, |
| "learning_rate": 1.886558614264165e-05, |
| "loss": 1.2544, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.5526315789473686, |
| "grad_norm": 0.41419322615420073, |
| "learning_rate": 1.8740944263341773e-05, |
| "loss": 1.2722, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.557894736842105, |
| "grad_norm": 0.2575000448429207, |
| "learning_rate": 1.8616589403273776e-05, |
| "loss": 1.251, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.5631578947368423, |
| "grad_norm": 0.45829370611833337, |
| "learning_rate": 1.8492523241356877e-05, |
| "loss": 1.2552, |
| "step": 677 |
| }, |
| { |
| "epoch": 3.568421052631579, |
| "grad_norm": 0.3876144668681015, |
| "learning_rate": 1.8368747452612504e-05, |
| "loss": 1.2756, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.5736842105263156, |
| "grad_norm": 0.3605137220418223, |
| "learning_rate": 1.8245263708141782e-05, |
| "loss": 1.242, |
| "step": 679 |
| }, |
| { |
| "epoch": 3.5789473684210527, |
| "grad_norm": 0.3947355937612717, |
| "learning_rate": 1.8122073675102935e-05, |
| "loss": 1.2556, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.5842105263157897, |
| "grad_norm": 0.29347916836402094, |
| "learning_rate": 1.7999179016688763e-05, |
| "loss": 1.26, |
| "step": 681 |
| }, |
| { |
| "epoch": 3.5894736842105264, |
| "grad_norm": 0.32495295214844105, |
| "learning_rate": 1.7876581392104225e-05, |
| "loss": 1.2496, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.594736842105263, |
| "grad_norm": 0.2493724682619427, |
| "learning_rate": 1.7754282456543977e-05, |
| "loss": 1.2514, |
| "step": 683 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.35605401548647925, |
| "learning_rate": 1.7632283861170135e-05, |
| "loss": 1.2539, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.6052631578947367, |
| "grad_norm": 0.2630345804072707, |
| "learning_rate": 1.7510587253089842e-05, |
| "loss": 1.2579, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.610526315789474, |
| "grad_norm": 0.2772719177300871, |
| "learning_rate": 1.7389194275333124e-05, |
| "loss": 1.2471, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.6157894736842104, |
| "grad_norm": 0.3256551364716347, |
| "learning_rate": 1.7268106566830713e-05, |
| "loss": 1.2562, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.6210526315789475, |
| "grad_norm": 0.2942105351769792, |
| "learning_rate": 1.7147325762391848e-05, |
| "loss": 1.2664, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.626315789473684, |
| "grad_norm": 0.29601761914650015, |
| "learning_rate": 1.702685349268226e-05, |
| "loss": 1.2559, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.6315789473684212, |
| "grad_norm": 0.2759560921461832, |
| "learning_rate": 1.690669138420215e-05, |
| "loss": 1.2591, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.636842105263158, |
| "grad_norm": 0.2440653651529168, |
| "learning_rate": 1.6786841059264217e-05, |
| "loss": 1.2574, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.6421052631578945, |
| "grad_norm": 0.303898127022955, |
| "learning_rate": 1.6667304135971756e-05, |
| "loss": 1.2547, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.6473684210526316, |
| "grad_norm": 0.2481861381786453, |
| "learning_rate": 1.65480822281968e-05, |
| "loss": 1.2488, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.6526315789473687, |
| "grad_norm": 0.2565499348104272, |
| "learning_rate": 1.6429176945558413e-05, |
| "loss": 1.2561, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.6578947368421053, |
| "grad_norm": 0.3224687182653659, |
| "learning_rate": 1.6310589893400804e-05, |
| "loss": 1.247, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.663157894736842, |
| "grad_norm": 0.25279520055905946, |
| "learning_rate": 1.6192322672771793e-05, |
| "loss": 1.2636, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.668421052631579, |
| "grad_norm": 0.3239078414093973, |
| "learning_rate": 1.6074376880401147e-05, |
| "loss": 1.2431, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.6736842105263157, |
| "grad_norm": 0.25211963429157525, |
| "learning_rate": 1.5956754108678996e-05, |
| "loss": 1.2489, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.6789473684210527, |
| "grad_norm": 0.3288796816421695, |
| "learning_rate": 1.5839455945634372e-05, |
| "loss": 1.2433, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.6842105263157894, |
| "grad_norm": 0.2570823868070139, |
| "learning_rate": 1.5722483974913737e-05, |
| "loss": 1.2437, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.6894736842105265, |
| "grad_norm": 0.23205884441696503, |
| "learning_rate": 1.560583977575964e-05, |
| "loss": 1.2558, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.694736842105263, |
| "grad_norm": 0.254586157360098, |
| "learning_rate": 1.5489524922989367e-05, |
| "loss": 1.2677, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 0.2528078741758432, |
| "learning_rate": 1.537354098697367e-05, |
| "loss": 1.2521, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.705263157894737, |
| "grad_norm": 0.30438966904710707, |
| "learning_rate": 1.525788953361563e-05, |
| "loss": 1.2569, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.7105263157894735, |
| "grad_norm": 0.24585215855274448, |
| "learning_rate": 1.5142572124329418e-05, |
| "loss": 1.2582, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.7157894736842105, |
| "grad_norm": 0.23812179037555448, |
| "learning_rate": 1.5027590316019276e-05, |
| "loss": 1.2582, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.7210526315789476, |
| "grad_norm": 0.2258598951803704, |
| "learning_rate": 1.491294566105852e-05, |
| "loss": 1.2398, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.7263157894736842, |
| "grad_norm": 0.23820145432975506, |
| "learning_rate": 1.4798639707268509e-05, |
| "loss": 1.26, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.731578947368421, |
| "grad_norm": 0.27098791758934043, |
| "learning_rate": 1.4684673997897795e-05, |
| "loss": 1.2467, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.736842105263158, |
| "grad_norm": 0.1895081621315529, |
| "learning_rate": 1.457105007160129e-05, |
| "loss": 1.2469, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.7421052631578946, |
| "grad_norm": 0.24431380487075854, |
| "learning_rate": 1.4457769462419461e-05, |
| "loss": 1.2505, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.7473684210526317, |
| "grad_norm": 0.2598287894690381, |
| "learning_rate": 1.4344833699757662e-05, |
| "loss": 1.2733, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.7526315789473683, |
| "grad_norm": 0.24173801356915325, |
| "learning_rate": 1.4232244308365437e-05, |
| "loss": 1.2515, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.7578947368421054, |
| "grad_norm": 0.2744768545995936, |
| "learning_rate": 1.4120002808315999e-05, |
| "loss": 1.2446, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.763157894736842, |
| "grad_norm": 0.29075680429359135, |
| "learning_rate": 1.4008110714985623e-05, |
| "loss": 1.2576, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.768421052631579, |
| "grad_norm": 0.1679499052346039, |
| "learning_rate": 1.3896569539033253e-05, |
| "loss": 1.2434, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.7736842105263158, |
| "grad_norm": 0.21354680460803685, |
| "learning_rate": 1.3785380786380103e-05, |
| "loss": 1.2642, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.7789473684210524, |
| "grad_norm": 0.24355235079533985, |
| "learning_rate": 1.367454595818928e-05, |
| "loss": 1.2449, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.7842105263157895, |
| "grad_norm": 0.17842505149174132, |
| "learning_rate": 1.3564066550845558e-05, |
| "loss": 1.2399, |
| "step": 719 |
| }, |
| { |
| "epoch": 3.7894736842105265, |
| "grad_norm": 0.2363958816949115, |
| "learning_rate": 1.3453944055935151e-05, |
| "loss": 1.2471, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.794736842105263, |
| "grad_norm": 0.20243183669778259, |
| "learning_rate": 1.3344179960225603e-05, |
| "loss": 1.2535, |
| "step": 721 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.2471952644451058, |
| "learning_rate": 1.3234775745645684e-05, |
| "loss": 1.2484, |
| "step": 722 |
| }, |
| { |
| "epoch": 3.805263157894737, |
| "grad_norm": 0.21944384742054443, |
| "learning_rate": 1.3125732889265393e-05, |
| "loss": 1.2444, |
| "step": 723 |
| }, |
| { |
| "epoch": 3.8105263157894735, |
| "grad_norm": 0.22384339943654685, |
| "learning_rate": 1.3017052863276054e-05, |
| "loss": 1.2544, |
| "step": 724 |
| }, |
| { |
| "epoch": 3.8157894736842106, |
| "grad_norm": 0.20643300200194556, |
| "learning_rate": 1.2908737134970367e-05, |
| "loss": 1.2455, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.8210526315789473, |
| "grad_norm": 0.22387663232782792, |
| "learning_rate": 1.2800787166722634e-05, |
| "loss": 1.2415, |
| "step": 726 |
| }, |
| { |
| "epoch": 3.8263157894736843, |
| "grad_norm": 0.23601246798864953, |
| "learning_rate": 1.2693204415969068e-05, |
| "loss": 1.2488, |
| "step": 727 |
| }, |
| { |
| "epoch": 3.831578947368421, |
| "grad_norm": 0.21781387567237637, |
| "learning_rate": 1.2585990335188014e-05, |
| "loss": 1.2346, |
| "step": 728 |
| }, |
| { |
| "epoch": 3.836842105263158, |
| "grad_norm": 0.20954968812529903, |
| "learning_rate": 1.2479146371880408e-05, |
| "loss": 1.25, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.8421052631578947, |
| "grad_norm": 0.2935272323831709, |
| "learning_rate": 1.2372673968550229e-05, |
| "loss": 1.2575, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.8473684210526313, |
| "grad_norm": 0.23428135792560334, |
| "learning_rate": 1.2266574562684994e-05, |
| "loss": 1.2477, |
| "step": 731 |
| }, |
| { |
| "epoch": 3.8526315789473684, |
| "grad_norm": 0.18658016102303704, |
| "learning_rate": 1.2160849586736375e-05, |
| "loss": 1.256, |
| "step": 732 |
| }, |
| { |
| "epoch": 3.8578947368421055, |
| "grad_norm": 0.23105098493810466, |
| "learning_rate": 1.2055500468100849e-05, |
| "loss": 1.2399, |
| "step": 733 |
| }, |
| { |
| "epoch": 3.863157894736842, |
| "grad_norm": 0.1929616859489707, |
| "learning_rate": 1.1950528629100457e-05, |
| "loss": 1.2515, |
| "step": 734 |
| }, |
| { |
| "epoch": 3.8684210526315788, |
| "grad_norm": 0.218750003790284, |
| "learning_rate": 1.1845935486963546e-05, |
| "loss": 1.2489, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.873684210526316, |
| "grad_norm": 0.19977098349774547, |
| "learning_rate": 1.1741722453805657e-05, |
| "loss": 1.2449, |
| "step": 736 |
| }, |
| { |
| "epoch": 3.8789473684210525, |
| "grad_norm": 0.23507506446012338, |
| "learning_rate": 1.163789093661051e-05, |
| "loss": 1.2562, |
| "step": 737 |
| }, |
| { |
| "epoch": 3.8842105263157896, |
| "grad_norm": 0.19034197687876206, |
| "learning_rate": 1.1534442337210919e-05, |
| "loss": 1.2528, |
| "step": 738 |
| }, |
| { |
| "epoch": 3.889473684210526, |
| "grad_norm": 0.25267159420496116, |
| "learning_rate": 1.1431378052269934e-05, |
| "loss": 1.2571, |
| "step": 739 |
| }, |
| { |
| "epoch": 3.8947368421052633, |
| "grad_norm": 0.21369948030483346, |
| "learning_rate": 1.1328699473261957e-05, |
| "loss": 1.241, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.9, |
| "grad_norm": 0.23307740618119258, |
| "learning_rate": 1.1226407986453963e-05, |
| "loss": 1.2557, |
| "step": 741 |
| }, |
| { |
| "epoch": 3.905263157894737, |
| "grad_norm": 0.19115969783367653, |
| "learning_rate": 1.1124504972886782e-05, |
| "loss": 1.2525, |
| "step": 742 |
| }, |
| { |
| "epoch": 3.9105263157894736, |
| "grad_norm": 0.2681117091243346, |
| "learning_rate": 1.1022991808356442e-05, |
| "loss": 1.248, |
| "step": 743 |
| }, |
| { |
| "epoch": 3.9157894736842103, |
| "grad_norm": 0.1651284103554666, |
| "learning_rate": 1.0921869863395642e-05, |
| "loss": 1.242, |
| "step": 744 |
| }, |
| { |
| "epoch": 3.9210526315789473, |
| "grad_norm": 0.24161510420189317, |
| "learning_rate": 1.0821140503255174e-05, |
| "loss": 1.2555, |
| "step": 745 |
| }, |
| { |
| "epoch": 3.9263157894736844, |
| "grad_norm": 0.20280135248319278, |
| "learning_rate": 1.0720805087885533e-05, |
| "loss": 1.2578, |
| "step": 746 |
| }, |
| { |
| "epoch": 3.931578947368421, |
| "grad_norm": 0.3284807144434915, |
| "learning_rate": 1.0620864971918579e-05, |
| "loss": 1.259, |
| "step": 747 |
| }, |
| { |
| "epoch": 3.9368421052631577, |
| "grad_norm": 0.22538990852777954, |
| "learning_rate": 1.05213215046492e-05, |
| "loss": 1.2597, |
| "step": 748 |
| }, |
| { |
| "epoch": 3.942105263157895, |
| "grad_norm": 0.19055951323654136, |
| "learning_rate": 1.0422176030017117e-05, |
| "loss": 1.2443, |
| "step": 749 |
| }, |
| { |
| "epoch": 3.9473684210526314, |
| "grad_norm": 0.18646041833135787, |
| "learning_rate": 1.0323429886588743e-05, |
| "loss": 1.2388, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.9526315789473685, |
| "grad_norm": 0.19285379546461523, |
| "learning_rate": 1.0225084407539109e-05, |
| "loss": 1.2335, |
| "step": 751 |
| }, |
| { |
| "epoch": 3.957894736842105, |
| "grad_norm": 0.1997818414436052, |
| "learning_rate": 1.0127140920633857e-05, |
| "loss": 1.2439, |
| "step": 752 |
| }, |
| { |
| "epoch": 3.963157894736842, |
| "grad_norm": 0.20149581036707856, |
| "learning_rate": 1.0029600748211314e-05, |
| "loss": 1.2415, |
| "step": 753 |
| }, |
| { |
| "epoch": 3.968421052631579, |
| "grad_norm": 0.19260248911961064, |
| "learning_rate": 9.932465207164675e-06, |
| "loss": 1.2633, |
| "step": 754 |
| }, |
| { |
| "epoch": 3.973684210526316, |
| "grad_norm": 0.21099578591151794, |
| "learning_rate": 9.835735608924155e-06, |
| "loss": 1.231, |
| "step": 755 |
| }, |
| { |
| "epoch": 3.9789473684210526, |
| "grad_norm": 0.17132739675169845, |
| "learning_rate": 9.739413259439337e-06, |
| "loss": 1.2451, |
| "step": 756 |
| }, |
| { |
| "epoch": 3.984210526315789, |
| "grad_norm": 0.21904215059223633, |
| "learning_rate": 9.643499459161538e-06, |
| "loss": 1.2523, |
| "step": 757 |
| }, |
| { |
| "epoch": 3.9894736842105263, |
| "grad_norm": 0.224551193557602, |
| "learning_rate": 9.547995503026217e-06, |
| "loss": 1.2478, |
| "step": 758 |
| }, |
| { |
| "epoch": 3.9947368421052634, |
| "grad_norm": 0.19238609932248696, |
| "learning_rate": 9.452902680435527e-06, |
| "loss": 1.249, |
| "step": 759 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.22055223309269914, |
| "learning_rate": 9.358222275240884e-06, |
| "loss": 1.2167, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.005263157894737, |
| "grad_norm": 0.20140128214778716, |
| "learning_rate": 9.263955565725648e-06, |
| "loss": 1.2391, |
| "step": 761 |
| }, |
| { |
| "epoch": 4.010526315789473, |
| "grad_norm": 0.2068373210972995, |
| "learning_rate": 9.170103824587855e-06, |
| "loss": 1.2331, |
| "step": 762 |
| }, |
| { |
| "epoch": 4.015789473684211, |
| "grad_norm": 0.18232115316386402, |
| "learning_rate": 9.07666831892304e-06, |
| "loss": 1.2121, |
| "step": 763 |
| }, |
| { |
| "epoch": 4.021052631578947, |
| "grad_norm": 0.2188152260857773, |
| "learning_rate": 8.983650310207142e-06, |
| "loss": 1.2232, |
| "step": 764 |
| }, |
| { |
| "epoch": 4.026315789473684, |
| "grad_norm": 0.1880274936269495, |
| "learning_rate": 8.89105105427945e-06, |
| "loss": 1.2272, |
| "step": 765 |
| }, |
| { |
| "epoch": 4.031578947368421, |
| "grad_norm": 0.17030491611623289, |
| "learning_rate": 8.798871801325632e-06, |
| "loss": 1.2284, |
| "step": 766 |
| }, |
| { |
| "epoch": 4.036842105263158, |
| "grad_norm": 0.1887119280020856, |
| "learning_rate": 8.707113795860938e-06, |
| "loss": 1.2364, |
| "step": 767 |
| }, |
| { |
| "epoch": 4.042105263157895, |
| "grad_norm": 0.18907111180220373, |
| "learning_rate": 8.615778276713293e-06, |
| "loss": 1.2277, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.0473684210526315, |
| "grad_norm": 0.17028701794910334, |
| "learning_rate": 8.524866477006637e-06, |
| "loss": 1.2268, |
| "step": 769 |
| }, |
| { |
| "epoch": 4.052631578947368, |
| "grad_norm": 0.1927239082270522, |
| "learning_rate": 8.434379624144261e-06, |
| "loss": 1.2202, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.057894736842106, |
| "grad_norm": 0.18231681740661396, |
| "learning_rate": 8.344318939792232e-06, |
| "loss": 1.2103, |
| "step": 771 |
| }, |
| { |
| "epoch": 4.063157894736842, |
| "grad_norm": 0.2108141165888399, |
| "learning_rate": 8.254685639862896e-06, |
| "loss": 1.2289, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.068421052631579, |
| "grad_norm": 0.21501105777435195, |
| "learning_rate": 8.165480934498462e-06, |
| "loss": 1.2304, |
| "step": 773 |
| }, |
| { |
| "epoch": 4.073684210526316, |
| "grad_norm": 0.22014095135466175, |
| "learning_rate": 8.076706028054709e-06, |
| "loss": 1.2395, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.078947368421052, |
| "grad_norm": 0.18281510166398557, |
| "learning_rate": 7.988362119084642e-06, |
| "loss": 1.232, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.08421052631579, |
| "grad_norm": 0.21712131045816194, |
| "learning_rate": 7.90045040032236e-06, |
| "loss": 1.2423, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.089473684210526, |
| "grad_norm": 0.19226805462323326, |
| "learning_rate": 7.812972058666974e-06, |
| "loss": 1.2295, |
| "step": 777 |
| }, |
| { |
| "epoch": 4.094736842105263, |
| "grad_norm": 0.175015352113717, |
| "learning_rate": 7.725928275166534e-06, |
| "loss": 1.2282, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 0.2095750364202842, |
| "learning_rate": 7.639320225002106e-06, |
| "loss": 1.2244, |
| "step": 779 |
| }, |
| { |
| "epoch": 4.105263157894737, |
| "grad_norm": 0.19644672306841843, |
| "learning_rate": 7.553149077471915e-06, |
| "loss": 1.2217, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.110526315789474, |
| "grad_norm": 0.2000635414888708, |
| "learning_rate": 7.46741599597554e-06, |
| "loss": 1.2319, |
| "step": 781 |
| }, |
| { |
| "epoch": 4.11578947368421, |
| "grad_norm": 0.1746543551783459, |
| "learning_rate": 7.382122137998209e-06, |
| "loss": 1.2282, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.121052631578947, |
| "grad_norm": 0.17481980918717463, |
| "learning_rate": 7.297268655095213e-06, |
| "loss": 1.2395, |
| "step": 783 |
| }, |
| { |
| "epoch": 4.126315789473685, |
| "grad_norm": 0.17610089627569894, |
| "learning_rate": 7.212856692876289e-06, |
| "loss": 1.2319, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.131578947368421, |
| "grad_norm": 0.17566117386443802, |
| "learning_rate": 7.128887390990198e-06, |
| "loss": 1.2245, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.136842105263158, |
| "grad_norm": 0.18888285977402394, |
| "learning_rate": 7.045361883109318e-06, |
| "loss": 1.2363, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.1421052631578945, |
| "grad_norm": 0.1679963465599155, |
| "learning_rate": 6.962281296914386e-06, |
| "loss": 1.2319, |
| "step": 787 |
| }, |
| { |
| "epoch": 4.147368421052631, |
| "grad_norm": 0.17232128719198106, |
| "learning_rate": 6.8796467540791986e-06, |
| "loss": 1.2312, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.152631578947369, |
| "grad_norm": 0.19685528274227304, |
| "learning_rate": 6.797459370255519e-06, |
| "loss": 1.2324, |
| "step": 789 |
| }, |
| { |
| "epoch": 4.157894736842105, |
| "grad_norm": 0.1583456150516079, |
| "learning_rate": 6.715720255058e-06, |
| "loss": 1.24, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.163157894736842, |
| "grad_norm": 0.172328795648275, |
| "learning_rate": 6.634430512049213e-06, |
| "loss": 1.2513, |
| "step": 791 |
| }, |
| { |
| "epoch": 4.168421052631579, |
| "grad_norm": 0.16257107292586506, |
| "learning_rate": 6.553591238724712e-06, |
| "loss": 1.2275, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.173684210526316, |
| "grad_norm": 0.14389724088218966, |
| "learning_rate": 6.4732035264982904e-06, |
| "loss": 1.2348, |
| "step": 793 |
| }, |
| { |
| "epoch": 4.178947368421053, |
| "grad_norm": 0.15689066100797078, |
| "learning_rate": 6.39326846068717e-06, |
| "loss": 1.2179, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.184210526315789, |
| "grad_norm": 0.18533318047509703, |
| "learning_rate": 6.313787120497376e-06, |
| "loss": 1.236, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.189473684210526, |
| "grad_norm": 0.1459277700590749, |
| "learning_rate": 6.234760579009167e-06, |
| "loss": 1.2435, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.1947368421052635, |
| "grad_norm": 0.155103015306397, |
| "learning_rate": 6.1561899031625794e-06, |
| "loss": 1.2282, |
| "step": 797 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 0.1477347804716696, |
| "learning_rate": 6.078076153742962e-06, |
| "loss": 1.2249, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.205263157894737, |
| "grad_norm": 0.15276423763995617, |
| "learning_rate": 6.000420385366687e-06, |
| "loss": 1.2297, |
| "step": 799 |
| }, |
| { |
| "epoch": 4.2105263157894735, |
| "grad_norm": 0.15126290143221918, |
| "learning_rate": 5.923223646466923e-06, |
| "loss": 1.2387, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.215789473684211, |
| "grad_norm": 0.15492928616201465, |
| "learning_rate": 5.846486979279449e-06, |
| "loss": 1.2367, |
| "step": 801 |
| }, |
| { |
| "epoch": 4.221052631578948, |
| "grad_norm": 0.17188412280549703, |
| "learning_rate": 5.770211419828604e-06, |
| "loss": 1.2322, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.226315789473684, |
| "grad_norm": 0.15097184444197026, |
| "learning_rate": 5.694397997913319e-06, |
| "loss": 1.2321, |
| "step": 803 |
| }, |
| { |
| "epoch": 4.231578947368421, |
| "grad_norm": 0.1453328152503722, |
| "learning_rate": 5.619047737093164e-06, |
| "loss": 1.2384, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.2368421052631575, |
| "grad_norm": 0.18220366542871314, |
| "learning_rate": 5.5441616546745646e-06, |
| "loss": 1.2383, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.242105263157895, |
| "grad_norm": 0.167450785630923, |
| "learning_rate": 5.469740761697044e-06, |
| "loss": 1.2426, |
| "step": 806 |
| }, |
| { |
| "epoch": 4.247368421052632, |
| "grad_norm": 0.14931148609570408, |
| "learning_rate": 5.395786062919622e-06, |
| "loss": 1.2333, |
| "step": 807 |
| }, |
| { |
| "epoch": 4.252631578947368, |
| "grad_norm": 0.16803901696022852, |
| "learning_rate": 5.322298556807179e-06, |
| "loss": 1.2417, |
| "step": 808 |
| }, |
| { |
| "epoch": 4.257894736842105, |
| "grad_norm": 0.16226281008273294, |
| "learning_rate": 5.249279235517031e-06, |
| "loss": 1.2329, |
| "step": 809 |
| }, |
| { |
| "epoch": 4.2631578947368425, |
| "grad_norm": 0.13999210516589425, |
| "learning_rate": 5.176729084885508e-06, |
| "loss": 1.2412, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.268421052631579, |
| "grad_norm": 0.18759320208384703, |
| "learning_rate": 5.10464908441465e-06, |
| "loss": 1.2357, |
| "step": 811 |
| }, |
| { |
| "epoch": 4.273684210526316, |
| "grad_norm": 0.17340200354001228, |
| "learning_rate": 5.033040207258979e-06, |
| "loss": 1.2271, |
| "step": 812 |
| }, |
| { |
| "epoch": 4.278947368421052, |
| "grad_norm": 0.15950936103038027, |
| "learning_rate": 4.9619034202123884e-06, |
| "loss": 1.2151, |
| "step": 813 |
| }, |
| { |
| "epoch": 4.284210526315789, |
| "grad_norm": 0.15140529416594803, |
| "learning_rate": 4.891239683695044e-06, |
| "loss": 1.232, |
| "step": 814 |
| }, |
| { |
| "epoch": 4.2894736842105265, |
| "grad_norm": 0.13998854140642578, |
| "learning_rate": 4.821049951740442e-06, |
| "loss": 1.2255, |
| "step": 815 |
| }, |
| { |
| "epoch": 4.294736842105263, |
| "grad_norm": 0.1499375273785916, |
| "learning_rate": 4.751335171982527e-06, |
| "loss": 1.2314, |
| "step": 816 |
| }, |
| { |
| "epoch": 4.3, |
| "grad_norm": 0.158191412279702, |
| "learning_rate": 4.6820962856429205e-06, |
| "loss": 1.234, |
| "step": 817 |
| }, |
| { |
| "epoch": 4.3052631578947365, |
| "grad_norm": 0.14057999268017907, |
| "learning_rate": 4.613334227518165e-06, |
| "loss": 1.2427, |
| "step": 818 |
| }, |
| { |
| "epoch": 4.310526315789474, |
| "grad_norm": 0.1433236145981049, |
| "learning_rate": 4.545049925967137e-06, |
| "loss": 1.2313, |
| "step": 819 |
| }, |
| { |
| "epoch": 4.315789473684211, |
| "grad_norm": 0.1303171009707853, |
| "learning_rate": 4.4772443028985004e-06, |
| "loss": 1.2297, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.321052631578947, |
| "grad_norm": 0.15617718436585057, |
| "learning_rate": 4.409918273758278e-06, |
| "loss": 1.2412, |
| "step": 821 |
| }, |
| { |
| "epoch": 4.326315789473684, |
| "grad_norm": 0.1476554630303936, |
| "learning_rate": 4.343072747517459e-06, |
| "loss": 1.2387, |
| "step": 822 |
| }, |
| { |
| "epoch": 4.331578947368421, |
| "grad_norm": 0.1362192280798835, |
| "learning_rate": 4.276708626659778e-06, |
| "loss": 1.2349, |
| "step": 823 |
| }, |
| { |
| "epoch": 4.336842105263158, |
| "grad_norm": 0.15051183831923126, |
| "learning_rate": 4.2108268071694616e-06, |
| "loss": 1.2122, |
| "step": 824 |
| }, |
| { |
| "epoch": 4.342105263157895, |
| "grad_norm": 0.1445207500529269, |
| "learning_rate": 4.1454281785191995e-06, |
| "loss": 1.2224, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.347368421052631, |
| "grad_norm": 0.14362316701732558, |
| "learning_rate": 4.080513623658075e-06, |
| "loss": 1.2186, |
| "step": 826 |
| }, |
| { |
| "epoch": 4.352631578947369, |
| "grad_norm": 0.1479016471804495, |
| "learning_rate": 4.0160840189997155e-06, |
| "loss": 1.2324, |
| "step": 827 |
| }, |
| { |
| "epoch": 4.3578947368421055, |
| "grad_norm": 0.1397296336400901, |
| "learning_rate": 3.952140234410396e-06, |
| "loss": 1.2309, |
| "step": 828 |
| }, |
| { |
| "epoch": 4.363157894736842, |
| "grad_norm": 0.12213103880797943, |
| "learning_rate": 3.888683133197293e-06, |
| "loss": 1.2231, |
| "step": 829 |
| }, |
| { |
| "epoch": 4.368421052631579, |
| "grad_norm": 0.13169031113809618, |
| "learning_rate": 3.825713572096903e-06, |
| "loss": 1.2264, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.373684210526315, |
| "grad_norm": 0.1415624842501799, |
| "learning_rate": 3.7632324012633992e-06, |
| "loss": 1.2444, |
| "step": 831 |
| }, |
| { |
| "epoch": 4.378947368421053, |
| "grad_norm": 0.14671744800493622, |
| "learning_rate": 3.701240464257181e-06, |
| "loss": 1.2183, |
| "step": 832 |
| }, |
| { |
| "epoch": 4.38421052631579, |
| "grad_norm": 0.13323253635868224, |
| "learning_rate": 3.6397385980335e-06, |
| "loss": 1.2156, |
| "step": 833 |
| }, |
| { |
| "epoch": 4.389473684210526, |
| "grad_norm": 0.13127420581089705, |
| "learning_rate": 3.5787276329311315e-06, |
| "loss": 1.2231, |
| "step": 834 |
| }, |
| { |
| "epoch": 4.394736842105263, |
| "grad_norm": 0.133896287855281, |
| "learning_rate": 3.518208392661184e-06, |
| "loss": 1.2293, |
| "step": 835 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.13026051571456285, |
| "learning_rate": 3.458181694295961e-06, |
| "loss": 1.2395, |
| "step": 836 |
| }, |
| { |
| "epoch": 4.405263157894737, |
| "grad_norm": 0.15010438499441964, |
| "learning_rate": 3.398648348257969e-06, |
| "loss": 1.2323, |
| "step": 837 |
| }, |
| { |
| "epoch": 4.410526315789474, |
| "grad_norm": 0.1501949658490909, |
| "learning_rate": 3.3396091583089275e-06, |
| "loss": 1.2186, |
| "step": 838 |
| }, |
| { |
| "epoch": 4.41578947368421, |
| "grad_norm": 0.1433763217867749, |
| "learning_rate": 3.281064921538919e-06, |
| "loss": 1.2379, |
| "step": 839 |
| }, |
| { |
| "epoch": 4.421052631578947, |
| "grad_norm": 0.1323412042853926, |
| "learning_rate": 3.2230164283556918e-06, |
| "loss": 1.2231, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.426315789473684, |
| "grad_norm": 0.13635885589343097, |
| "learning_rate": 3.1654644624739082e-06, |
| "loss": 1.2297, |
| "step": 841 |
| }, |
| { |
| "epoch": 4.431578947368421, |
| "grad_norm": 0.1323736124785357, |
| "learning_rate": 3.1084098009046106e-06, |
| "loss": 1.235, |
| "step": 842 |
| }, |
| { |
| "epoch": 4.436842105263158, |
| "grad_norm": 0.13102236402292022, |
| "learning_rate": 3.0518532139447267e-06, |
| "loss": 1.2307, |
| "step": 843 |
| }, |
| { |
| "epoch": 4.442105263157894, |
| "grad_norm": 0.15428668766385725, |
| "learning_rate": 2.995795465166644e-06, |
| "loss": 1.226, |
| "step": 844 |
| }, |
| { |
| "epoch": 4.447368421052632, |
| "grad_norm": 0.13778512976226498, |
| "learning_rate": 2.9402373114079295e-06, |
| "loss": 1.2276, |
| "step": 845 |
| }, |
| { |
| "epoch": 4.4526315789473685, |
| "grad_norm": 0.13474950636883365, |
| "learning_rate": 2.8851795027610997e-06, |
| "loss": 1.2228, |
| "step": 846 |
| }, |
| { |
| "epoch": 4.457894736842105, |
| "grad_norm": 0.1353883744809194, |
| "learning_rate": 2.83062278256351e-06, |
| "loss": 1.2339, |
| "step": 847 |
| }, |
| { |
| "epoch": 4.463157894736842, |
| "grad_norm": 0.13137189130014673, |
| "learning_rate": 2.776567887387267e-06, |
| "loss": 1.2301, |
| "step": 848 |
| }, |
| { |
| "epoch": 4.468421052631579, |
| "grad_norm": 0.13126591401950521, |
| "learning_rate": 2.723015547029344e-06, |
| "loss": 1.2468, |
| "step": 849 |
| }, |
| { |
| "epoch": 4.473684210526316, |
| "grad_norm": 0.1415673262181535, |
| "learning_rate": 2.669966484501716e-06, |
| "loss": 1.2245, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.478947368421053, |
| "grad_norm": 0.1320404723499411, |
| "learning_rate": 2.6174214160215704e-06, |
| "loss": 1.2352, |
| "step": 851 |
| }, |
| { |
| "epoch": 4.484210526315789, |
| "grad_norm": 0.12633771710003897, |
| "learning_rate": 2.5653810510016454e-06, |
| "loss": 1.2339, |
| "step": 852 |
| }, |
| { |
| "epoch": 4.489473684210527, |
| "grad_norm": 0.12316620532344269, |
| "learning_rate": 2.5138460920406884e-06, |
| "loss": 1.2317, |
| "step": 853 |
| }, |
| { |
| "epoch": 4.494736842105263, |
| "grad_norm": 0.13602160694846396, |
| "learning_rate": 2.462817234913919e-06, |
| "loss": 1.2273, |
| "step": 854 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.1415982613618782, |
| "learning_rate": 2.4122951685636674e-06, |
| "loss": 1.2243, |
| "step": 855 |
| }, |
| { |
| "epoch": 4.505263157894737, |
| "grad_norm": 0.3127501030193754, |
| "learning_rate": 2.3622805750900567e-06, |
| "loss": 1.2222, |
| "step": 856 |
| }, |
| { |
| "epoch": 4.510526315789473, |
| "grad_norm": 0.13107864455151064, |
| "learning_rate": 2.3127741297418283e-06, |
| "loss": 1.2366, |
| "step": 857 |
| }, |
| { |
| "epoch": 4.515789473684211, |
| "grad_norm": 0.13657403397982118, |
| "learning_rate": 2.2637765009071576e-06, |
| "loss": 1.2337, |
| "step": 858 |
| }, |
| { |
| "epoch": 4.521052631578947, |
| "grad_norm": 0.1302788270408855, |
| "learning_rate": 2.215288350104694e-06, |
| "loss": 1.2253, |
| "step": 859 |
| }, |
| { |
| "epoch": 4.526315789473684, |
| "grad_norm": 0.12664051031696197, |
| "learning_rate": 2.1673103319746146e-06, |
| "loss": 1.225, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.531578947368421, |
| "grad_norm": 0.14352455601262662, |
| "learning_rate": 2.1198430942697625e-06, |
| "loss": 1.2251, |
| "step": 861 |
| }, |
| { |
| "epoch": 4.536842105263158, |
| "grad_norm": 0.13649018750914618, |
| "learning_rate": 2.0728872778469224e-06, |
| "loss": 1.2407, |
| "step": 862 |
| }, |
| { |
| "epoch": 4.542105263157895, |
| "grad_norm": 0.20719895993192947, |
| "learning_rate": 2.026443516658163e-06, |
| "loss": 1.2272, |
| "step": 863 |
| }, |
| { |
| "epoch": 4.5473684210526315, |
| "grad_norm": 0.13901759037964037, |
| "learning_rate": 1.9805124377422834e-06, |
| "loss": 1.2368, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.552631578947368, |
| "grad_norm": 0.1307517829866866, |
| "learning_rate": 1.93509466121633e-06, |
| "loss": 1.2318, |
| "step": 865 |
| }, |
| { |
| "epoch": 4.557894736842105, |
| "grad_norm": 0.12095060465165465, |
| "learning_rate": 1.8901908002672442e-06, |
| "loss": 1.2359, |
| "step": 866 |
| }, |
| { |
| "epoch": 4.563157894736842, |
| "grad_norm": 0.11915812322895941, |
| "learning_rate": 1.8458014611435705e-06, |
| "loss": 1.2426, |
| "step": 867 |
| }, |
| { |
| "epoch": 4.568421052631579, |
| "grad_norm": 0.1240067541225263, |
| "learning_rate": 1.80192724314729e-06, |
| "loss": 1.2163, |
| "step": 868 |
| }, |
| { |
| "epoch": 4.573684210526316, |
| "grad_norm": 0.12397138277266245, |
| "learning_rate": 1.7585687386256944e-06, |
| "loss": 1.2428, |
| "step": 869 |
| }, |
| { |
| "epoch": 4.578947368421053, |
| "grad_norm": 0.13123863782173215, |
| "learning_rate": 1.7157265329634354e-06, |
| "loss": 1.2413, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.58421052631579, |
| "grad_norm": 0.13114076151140514, |
| "learning_rate": 1.6734012045745762e-06, |
| "loss": 1.2255, |
| "step": 871 |
| }, |
| { |
| "epoch": 4.589473684210526, |
| "grad_norm": 0.11880095160971275, |
| "learning_rate": 1.6315933248948068e-06, |
| "loss": 1.2325, |
| "step": 872 |
| }, |
| { |
| "epoch": 4.594736842105263, |
| "grad_norm": 0.133304713863376, |
| "learning_rate": 1.5903034583737343e-06, |
| "loss": 1.2406, |
| "step": 873 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.12445121300617833, |
| "learning_rate": 1.5495321624672443e-06, |
| "loss": 1.2323, |
| "step": 874 |
| }, |
| { |
| "epoch": 4.605263157894737, |
| "grad_norm": 0.11989093911414492, |
| "learning_rate": 1.5092799876299835e-06, |
| "loss": 1.2152, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.610526315789474, |
| "grad_norm": 0.12318779958969978, |
| "learning_rate": 1.4695474773079287e-06, |
| "loss": 1.2274, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.61578947368421, |
| "grad_norm": 0.11755163812164948, |
| "learning_rate": 1.4303351679310473e-06, |
| "loss": 1.2323, |
| "step": 877 |
| }, |
| { |
| "epoch": 4.621052631578947, |
| "grad_norm": 0.1271239652597123, |
| "learning_rate": 1.3916435889060575e-06, |
| "loss": 1.2281, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.626315789473685, |
| "grad_norm": 0.12826240424195157, |
| "learning_rate": 1.353473262609275e-06, |
| "loss": 1.2273, |
| "step": 879 |
| }, |
| { |
| "epoch": 4.631578947368421, |
| "grad_norm": 0.12460164768857226, |
| "learning_rate": 1.3158247043795735e-06, |
| "loss": 1.2264, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.636842105263158, |
| "grad_norm": 0.11779769435490604, |
| "learning_rate": 1.278698422511413e-06, |
| "loss": 1.2243, |
| "step": 881 |
| }, |
| { |
| "epoch": 4.6421052631578945, |
| "grad_norm": 0.11403097697307746, |
| "learning_rate": 1.242094918247978e-06, |
| "loss": 1.2283, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.647368421052631, |
| "grad_norm": 0.12118016084867007, |
| "learning_rate": 1.2060146857744282e-06, |
| "loss": 1.2392, |
| "step": 883 |
| }, |
| { |
| "epoch": 4.652631578947369, |
| "grad_norm": 0.12319740930061163, |
| "learning_rate": 1.1704582122112008e-06, |
| "loss": 1.2088, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.657894736842105, |
| "grad_norm": 0.11386564708274247, |
| "learning_rate": 1.1354259776074472e-06, |
| "loss": 1.233, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.663157894736842, |
| "grad_norm": 0.11374999316034942, |
| "learning_rate": 1.1009184549345632e-06, |
| "loss": 1.2386, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.668421052631579, |
| "grad_norm": 0.12522042587937965, |
| "learning_rate": 1.0669361100797704e-06, |
| "loss": 1.2418, |
| "step": 887 |
| }, |
| { |
| "epoch": 4.673684210526316, |
| "grad_norm": 0.11429258921626788, |
| "learning_rate": 1.0334794018398652e-06, |
| "loss": 1.2178, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.678947368421053, |
| "grad_norm": 0.34812757148076545, |
| "learning_rate": 1.0005487819149917e-06, |
| "loss": 1.2272, |
| "step": 889 |
| }, |
| { |
| "epoch": 4.684210526315789, |
| "grad_norm": 0.1182967297844485, |
| "learning_rate": 9.681446949025752e-07, |
| "loss": 1.2191, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.689473684210526, |
| "grad_norm": 0.1272033760667648, |
| "learning_rate": 9.362675782912923e-07, |
| "loss": 1.2356, |
| "step": 891 |
| }, |
| { |
| "epoch": 4.6947368421052635, |
| "grad_norm": 0.12672455306432165, |
| "learning_rate": 9.049178624551635e-07, |
| "loss": 1.2285, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.7, |
| "grad_norm": 0.3617879606840202, |
| "learning_rate": 8.740959706477725e-07, |
| "loss": 1.2656, |
| "step": 893 |
| }, |
| { |
| "epoch": 4.705263157894737, |
| "grad_norm": 0.10997692184574041, |
| "learning_rate": 8.438023189965272e-07, |
| "loss": 1.2358, |
| "step": 894 |
| }, |
| { |
| "epoch": 4.7105263157894735, |
| "grad_norm": 0.12136967224479166, |
| "learning_rate": 8.140373164970428e-07, |
| "loss": 1.2146, |
| "step": 895 |
| }, |
| { |
| "epoch": 4.715789473684211, |
| "grad_norm": 0.2009841140710602, |
| "learning_rate": 7.848013650076258e-07, |
| "loss": 1.2284, |
| "step": 896 |
| }, |
| { |
| "epoch": 4.721052631578948, |
| "grad_norm": 0.11466884057407387, |
| "learning_rate": 7.560948592438521e-07, |
| "loss": 1.241, |
| "step": 897 |
| }, |
| { |
| "epoch": 4.726315789473684, |
| "grad_norm": 0.11496880440793267, |
| "learning_rate": 7.279181867732199e-07, |
| "loss": 1.2151, |
| "step": 898 |
| }, |
| { |
| "epoch": 4.731578947368421, |
| "grad_norm": 0.12172797181082162, |
| "learning_rate": 7.002717280099403e-07, |
| "loss": 1.2227, |
| "step": 899 |
| }, |
| { |
| "epoch": 4.7368421052631575, |
| "grad_norm": 0.12443319588453902, |
| "learning_rate": 6.731558562097995e-07, |
| "loss": 1.2329, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.742105263157895, |
| "grad_norm": 0.12280105376114048, |
| "learning_rate": 6.465709374650964e-07, |
| "loss": 1.2343, |
| "step": 901 |
| }, |
| { |
| "epoch": 4.747368421052632, |
| "grad_norm": 0.11762303018802715, |
| "learning_rate": 6.205173306997125e-07, |
| "loss": 1.2267, |
| "step": 902 |
| }, |
| { |
| "epoch": 4.752631578947368, |
| "grad_norm": 0.11816128091190285, |
| "learning_rate": 5.949953876642855e-07, |
| "loss": 1.2293, |
| "step": 903 |
| }, |
| { |
| "epoch": 4.757894736842105, |
| "grad_norm": 0.1156560558591028, |
| "learning_rate": 5.700054529314347e-07, |
| "loss": 1.2315, |
| "step": 904 |
| }, |
| { |
| "epoch": 4.7631578947368425, |
| "grad_norm": 0.11137701754866611, |
| "learning_rate": 5.455478638911071e-07, |
| "loss": 1.2394, |
| "step": 905 |
| }, |
| { |
| "epoch": 4.768421052631579, |
| "grad_norm": 0.11181750038905715, |
| "learning_rate": 5.216229507460435e-07, |
| "loss": 1.2208, |
| "step": 906 |
| }, |
| { |
| "epoch": 4.773684210526316, |
| "grad_norm": 0.12036980268449626, |
| "learning_rate": 4.982310365073107e-07, |
| "loss": 1.2235, |
| "step": 907 |
| }, |
| { |
| "epoch": 4.778947368421052, |
| "grad_norm": 0.12359942605818125, |
| "learning_rate": 4.75372436989936e-07, |
| "loss": 1.2308, |
| "step": 908 |
| }, |
| { |
| "epoch": 4.784210526315789, |
| "grad_norm": 0.13220645490519645, |
| "learning_rate": 4.530474608086355e-07, |
| "loss": 1.214, |
| "step": 909 |
| }, |
| { |
| "epoch": 4.7894736842105265, |
| "grad_norm": 0.12206816510347139, |
| "learning_rate": 4.3125640937368373e-07, |
| "loss": 1.2194, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.794736842105263, |
| "grad_norm": 0.11617994515280962, |
| "learning_rate": 4.0999957688679706e-07, |
| "loss": 1.2241, |
| "step": 911 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.1148058679734953, |
| "learning_rate": 3.8927725033718553e-07, |
| "loss": 1.2223, |
| "step": 912 |
| }, |
| { |
| "epoch": 4.8052631578947365, |
| "grad_norm": 0.11823614340464102, |
| "learning_rate": 3.690897094976942e-07, |
| "loss": 1.2238, |
| "step": 913 |
| }, |
| { |
| "epoch": 4.810526315789474, |
| "grad_norm": 0.11790591732140702, |
| "learning_rate": 3.4943722692099224e-07, |
| "loss": 1.2153, |
| "step": 914 |
| }, |
| { |
| "epoch": 4.815789473684211, |
| "grad_norm": 0.11877977952867706, |
| "learning_rate": 3.3032006793590977e-07, |
| "loss": 1.2334, |
| "step": 915 |
| }, |
| { |
| "epoch": 4.821052631578947, |
| "grad_norm": 0.12246828468344964, |
| "learning_rate": 3.117384906438581e-07, |
| "loss": 1.2386, |
| "step": 916 |
| }, |
| { |
| "epoch": 4.826315789473684, |
| "grad_norm": 0.10958575864964563, |
| "learning_rate": 2.936927459153438e-07, |
| "loss": 1.2392, |
| "step": 917 |
| }, |
| { |
| "epoch": 4.831578947368421, |
| "grad_norm": 0.11159223936915229, |
| "learning_rate": 2.761830773865759e-07, |
| "loss": 1.225, |
| "step": 918 |
| }, |
| { |
| "epoch": 4.836842105263158, |
| "grad_norm": 0.11067027350647266, |
| "learning_rate": 2.5920972145618394e-07, |
| "loss": 1.2182, |
| "step": 919 |
| }, |
| { |
| "epoch": 4.842105263157895, |
| "grad_norm": 0.11845597460367807, |
| "learning_rate": 2.4277290728202063e-07, |
| "loss": 1.2303, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.847368421052631, |
| "grad_norm": 0.11321338292881286, |
| "learning_rate": 2.2687285677807536e-07, |
| "loss": 1.2286, |
| "step": 921 |
| }, |
| { |
| "epoch": 4.852631578947369, |
| "grad_norm": 0.10918511827532087, |
| "learning_rate": 2.1150978461146332e-07, |
| "loss": 1.2303, |
| "step": 922 |
| }, |
| { |
| "epoch": 4.8578947368421055, |
| "grad_norm": 0.11331613848290951, |
| "learning_rate": 1.9668389819954338e-07, |
| "loss": 1.2238, |
| "step": 923 |
| }, |
| { |
| "epoch": 4.863157894736842, |
| "grad_norm": 0.11227809077874316, |
| "learning_rate": 1.8239539770711133e-07, |
| "loss": 1.229, |
| "step": 924 |
| }, |
| { |
| "epoch": 4.868421052631579, |
| "grad_norm": 0.11188390803054302, |
| "learning_rate": 1.6864447604370004e-07, |
| "loss": 1.2315, |
| "step": 925 |
| }, |
| { |
| "epoch": 4.873684210526315, |
| "grad_norm": 0.1126954574845899, |
| "learning_rate": 1.5543131886096352e-07, |
| "loss": 1.2281, |
| "step": 926 |
| }, |
| { |
| "epoch": 4.878947368421053, |
| "grad_norm": 0.1154664007961282, |
| "learning_rate": 1.427561045501902e-07, |
| "loss": 1.2372, |
| "step": 927 |
| }, |
| { |
| "epoch": 4.88421052631579, |
| "grad_norm": 0.11176844779105831, |
| "learning_rate": 1.3061900423986917e-07, |
| "loss": 1.2268, |
| "step": 928 |
| }, |
| { |
| "epoch": 4.889473684210526, |
| "grad_norm": 0.11214271981901136, |
| "learning_rate": 1.1902018179340779e-07, |
| "loss": 1.2211, |
| "step": 929 |
| }, |
| { |
| "epoch": 4.894736842105263, |
| "grad_norm": 0.11806437367689042, |
| "learning_rate": 1.0795979380690657e-07, |
| "loss": 1.2232, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.9, |
| "grad_norm": 0.12131946872074126, |
| "learning_rate": 9.74379896070321e-08, |
| "loss": 1.2392, |
| "step": 931 |
| }, |
| { |
| "epoch": 4.905263157894737, |
| "grad_norm": 0.11758661501722971, |
| "learning_rate": 8.745491124901861e-08, |
| "loss": 1.2215, |
| "step": 932 |
| }, |
| { |
| "epoch": 4.910526315789474, |
| "grad_norm": 0.10980377112088192, |
| "learning_rate": 7.80106935147451e-08, |
| "loss": 1.2412, |
| "step": 933 |
| }, |
| { |
| "epoch": 4.91578947368421, |
| "grad_norm": 0.11037951117364361, |
| "learning_rate": 6.910546391092343e-08, |
| "loss": 1.2198, |
| "step": 934 |
| }, |
| { |
| "epoch": 4.921052631578947, |
| "grad_norm": 0.11711041285423687, |
| "learning_rate": 6.073934266735303e-08, |
| "loss": 1.2256, |
| "step": 935 |
| }, |
| { |
| "epoch": 4.926315789473684, |
| "grad_norm": 0.11213122469542446, |
| "learning_rate": 5.291244273531782e-08, |
| "loss": 1.2389, |
| "step": 936 |
| }, |
| { |
| "epoch": 4.931578947368421, |
| "grad_norm": 0.11158223482551854, |
| "learning_rate": 4.562486978606728e-08, |
| "loss": 1.2358, |
| "step": 937 |
| }, |
| { |
| "epoch": 4.936842105263158, |
| "grad_norm": 0.10743288484662021, |
| "learning_rate": 3.887672220936445e-08, |
| "loss": 1.2142, |
| "step": 938 |
| }, |
| { |
| "epoch": 4.942105263157895, |
| "grad_norm": 0.11480044753648233, |
| "learning_rate": 3.266809111218017e-08, |
| "loss": 1.2304, |
| "step": 939 |
| }, |
| { |
| "epoch": 4.947368421052632, |
| "grad_norm": 0.12600905075056, |
| "learning_rate": 2.699906031745414e-08, |
| "loss": 1.2348, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.9526315789473685, |
| "grad_norm": 0.10693815172707843, |
| "learning_rate": 2.1869706362958044e-08, |
| "loss": 1.2329, |
| "step": 941 |
| }, |
| { |
| "epoch": 4.957894736842105, |
| "grad_norm": 0.11368842959943799, |
| "learning_rate": 1.7280098500283005e-08, |
| "loss": 1.2461, |
| "step": 942 |
| }, |
| { |
| "epoch": 4.963157894736842, |
| "grad_norm": 0.11074973929231093, |
| "learning_rate": 1.3230298693871491e-08, |
| "loss": 1.2364, |
| "step": 943 |
| }, |
| { |
| "epoch": 4.968421052631579, |
| "grad_norm": 0.11094251507004392, |
| "learning_rate": 9.720361620217943e-09, |
| "loss": 1.2314, |
| "step": 944 |
| }, |
| { |
| "epoch": 4.973684210526316, |
| "grad_norm": 0.11419040432886776, |
| "learning_rate": 6.750334667091629e-09, |
| "loss": 1.23, |
| "step": 945 |
| }, |
| { |
| "epoch": 4.978947368421053, |
| "grad_norm": 0.2986955255592173, |
| "learning_rate": 4.320257932928229e-09, |
| "loss": 1.2347, |
| "step": 946 |
| }, |
| { |
| "epoch": 4.984210526315789, |
| "grad_norm": 0.10907490264263059, |
| "learning_rate": 2.4301642262791748e-09, |
| "loss": 1.2327, |
| "step": 947 |
| }, |
| { |
| "epoch": 4.989473684210527, |
| "grad_norm": 0.11799915395997997, |
| "learning_rate": 1.0800790653675564e-09, |
| "loss": 1.2269, |
| "step": 948 |
| }, |
| { |
| "epoch": 4.994736842105263, |
| "grad_norm": 0.11484395305342408, |
| "learning_rate": 2.700206777328518e-10, |
| "loss": 1.2454, |
| "step": 949 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.11735650459931829, |
| "learning_rate": 0.0, |
| "loss": 1.2331, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 950, |
| "total_flos": 1.59373351452672e+16, |
| "train_loss": 1.3301890049482648, |
| "train_runtime": 16504.5508, |
| "train_samples_per_second": 29.374, |
| "train_steps_per_second": 0.058 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 950, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.59373351452672e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|