| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 87, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03508771929824561, |
| "grad_norm": 0.043367721140384674, |
| "learning_rate": 0.0, |
| "loss": 0.0033, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07017543859649122, |
| "grad_norm": 0.06946025043725967, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.0037, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.044264402240514755, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 0.0032, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 0.03978864848613739, |
| "learning_rate": 0.0002, |
| "loss": 0.0038, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.17543859649122806, |
| "grad_norm": 0.03119363822042942, |
| "learning_rate": 0.00019761904761904763, |
| "loss": 0.0036, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.0629071518778801, |
| "learning_rate": 0.00019523809523809525, |
| "loss": 0.0035, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.24561403508771928, |
| "grad_norm": 0.13739793002605438, |
| "learning_rate": 0.00019285714285714286, |
| "loss": 0.007, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 0.073564812541008, |
| "learning_rate": 0.00019047619047619048, |
| "loss": 0.0038, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.09854966402053833, |
| "learning_rate": 0.0001880952380952381, |
| "loss": 0.004, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.3508771929824561, |
| "grad_norm": 0.07107563316822052, |
| "learning_rate": 0.00018571428571428572, |
| "loss": 0.0038, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.38596491228070173, |
| "grad_norm": 0.062161125242710114, |
| "learning_rate": 0.00018333333333333334, |
| "loss": 0.0027, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.07297360152006149, |
| "learning_rate": 0.00018095238095238095, |
| "loss": 0.0037, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.45614035087719296, |
| "grad_norm": 0.09481006115674973, |
| "learning_rate": 0.0001785714285714286, |
| "loss": 0.0082, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.49122807017543857, |
| "grad_norm": 0.15246650576591492, |
| "learning_rate": 0.0001761904761904762, |
| "loss": 0.0089, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.09137029945850372, |
| "learning_rate": 0.00017380952380952383, |
| "loss": 0.0041, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 0.12798666954040527, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 0.0026, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.5964912280701754, |
| "grad_norm": 0.1691877394914627, |
| "learning_rate": 0.00016904761904761904, |
| "loss": 0.0131, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.11234349757432938, |
| "learning_rate": 0.0001666666666666667, |
| "loss": 0.0085, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.04114694893360138, |
| "learning_rate": 0.00016428571428571428, |
| "loss": 0.0029, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 0.08836357295513153, |
| "learning_rate": 0.00016190476190476192, |
| "loss": 0.0041, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.10266884416341782, |
| "learning_rate": 0.00015952380952380954, |
| "loss": 0.0057, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.7719298245614035, |
| "grad_norm": 0.08742401748895645, |
| "learning_rate": 0.00015714285714285716, |
| "loss": 0.0061, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.8070175438596491, |
| "grad_norm": 0.06542518734931946, |
| "learning_rate": 0.00015476190476190478, |
| "loss": 0.0059, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.0995144322514534, |
| "learning_rate": 0.00015238095238095237, |
| "loss": 0.0041, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.8771929824561403, |
| "grad_norm": 0.1686088740825653, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.0058, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.9122807017543859, |
| "grad_norm": 0.11190295219421387, |
| "learning_rate": 0.00014761904761904763, |
| "loss": 0.0069, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.13313594460487366, |
| "learning_rate": 0.00014523809523809525, |
| "loss": 0.0085, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.9824561403508771, |
| "grad_norm": 0.0898401066660881, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 0.0042, |
| "step": 28 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.050766751170158386, |
| "learning_rate": 0.00014047619047619049, |
| "loss": 0.0015, |
| "step": 29 |
| }, |
| { |
| "epoch": 1.0350877192982457, |
| "grad_norm": 0.06956154853105545, |
| "learning_rate": 0.0001380952380952381, |
| "loss": 0.0051, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.0701754385964912, |
| "grad_norm": 0.06322275847196579, |
| "learning_rate": 0.00013571428571428572, |
| "loss": 0.0061, |
| "step": 31 |
| }, |
| { |
| "epoch": 1.1052631578947367, |
| "grad_norm": 0.03425135090947151, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 0.0028, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.1403508771929824, |
| "grad_norm": 0.09477324038743973, |
| "learning_rate": 0.00013095238095238096, |
| "loss": 0.0039, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.1754385964912282, |
| "grad_norm": 0.049563754349946976, |
| "learning_rate": 0.00012857142857142858, |
| "loss": 0.0038, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 0.05497609078884125, |
| "learning_rate": 0.0001261904761904762, |
| "loss": 0.0046, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.2456140350877192, |
| "grad_norm": 0.12121907621622086, |
| "learning_rate": 0.0001238095238095238, |
| "loss": 0.0071, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.280701754385965, |
| "grad_norm": 0.026673054322600365, |
| "learning_rate": 0.00012142857142857143, |
| "loss": 0.0017, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 0.09272781759500504, |
| "learning_rate": 0.00011904761904761905, |
| "loss": 0.0067, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.3508771929824561, |
| "grad_norm": 0.04980189725756645, |
| "learning_rate": 0.00011666666666666668, |
| "loss": 0.0037, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.3859649122807016, |
| "grad_norm": 0.0377652607858181, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 0.0025, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.4210526315789473, |
| "grad_norm": 0.04286219924688339, |
| "learning_rate": 0.00011190476190476191, |
| "loss": 0.0028, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.456140350877193, |
| "grad_norm": 0.04524603486061096, |
| "learning_rate": 0.00010952380952380953, |
| "loss": 0.0018, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.4912280701754386, |
| "grad_norm": 0.15857696533203125, |
| "learning_rate": 0.00010714285714285715, |
| "loss": 0.0192, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.526315789473684, |
| "grad_norm": 0.045452795922756195, |
| "learning_rate": 0.00010476190476190477, |
| "loss": 0.002, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.5614035087719298, |
| "grad_norm": 0.06400217860937119, |
| "learning_rate": 0.00010238095238095237, |
| "loss": 0.0032, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.5964912280701755, |
| "grad_norm": 0.024957947432994843, |
| "learning_rate": 0.0001, |
| "loss": 0.002, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 0.04150143265724182, |
| "learning_rate": 9.761904761904762e-05, |
| "loss": 0.0028, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.0963282361626625, |
| "learning_rate": 9.523809523809524e-05, |
| "loss": 0.0053, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.7017543859649122, |
| "grad_norm": 0.08165717869997025, |
| "learning_rate": 9.285714285714286e-05, |
| "loss": 0.0051, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.736842105263158, |
| "grad_norm": 0.07597285509109497, |
| "learning_rate": 9.047619047619048e-05, |
| "loss": 0.0034, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.7719298245614035, |
| "grad_norm": 0.08033040165901184, |
| "learning_rate": 8.80952380952381e-05, |
| "loss": 0.0056, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.807017543859649, |
| "grad_norm": 0.04683045297861099, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.003, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.07887265831232071, |
| "learning_rate": 8.333333333333334e-05, |
| "loss": 0.0035, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.8771929824561404, |
| "grad_norm": 0.1161859855055809, |
| "learning_rate": 8.095238095238096e-05, |
| "loss": 0.005, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.912280701754386, |
| "grad_norm": 0.05016123130917549, |
| "learning_rate": 7.857142857142858e-05, |
| "loss": 0.0027, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.9473684210526314, |
| "grad_norm": 0.11343929171562195, |
| "learning_rate": 7.619047619047618e-05, |
| "loss": 0.0037, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.9824561403508771, |
| "grad_norm": 0.03468641638755798, |
| "learning_rate": 7.380952380952382e-05, |
| "loss": 0.002, |
| "step": 57 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.028209766373038292, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.0009, |
| "step": 58 |
| }, |
| { |
| "epoch": 2.0350877192982457, |
| "grad_norm": 0.02228238247334957, |
| "learning_rate": 6.904761904761905e-05, |
| "loss": 0.0023, |
| "step": 59 |
| }, |
| { |
| "epoch": 2.0701754385964914, |
| "grad_norm": 0.05176170915365219, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 0.0022, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 0.08067404478788376, |
| "learning_rate": 6.428571428571429e-05, |
| "loss": 0.0035, |
| "step": 61 |
| }, |
| { |
| "epoch": 2.1403508771929824, |
| "grad_norm": 0.024810029193758965, |
| "learning_rate": 6.19047619047619e-05, |
| "loss": 0.0016, |
| "step": 62 |
| }, |
| { |
| "epoch": 2.175438596491228, |
| "grad_norm": 0.08523717522621155, |
| "learning_rate": 5.9523809523809524e-05, |
| "loss": 0.0036, |
| "step": 63 |
| }, |
| { |
| "epoch": 2.2105263157894735, |
| "grad_norm": 0.08478069305419922, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 0.0032, |
| "step": 64 |
| }, |
| { |
| "epoch": 2.245614035087719, |
| "grad_norm": 0.019477397203445435, |
| "learning_rate": 5.4761904761904766e-05, |
| "loss": 0.0024, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.280701754385965, |
| "grad_norm": 0.0420527420938015, |
| "learning_rate": 5.2380952380952384e-05, |
| "loss": 0.0027, |
| "step": 66 |
| }, |
| { |
| "epoch": 2.3157894736842106, |
| "grad_norm": 0.05265393853187561, |
| "learning_rate": 5e-05, |
| "loss": 0.0025, |
| "step": 67 |
| }, |
| { |
| "epoch": 2.3508771929824563, |
| "grad_norm": 0.08029446750879288, |
| "learning_rate": 4.761904761904762e-05, |
| "loss": 0.0028, |
| "step": 68 |
| }, |
| { |
| "epoch": 2.3859649122807016, |
| "grad_norm": 0.06714743375778198, |
| "learning_rate": 4.523809523809524e-05, |
| "loss": 0.0026, |
| "step": 69 |
| }, |
| { |
| "epoch": 2.4210526315789473, |
| "grad_norm": 0.07133755087852478, |
| "learning_rate": 4.2857142857142856e-05, |
| "loss": 0.0048, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.456140350877193, |
| "grad_norm": 0.015778280794620514, |
| "learning_rate": 4.047619047619048e-05, |
| "loss": 0.0013, |
| "step": 71 |
| }, |
| { |
| "epoch": 2.4912280701754383, |
| "grad_norm": 0.03038935363292694, |
| "learning_rate": 3.809523809523809e-05, |
| "loss": 0.0026, |
| "step": 72 |
| }, |
| { |
| "epoch": 2.526315789473684, |
| "grad_norm": 0.07423048466444016, |
| "learning_rate": 3.571428571428572e-05, |
| "loss": 0.0051, |
| "step": 73 |
| }, |
| { |
| "epoch": 2.56140350877193, |
| "grad_norm": 0.06572224199771881, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.0016, |
| "step": 74 |
| }, |
| { |
| "epoch": 2.5964912280701755, |
| "grad_norm": 0.09695594757795334, |
| "learning_rate": 3.095238095238095e-05, |
| "loss": 0.0012, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 0.011839903891086578, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 0.001, |
| "step": 76 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.015379734337329865, |
| "learning_rate": 2.6190476190476192e-05, |
| "loss": 0.0019, |
| "step": 77 |
| }, |
| { |
| "epoch": 2.7017543859649122, |
| "grad_norm": 0.025853777304291725, |
| "learning_rate": 2.380952380952381e-05, |
| "loss": 0.0009, |
| "step": 78 |
| }, |
| { |
| "epoch": 2.736842105263158, |
| "grad_norm": 0.06937374174594879, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 0.0017, |
| "step": 79 |
| }, |
| { |
| "epoch": 2.7719298245614032, |
| "grad_norm": 0.023749876767396927, |
| "learning_rate": 1.9047619047619046e-05, |
| "loss": 0.0014, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.807017543859649, |
| "grad_norm": 0.01632499136030674, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.0018, |
| "step": 81 |
| }, |
| { |
| "epoch": 2.8421052631578947, |
| "grad_norm": 0.019440138712525368, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 0.0016, |
| "step": 82 |
| }, |
| { |
| "epoch": 2.8771929824561404, |
| "grad_norm": 0.04011471942067146, |
| "learning_rate": 1.1904761904761905e-05, |
| "loss": 0.0029, |
| "step": 83 |
| }, |
| { |
| "epoch": 2.912280701754386, |
| "grad_norm": 0.02127029560506344, |
| "learning_rate": 9.523809523809523e-06, |
| "loss": 0.0016, |
| "step": 84 |
| }, |
| { |
| "epoch": 2.9473684210526314, |
| "grad_norm": 0.013040756806731224, |
| "learning_rate": 7.142857142857143e-06, |
| "loss": 0.0019, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.982456140350877, |
| "grad_norm": 0.01978246122598648, |
| "learning_rate": 4.7619047619047615e-06, |
| "loss": 0.0013, |
| "step": 86 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.015681693330407143, |
| "learning_rate": 2.3809523809523808e-06, |
| "loss": 0.0005, |
| "step": 87 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 87, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.738355493837824e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|