LLM-Maroc / outputs /checkpoint-87 /trainer_state.json
kaisser's picture
Upload folder using huggingface_hub
3bd0ddc verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 87,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03508771929824561,
"grad_norm": 0.043367721140384674,
"learning_rate": 0.0,
"loss": 0.0033,
"step": 1
},
{
"epoch": 0.07017543859649122,
"grad_norm": 0.06946025043725967,
"learning_rate": 6.666666666666667e-05,
"loss": 0.0037,
"step": 2
},
{
"epoch": 0.10526315789473684,
"grad_norm": 0.044264402240514755,
"learning_rate": 0.00013333333333333334,
"loss": 0.0032,
"step": 3
},
{
"epoch": 0.14035087719298245,
"grad_norm": 0.03978864848613739,
"learning_rate": 0.0002,
"loss": 0.0038,
"step": 4
},
{
"epoch": 0.17543859649122806,
"grad_norm": 0.03119363822042942,
"learning_rate": 0.00019761904761904763,
"loss": 0.0036,
"step": 5
},
{
"epoch": 0.21052631578947367,
"grad_norm": 0.0629071518778801,
"learning_rate": 0.00019523809523809525,
"loss": 0.0035,
"step": 6
},
{
"epoch": 0.24561403508771928,
"grad_norm": 0.13739793002605438,
"learning_rate": 0.00019285714285714286,
"loss": 0.007,
"step": 7
},
{
"epoch": 0.2807017543859649,
"grad_norm": 0.073564812541008,
"learning_rate": 0.00019047619047619048,
"loss": 0.0038,
"step": 8
},
{
"epoch": 0.3157894736842105,
"grad_norm": 0.09854966402053833,
"learning_rate": 0.0001880952380952381,
"loss": 0.004,
"step": 9
},
{
"epoch": 0.3508771929824561,
"grad_norm": 0.07107563316822052,
"learning_rate": 0.00018571428571428572,
"loss": 0.0038,
"step": 10
},
{
"epoch": 0.38596491228070173,
"grad_norm": 0.062161125242710114,
"learning_rate": 0.00018333333333333334,
"loss": 0.0027,
"step": 11
},
{
"epoch": 0.42105263157894735,
"grad_norm": 0.07297360152006149,
"learning_rate": 0.00018095238095238095,
"loss": 0.0037,
"step": 12
},
{
"epoch": 0.45614035087719296,
"grad_norm": 0.09481006115674973,
"learning_rate": 0.0001785714285714286,
"loss": 0.0082,
"step": 13
},
{
"epoch": 0.49122807017543857,
"grad_norm": 0.15246650576591492,
"learning_rate": 0.0001761904761904762,
"loss": 0.0089,
"step": 14
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.09137029945850372,
"learning_rate": 0.00017380952380952383,
"loss": 0.0041,
"step": 15
},
{
"epoch": 0.5614035087719298,
"grad_norm": 0.12798666954040527,
"learning_rate": 0.00017142857142857143,
"loss": 0.0026,
"step": 16
},
{
"epoch": 0.5964912280701754,
"grad_norm": 0.1691877394914627,
"learning_rate": 0.00016904761904761904,
"loss": 0.0131,
"step": 17
},
{
"epoch": 0.631578947368421,
"grad_norm": 0.11234349757432938,
"learning_rate": 0.0001666666666666667,
"loss": 0.0085,
"step": 18
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.04114694893360138,
"learning_rate": 0.00016428571428571428,
"loss": 0.0029,
"step": 19
},
{
"epoch": 0.7017543859649122,
"grad_norm": 0.08836357295513153,
"learning_rate": 0.00016190476190476192,
"loss": 0.0041,
"step": 20
},
{
"epoch": 0.7368421052631579,
"grad_norm": 0.10266884416341782,
"learning_rate": 0.00015952380952380954,
"loss": 0.0057,
"step": 21
},
{
"epoch": 0.7719298245614035,
"grad_norm": 0.08742401748895645,
"learning_rate": 0.00015714285714285716,
"loss": 0.0061,
"step": 22
},
{
"epoch": 0.8070175438596491,
"grad_norm": 0.06542518734931946,
"learning_rate": 0.00015476190476190478,
"loss": 0.0059,
"step": 23
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.0995144322514534,
"learning_rate": 0.00015238095238095237,
"loss": 0.0041,
"step": 24
},
{
"epoch": 0.8771929824561403,
"grad_norm": 0.1686088740825653,
"learning_rate": 0.00015000000000000001,
"loss": 0.0058,
"step": 25
},
{
"epoch": 0.9122807017543859,
"grad_norm": 0.11190295219421387,
"learning_rate": 0.00014761904761904763,
"loss": 0.0069,
"step": 26
},
{
"epoch": 0.9473684210526315,
"grad_norm": 0.13313594460487366,
"learning_rate": 0.00014523809523809525,
"loss": 0.0085,
"step": 27
},
{
"epoch": 0.9824561403508771,
"grad_norm": 0.0898401066660881,
"learning_rate": 0.00014285714285714287,
"loss": 0.0042,
"step": 28
},
{
"epoch": 1.0,
"grad_norm": 0.050766751170158386,
"learning_rate": 0.00014047619047619049,
"loss": 0.0015,
"step": 29
},
{
"epoch": 1.0350877192982457,
"grad_norm": 0.06956154853105545,
"learning_rate": 0.0001380952380952381,
"loss": 0.0051,
"step": 30
},
{
"epoch": 1.0701754385964912,
"grad_norm": 0.06322275847196579,
"learning_rate": 0.00013571428571428572,
"loss": 0.0061,
"step": 31
},
{
"epoch": 1.1052631578947367,
"grad_norm": 0.03425135090947151,
"learning_rate": 0.00013333333333333334,
"loss": 0.0028,
"step": 32
},
{
"epoch": 1.1403508771929824,
"grad_norm": 0.09477324038743973,
"learning_rate": 0.00013095238095238096,
"loss": 0.0039,
"step": 33
},
{
"epoch": 1.1754385964912282,
"grad_norm": 0.049563754349946976,
"learning_rate": 0.00012857142857142858,
"loss": 0.0038,
"step": 34
},
{
"epoch": 1.2105263157894737,
"grad_norm": 0.05497609078884125,
"learning_rate": 0.0001261904761904762,
"loss": 0.0046,
"step": 35
},
{
"epoch": 1.2456140350877192,
"grad_norm": 0.12121907621622086,
"learning_rate": 0.0001238095238095238,
"loss": 0.0071,
"step": 36
},
{
"epoch": 1.280701754385965,
"grad_norm": 0.026673054322600365,
"learning_rate": 0.00012142857142857143,
"loss": 0.0017,
"step": 37
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.09272781759500504,
"learning_rate": 0.00011904761904761905,
"loss": 0.0067,
"step": 38
},
{
"epoch": 1.3508771929824561,
"grad_norm": 0.04980189725756645,
"learning_rate": 0.00011666666666666668,
"loss": 0.0037,
"step": 39
},
{
"epoch": 1.3859649122807016,
"grad_norm": 0.0377652607858181,
"learning_rate": 0.00011428571428571428,
"loss": 0.0025,
"step": 40
},
{
"epoch": 1.4210526315789473,
"grad_norm": 0.04286219924688339,
"learning_rate": 0.00011190476190476191,
"loss": 0.0028,
"step": 41
},
{
"epoch": 1.456140350877193,
"grad_norm": 0.04524603486061096,
"learning_rate": 0.00010952380952380953,
"loss": 0.0018,
"step": 42
},
{
"epoch": 1.4912280701754386,
"grad_norm": 0.15857696533203125,
"learning_rate": 0.00010714285714285715,
"loss": 0.0192,
"step": 43
},
{
"epoch": 1.526315789473684,
"grad_norm": 0.045452795922756195,
"learning_rate": 0.00010476190476190477,
"loss": 0.002,
"step": 44
},
{
"epoch": 1.5614035087719298,
"grad_norm": 0.06400217860937119,
"learning_rate": 0.00010238095238095237,
"loss": 0.0032,
"step": 45
},
{
"epoch": 1.5964912280701755,
"grad_norm": 0.024957947432994843,
"learning_rate": 0.0001,
"loss": 0.002,
"step": 46
},
{
"epoch": 1.631578947368421,
"grad_norm": 0.04150143265724182,
"learning_rate": 9.761904761904762e-05,
"loss": 0.0028,
"step": 47
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.0963282361626625,
"learning_rate": 9.523809523809524e-05,
"loss": 0.0053,
"step": 48
},
{
"epoch": 1.7017543859649122,
"grad_norm": 0.08165717869997025,
"learning_rate": 9.285714285714286e-05,
"loss": 0.0051,
"step": 49
},
{
"epoch": 1.736842105263158,
"grad_norm": 0.07597285509109497,
"learning_rate": 9.047619047619048e-05,
"loss": 0.0034,
"step": 50
},
{
"epoch": 1.7719298245614035,
"grad_norm": 0.08033040165901184,
"learning_rate": 8.80952380952381e-05,
"loss": 0.0056,
"step": 51
},
{
"epoch": 1.807017543859649,
"grad_norm": 0.04683045297861099,
"learning_rate": 8.571428571428571e-05,
"loss": 0.003,
"step": 52
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.07887265831232071,
"learning_rate": 8.333333333333334e-05,
"loss": 0.0035,
"step": 53
},
{
"epoch": 1.8771929824561404,
"grad_norm": 0.1161859855055809,
"learning_rate": 8.095238095238096e-05,
"loss": 0.005,
"step": 54
},
{
"epoch": 1.912280701754386,
"grad_norm": 0.05016123130917549,
"learning_rate": 7.857142857142858e-05,
"loss": 0.0027,
"step": 55
},
{
"epoch": 1.9473684210526314,
"grad_norm": 0.11343929171562195,
"learning_rate": 7.619047619047618e-05,
"loss": 0.0037,
"step": 56
},
{
"epoch": 1.9824561403508771,
"grad_norm": 0.03468641638755798,
"learning_rate": 7.380952380952382e-05,
"loss": 0.002,
"step": 57
},
{
"epoch": 2.0,
"grad_norm": 0.028209766373038292,
"learning_rate": 7.142857142857143e-05,
"loss": 0.0009,
"step": 58
},
{
"epoch": 2.0350877192982457,
"grad_norm": 0.02228238247334957,
"learning_rate": 6.904761904761905e-05,
"loss": 0.0023,
"step": 59
},
{
"epoch": 2.0701754385964914,
"grad_norm": 0.05176170915365219,
"learning_rate": 6.666666666666667e-05,
"loss": 0.0022,
"step": 60
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.08067404478788376,
"learning_rate": 6.428571428571429e-05,
"loss": 0.0035,
"step": 61
},
{
"epoch": 2.1403508771929824,
"grad_norm": 0.024810029193758965,
"learning_rate": 6.19047619047619e-05,
"loss": 0.0016,
"step": 62
},
{
"epoch": 2.175438596491228,
"grad_norm": 0.08523717522621155,
"learning_rate": 5.9523809523809524e-05,
"loss": 0.0036,
"step": 63
},
{
"epoch": 2.2105263157894735,
"grad_norm": 0.08478069305419922,
"learning_rate": 5.714285714285714e-05,
"loss": 0.0032,
"step": 64
},
{
"epoch": 2.245614035087719,
"grad_norm": 0.019477397203445435,
"learning_rate": 5.4761904761904766e-05,
"loss": 0.0024,
"step": 65
},
{
"epoch": 2.280701754385965,
"grad_norm": 0.0420527420938015,
"learning_rate": 5.2380952380952384e-05,
"loss": 0.0027,
"step": 66
},
{
"epoch": 2.3157894736842106,
"grad_norm": 0.05265393853187561,
"learning_rate": 5e-05,
"loss": 0.0025,
"step": 67
},
{
"epoch": 2.3508771929824563,
"grad_norm": 0.08029446750879288,
"learning_rate": 4.761904761904762e-05,
"loss": 0.0028,
"step": 68
},
{
"epoch": 2.3859649122807016,
"grad_norm": 0.06714743375778198,
"learning_rate": 4.523809523809524e-05,
"loss": 0.0026,
"step": 69
},
{
"epoch": 2.4210526315789473,
"grad_norm": 0.07133755087852478,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.0048,
"step": 70
},
{
"epoch": 2.456140350877193,
"grad_norm": 0.015778280794620514,
"learning_rate": 4.047619047619048e-05,
"loss": 0.0013,
"step": 71
},
{
"epoch": 2.4912280701754383,
"grad_norm": 0.03038935363292694,
"learning_rate": 3.809523809523809e-05,
"loss": 0.0026,
"step": 72
},
{
"epoch": 2.526315789473684,
"grad_norm": 0.07423048466444016,
"learning_rate": 3.571428571428572e-05,
"loss": 0.0051,
"step": 73
},
{
"epoch": 2.56140350877193,
"grad_norm": 0.06572224199771881,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0016,
"step": 74
},
{
"epoch": 2.5964912280701755,
"grad_norm": 0.09695594757795334,
"learning_rate": 3.095238095238095e-05,
"loss": 0.0012,
"step": 75
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.011839903891086578,
"learning_rate": 2.857142857142857e-05,
"loss": 0.001,
"step": 76
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.015379734337329865,
"learning_rate": 2.6190476190476192e-05,
"loss": 0.0019,
"step": 77
},
{
"epoch": 2.7017543859649122,
"grad_norm": 0.025853777304291725,
"learning_rate": 2.380952380952381e-05,
"loss": 0.0009,
"step": 78
},
{
"epoch": 2.736842105263158,
"grad_norm": 0.06937374174594879,
"learning_rate": 2.1428571428571428e-05,
"loss": 0.0017,
"step": 79
},
{
"epoch": 2.7719298245614032,
"grad_norm": 0.023749876767396927,
"learning_rate": 1.9047619047619046e-05,
"loss": 0.0014,
"step": 80
},
{
"epoch": 2.807017543859649,
"grad_norm": 0.01632499136030674,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0018,
"step": 81
},
{
"epoch": 2.8421052631578947,
"grad_norm": 0.019440138712525368,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.0016,
"step": 82
},
{
"epoch": 2.8771929824561404,
"grad_norm": 0.04011471942067146,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.0029,
"step": 83
},
{
"epoch": 2.912280701754386,
"grad_norm": 0.02127029560506344,
"learning_rate": 9.523809523809523e-06,
"loss": 0.0016,
"step": 84
},
{
"epoch": 2.9473684210526314,
"grad_norm": 0.013040756806731224,
"learning_rate": 7.142857142857143e-06,
"loss": 0.0019,
"step": 85
},
{
"epoch": 2.982456140350877,
"grad_norm": 0.01978246122598648,
"learning_rate": 4.7619047619047615e-06,
"loss": 0.0013,
"step": 86
},
{
"epoch": 3.0,
"grad_norm": 0.015681693330407143,
"learning_rate": 2.3809523809523808e-06,
"loss": 0.0005,
"step": 87
}
],
"logging_steps": 1,
"max_steps": 87,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.738355493837824e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}