meditron_CHUV_2 / checkpoint-36 /trainer_state.json
Basticooler's picture
Add files using upload-large-folder tool
6f4ef83 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 36,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.027777777777777776,
"grad_norm": 22.67836931373644,
"learning_rate": 1e-05,
"loss": 19.3158,
"step": 1
},
{
"epoch": 0.05555555555555555,
"grad_norm": 25.528554454653026,
"learning_rate": 9.982876141412857e-06,
"loss": 16.4902,
"step": 2
},
{
"epoch": 0.08333333333333333,
"grad_norm": 172.9631001519472,
"learning_rate": 9.931634888554937e-06,
"loss": 16.5242,
"step": 3
},
{
"epoch": 0.1111111111111111,
"grad_norm": 28.11231293384171,
"learning_rate": 9.846666218300808e-06,
"loss": 14.0419,
"step": 4
},
{
"epoch": 0.1388888888888889,
"grad_norm": 90.23398804316926,
"learning_rate": 9.728616793536588e-06,
"loss": 14.8229,
"step": 5
},
{
"epoch": 0.16666666666666666,
"grad_norm": 146.4150512008885,
"learning_rate": 9.578385041664926e-06,
"loss": 14.3093,
"step": 6
},
{
"epoch": 0.19444444444444445,
"grad_norm": 29.876775310695155,
"learning_rate": 9.397114317029975e-06,
"loss": 12.2314,
"step": 7
},
{
"epoch": 0.2222222222222222,
"grad_norm": 20.704280611190857,
"learning_rate": 9.186184199300464e-06,
"loss": 11.6216,
"step": 8
},
{
"epoch": 0.25,
"grad_norm": 15.561815859465199,
"learning_rate": 8.947199994035402e-06,
"loss": 11.1165,
"step": 9
},
{
"epoch": 0.2777777777777778,
"grad_norm": 9.255900465441924,
"learning_rate": 8.681980515339464e-06,
"loss": 10.2489,
"step": 10
},
{
"epoch": 0.3055555555555556,
"grad_norm": 9.141237857626225,
"learning_rate": 8.392544243589428e-06,
"loss": 9.6206,
"step": 11
},
{
"epoch": 0.3333333333333333,
"grad_norm": 7.367404565747214,
"learning_rate": 8.081093963579707e-06,
"loss": 8.9421,
"step": 12
},
{
"epoch": 0.3611111111111111,
"grad_norm": 7.276080472587209,
"learning_rate": 7.75e-06,
"loss": 8.3113,
"step": 13
},
{
"epoch": 0.3888888888888889,
"grad_norm": 7.4332326669293005,
"learning_rate": 7.401782177833148e-06,
"loss": 7.9473,
"step": 14
},
{
"epoch": 0.4166666666666667,
"grad_norm": 6.227852054297136,
"learning_rate": 7.0390906449655104e-06,
"loss": 7.4265,
"step": 15
},
{
"epoch": 0.4444444444444444,
"grad_norm": 9.533857403224232,
"learning_rate": 6.664685702961344e-06,
"loss": 7.0564,
"step": 16
},
{
"epoch": 0.4722222222222222,
"grad_norm": 11.027416694984288,
"learning_rate": 6.281416799501188e-06,
"loss": 6.6986,
"step": 17
},
{
"epoch": 0.5,
"grad_norm": 8.015569378629083,
"learning_rate": 5.892200842364463e-06,
"loss": 6.4416,
"step": 18
},
{
"epoch": 0.5277777777777778,
"grad_norm": 10.173585030611866,
"learning_rate": 5.500000000000001e-06,
"loss": 6.313,
"step": 19
},
{
"epoch": 0.5555555555555556,
"grad_norm": 6.910614565786783,
"learning_rate": 5.107799157635538e-06,
"loss": 5.8678,
"step": 20
},
{
"epoch": 0.5833333333333334,
"grad_norm": 9.582544535267417,
"learning_rate": 4.718583200498814e-06,
"loss": 5.656,
"step": 21
},
{
"epoch": 0.6111111111111112,
"grad_norm": 5.662411790388903,
"learning_rate": 4.335314297038656e-06,
"loss": 5.4231,
"step": 22
},
{
"epoch": 0.6388888888888888,
"grad_norm": 5.28340071917807,
"learning_rate": 3.960909355034491e-06,
"loss": 5.2888,
"step": 23
},
{
"epoch": 0.6666666666666666,
"grad_norm": 6.812817877166794,
"learning_rate": 3.598217822166854e-06,
"loss": 5.2311,
"step": 24
},
{
"epoch": 0.6944444444444444,
"grad_norm": 4.939213318761914,
"learning_rate": 3.2500000000000015e-06,
"loss": 5.0425,
"step": 25
},
{
"epoch": 0.7222222222222222,
"grad_norm": 5.861252411712965,
"learning_rate": 2.9189060364202944e-06,
"loss": 4.9106,
"step": 26
},
{
"epoch": 0.75,
"grad_norm": 4.913591223534713,
"learning_rate": 2.607455756410573e-06,
"loss": 4.6825,
"step": 27
},
{
"epoch": 0.7777777777777778,
"grad_norm": 4.526799704205077,
"learning_rate": 2.3180194846605367e-06,
"loss": 4.5745,
"step": 28
},
{
"epoch": 0.8055555555555556,
"grad_norm": 4.561348704304984,
"learning_rate": 2.0528000059646e-06,
"loss": 4.498,
"step": 29
},
{
"epoch": 0.8333333333333334,
"grad_norm": 3.7034148333699637,
"learning_rate": 1.8138158006995366e-06,
"loss": 4.439,
"step": 30
},
{
"epoch": 0.8611111111111112,
"grad_norm": 3.474665263821162,
"learning_rate": 1.602885682970026e-06,
"loss": 4.1679,
"step": 31
},
{
"epoch": 0.8888888888888888,
"grad_norm": 3.4083753091607747,
"learning_rate": 1.4216149583350756e-06,
"loss": 4.4125,
"step": 32
},
{
"epoch": 0.9166666666666666,
"grad_norm": 3.214452041424971,
"learning_rate": 1.2713832064634127e-06,
"loss": 4.1909,
"step": 33
},
{
"epoch": 0.9444444444444444,
"grad_norm": 2.9121418083489643,
"learning_rate": 1.1533337816991932e-06,
"loss": 4.2952,
"step": 34
},
{
"epoch": 0.9722222222222222,
"grad_norm": 2.9431831966910993,
"learning_rate": 1.0683651114450641e-06,
"loss": 4.0494,
"step": 35
},
{
"epoch": 1.0,
"grad_norm": 4.464172024031719,
"learning_rate": 1.0171238585871451e-06,
"loss": 4.0939,
"step": 36
}
],
"logging_steps": 1,
"max_steps": 36,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 74680891342848.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}