meditron_CHUV_2 / checkpoint-36 /trainer_state.json

Add files using upload-large-folder tool

6f4ef83 verified 10 months ago

6.85 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 36,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.027777777777777776,
	"grad_norm": 22.67836931373644,
	"learning_rate": 1e-05,
	"loss": 19.3158,
	"step": 1
	},
	{
	"epoch": 0.05555555555555555,
	"grad_norm": 25.528554454653026,
	"learning_rate": 9.982876141412857e-06,
	"loss": 16.4902,
	"step": 2
	},
	{
	"epoch": 0.08333333333333333,
	"grad_norm": 172.9631001519472,
	"learning_rate": 9.931634888554937e-06,
	"loss": 16.5242,
	"step": 3
	},
	{
	"epoch": 0.1111111111111111,
	"grad_norm": 28.11231293384171,
	"learning_rate": 9.846666218300808e-06,
	"loss": 14.0419,
	"step": 4
	},
	{
	"epoch": 0.1388888888888889,
	"grad_norm": 90.23398804316926,
	"learning_rate": 9.728616793536588e-06,
	"loss": 14.8229,
	"step": 5
	},
	{
	"epoch": 0.16666666666666666,
	"grad_norm": 146.4150512008885,
	"learning_rate": 9.578385041664926e-06,
	"loss": 14.3093,
	"step": 6
	},
	{
	"epoch": 0.19444444444444445,
	"grad_norm": 29.876775310695155,
	"learning_rate": 9.397114317029975e-06,
	"loss": 12.2314,
	"step": 7
	},
	{
	"epoch": 0.2222222222222222,
	"grad_norm": 20.704280611190857,
	"learning_rate": 9.186184199300464e-06,
	"loss": 11.6216,
	"step": 8
	},
	{
	"epoch": 0.25,
	"grad_norm": 15.561815859465199,
	"learning_rate": 8.947199994035402e-06,
	"loss": 11.1165,
	"step": 9
	},
	{
	"epoch": 0.2777777777777778,
	"grad_norm": 9.255900465441924,
	"learning_rate": 8.681980515339464e-06,
	"loss": 10.2489,
	"step": 10
	},
	{
	"epoch": 0.3055555555555556,
	"grad_norm": 9.141237857626225,
	"learning_rate": 8.392544243589428e-06,
	"loss": 9.6206,
	"step": 11
	},
	{
	"epoch": 0.3333333333333333,
	"grad_norm": 7.367404565747214,
	"learning_rate": 8.081093963579707e-06,
	"loss": 8.9421,
	"step": 12
	},
	{
	"epoch": 0.3611111111111111,
	"grad_norm": 7.276080472587209,
	"learning_rate": 7.75e-06,
	"loss": 8.3113,
	"step": 13
	},
	{
	"epoch": 0.3888888888888889,
	"grad_norm": 7.4332326669293005,
	"learning_rate": 7.401782177833148e-06,
	"loss": 7.9473,
	"step": 14
	},
	{
	"epoch": 0.4166666666666667,
	"grad_norm": 6.227852054297136,
	"learning_rate": 7.0390906449655104e-06,
	"loss": 7.4265,
	"step": 15
	},
	{
	"epoch": 0.4444444444444444,
	"grad_norm": 9.533857403224232,
	"learning_rate": 6.664685702961344e-06,
	"loss": 7.0564,
	"step": 16
	},
	{
	"epoch": 0.4722222222222222,
	"grad_norm": 11.027416694984288,
	"learning_rate": 6.281416799501188e-06,
	"loss": 6.6986,
	"step": 17
	},
	{
	"epoch": 0.5,
	"grad_norm": 8.015569378629083,
	"learning_rate": 5.892200842364463e-06,
	"loss": 6.4416,
	"step": 18
	},
	{
	"epoch": 0.5277777777777778,
	"grad_norm": 10.173585030611866,
	"learning_rate": 5.500000000000001e-06,
	"loss": 6.313,
	"step": 19
	},
	{
	"epoch": 0.5555555555555556,
	"grad_norm": 6.910614565786783,
	"learning_rate": 5.107799157635538e-06,
	"loss": 5.8678,
	"step": 20
	},
	{
	"epoch": 0.5833333333333334,
	"grad_norm": 9.582544535267417,
	"learning_rate": 4.718583200498814e-06,
	"loss": 5.656,
	"step": 21
	},
	{
	"epoch": 0.6111111111111112,
	"grad_norm": 5.662411790388903,
	"learning_rate": 4.335314297038656e-06,
	"loss": 5.4231,
	"step": 22
	},
	{
	"epoch": 0.6388888888888888,
	"grad_norm": 5.28340071917807,
	"learning_rate": 3.960909355034491e-06,
	"loss": 5.2888,
	"step": 23
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 6.812817877166794,
	"learning_rate": 3.598217822166854e-06,
	"loss": 5.2311,
	"step": 24
	},
	{
	"epoch": 0.6944444444444444,
	"grad_norm": 4.939213318761914,
	"learning_rate": 3.2500000000000015e-06,
	"loss": 5.0425,
	"step": 25
	},
	{
	"epoch": 0.7222222222222222,
	"grad_norm": 5.861252411712965,
	"learning_rate": 2.9189060364202944e-06,
	"loss": 4.9106,
	"step": 26
	},
	{
	"epoch": 0.75,
	"grad_norm": 4.913591223534713,
	"learning_rate": 2.607455756410573e-06,
	"loss": 4.6825,
	"step": 27
	},
	{
	"epoch": 0.7777777777777778,
	"grad_norm": 4.526799704205077,
	"learning_rate": 2.3180194846605367e-06,
	"loss": 4.5745,
	"step": 28
	},
	{
	"epoch": 0.8055555555555556,
	"grad_norm": 4.561348704304984,
	"learning_rate": 2.0528000059646e-06,
	"loss": 4.498,
	"step": 29
	},
	{
	"epoch": 0.8333333333333334,
	"grad_norm": 3.7034148333699637,
	"learning_rate": 1.8138158006995366e-06,
	"loss": 4.439,
	"step": 30
	},
	{
	"epoch": 0.8611111111111112,
	"grad_norm": 3.474665263821162,
	"learning_rate": 1.602885682970026e-06,
	"loss": 4.1679,
	"step": 31
	},
	{
	"epoch": 0.8888888888888888,
	"grad_norm": 3.4083753091607747,
	"learning_rate": 1.4216149583350756e-06,
	"loss": 4.4125,
	"step": 32
	},
	{
	"epoch": 0.9166666666666666,
	"grad_norm": 3.214452041424971,
	"learning_rate": 1.2713832064634127e-06,
	"loss": 4.1909,
	"step": 33
	},
	{
	"epoch": 0.9444444444444444,
	"grad_norm": 2.9121418083489643,
	"learning_rate": 1.1533337816991932e-06,
	"loss": 4.2952,
	"step": 34
	},
	{
	"epoch": 0.9722222222222222,
	"grad_norm": 2.9431831966910993,
	"learning_rate": 1.0683651114450641e-06,
	"loss": 4.0494,
	"step": 35
	},
	{
	"epoch": 1.0,
	"grad_norm": 4.464172024031719,
	"learning_rate": 1.0171238585871451e-06,
	"loss": 4.0939,
	"step": 36
	}
	],
	"logging_steps": 1,
	"max_steps": 36,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 74680891342848.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}