Pretam
/

NIOS_san_kin

Model card Files Files and versions

NIOS_san_kin / trainer_state.json

Pretam's picture

Upload folder using huggingface_hub

833874a verified 29 days ago

history blame contribute delete

4.29 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 10.0,
	"eval_steps": 500,
	"global_step": 9610,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.5202913631633714,
	"grad_norm": 1.687648057937622,
	"learning_rate": 4.7403746097814776e-05,
	"loss": 1.6767198486328125,
	"step": 500
	},
	{
	"epoch": 1.0405827263267429,
	"grad_norm": 1.4969497919082642,
	"learning_rate": 4.480228928199792e-05,
	"loss": 1.27803369140625,
	"step": 1000
	},
	{
	"epoch": 1.5608740894901145,
	"grad_norm": 1.6620995998382568,
	"learning_rate": 4.220083246618106e-05,
	"loss": 1.0915474853515625,
	"step": 1500
	},
	{
	"epoch": 2.0811654526534857,
	"grad_norm": 1.6661252975463867,
	"learning_rate": 3.959937565036421e-05,
	"loss": 0.9975990600585938,
	"step": 2000
	},
	{
	"epoch": 2.6014568158168574,
	"grad_norm": 1.3287609815597534,
	"learning_rate": 3.6997918834547346e-05,
	"loss": 0.9085905151367187,
	"step": 2500
	},
	{
	"epoch": 3.121748178980229,
	"grad_norm": 1.5800344944000244,
	"learning_rate": 3.439646201873049e-05,
	"loss": 0.8760296020507813,
	"step": 3000
	},
	{
	"epoch": 3.6420395421436003,
	"grad_norm": 1.364020586013794,
	"learning_rate": 3.179500520291364e-05,
	"loss": 0.8017211303710937,
	"step": 3500
	},
	{
	"epoch": 4.1623309053069715,
	"grad_norm": 1.3175318241119385,
	"learning_rate": 2.9193548387096776e-05,
	"loss": 0.7779053344726562,
	"step": 4000
	},
	{
	"epoch": 4.682622268470343,
	"grad_norm": 4.449261665344238,
	"learning_rate": 2.659209157127992e-05,
	"loss": 0.7223885498046875,
	"step": 4500
	},
	{
	"epoch": 5.202913631633715,
	"grad_norm": 1.5468088388442993,
	"learning_rate": 2.3990634755463058e-05,
	"loss": 0.6987474365234375,
	"step": 5000
	},
	{
	"epoch": 5.723204994797086,
	"grad_norm": 1.247883677482605,
	"learning_rate": 2.13891779396462e-05,
	"loss": 0.6626260986328125,
	"step": 5500
	},
	{
	"epoch": 6.243496357960458,
	"grad_norm": 1.5333998203277588,
	"learning_rate": 1.8787721123829346e-05,
	"loss": 0.6409296875,
	"step": 6000
	},
	{
	"epoch": 6.76378772112383,
	"grad_norm": 1.6951643228530884,
	"learning_rate": 1.618626430801249e-05,
	"loss": 0.6173243408203125,
	"step": 6500
	},
	{
	"epoch": 7.2840790842872005,
	"grad_norm": 1.4317214488983154,
	"learning_rate": 1.3584807492195631e-05,
	"loss": 0.5941461791992187,
	"step": 7000
	},
	{
	"epoch": 7.804370447450572,
	"grad_norm": 1.5687386989593506,
	"learning_rate": 1.0983350676378774e-05,
	"loss": 0.5830839233398437,
	"step": 7500
	},
	{
	"epoch": 8.324661810613943,
	"grad_norm": 1.5302783250808716,
	"learning_rate": 8.381893860561914e-06,
	"loss": 0.5607491455078125,
	"step": 8000
	},
	{
	"epoch": 8.844953173777315,
	"grad_norm": 1.4110559225082397,
	"learning_rate": 5.780437044745058e-06,
	"loss": 0.5564122924804688,
	"step": 8500
	},
	{
	"epoch": 9.365244536940686,
	"grad_norm": 1.3692632913589478,
	"learning_rate": 3.1789802289282e-06,
	"loss": 0.54335546875,
	"step": 9000
	},
	{
	"epoch": 9.885535900104058,
	"grad_norm": 1.0497645139694214,
	"learning_rate": 5.775234131113424e-07,
	"loss": 0.539821533203125,
	"step": 9500
	}
	],
	"logging_steps": 500,
	"max_steps": 9610,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 10,
	"save_steps": 1000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.218510905176064e+16,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}