tooka-bert-large-sentiment / trainer_state.json

IRI2070

Upload folder using huggingface_hub

652ddad verified about 2 months ago

4.82 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500,
	"global_step": 10146,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.14784151389710232,
	"grad_norm": 3.7569432258605957,
	"learning_rate": 1.9016361127537947e-05,
	"loss": 0.156098388671875,
	"step": 500
	},
	{
	"epoch": 0.29568302779420463,
	"grad_norm": 0.009413833729922771,
	"learning_rate": 1.8030751034890598e-05,
	"loss": 0.09492064666748047,
	"step": 1000
	},
	{
	"epoch": 0.4435245416913069,
	"grad_norm": 0.07881546020507812,
	"learning_rate": 1.704514094224325e-05,
	"loss": 0.07839873504638672,
	"step": 1500
	},
	{
	"epoch": 0.5913660555884093,
	"grad_norm": 0.015427447855472565,
	"learning_rate": 1.6059530849595903e-05,
	"loss": 0.07814859008789063,
	"step": 2000
	},
	{
	"epoch": 0.7392075694855116,
	"grad_norm": 14.57061767578125,
	"learning_rate": 1.5073920756948552e-05,
	"loss": 0.08136223602294922,
	"step": 2500
	},
	{
	"epoch": 0.8870490833826138,
	"grad_norm": 1.0001031160354614,
	"learning_rate": 1.4088310664301204e-05,
	"loss": 0.07243869018554687,
	"step": 3000
	},
	{
	"epoch": 1.0348905972797162,
	"grad_norm": 30.364643096923828,
	"learning_rate": 1.3102700571653855e-05,
	"loss": 0.04469930648803711,
	"step": 3500
	},
	{
	"epoch": 1.1827321111768185,
	"grad_norm": 0.0182269848883152,
	"learning_rate": 1.2117090479006506e-05,
	"loss": 0.03440779495239258,
	"step": 4000
	},
	{
	"epoch": 1.3305736250739209,
	"grad_norm": 0.0005539056146517396,
	"learning_rate": 1.1131480386359156e-05,
	"loss": 0.0230482120513916,
	"step": 4500
	},
	{
	"epoch": 1.4784151389710232,
	"grad_norm": 0.739719033241272,
	"learning_rate": 1.0145870293711809e-05,
	"loss": 0.028042703628540038,
	"step": 5000
	},
	{
	"epoch": 1.6262566528681255,
	"grad_norm": 0.0063209934160113335,
	"learning_rate": 9.16026020106446e-06,
	"loss": 0.02548642349243164,
	"step": 5500
	},
	{
	"epoch": 1.7740981667652278,
	"grad_norm": 0.0015545282512903214,
	"learning_rate": 8.17465010841711e-06,
	"loss": 0.023311178207397462,
	"step": 6000
	},
	{
	"epoch": 1.9219396806623301,
	"grad_norm": 0.0024629898834973574,
	"learning_rate": 7.189040015769762e-06,
	"loss": 0.019577335357666016,
	"step": 6500
	},
	{
	"epoch": 2.0697811945594324,
	"grad_norm": 0.0016239744145423174,
	"learning_rate": 6.203429923122414e-06,
	"loss": 0.016429786682128907,
	"step": 7000
	},
	{
	"epoch": 2.2176227084565348,
	"grad_norm": 33.945587158203125,
	"learning_rate": 5.217819830475065e-06,
	"loss": 0.00910054111480713,
	"step": 7500
	},
	{
	"epoch": 2.365464222353637,
	"grad_norm": 0.0004062611551489681,
	"learning_rate": 4.2322097378277155e-06,
	"loss": 0.008913342475891112,
	"step": 8000
	},
	{
	"epoch": 2.5133057362507394,
	"grad_norm": 0.00030476730898953974,
	"learning_rate": 3.246599645180367e-06,
	"loss": 0.006344354152679444,
	"step": 8500
	},
	{
	"epoch": 2.6611472501478417,
	"grad_norm": 0.0006314264028333127,
	"learning_rate": 2.260989552533018e-06,
	"loss": 0.0064224090576171875,
	"step": 9000
	},
	{
	"epoch": 2.808988764044944,
	"grad_norm": 0.0005876660579815507,
	"learning_rate": 1.2753794598856695e-06,
	"loss": 0.007262358665466309,
	"step": 9500
	},
	{
	"epoch": 2.9568302779420463,
	"grad_norm": 0.0005308115505613387,
	"learning_rate": 2.897693672383205e-07,
	"loss": 0.004996685981750488,
	"step": 10000
	},
	{
	"epoch": 3.0,
	"step": 10146,
	"total_flos": 9.454816453507891e+16,
	"train_loss": 0.040564874114779687,
	"train_runtime": 20432.9457,
	"train_samples_per_second": 4.965,
	"train_steps_per_second": 0.497
	}
	],
	"logging_steps": 500,
	"max_steps": 10146,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 9.454816453507891e+16,
	"train_batch_size": 10,
	"trial_name": null,
	"trial_params": null
	}