faiqwild
/

Qwen3-8B-unsloth-bnb-4bit-Instruct-PRD-Classification

Text Generation

Model card Files Files and versions

Qwen3-8B-unsloth-bnb-4bit-Instruct-PRD-Classification / trainer_state.json

faiqwild's picture

Upload folder using huggingface_hub

c9b9fb8 verified 5 months ago

history blame contribute delete

4.5 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.5784526391901663,
	"eval_steps": 50,
	"global_step": 200,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.028922631959508314,
	"grad_norm": 0.3374865651130676,
	"learning_rate": 0.00018,
	"loss": 2.0231,
	"step": 10
	},
	{
	"epoch": 0.05784526391901663,
	"grad_norm": 0.20978349447250366,
	"learning_rate": 0.0001905263157894737,
	"loss": 1.1149,
	"step": 20
	},
	{
	"epoch": 0.08676789587852494,
	"grad_norm": 0.11324426531791687,
	"learning_rate": 0.00018,
	"loss": 0.8403,
	"step": 30
	},
	{
	"epoch": 0.11569052783803326,
	"grad_norm": 0.10928981751203537,
	"learning_rate": 0.00016947368421052633,
	"loss": 0.7539,
	"step": 40
	},
	{
	"epoch": 0.14461315979754158,
	"grad_norm": 0.08820035308599472,
	"learning_rate": 0.00015894736842105264,
	"loss": 0.7341,
	"step": 50
	},
	{
	"epoch": 0.1735357917570499,
	"grad_norm": 0.1019802913069725,
	"learning_rate": 0.00014842105263157895,
	"loss": 0.6881,
	"step": 60
	},
	{
	"epoch": 0.2024584237165582,
	"grad_norm": 0.10406459867954254,
	"learning_rate": 0.00013789473684210527,
	"loss": 0.6202,
	"step": 70
	},
	{
	"epoch": 0.2313810556760665,
	"grad_norm": 0.1173112764954567,
	"learning_rate": 0.00012736842105263158,
	"loss": 0.6308,
	"step": 80
	},
	{
	"epoch": 0.2603036876355748,
	"grad_norm": 0.11734358966350555,
	"learning_rate": 0.00011684210526315791,
	"loss": 0.6048,
	"step": 90
	},
	{
	"epoch": 0.28922631959508316,
	"grad_norm": 0.13270510733127594,
	"learning_rate": 0.00010631578947368421,
	"loss": 0.6347,
	"step": 100
	},
	{
	"epoch": 0.31814895155459144,
	"grad_norm": 0.127024307847023,
	"learning_rate": 9.578947368421052e-05,
	"loss": 0.5998,
	"step": 110
	},
	{
	"epoch": 0.3470715835140998,
	"grad_norm": 0.1302337944507599,
	"learning_rate": 8.526315789473685e-05,
	"loss": 0.5987,
	"step": 120
	},
	{
	"epoch": 0.3759942154736081,
	"grad_norm": 0.11779110878705978,
	"learning_rate": 7.473684210526316e-05,
	"loss": 0.5897,
	"step": 130
	},
	{
	"epoch": 0.4049168474331164,
	"grad_norm": 0.12403657287359238,
	"learning_rate": 6.421052631578948e-05,
	"loss": 0.5914,
	"step": 140
	},
	{
	"epoch": 0.43383947939262474,
	"grad_norm": 0.12974581122398376,
	"learning_rate": 5.368421052631579e-05,
	"loss": 0.603,
	"step": 150
	},
	{
	"epoch": 0.462762111352133,
	"grad_norm": 0.12593378126621246,
	"learning_rate": 4.3157894736842105e-05,
	"loss": 0.5707,
	"step": 160
	},
	{
	"epoch": 0.49168474331164136,
	"grad_norm": 0.1275719851255417,
	"learning_rate": 3.2631578947368426e-05,
	"loss": 0.546,
	"step": 170
	},
	{
	"epoch": 0.5206073752711496,
	"grad_norm": 0.1218939870595932,
	"learning_rate": 2.2105263157894736e-05,
	"loss": 0.5871,
	"step": 180
	},
	{
	"epoch": 0.549530007230658,
	"grad_norm": 0.10984767973423004,
	"learning_rate": 1.1578947368421053e-05,
	"loss": 0.5978,
	"step": 190
	},
	{
	"epoch": 0.5784526391901663,
	"grad_norm": 0.1242329478263855,
	"learning_rate": 1.0526315789473685e-06,
	"loss": 0.5879,
	"step": 200
	},
	{
	"epoch": 0.5784526391901663,
	"step": 200,
	"total_flos": 4.2983824905968026e+17,
	"train_loss": 0.7258491277694702,
	"train_runtime": 2490.1271,
	"train_samples_per_second": 2.57,
	"train_steps_per_second": 0.08
	}
	],
	"logging_steps": 10,
	"max_steps": 200,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 200,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 4.2983824905968026e+17,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}