burnboom
/

qwen-csharp

Text Generation

Model card Files Files and versions

qwen-csharp / checkpoint-945 /trainer_state.json

burnboom's picture

Upload folder using huggingface_hub

2d1110b verified about 2 months ago

history blame contribute delete

3.91 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 945,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05292405398253506,
	"grad_norm": 0.22462093830108643,
	"learning_rate": 0.00018962962962962965,
	"loss": 1.5354,
	"step": 50
	},
	{
	"epoch": 0.10584810796507012,
	"grad_norm": 0.2346465140581131,
	"learning_rate": 0.00017904761904761907,
	"loss": 1.4471,
	"step": 100
	},
	{
	"epoch": 0.15877216194760518,
	"grad_norm": 0.22219829261302948,
	"learning_rate": 0.00016846560846560849,
	"loss": 1.4369,
	"step": 150
	},
	{
	"epoch": 0.21169621593014024,
	"grad_norm": 0.2058716118335724,
	"learning_rate": 0.0001578835978835979,
	"loss": 1.4188,
	"step": 200
	},
	{
	"epoch": 0.2646202699126753,
	"grad_norm": 0.24191923439502716,
	"learning_rate": 0.00014730158730158732,
	"loss": 1.4202,
	"step": 250
	},
	{
	"epoch": 0.31754432389521037,
	"grad_norm": 0.21737360954284668,
	"learning_rate": 0.00013671957671957673,
	"loss": 1.4195,
	"step": 300
	},
	{
	"epoch": 0.37046837787774545,
	"grad_norm": 0.22670969367027283,
	"learning_rate": 0.00012613756613756615,
	"loss": 1.4092,
	"step": 350
	},
	{
	"epoch": 0.42339243186028047,
	"grad_norm": 0.2364790141582489,
	"learning_rate": 0.00011555555555555555,
	"loss": 1.4003,
	"step": 400
	},
	{
	"epoch": 0.47631648584281555,
	"grad_norm": 0.23698057234287262,
	"learning_rate": 0.00010497354497354497,
	"loss": 1.4084,
	"step": 450
	},
	{
	"epoch": 0.5292405398253506,
	"grad_norm": 0.23969094455242157,
	"learning_rate": 9.43915343915344e-05,
	"loss": 1.3919,
	"step": 500
	},
	{
	"epoch": 0.5821645938078857,
	"grad_norm": 0.24043263494968414,
	"learning_rate": 8.380952380952382e-05,
	"loss": 1.3694,
	"step": 550
	},
	{
	"epoch": 0.6350886477904207,
	"grad_norm": 0.25561365485191345,
	"learning_rate": 7.322751322751323e-05,
	"loss": 1.3791,
	"step": 600
	},
	{
	"epoch": 0.6880127017729558,
	"grad_norm": 0.23547253012657166,
	"learning_rate": 6.264550264550265e-05,
	"loss": 1.3867,
	"step": 650
	},
	{
	"epoch": 0.7409367557554909,
	"grad_norm": 0.25962698459625244,
	"learning_rate": 5.206349206349207e-05,
	"loss": 1.3826,
	"step": 700
	},
	{
	"epoch": 0.793860809738026,
	"grad_norm": 0.24111054837703705,
	"learning_rate": 4.148148148148148e-05,
	"loss": 1.3719,
	"step": 750
	},
	{
	"epoch": 0.8467848637205609,
	"grad_norm": 0.2652861773967743,
	"learning_rate": 3.08994708994709e-05,
	"loss": 1.3895,
	"step": 800
	},
	{
	"epoch": 0.899708917703096,
	"grad_norm": 0.24232152104377747,
	"learning_rate": 2.031746031746032e-05,
	"loss": 1.3756,
	"step": 850
	},
	{
	"epoch": 0.9526329716856311,
	"grad_norm": 0.2373017519712448,
	"learning_rate": 9.735449735449737e-06,
	"loss": 1.3885,
	"step": 900
	}
	],
	"logging_steps": 50,
	"max_steps": 945,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 4.175813313508147e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}