Text Generation
PEFT
Safetensors
English
qlora
lora
structured-output
qwen_5 / trainer_state.json
CFGauss's picture
Upload LoRA adapter (README written by author)
d1dcd9a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0417287630402385,
"eval_steps": 50,
"global_step": 350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.029806259314456036,
"grad_norm": 2.8659629606409e-05,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.0016,
"step": 10
},
{
"epoch": 0.05961251862891207,
"grad_norm": 1.3777846106677316e-05,
"learning_rate": 2.814814814814815e-05,
"loss": 0.59,
"step": 20
},
{
"epoch": 0.08941877794336811,
"grad_norm": 7.97840766608715e-06,
"learning_rate": 4.296296296296296e-05,
"loss": 0.6772,
"step": 30
},
{
"epoch": 0.11922503725782414,
"grad_norm": 6.855416359030642e-06,
"learning_rate": 5.7777777777777776e-05,
"loss": 0.6207,
"step": 40
},
{
"epoch": 0.14903129657228018,
"grad_norm": 8.57202394399792e-06,
"learning_rate": 7.25925925925926e-05,
"loss": 0.6395,
"step": 50
},
{
"epoch": 0.14903129657228018,
"eval_loss": 0.5846871733665466,
"eval_runtime": 87.5403,
"eval_samples_per_second": 3.233,
"eval_steps_per_second": 1.622,
"step": 50
},
{
"epoch": 0.17883755588673622,
"grad_norm": 3.3820235785242403e-06,
"learning_rate": 8.740740740740741e-05,
"loss": 0.4937,
"step": 60
},
{
"epoch": 0.20864381520119224,
"grad_norm": 5.906346359552117e-06,
"learning_rate": 0.00010222222222222222,
"loss": 0.467,
"step": 70
},
{
"epoch": 0.23845007451564829,
"grad_norm": 5.968211553408764e-06,
"learning_rate": 0.00011703703703703704,
"loss": 0.6052,
"step": 80
},
{
"epoch": 0.26825633383010433,
"grad_norm": 6.654247954429593e-06,
"learning_rate": 0.00013185185185185186,
"loss": 0.4269,
"step": 90
},
{
"epoch": 0.29806259314456035,
"grad_norm": 1.4780930541746784e-05,
"learning_rate": 0.00014666666666666666,
"loss": 0.4415,
"step": 100
},
{
"epoch": 0.29806259314456035,
"eval_loss": 0.49561959505081177,
"eval_runtime": 86.7616,
"eval_samples_per_second": 3.262,
"eval_steps_per_second": 1.637,
"step": 100
},
{
"epoch": 0.32786885245901637,
"grad_norm": 5.26007761436631e-06,
"learning_rate": 0.0001614814814814815,
"loss": 0.4201,
"step": 110
},
{
"epoch": 0.35767511177347244,
"grad_norm": 1.0745498912001494e-05,
"learning_rate": 0.0001762962962962963,
"loss": 0.5687,
"step": 120
},
{
"epoch": 0.38748137108792846,
"grad_norm": 9.13943767955061e-06,
"learning_rate": 0.00019111111111111114,
"loss": 0.4544,
"step": 130
},
{
"epoch": 0.4172876304023845,
"grad_norm": 9.74733575276332e-06,
"learning_rate": 0.00019999459826567048,
"loss": 0.3984,
"step": 140
},
{
"epoch": 0.44709388971684055,
"grad_norm": 7.19069566912367e-06,
"learning_rate": 0.00019993383545625465,
"loss": 0.3551,
"step": 150
},
{
"epoch": 0.44709388971684055,
"eval_loss": 0.44105133414268494,
"eval_runtime": 86.7076,
"eval_samples_per_second": 3.264,
"eval_steps_per_second": 1.638,
"step": 150
},
{
"epoch": 0.47690014903129657,
"grad_norm": 5.66791504752473e-06,
"learning_rate": 0.00019980559883241722,
"loss": 0.3437,
"step": 160
},
{
"epoch": 0.5067064083457526,
"grad_norm": 9.916246199281886e-06,
"learning_rate": 0.0001996099749775874,
"loss": 0.533,
"step": 170
},
{
"epoch": 0.5365126676602087,
"grad_norm": 6.7572823354566935e-06,
"learning_rate": 0.00019934709597403352,
"loss": 0.4875,
"step": 180
},
{
"epoch": 0.5663189269746647,
"grad_norm": 2.7519972718437202e-05,
"learning_rate": 0.00019901713931368332,
"loss": 0.4088,
"step": 190
},
{
"epoch": 0.5961251862891207,
"grad_norm": 6.2564413383370265e-06,
"learning_rate": 0.00019862032777828405,
"loss": 0.3734,
"step": 200
},
{
"epoch": 0.5961251862891207,
"eval_loss": 0.416751503944397,
"eval_runtime": 86.855,
"eval_samples_per_second": 3.258,
"eval_steps_per_second": 1.635,
"step": 200
},
{
"epoch": 0.6259314456035767,
"grad_norm": 6.829235189798055e-06,
"learning_rate": 0.00019815692928898347,
"loss": 0.4013,
"step": 210
},
{
"epoch": 0.6557377049180327,
"grad_norm": 2.6349375730205793e-06,
"learning_rate": 0.00019762725672543371,
"loss": 0.439,
"step": 220
},
{
"epoch": 0.6855439642324889,
"grad_norm": 6.470134849223541e-06,
"learning_rate": 0.00019703166771453952,
"loss": 0.3611,
"step": 230
},
{
"epoch": 0.7153502235469449,
"grad_norm": 4.913066732115112e-06,
"learning_rate": 0.0001963705643889941,
"loss": 0.3843,
"step": 240
},
{
"epoch": 0.7451564828614009,
"grad_norm": 8.839782822178677e-06,
"learning_rate": 0.00019564439311576512,
"loss": 0.4593,
"step": 250
},
{
"epoch": 0.7451564828614009,
"eval_loss": 0.3998318612575531,
"eval_runtime": 86.6219,
"eval_samples_per_second": 3.267,
"eval_steps_per_second": 1.639,
"step": 250
},
{
"epoch": 0.7749627421758569,
"grad_norm": 6.905674126755912e-06,
"learning_rate": 0.00019485364419471454,
"loss": 0.3549,
"step": 260
},
{
"epoch": 0.8047690014903129,
"grad_norm": 1.1015033123840112e-05,
"learning_rate": 0.00019399885152755558,
"loss": 0.33,
"step": 270
},
{
"epoch": 0.834575260804769,
"grad_norm": 7.4981344369007275e-06,
"learning_rate": 0.00019308059225737014,
"loss": 0.464,
"step": 280
},
{
"epoch": 0.8643815201192251,
"grad_norm": 1.2525980309874285e-05,
"learning_rate": 0.00019209948637893088,
"loss": 0.4893,
"step": 290
},
{
"epoch": 0.8941877794336811,
"grad_norm": 8.533593245374504e-06,
"learning_rate": 0.00019105619632008982,
"loss": 0.3002,
"step": 300
},
{
"epoch": 0.8941877794336811,
"eval_loss": 0.38659366965293884,
"eval_runtime": 86.718,
"eval_samples_per_second": 3.263,
"eval_steps_per_second": 1.637,
"step": 300
},
{
"epoch": 0.9239940387481371,
"grad_norm": 6.511543233500561e-06,
"learning_rate": 0.0001899514264945173,
"loss": 0.3519,
"step": 310
},
{
"epoch": 0.9538002980625931,
"grad_norm": 9.529394446872175e-06,
"learning_rate": 0.00018878592282609228,
"loss": 0.2376,
"step": 320
},
{
"epoch": 0.9836065573770492,
"grad_norm": 5.924814558966318e-06,
"learning_rate": 0.00018756047224526606,
"loss": 0.3868,
"step": 330
},
{
"epoch": 1.0119225037257824,
"grad_norm": 7.456989806087222e-06,
"learning_rate": 0.0001862759021577385,
"loss": 0.4924,
"step": 340
},
{
"epoch": 1.0417287630402385,
"grad_norm": 8.004604751477018e-06,
"learning_rate": 0.00018493307988580652,
"loss": 0.3768,
"step": 350
},
{
"epoch": 1.0417287630402385,
"eval_loss": 0.3797300159931183,
"eval_runtime": 86.5928,
"eval_samples_per_second": 3.268,
"eval_steps_per_second": 1.64,
"step": 350
}
],
"logging_steps": 10,
"max_steps": 1344,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.971853201788314e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}