MiaMao's picture
Add LoRA checkpoints (without PNG loss curves)
b843574
{
"best_global_step": 2500,
"best_metric": 0.08692806959152222,
"best_model_checkpoint": "D:\\Task_design\\Topic\\willingness_train\\outputs\\qwen7b-lora-topic_willingness\\checkpoint-2500",
"epoch": 0.05488865835652379,
"eval_steps": 1250,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001097773167130476,
"grad_norm": 6656.0,
"learning_rate": 7.168983174835406e-06,
"loss": 63.2808,
"step": 50
},
{
"epoch": 0.002195546334260952,
"grad_norm": 3392.0,
"learning_rate": 1.4484272128749086e-05,
"loss": 37.1328,
"step": 100
},
{
"epoch": 0.0032933195013914274,
"grad_norm": 125.0,
"learning_rate": 2.1799561082662766e-05,
"loss": 3.812,
"step": 150
},
{
"epoch": 0.004391092668521904,
"grad_norm": 604.0,
"learning_rate": 2.9114850036576446e-05,
"loss": 2.6617,
"step": 200
},
{
"epoch": 0.005488865835652379,
"grad_norm": 314.0,
"learning_rate": 3.6430138990490126e-05,
"loss": 1.4918,
"step": 250
},
{
"epoch": 0.006586639002782855,
"grad_norm": 141.0,
"learning_rate": 4.3745427944403806e-05,
"loss": 0.7991,
"step": 300
},
{
"epoch": 0.007684412169913331,
"grad_norm": 478.0,
"learning_rate": 5.1060716898317486e-05,
"loss": 0.8998,
"step": 350
},
{
"epoch": 0.008782185337043807,
"grad_norm": 688.0,
"learning_rate": 5.8376005852231167e-05,
"loss": 0.4973,
"step": 400
},
{
"epoch": 0.009879958504174282,
"grad_norm": 900.0,
"learning_rate": 6.569129480614485e-05,
"loss": 1.6899,
"step": 450
},
{
"epoch": 0.010977731671304758,
"grad_norm": 724.0,
"learning_rate": 7.300658376005852e-05,
"loss": 1.1575,
"step": 500
},
{
"epoch": 0.012075504838435233,
"grad_norm": 51.0,
"learning_rate": 8.032187271397221e-05,
"loss": 0.6105,
"step": 550
},
{
"epoch": 0.01317327800556571,
"grad_norm": 100.5,
"learning_rate": 8.763716166788588e-05,
"loss": 0.3713,
"step": 600
},
{
"epoch": 0.014271051172696185,
"grad_norm": 430.0,
"learning_rate": 9.495245062179955e-05,
"loss": 0.4425,
"step": 650
},
{
"epoch": 0.015368824339826661,
"grad_norm": 139.0,
"learning_rate": 0.00010226773957571325,
"loss": 0.4892,
"step": 700
},
{
"epoch": 0.01646659750695714,
"grad_norm": 524.0,
"learning_rate": 0.00010958302852962691,
"loss": 1.1915,
"step": 750
},
{
"epoch": 0.017564370674087615,
"grad_norm": 41.5,
"learning_rate": 0.00011689831748354061,
"loss": 0.6968,
"step": 800
},
{
"epoch": 0.01866214384121809,
"grad_norm": 240.0,
"learning_rate": 0.0001242136064374543,
"loss": 1.25,
"step": 850
},
{
"epoch": 0.019759917008348563,
"grad_norm": 15.875,
"learning_rate": 0.00013152889539136796,
"loss": 0.7973,
"step": 900
},
{
"epoch": 0.02085769017547904,
"grad_norm": 298.0,
"learning_rate": 0.00013884418434528163,
"loss": 1.183,
"step": 950
},
{
"epoch": 0.021955463342609515,
"grad_norm": 330.0,
"learning_rate": 0.00014615947329919534,
"loss": 0.6259,
"step": 1000
},
{
"epoch": 0.02305323650973999,
"grad_norm": 330.0,
"learning_rate": 0.00015347476225310898,
"loss": 0.49,
"step": 1050
},
{
"epoch": 0.024151009676870467,
"grad_norm": 83.5,
"learning_rate": 0.00016079005120702268,
"loss": 0.6314,
"step": 1100
},
{
"epoch": 0.025248782844000943,
"grad_norm": 129.0,
"learning_rate": 0.00016810534016093638,
"loss": 0.6512,
"step": 1150
},
{
"epoch": 0.02634655601113142,
"grad_norm": 142.0,
"learning_rate": 0.00017542062911485006,
"loss": 0.665,
"step": 1200
},
{
"epoch": 0.027444329178261895,
"grad_norm": 104.5,
"learning_rate": 0.00018273591806876373,
"loss": 0.3758,
"step": 1250
},
{
"epoch": 0.027444329178261895,
"eval_loss": 0.0266411192715168,
"eval_mae": 0.13352739810943604,
"eval_rmse": 0.163221076130867,
"eval_runtime": 173.4238,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 11.532,
"step": 1250
},
{
"epoch": 0.02854210234539237,
"grad_norm": 75.0,
"learning_rate": 0.0001900512070226774,
"loss": 0.3099,
"step": 1300
},
{
"epoch": 0.029639875512522847,
"grad_norm": 204.0,
"learning_rate": 0.0001973664959765911,
"loss": 0.2033,
"step": 1350
},
{
"epoch": 0.030737648679653323,
"grad_norm": 32.75,
"learning_rate": 0.00019985513807152557,
"loss": 0.8338,
"step": 1400
},
{
"epoch": 0.0318354218467838,
"grad_norm": 55.5,
"learning_rate": 0.00019962879130828429,
"loss": 0.2504,
"step": 1450
},
{
"epoch": 0.03293319501391428,
"grad_norm": 130.0,
"learning_rate": 0.00019940244454504303,
"loss": 0.6087,
"step": 1500
},
{
"epoch": 0.03403096818104475,
"grad_norm": 74.0,
"learning_rate": 0.00019917609778180172,
"loss": 0.3358,
"step": 1550
},
{
"epoch": 0.03512874134817523,
"grad_norm": 16.5,
"learning_rate": 0.00019894975101856043,
"loss": 0.2247,
"step": 1600
},
{
"epoch": 0.0362265145153057,
"grad_norm": 42.5,
"learning_rate": 0.00019872340425531918,
"loss": 0.251,
"step": 1650
},
{
"epoch": 0.03732428768243618,
"grad_norm": 139.0,
"learning_rate": 0.00019849705749207786,
"loss": 0.3527,
"step": 1700
},
{
"epoch": 0.038422060849566654,
"grad_norm": 126.5,
"learning_rate": 0.00019827071072883658,
"loss": 0.8417,
"step": 1750
},
{
"epoch": 0.03951983401669713,
"grad_norm": 30.5,
"learning_rate": 0.0001980443639655953,
"loss": 0.4691,
"step": 1800
},
{
"epoch": 0.040617607183827606,
"grad_norm": 125.0,
"learning_rate": 0.000197818017202354,
"loss": 0.2967,
"step": 1850
},
{
"epoch": 0.04171538035095808,
"grad_norm": 148.0,
"learning_rate": 0.00019759167043911273,
"loss": 0.2911,
"step": 1900
},
{
"epoch": 0.04281315351808856,
"grad_norm": 42.0,
"learning_rate": 0.00019736532367587144,
"loss": 0.2031,
"step": 1950
},
{
"epoch": 0.04391092668521903,
"grad_norm": 70.5,
"learning_rate": 0.00019713897691263016,
"loss": 0.2217,
"step": 2000
},
{
"epoch": 0.04500869985234951,
"grad_norm": 93.0,
"learning_rate": 0.00019691263014938887,
"loss": 0.2336,
"step": 2050
},
{
"epoch": 0.04610647301947998,
"grad_norm": 52.75,
"learning_rate": 0.0001966862833861476,
"loss": 0.3005,
"step": 2100
},
{
"epoch": 0.04720424618661046,
"grad_norm": 25.625,
"learning_rate": 0.0001964599366229063,
"loss": 0.1911,
"step": 2150
},
{
"epoch": 0.048302019353740934,
"grad_norm": 9.125,
"learning_rate": 0.00019623358985966502,
"loss": 0.3816,
"step": 2200
},
{
"epoch": 0.04939979252087141,
"grad_norm": 3.359375,
"learning_rate": 0.00019600724309642374,
"loss": 0.1897,
"step": 2250
},
{
"epoch": 0.050497565688001886,
"grad_norm": 56.75,
"learning_rate": 0.00019578089633318245,
"loss": 0.1517,
"step": 2300
},
{
"epoch": 0.051595338855132365,
"grad_norm": 15.0625,
"learning_rate": 0.00019555454956994117,
"loss": 0.3616,
"step": 2350
},
{
"epoch": 0.05269311202226284,
"grad_norm": 152.0,
"learning_rate": 0.00019532820280669988,
"loss": 0.3372,
"step": 2400
},
{
"epoch": 0.05379088518939332,
"grad_norm": 21.875,
"learning_rate": 0.00019510185604345857,
"loss": 0.1929,
"step": 2450
},
{
"epoch": 0.05488865835652379,
"grad_norm": 45.75,
"learning_rate": 0.0001948755092802173,
"loss": 0.1391,
"step": 2500
},
{
"epoch": 0.05488865835652379,
"eval_loss": 0.011679456569254398,
"eval_mae": 0.08692806959152222,
"eval_rmse": 0.10807153582572937,
"eval_runtime": 180.831,
"eval_samples_per_second": 11.06,
"eval_steps_per_second": 11.06,
"step": 2500
}
],
"logging_steps": 50,
"max_steps": 45547,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.0170749247488e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}