web_llama_sft_correct / trainer_state.json
zizi-0123's picture
Add files using upload-large-folder tool
37c2416 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03340989768218835,
"grad_norm": 8.104019844032948,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.6231,
"step": 10
},
{
"epoch": 0.0668197953643767,
"grad_norm": 3.4693710845262706,
"learning_rate": 2.1111111111111114e-06,
"loss": 1.489,
"step": 20
},
{
"epoch": 0.10022969304656504,
"grad_norm": 2.3368868000890224,
"learning_rate": 3.2222222222222227e-06,
"loss": 1.3346,
"step": 30
},
{
"epoch": 0.1336395907287534,
"grad_norm": 1.9194151908776884,
"learning_rate": 4.333333333333334e-06,
"loss": 1.2275,
"step": 40
},
{
"epoch": 0.16704948841094175,
"grad_norm": 1.7081126245466087,
"learning_rate": 5.444444444444445e-06,
"loss": 1.1712,
"step": 50
},
{
"epoch": 0.20045938609313008,
"grad_norm": 1.7461363381025887,
"learning_rate": 6.555555555555556e-06,
"loss": 1.1295,
"step": 60
},
{
"epoch": 0.23386928377531843,
"grad_norm": 1.6467172594579205,
"learning_rate": 7.666666666666667e-06,
"loss": 1.1083,
"step": 70
},
{
"epoch": 0.2672791814575068,
"grad_norm": 1.7574333391127064,
"learning_rate": 8.777777777777778e-06,
"loss": 1.0889,
"step": 80
},
{
"epoch": 0.3006890791396951,
"grad_norm": 1.5506512108382777,
"learning_rate": 9.88888888888889e-06,
"loss": 1.0634,
"step": 90
},
{
"epoch": 0.3340989768218835,
"grad_norm": 1.637392091372257,
"learning_rate": 9.99695413509548e-06,
"loss": 1.0842,
"step": 100
},
{
"epoch": 0.3675088745040718,
"grad_norm": 1.640058641808361,
"learning_rate": 9.986429983545127e-06,
"loss": 1.0614,
"step": 110
},
{
"epoch": 0.40091877218626015,
"grad_norm": 1.6505780161101127,
"learning_rate": 9.968405767630857e-06,
"loss": 1.0735,
"step": 120
},
{
"epoch": 0.43432866986844854,
"grad_norm": 1.6260516030561405,
"learning_rate": 9.942908597485558e-06,
"loss": 1.0568,
"step": 130
},
{
"epoch": 0.46773856755063686,
"grad_norm": 1.4772413330384198,
"learning_rate": 9.909976823275143e-06,
"loss": 1.0812,
"step": 140
},
{
"epoch": 0.5011484652328252,
"grad_norm": 1.5820640912826012,
"learning_rate": 9.869659977516261e-06,
"loss": 1.0285,
"step": 150
},
{
"epoch": 0.5345583629150136,
"grad_norm": 1.591148450789976,
"learning_rate": 9.822018700574696e-06,
"loss": 1.0376,
"step": 160
},
{
"epoch": 0.567968260597202,
"grad_norm": 1.5272109606251885,
"learning_rate": 9.767124649456484e-06,
"loss": 1.0283,
"step": 170
},
{
"epoch": 0.6013781582793902,
"grad_norm": 1.4986155360090707,
"learning_rate": 9.705060390028979e-06,
"loss": 1.0271,
"step": 180
},
{
"epoch": 0.6347880559615786,
"grad_norm": 1.5634069287930357,
"learning_rate": 9.635919272833938e-06,
"loss": 1.0261,
"step": 190
},
{
"epoch": 0.668197953643767,
"grad_norm": 1.4240257902836764,
"learning_rate": 9.559805292679445e-06,
"loss": 1.0091,
"step": 200
},
{
"epoch": 0.7016078513259553,
"grad_norm": 1.5540706999114942,
"learning_rate": 9.476832932221835e-06,
"loss": 1.0104,
"step": 210
},
{
"epoch": 0.7350177490081437,
"grad_norm": 1.5662551532037128,
"learning_rate": 9.38712698977291e-06,
"loss": 0.994,
"step": 220
},
{
"epoch": 0.768427646690332,
"grad_norm": 1.5052024562975517,
"learning_rate": 9.290822391591418e-06,
"loss": 1.0006,
"step": 230
},
{
"epoch": 0.8018375443725203,
"grad_norm": 1.6065362027025494,
"learning_rate": 9.188063988941147e-06,
"loss": 1.0096,
"step": 240
},
{
"epoch": 0.8352474420547087,
"grad_norm": 1.4428707337680362,
"learning_rate": 9.079006340220862e-06,
"loss": 0.9901,
"step": 250
},
{
"epoch": 0.8686573397368971,
"grad_norm": 1.5314846163892415,
"learning_rate": 8.963813478493788e-06,
"loss": 0.9863,
"step": 260
},
{
"epoch": 0.9020672374190855,
"grad_norm": 1.4351095172894568,
"learning_rate": 8.842658664766317e-06,
"loss": 1.0219,
"step": 270
},
{
"epoch": 0.9354771351012737,
"grad_norm": 1.507687415590904,
"learning_rate": 8.715724127386971e-06,
"loss": 1.0047,
"step": 280
},
{
"epoch": 0.9688870327834621,
"grad_norm": 1.5014644267583146,
"learning_rate": 8.58320078795768e-06,
"loss": 1.0026,
"step": 290
},
{
"epoch": 1.0,
"grad_norm": 1.425943640955651,
"learning_rate": 8.44528797416954e-06,
"loss": 0.9653,
"step": 300
},
{
"epoch": 1.0334098976821884,
"grad_norm": 1.4718810021174351,
"learning_rate": 8.302193119995038e-06,
"loss": 0.8486,
"step": 310
},
{
"epoch": 1.0668197953643768,
"grad_norm": 1.4077200885873793,
"learning_rate": 8.154131453687657e-06,
"loss": 0.8382,
"step": 320
},
{
"epoch": 1.1002296930465651,
"grad_norm": 1.5972201624551428,
"learning_rate": 8.001325674058124e-06,
"loss": 0.8283,
"step": 330
},
{
"epoch": 1.1336395907287533,
"grad_norm": 1.5226433404562436,
"learning_rate": 7.84400561551426e-06,
"loss": 0.8351,
"step": 340
},
{
"epoch": 1.1670494884109417,
"grad_norm": 1.5106301998037033,
"learning_rate": 7.68240790236819e-06,
"loss": 0.838,
"step": 350
},
{
"epoch": 1.20045938609313,
"grad_norm": 1.5215853204266103,
"learning_rate": 7.5167755929309e-06,
"loss": 0.8322,
"step": 360
},
{
"epoch": 1.2338692837753185,
"grad_norm": 1.5217322186980389,
"learning_rate": 7.347357813929455e-06,
"loss": 0.8227,
"step": 370
},
{
"epoch": 1.2672791814575068,
"grad_norm": 1.6755977423651072,
"learning_rate": 7.174409385796726e-06,
"loss": 0.8287,
"step": 380
},
{
"epoch": 1.300689079139695,
"grad_norm": 1.5944600963804338,
"learning_rate": 6.998190439397262e-06,
"loss": 0.8486,
"step": 390
},
{
"epoch": 1.3340989768218834,
"grad_norm": 1.5643959643386598,
"learning_rate": 6.818966024765758e-06,
"loss": 0.855,
"step": 400
},
{
"epoch": 1.3675088745040718,
"grad_norm": 1.5456085160299915,
"learning_rate": 6.637005712446622e-06,
"loss": 0.8664,
"step": 410
},
{
"epoch": 1.4009187721862602,
"grad_norm": 1.5082758452600278,
"learning_rate": 6.452583188034275e-06,
"loss": 0.8555,
"step": 420
},
{
"epoch": 1.4343286698684485,
"grad_norm": 1.6264152681857857,
"learning_rate": 6.26597584052401e-06,
"loss": 0.8425,
"step": 430
},
{
"epoch": 1.467738567550637,
"grad_norm": 1.6079625298586488,
"learning_rate": 6.077464345092601e-06,
"loss": 0.8463,
"step": 440
},
{
"epoch": 1.5011484652328253,
"grad_norm": 1.494204317440263,
"learning_rate": 5.887332240936177e-06,
"loss": 0.8373,
"step": 450
},
{
"epoch": 1.5345583629150137,
"grad_norm": 1.552091126608121,
"learning_rate": 5.695865504800328e-06,
"loss": 0.8415,
"step": 460
},
{
"epoch": 1.567968260597202,
"grad_norm": 1.4687706324296175,
"learning_rate": 5.503352120843923e-06,
"loss": 0.8364,
"step": 470
},
{
"epoch": 1.6013781582793902,
"grad_norm": 1.5165507426860219,
"learning_rate": 5.310081647483577e-06,
"loss": 0.8317,
"step": 480
},
{
"epoch": 1.6347880559615786,
"grad_norm": 1.5074289196814759,
"learning_rate": 5.116344781870282e-06,
"loss": 0.8313,
"step": 490
},
{
"epoch": 1.668197953643767,
"grad_norm": 1.4816310929794339,
"learning_rate": 4.922432922653284e-06,
"loss": 0.8514,
"step": 500
},
{
"epoch": 1.7016078513259552,
"grad_norm": 1.5057236332446566,
"learning_rate": 4.728637731688832e-06,
"loss": 0.8335,
"step": 510
},
{
"epoch": 1.7350177490081435,
"grad_norm": 1.5771287925137742,
"learning_rate": 4.53525069535304e-06,
"loss": 0.8517,
"step": 520
},
{
"epoch": 1.768427646690332,
"grad_norm": 1.5190684133581556,
"learning_rate": 4.342562686118687e-06,
"loss": 0.8366,
"step": 530
},
{
"epoch": 1.8018375443725203,
"grad_norm": 1.5589303795357043,
"learning_rate": 4.150863525055397e-06,
"loss": 0.843,
"step": 540
},
{
"epoch": 1.8352474420547087,
"grad_norm": 1.5319164868806459,
"learning_rate": 3.960441545911205e-06,
"loss": 0.8402,
"step": 550
},
{
"epoch": 1.868657339736897,
"grad_norm": 1.508115730305158,
"learning_rate": 3.7715831614312184e-06,
"loss": 0.8415,
"step": 560
},
{
"epoch": 1.9020672374190855,
"grad_norm": 1.5207204322039876,
"learning_rate": 3.5845724325656485e-06,
"loss": 0.8391,
"step": 570
},
{
"epoch": 1.9354771351012738,
"grad_norm": 1.6058398309364263,
"learning_rate": 3.399690641215142e-06,
"loss": 0.8333,
"step": 580
},
{
"epoch": 1.9688870327834622,
"grad_norm": 1.6417685296920548,
"learning_rate": 3.2172158671561005e-06,
"loss": 0.8078,
"step": 590
},
{
"epoch": 2.0,
"grad_norm": 1.582963981597533,
"learning_rate": 3.0374225697822645e-06,
"loss": 0.8428,
"step": 600
},
{
"epoch": 2.0334098976821884,
"grad_norm": 1.6865316423186703,
"learning_rate": 2.86058117529173e-06,
"loss": 0.6855,
"step": 610
},
{
"epoch": 2.0668197953643768,
"grad_norm": 1.6853603645992963,
"learning_rate": 2.686957669940242e-06,
"loss": 0.6695,
"step": 620
},
{
"epoch": 2.100229693046565,
"grad_norm": 1.769846698754755,
"learning_rate": 2.5168131999726203e-06,
"loss": 0.6845,
"step": 630
},
{
"epoch": 2.1336395907287535,
"grad_norm": 1.743572647134592,
"learning_rate": 2.3504036788339763e-06,
"loss": 0.6683,
"step": 640
},
{
"epoch": 2.167049488410942,
"grad_norm": 1.827986152296155,
"learning_rate": 2.1879794022516006e-06,
"loss": 0.6723,
"step": 650
},
{
"epoch": 2.2004593860931303,
"grad_norm": 1.6862359445877158,
"learning_rate": 2.0297846717664043e-06,
"loss": 0.6788,
"step": 660
},
{
"epoch": 2.2338692837753182,
"grad_norm": 1.7265559141551987,
"learning_rate": 1.8760574272802002e-06,
"loss": 0.686,
"step": 670
},
{
"epoch": 2.2672791814575066,
"grad_norm": 1.6784351972141776,
"learning_rate": 1.7270288891714814e-06,
"loss": 0.6759,
"step": 680
},
{
"epoch": 2.300689079139695,
"grad_norm": 1.7845523210344045,
"learning_rate": 1.5829232105180143e-06,
"loss": 0.6875,
"step": 690
},
{
"epoch": 2.3340989768218834,
"grad_norm": 1.7713046558357322,
"learning_rate": 1.4439571399493146e-06,
"loss": 0.6866,
"step": 700
},
{
"epoch": 2.3675088745040718,
"grad_norm": 1.7123867307563565,
"learning_rate": 1.310339695636118e-06,
"loss": 0.6683,
"step": 710
},
{
"epoch": 2.40091877218626,
"grad_norm": 1.7098768284883517,
"learning_rate": 1.182271850907199e-06,
"loss": 0.6636,
"step": 720
},
{
"epoch": 2.4343286698684485,
"grad_norm": 1.7523526171373818,
"learning_rate": 1.0599462319663906e-06,
"loss": 0.6907,
"step": 730
},
{
"epoch": 2.467738567550637,
"grad_norm": 1.79044869887376,
"learning_rate": 9.435468281644799e-07,
"loss": 0.6606,
"step": 740
},
{
"epoch": 2.5011484652328253,
"grad_norm": 1.8797791077083694,
"learning_rate": 8.332487152617424e-07,
"loss": 0.6858,
"step": 750
},
{
"epoch": 2.5345583629150137,
"grad_norm": 1.6542367973465413,
"learning_rate": 7.292177920973726e-07,
"loss": 0.6634,
"step": 760
},
{
"epoch": 2.567968260597202,
"grad_norm": 1.85288428953335,
"learning_rate": 6.316105310618664e-07,
"loss": 0.6636,
"step": 770
},
{
"epoch": 2.60137815827939,
"grad_norm": 1.6896224399244537,
"learning_rate": 5.405737427476854e-07,
"loss": 0.6786,
"step": 780
},
{
"epoch": 2.634788055961579,
"grad_norm": 1.7762167435792457,
"learning_rate": 4.562443551321788e-07,
"loss": 0.6845,
"step": 790
},
{
"epoch": 2.6681979536437668,
"grad_norm": 1.8054703475251148,
"learning_rate": 3.787492076248994e-07,
"loss": 0.6754,
"step": 800
},
{
"epoch": 2.701607851325955,
"grad_norm": 1.679344404898734,
"learning_rate": 3.082048602890808e-07,
"loss": 0.6744,
"step": 810
},
{
"epoch": 2.7350177490081435,
"grad_norm": 1.725378103601795,
"learning_rate": 2.447174185242324e-07,
"loss": 0.6524,
"step": 820
},
{
"epoch": 2.768427646690332,
"grad_norm": 1.8002958981650867,
"learning_rate": 1.8838237347353848e-07,
"loss": 0.6831,
"step": 830
},
{
"epoch": 2.8018375443725203,
"grad_norm": 1.7661404831387393,
"learning_rate": 1.3928445839610782e-07,
"loss": 0.677,
"step": 840
},
{
"epoch": 2.8352474420547087,
"grad_norm": 1.8674906196125884,
"learning_rate": 9.749752122010347e-08,
"loss": 0.6811,
"step": 850
},
{
"epoch": 2.868657339736897,
"grad_norm": 1.7100605833681841,
"learning_rate": 6.308441346844386e-08,
"loss": 0.6714,
"step": 860
},
{
"epoch": 2.9020672374190855,
"grad_norm": 1.7104584417164503,
"learning_rate": 3.6096895724141435e-08,
"loss": 0.6674,
"step": 870
},
{
"epoch": 2.935477135101274,
"grad_norm": 1.833257936964389,
"learning_rate": 1.657555977746972e-08,
"loss": 0.6712,
"step": 880
},
{
"epoch": 2.968887032783462,
"grad_norm": 1.7160362714366442,
"learning_rate": 4.5497675720540535e-09,
"loss": 0.6732,
"step": 890
},
{
"epoch": 3.0,
"grad_norm": 1.8048277473083167,
"learning_rate": 3.760704171962282e-11,
"loss": 0.6532,
"step": 900
},
{
"epoch": 3.0,
"step": 900,
"total_flos": 147705369985024.0,
"train_loss": 0.8683794037501017,
"train_runtime": 170018.3735,
"train_samples_per_second": 0.338,
"train_steps_per_second": 0.005
}
],
"logging_steps": 10,
"max_steps": 900,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 147705369985024.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}