rrd / trainer_state.json
haihp02's picture
Upload checkpoint
b0c7629 verified
{
"best_global_step": 800,
"best_metric": 0.2455482929944992,
"best_model_checkpoint": "./checkpoints/qwen253-lora-leduc_random_l_s3/checkpoint-800",
"epoch": 1.0,
"eval_steps": 200,
"global_step": 826,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012106537530266344,
"grad_norm": 10.681763648986816,
"learning_rate": 8.999999999999999e-06,
"loss": 0.828,
"mean_token_accuracy": 0.8087970525026321,
"num_tokens": 158075.0,
"step": 10
},
{
"epoch": 0.024213075060532687,
"grad_norm": 0.8575407266616821,
"learning_rate": 1.8999999999999998e-05,
"loss": 0.3278,
"mean_token_accuracy": 0.8451847195625305,
"num_tokens": 314832.0,
"step": 20
},
{
"epoch": 0.03631961259079903,
"grad_norm": 0.38625678420066833,
"learning_rate": 2.9e-05,
"loss": 0.2427,
"mean_token_accuracy": 0.8527996808290481,
"num_tokens": 473401.0,
"step": 30
},
{
"epoch": 0.048426150121065374,
"grad_norm": 0.25771161913871765,
"learning_rate": 3.4997791661317485e-05,
"loss": 0.2284,
"mean_token_accuracy": 0.8509193986654282,
"num_tokens": 630871.0,
"step": 40
},
{
"epoch": 0.06053268765133172,
"grad_norm": 0.21924710273742676,
"learning_rate": 3.497295425144213e-05,
"loss": 0.2428,
"mean_token_accuracy": 0.8498442590236663,
"num_tokens": 782593.0,
"step": 50
},
{
"epoch": 0.07263922518159806,
"grad_norm": 0.172270268201828,
"learning_rate": 3.4920558312793984e-05,
"loss": 0.2396,
"mean_token_accuracy": 0.844213005900383,
"num_tokens": 938469.0,
"step": 60
},
{
"epoch": 0.0847457627118644,
"grad_norm": 0.23005615174770355,
"learning_rate": 3.4840686484803226e-05,
"loss": 0.2336,
"mean_token_accuracy": 0.8490692973136902,
"num_tokens": 1093154.0,
"step": 70
},
{
"epoch": 0.09685230024213075,
"grad_norm": 0.2150808423757553,
"learning_rate": 3.473346474216413e-05,
"loss": 0.2308,
"mean_token_accuracy": 0.8609368681907654,
"num_tokens": 1249444.0,
"step": 80
},
{
"epoch": 0.1089588377723971,
"grad_norm": 0.15730148553848267,
"learning_rate": 3.459906219614643e-05,
"loss": 0.2424,
"mean_token_accuracy": 0.8448190927505493,
"num_tokens": 1405823.0,
"step": 90
},
{
"epoch": 0.12106537530266344,
"grad_norm": 0.4564799964427948,
"learning_rate": 3.4437690827871256e-05,
"loss": 0.2422,
"mean_token_accuracy": 0.8474129974842072,
"num_tokens": 1559563.0,
"step": 100
},
{
"epoch": 0.13317191283292978,
"grad_norm": 0.1691020429134369,
"learning_rate": 3.424960515397224e-05,
"loss": 0.2426,
"mean_token_accuracy": 0.8417032897472382,
"num_tokens": 1714717.0,
"step": 110
},
{
"epoch": 0.14527845036319612,
"grad_norm": 0.13242730498313904,
"learning_rate": 3.403510182516918e-05,
"loss": 0.2401,
"mean_token_accuracy": 0.8526080518960952,
"num_tokens": 1869540.0,
"step": 120
},
{
"epoch": 0.15738498789346247,
"grad_norm": 0.16052637994289398,
"learning_rate": 3.379451915838742e-05,
"loss": 0.2442,
"mean_token_accuracy": 0.8375077575445176,
"num_tokens": 2024145.0,
"step": 130
},
{
"epoch": 0.1694915254237288,
"grad_norm": 0.1499997228384018,
"learning_rate": 3.352823660316074e-05,
"loss": 0.2346,
"mean_token_accuracy": 0.845609164237976,
"num_tokens": 2180735.0,
"step": 140
},
{
"epoch": 0.18159806295399517,
"grad_norm": 0.10429004579782486,
"learning_rate": 3.323667414315959e-05,
"loss": 0.2419,
"mean_token_accuracy": 0.8553953766822815,
"num_tokens": 2335148.0,
"step": 150
},
{
"epoch": 0.1937046004842615,
"grad_norm": 0.08566914498806,
"learning_rate": 3.292029163378833e-05,
"loss": 0.2357,
"mean_token_accuracy": 0.8465773612260818,
"num_tokens": 2492016.0,
"step": 160
},
{
"epoch": 0.20581113801452786,
"grad_norm": 0.14463454484939575,
"learning_rate": 3.2579588076896486e-05,
"loss": 0.2314,
"mean_token_accuracy": 0.8503528028726578,
"num_tokens": 2647377.0,
"step": 170
},
{
"epoch": 0.2179176755447942,
"grad_norm": 0.12798088788986206,
"learning_rate": 3.221510083374765e-05,
"loss": 0.2333,
"mean_token_accuracy": 0.8497181862592698,
"num_tokens": 2803931.0,
"step": 180
},
{
"epoch": 0.23002421307506055,
"grad_norm": 0.17899736762046814,
"learning_rate": 3.182740477748768e-05,
"loss": 0.2358,
"mean_token_accuracy": 0.8438972860574723,
"num_tokens": 2956080.0,
"step": 190
},
{
"epoch": 0.24213075060532688,
"grad_norm": 0.15043821930885315,
"learning_rate": 3.1417111386448595e-05,
"loss": 0.2366,
"mean_token_accuracy": 0.8498786896467209,
"num_tokens": 3111180.0,
"step": 200
},
{
"epoch": 0.24213075060532688,
"eval_loss": 0.24580417573451996,
"eval_num_tokens": 3111180.0,
"eval_runtime": 27.1445,
"eval_samples_per_second": 9.836,
"eval_steps_per_second": 9.836,
"step": 200
},
{
"epoch": 0.2542372881355932,
"grad_norm": 0.12326110154390335,
"learning_rate": 3.098486777971855e-05,
"loss": 0.2277,
"mean_token_accuracy": 0.8524766951799393,
"num_tokens": 3269249.0,
"step": 210
},
{
"epoch": 0.26634382566585957,
"grad_norm": 0.0829845741391182,
"learning_rate": 3.053135569649868e-05,
"loss": 0.2419,
"mean_token_accuracy": 0.843473681807518,
"num_tokens": 3424471.0,
"step": 220
},
{
"epoch": 0.2784503631961259,
"grad_norm": 0.13634343445301056,
"learning_rate": 3.005729042085683e-05,
"loss": 0.2383,
"mean_token_accuracy": 0.8487411588430405,
"num_tokens": 3579004.0,
"step": 230
},
{
"epoch": 0.29055690072639223,
"grad_norm": 0.09264083206653595,
"learning_rate": 2.956341965357393e-05,
"loss": 0.236,
"mean_token_accuracy": 0.8531801581382752,
"num_tokens": 3734168.0,
"step": 240
},
{
"epoch": 0.3026634382566586,
"grad_norm": 0.09290221333503723,
"learning_rate": 2.9050522332862385e-05,
"loss": 0.2369,
"mean_token_accuracy": 0.8493932217359543,
"num_tokens": 3888227.0,
"step": 250
},
{
"epoch": 0.31476997578692495,
"grad_norm": 0.08292774111032486,
"learning_rate": 2.8519407405816493e-05,
"loss": 0.2313,
"mean_token_accuracy": 0.851080346107483,
"num_tokens": 4046278.0,
"step": 260
},
{
"epoch": 0.3268765133171913,
"grad_norm": 0.1620582491159439,
"learning_rate": 2.797091255253247e-05,
"loss": 0.2379,
"mean_token_accuracy": 0.8395844340324402,
"num_tokens": 4200203.0,
"step": 270
},
{
"epoch": 0.3389830508474576,
"grad_norm": 0.12099113315343857,
"learning_rate": 2.7405902864910543e-05,
"loss": 0.2364,
"mean_token_accuracy": 0.8551326721906662,
"num_tokens": 4355292.0,
"step": 280
},
{
"epoch": 0.35108958837772397,
"grad_norm": 0.12489405274391174,
"learning_rate": 2.6825269482222827e-05,
"loss": 0.2354,
"mean_token_accuracy": 0.8442697525024414,
"num_tokens": 4510258.0,
"step": 290
},
{
"epoch": 0.36319612590799033,
"grad_norm": 0.10075319558382034,
"learning_rate": 2.6229928185598994e-05,
"loss": 0.2333,
"mean_token_accuracy": 0.8536905407905578,
"num_tokens": 4664788.0,
"step": 300
},
{
"epoch": 0.37530266343825663,
"grad_norm": 0.11994941532611847,
"learning_rate": 2.5620817953646596e-05,
"loss": 0.2323,
"mean_token_accuracy": 0.8539896428585052,
"num_tokens": 4821986.0,
"step": 310
},
{
"epoch": 0.387409200968523,
"grad_norm": 0.12077498435974121,
"learning_rate": 2.4998899481484006e-05,
"loss": 0.2399,
"mean_token_accuracy": 0.8509245574474334,
"num_tokens": 4978102.0,
"step": 320
},
{
"epoch": 0.39951573849878935,
"grad_norm": 0.1378944218158722,
"learning_rate": 2.4365153665521915e-05,
"loss": 0.233,
"mean_token_accuracy": 0.8478419154882431,
"num_tokens": 5134005.0,
"step": 330
},
{
"epoch": 0.4116222760290557,
"grad_norm": 0.15924955904483795,
"learning_rate": 2.3720580056383107e-05,
"loss": 0.2244,
"mean_token_accuracy": 0.8621924012899399,
"num_tokens": 5290764.0,
"step": 340
},
{
"epoch": 0.423728813559322,
"grad_norm": 0.1484508514404297,
"learning_rate": 2.30661952824006e-05,
"loss": 0.2266,
"mean_token_accuracy": 0.8586694985628128,
"num_tokens": 5447775.0,
"step": 350
},
{
"epoch": 0.4358353510895884,
"grad_norm": 0.18554432690143585,
"learning_rate": 2.2403031446180677e-05,
"loss": 0.2269,
"mean_token_accuracy": 0.8663704991340637,
"num_tokens": 5605311.0,
"step": 360
},
{
"epoch": 0.44794188861985473,
"grad_norm": 0.2061612904071808,
"learning_rate": 2.1732134496759685e-05,
"loss": 0.2293,
"mean_token_accuracy": 0.8527790486812592,
"num_tokens": 5763991.0,
"step": 370
},
{
"epoch": 0.4600484261501211,
"grad_norm": 0.21663211286067963,
"learning_rate": 2.1054562579922147e-05,
"loss": 0.2384,
"mean_token_accuracy": 0.8578897565603256,
"num_tokens": 5918372.0,
"step": 380
},
{
"epoch": 0.4721549636803874,
"grad_norm": 0.16164663434028625,
"learning_rate": 2.0371384369281973e-05,
"loss": 0.2321,
"mean_token_accuracy": 0.8527203172445297,
"num_tokens": 6076662.0,
"step": 390
},
{
"epoch": 0.48426150121065376,
"grad_norm": 0.14720699191093445,
"learning_rate": 1.968367738075915e-05,
"loss": 0.223,
"mean_token_accuracy": 0.8647637069225311,
"num_tokens": 6233988.0,
"step": 400
},
{
"epoch": 0.48426150121065376,
"eval_loss": 0.24974025785923004,
"eval_num_tokens": 6233988.0,
"eval_runtime": 26.7143,
"eval_samples_per_second": 9.995,
"eval_steps_per_second": 9.995,
"step": 400
},
{
"epoch": 0.4963680387409201,
"grad_norm": 0.11780782788991928,
"learning_rate": 1.899252627311015e-05,
"loss": 0.2288,
"mean_token_accuracy": 0.853996068239212,
"num_tokens": 6391251.0,
"step": 410
},
{
"epoch": 0.5084745762711864,
"grad_norm": 0.14394888281822205,
"learning_rate": 1.8299021137192683e-05,
"loss": 0.237,
"mean_token_accuracy": 0.8532957583665848,
"num_tokens": 6544551.0,
"step": 420
},
{
"epoch": 0.5205811138014528,
"grad_norm": 0.4107162356376648,
"learning_rate": 1.760425577666279e-05,
"loss": 0.2294,
"mean_token_accuracy": 0.8468001574277878,
"num_tokens": 6702345.0,
"step": 430
},
{
"epoch": 0.5326876513317191,
"grad_norm": 0.12140627950429916,
"learning_rate": 1.6909325982816146e-05,
"loss": 0.2268,
"mean_token_accuracy": 0.8571277797222138,
"num_tokens": 6857256.0,
"step": 440
},
{
"epoch": 0.5447941888619855,
"grad_norm": 0.13262014091014862,
"learning_rate": 1.6215327806294417e-05,
"loss": 0.228,
"mean_token_accuracy": 0.8527298241853714,
"num_tokens": 7013503.0,
"step": 450
},
{
"epoch": 0.5569007263922519,
"grad_norm": 0.18789087235927582,
"learning_rate": 1.552335582838251e-05,
"loss": 0.2317,
"mean_token_accuracy": 0.85929856300354,
"num_tokens": 7167382.0,
"step": 460
},
{
"epoch": 0.5690072639225182,
"grad_norm": 0.1549673080444336,
"learning_rate": 1.4834501434623413e-05,
"loss": 0.2386,
"mean_token_accuracy": 0.8493269443511963,
"num_tokens": 7322223.0,
"step": 470
},
{
"epoch": 0.5811138014527845,
"grad_norm": 0.28990328311920166,
"learning_rate": 1.4149851093473319e-05,
"loss": 0.2261,
"mean_token_accuracy": 0.8549934804439545,
"num_tokens": 7477291.0,
"step": 480
},
{
"epoch": 0.5932203389830508,
"grad_norm": 0.34836694598197937,
"learning_rate": 1.3470484642712053e-05,
"loss": 0.2391,
"mean_token_accuracy": 0.8534150063991547,
"num_tokens": 7631181.0,
"step": 490
},
{
"epoch": 0.6053268765133172,
"grad_norm": 0.08868297189474106,
"learning_rate": 1.2797473586311476e-05,
"loss": 0.235,
"mean_token_accuracy": 0.8497831732034683,
"num_tokens": 7786171.0,
"step": 500
},
{
"epoch": 0.6174334140435835,
"grad_norm": 0.11514752358198166,
"learning_rate": 1.2131879404448057e-05,
"loss": 0.2331,
"mean_token_accuracy": 0.8469379067420959,
"num_tokens": 7941159.0,
"step": 510
},
{
"epoch": 0.6295399515738499,
"grad_norm": 0.22596512734889984,
"learning_rate": 1.1474751879325075e-05,
"loss": 0.2374,
"mean_token_accuracy": 0.8513785660266876,
"num_tokens": 8095202.0,
"step": 520
},
{
"epoch": 0.6416464891041163,
"grad_norm": 0.19439919292926788,
"learning_rate": 1.0827127439444991e-05,
"loss": 0.2318,
"mean_token_accuracy": 0.8583654165267944,
"num_tokens": 8250634.0,
"step": 530
},
{
"epoch": 0.6537530266343826,
"grad_norm": 0.16050127148628235,
"learning_rate": 1.0190027524943444e-05,
"loss": 0.2247,
"mean_token_accuracy": 0.8635302782058716,
"num_tokens": 8408664.0,
"step": 540
},
{
"epoch": 0.6658595641646489,
"grad_norm": 0.19398577511310577,
"learning_rate": 9.564456976562993e-06,
"loss": 0.2359,
"mean_token_accuracy": 0.8510926723480224,
"num_tokens": 8563164.0,
"step": 550
},
{
"epoch": 0.6779661016949152,
"grad_norm": 0.16695190966129303,
"learning_rate": 8.951402450807686e-06,
"loss": 0.2256,
"mean_token_accuracy": 0.8567656666040421,
"num_tokens": 8717818.0,
"step": 560
},
{
"epoch": 0.6900726392251816,
"grad_norm": 0.14462164044380188,
"learning_rate": 8.35183086377792e-06,
"loss": 0.2302,
"mean_token_accuracy": 0.8581649184226989,
"num_tokens": 8872048.0,
"step": 570
},
{
"epoch": 0.7021791767554479,
"grad_norm": 0.20691347122192383,
"learning_rate": 7.766687866140133e-06,
"loss": 0.234,
"mean_token_accuracy": 0.856579378247261,
"num_tokens": 9027749.0,
"step": 580
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.23428326845169067,
"learning_rate": 7.196896351636536e-06,
"loss": 0.2305,
"mean_token_accuracy": 0.8628283053636551,
"num_tokens": 9186566.0,
"step": 590
},
{
"epoch": 0.7263922518159807,
"grad_norm": 0.16487430036067963,
"learning_rate": 6.643355001487321e-06,
"loss": 0.2298,
"mean_token_accuracy": 0.8545309662818908,
"num_tokens": 9343040.0,
"step": 600
},
{
"epoch": 0.7263922518159807,
"eval_loss": 0.24567341804504395,
"eval_num_tokens": 9343040.0,
"eval_runtime": 26.5992,
"eval_samples_per_second": 10.038,
"eval_steps_per_second": 10.038,
"step": 600
},
{
"epoch": 0.738498789346247,
"grad_norm": 0.1282430738210678,
"learning_rate": 6.106936866981081e-06,
"loss": 0.2249,
"mean_token_accuracy": 0.8606575727462769,
"num_tokens": 9499657.0,
"step": 610
},
{
"epoch": 0.7506053268765133,
"grad_norm": 0.13524822890758514,
"learning_rate": 5.588487992489113e-06,
"loss": 0.2259,
"mean_token_accuracy": 0.865332567691803,
"num_tokens": 9654173.0,
"step": 620
},
{
"epoch": 0.7627118644067796,
"grad_norm": 0.1036379262804985,
"learning_rate": 5.088826081075191e-06,
"loss": 0.2296,
"mean_token_accuracy": 0.8487064689397812,
"num_tokens": 9809053.0,
"step": 630
},
{
"epoch": 0.774818401937046,
"grad_norm": 0.20920903980731964,
"learning_rate": 4.6087392048056934e-06,
"loss": 0.2363,
"mean_token_accuracy": 0.8600066721439361,
"num_tokens": 9965376.0,
"step": 640
},
{
"epoch": 0.7869249394673123,
"grad_norm": 0.2146104872226715,
"learning_rate": 4.148984561793913e-06,
"loss": 0.2303,
"mean_token_accuracy": 0.8529395699501038,
"num_tokens": 10119911.0,
"step": 650
},
{
"epoch": 0.7990314769975787,
"grad_norm": 0.15396490693092346,
"learning_rate": 3.7102872819392174e-06,
"loss": 0.2298,
"mean_token_accuracy": 0.8552716702222825,
"num_tokens": 10277701.0,
"step": 660
},
{
"epoch": 0.8111380145278451,
"grad_norm": 0.15627269446849823,
"learning_rate": 3.2933392832444513e-06,
"loss": 0.2277,
"mean_token_accuracy": 0.8558280795812607,
"num_tokens": 10434155.0,
"step": 670
},
{
"epoch": 0.8232445520581114,
"grad_norm": 0.1806221306324005,
"learning_rate": 2.898798180515523e-06,
"loss": 0.2316,
"mean_token_accuracy": 0.8524704337120056,
"num_tokens": 10592288.0,
"step": 680
},
{
"epoch": 0.8353510895883777,
"grad_norm": 0.15548691153526306,
"learning_rate": 2.527286248164371e-06,
"loss": 0.2343,
"mean_token_accuracy": 0.8490294456481934,
"num_tokens": 10748051.0,
"step": 690
},
{
"epoch": 0.847457627118644,
"grad_norm": 0.21228361129760742,
"learning_rate": 2.179389438751151e-06,
"loss": 0.2274,
"mean_token_accuracy": 0.8586687803268432,
"num_tokens": 10903879.0,
"step": 700
},
{
"epoch": 0.8595641646489104,
"grad_norm": 0.1210569515824318,
"learning_rate": 1.8556564588136477e-06,
"loss": 0.2272,
"mean_token_accuracy": 0.8536923497915268,
"num_tokens": 11058830.0,
"step": 710
},
{
"epoch": 0.8716707021791767,
"grad_norm": 0.12513045966625214,
"learning_rate": 1.556597903441502e-06,
"loss": 0.2322,
"mean_token_accuracy": 0.8645375669002533,
"num_tokens": 11214742.0,
"step": 720
},
{
"epoch": 0.8837772397094431,
"grad_norm": 0.12597453594207764,
"learning_rate": 1.2826854509602204e-06,
"loss": 0.2257,
"mean_token_accuracy": 0.8663272529840469,
"num_tokens": 11371761.0,
"step": 730
},
{
"epoch": 0.8958837772397095,
"grad_norm": 0.13700132071971893,
"learning_rate": 1.0343511189951156e-06,
"loss": 0.2226,
"mean_token_accuracy": 0.862814399600029,
"num_tokens": 11526770.0,
"step": 740
},
{
"epoch": 0.9079903147699758,
"grad_norm": 0.14355961978435516,
"learning_rate": 8.119865830885323e-07,
"loss": 0.2285,
"mean_token_accuracy": 0.8575877249240875,
"num_tokens": 11683565.0,
"step": 750
},
{
"epoch": 0.9200968523002422,
"grad_norm": 0.17410188913345337,
"learning_rate": 6.159425589450137e-07,
"loss": 0.2331,
"mean_token_accuracy": 0.8473503857851028,
"num_tokens": 11840016.0,
"step": 760
},
{
"epoch": 0.9322033898305084,
"grad_norm": 0.10479779541492462,
"learning_rate": 4.4652824927878805e-07,
"loss": 0.2323,
"mean_token_accuracy": 0.8583548158407212,
"num_tokens": 11993687.0,
"step": 770
},
{
"epoch": 0.9443099273607748,
"grad_norm": 0.25644639134407043,
"learning_rate": 3.040108561359608e-07,
"loss": 0.2262,
"mean_token_accuracy": 0.8624204069375991,
"num_tokens": 12149690.0,
"step": 780
},
{
"epoch": 0.9564164648910412,
"grad_norm": 0.10093547403812408,
"learning_rate": 1.8861515946060807e-07,
"loss": 0.2327,
"mean_token_accuracy": 0.8606836467981338,
"num_tokens": 12305420.0,
"step": 790
},
{
"epoch": 0.9685230024213075,
"grad_norm": 0.21478745341300964,
"learning_rate": 1.0052316256947606e-07,
"loss": 0.2312,
"mean_token_accuracy": 0.8556828409433365,
"num_tokens": 12461595.0,
"step": 800
},
{
"epoch": 0.9685230024213075,
"eval_loss": 0.2455482929944992,
"eval_num_tokens": 12461595.0,
"eval_runtime": 26.6485,
"eval_samples_per_second": 10.019,
"eval_steps_per_second": 10.019,
"step": 800
},
{
"epoch": 0.9806295399515739,
"grad_norm": 0.13691502809524536,
"learning_rate": 3.987380509441307e-08,
"loss": 0.2252,
"mean_token_accuracy": 0.852449357509613,
"num_tokens": 12617715.0,
"step": 810
},
{
"epoch": 0.9927360774818402,
"grad_norm": 0.12197011709213257,
"learning_rate": 6.76274384530412e-09,
"loss": 0.2263,
"mean_token_accuracy": 0.8597381263971329,
"num_tokens": 12775223.0,
"step": 820
}
],
"logging_steps": 10,
"max_steps": 826,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.1877783380167885e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}