huruo's picture
Upload folder using huggingface_hub
356c90a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.854368932038835,
"eval_steps": 500,
"global_step": 765,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1941747572815534,
"grad_norm": 3.1270775891219507,
"learning_rate": 6.493506493506493e-07,
"loss": 1.8796,
"step": 10
},
{
"epoch": 0.3883495145631068,
"grad_norm": 4.0478996237342,
"learning_rate": 1.2987012987012986e-06,
"loss": 1.9555,
"step": 20
},
{
"epoch": 0.5825242718446602,
"grad_norm": 3.706686831129688,
"learning_rate": 1.9480519480519483e-06,
"loss": 1.8677,
"step": 30
},
{
"epoch": 0.7766990291262136,
"grad_norm": 1.800540642872872,
"learning_rate": 2.597402597402597e-06,
"loss": 1.6047,
"step": 40
},
{
"epoch": 0.970873786407767,
"grad_norm": 1.5787493617396333,
"learning_rate": 3.246753246753247e-06,
"loss": 1.3922,
"step": 50
},
{
"epoch": 1.1650485436893203,
"grad_norm": 1.207810791025872,
"learning_rate": 3.896103896103897e-06,
"loss": 1.2978,
"step": 60
},
{
"epoch": 1.3592233009708738,
"grad_norm": 0.9175578117993997,
"learning_rate": 4.5454545454545455e-06,
"loss": 1.1015,
"step": 70
},
{
"epoch": 1.5533980582524272,
"grad_norm": 0.9409417065870398,
"learning_rate": 4.999765432089186e-06,
"loss": 0.9465,
"step": 80
},
{
"epoch": 1.7475728155339807,
"grad_norm": 0.7673543415725506,
"learning_rate": 4.995596560308607e-06,
"loss": 0.802,
"step": 90
},
{
"epoch": 1.941747572815534,
"grad_norm": 0.6596047063364,
"learning_rate": 4.986225072382357e-06,
"loss": 0.8013,
"step": 100
},
{
"epoch": 2.1359223300970873,
"grad_norm": 0.6761088913203434,
"learning_rate": 4.971670505224043e-06,
"loss": 0.8454,
"step": 110
},
{
"epoch": 2.3300970873786406,
"grad_norm": 0.8375974153078517,
"learning_rate": 4.9519632010080765e-06,
"loss": 0.7326,
"step": 120
},
{
"epoch": 2.524271844660194,
"grad_norm": 0.7531376616419518,
"learning_rate": 4.927144243914781e-06,
"loss": 0.7275,
"step": 130
},
{
"epoch": 2.7184466019417477,
"grad_norm": 0.6285281076273438,
"learning_rate": 4.897265374481447e-06,
"loss": 0.7418,
"step": 140
},
{
"epoch": 2.912621359223301,
"grad_norm": 0.602398570146741,
"learning_rate": 4.862388881737883e-06,
"loss": 0.7094,
"step": 150
},
{
"epoch": 3.1067961165048543,
"grad_norm": 0.6666767672393625,
"learning_rate": 4.822587473351317e-06,
"loss": 0.6429,
"step": 160
},
{
"epoch": 3.3009708737864076,
"grad_norm": 0.5205954520717115,
"learning_rate": 4.777944124051395e-06,
"loss": 0.6477,
"step": 170
},
{
"epoch": 3.4951456310679614,
"grad_norm": 0.7568300500075249,
"learning_rate": 4.728551902651227e-06,
"loss": 0.6659,
"step": 180
},
{
"epoch": 3.6893203883495147,
"grad_norm": 0.7044549203105918,
"learning_rate": 4.6745137780251125e-06,
"loss": 0.656,
"step": 190
},
{
"epoch": 3.883495145631068,
"grad_norm": 0.7556811208616407,
"learning_rate": 4.615942404447439e-06,
"loss": 0.6441,
"step": 200
},
{
"epoch": 4.077669902912621,
"grad_norm": 0.5819276993822524,
"learning_rate": 4.552959886740232e-06,
"loss": 0.6552,
"step": 210
},
{
"epoch": 4.271844660194175,
"grad_norm": 0.7872228459586639,
"learning_rate": 4.48569752571899e-06,
"loss": 0.5654,
"step": 220
},
{
"epoch": 4.466019417475728,
"grad_norm": 0.6601391677800797,
"learning_rate": 4.414295544467447e-06,
"loss": 0.5617,
"step": 230
},
{
"epoch": 4.660194174757281,
"grad_norm": 0.760089090328379,
"learning_rate": 4.338902796011929e-06,
"loss": 0.5924,
"step": 240
},
{
"epoch": 4.854368932038835,
"grad_norm": 0.6323283888532482,
"learning_rate": 4.259676453004709e-06,
"loss": 0.5691,
"step": 250
},
{
"epoch": 5.048543689320389,
"grad_norm": 0.5670514791770882,
"learning_rate": 4.176781680063274e-06,
"loss": 0.62,
"step": 260
},
{
"epoch": 5.242718446601942,
"grad_norm": 0.8034918706341889,
"learning_rate": 4.090391289448612e-06,
"loss": 0.5791,
"step": 270
},
{
"epoch": 5.436893203883495,
"grad_norm": 0.9966745647142712,
"learning_rate": 4.000685380800299e-06,
"loss": 0.5215,
"step": 280
},
{
"epoch": 5.631067961165049,
"grad_norm": 0.6278542393836921,
"learning_rate": 3.907850965679467e-06,
"loss": 0.4971,
"step": 290
},
{
"epoch": 5.825242718446602,
"grad_norm": 0.5200880725660694,
"learning_rate": 3.812081577702351e-06,
"loss": 0.4876,
"step": 300
},
{
"epoch": 6.019417475728155,
"grad_norm": 0.6395614574611697,
"learning_rate": 3.7135768690771958e-06,
"loss": 0.5461,
"step": 310
},
{
"epoch": 6.213592233009709,
"grad_norm": 0.6094813425270712,
"learning_rate": 3.612542194385613e-06,
"loss": 0.4818,
"step": 320
},
{
"epoch": 6.407766990291262,
"grad_norm": 0.7863019179749429,
"learning_rate": 3.509188182476105e-06,
"loss": 0.4383,
"step": 330
},
{
"epoch": 6.601941747572815,
"grad_norm": 0.7166231822803332,
"learning_rate": 3.40373029736222e-06,
"loss": 0.4665,
"step": 340
},
{
"epoch": 6.796116504854369,
"grad_norm": 0.6263341506429414,
"learning_rate": 3.29638838904075e-06,
"loss": 0.4589,
"step": 350
},
{
"epoch": 6.990291262135923,
"grad_norm": 0.6199864592752095,
"learning_rate": 3.1873862351663966e-06,
"loss": 0.4338,
"step": 360
},
{
"epoch": 7.184466019417476,
"grad_norm": 0.5731484414815903,
"learning_rate": 3.0769510745383603e-06,
"loss": 0.4006,
"step": 370
},
{
"epoch": 7.378640776699029,
"grad_norm": 0.6375447599953608,
"learning_rate": 2.9653131333714357e-06,
"loss": 0.4033,
"step": 380
},
{
"epoch": 7.572815533980583,
"grad_norm": 0.7108113423728954,
"learning_rate": 2.8527051453391763e-06,
"loss": 0.3817,
"step": 390
},
{
"epoch": 7.766990291262136,
"grad_norm": 0.5983539359474854,
"learning_rate": 2.739361866389711e-06,
"loss": 0.3906,
"step": 400
},
{
"epoch": 7.961165048543689,
"grad_norm": 0.6703828114314603,
"learning_rate": 2.6255195853456994e-06,
"loss": 0.3998,
"step": 410
},
{
"epoch": 8.155339805825243,
"grad_norm": 0.7763432614119512,
"learning_rate": 2.511415631308664e-06,
"loss": 0.3636,
"step": 420
},
{
"epoch": 8.349514563106796,
"grad_norm": 0.7336274441363543,
"learning_rate": 2.397287878894637e-06,
"loss": 0.2913,
"step": 430
},
{
"epoch": 8.54368932038835,
"grad_norm": 0.640311797061036,
"learning_rate": 2.283374252332568e-06,
"loss": 0.3709,
"step": 440
},
{
"epoch": 8.737864077669903,
"grad_norm": 1.0549747152380415,
"learning_rate": 2.169912229459296e-06,
"loss": 0.3199,
"step": 450
},
{
"epoch": 8.932038834951456,
"grad_norm": 0.7335809666111395,
"learning_rate": 2.0571383466451237e-06,
"loss": 0.3302,
"step": 460
},
{
"epoch": 9.12621359223301,
"grad_norm": 0.8337042629082712,
"learning_rate": 1.9452877056820936e-06,
"loss": 0.3043,
"step": 470
},
{
"epoch": 9.320388349514563,
"grad_norm": 0.7768236518159666,
"learning_rate": 1.8345934836629424e-06,
"loss": 0.2832,
"step": 480
},
{
"epoch": 9.514563106796116,
"grad_norm": 0.8109748052215916,
"learning_rate": 1.7252864468725218e-06,
"loss": 0.2376,
"step": 490
},
{
"epoch": 9.70873786407767,
"grad_norm": 0.884651680841133,
"learning_rate": 1.6175944697050678e-06,
"loss": 0.2854,
"step": 500
},
{
"epoch": 9.902912621359224,
"grad_norm": 0.7174797476037638,
"learning_rate": 1.511742059610255e-06,
"loss": 0.2807,
"step": 510
},
{
"epoch": 10.097087378640778,
"grad_norm": 0.6760588512048147,
"learning_rate": 1.4079498890583766e-06,
"loss": 0.2656,
"step": 520
},
{
"epoch": 10.29126213592233,
"grad_norm": 0.6390548130600759,
"learning_rate": 1.3064343355003775e-06,
"loss": 0.2215,
"step": 530
},
{
"epoch": 10.485436893203884,
"grad_norm": 0.7714829355919598,
"learning_rate": 1.2074070302817962e-06,
"loss": 0.21,
"step": 540
},
{
"epoch": 10.679611650485437,
"grad_norm": 0.7133631416901189,
"learning_rate": 1.1110744174509952e-06,
"loss": 0.257,
"step": 550
},
{
"epoch": 10.87378640776699,
"grad_norm": 1.0554896728347178,
"learning_rate": 1.0176373233814509e-06,
"loss": 0.235,
"step": 560
},
{
"epoch": 11.067961165048544,
"grad_norm": 0.7568107855940384,
"learning_rate": 9.272905381053132e-07,
"loss": 0.2458,
"step": 570
},
{
"epoch": 11.262135922330097,
"grad_norm": 1.1755882782765041,
"learning_rate": 8.402224092310299e-07,
"loss": 0.2139,
"step": 580
},
{
"epoch": 11.45631067961165,
"grad_norm": 0.7277468096167472,
"learning_rate": 7.566144492916191e-07,
"loss": 0.1993,
"step": 590
},
{
"epoch": 11.650485436893204,
"grad_norm": 0.7732351944532618,
"learning_rate": 6.766409573421467e-07,
"loss": 0.2075,
"step": 600
},
{
"epoch": 11.844660194174757,
"grad_norm": 0.7272514601520167,
"learning_rate": 6.004686555952743e-07,
"loss": 0.2124,
"step": 610
},
{
"epoch": 12.03883495145631,
"grad_norm": 0.5446717472743969,
"learning_rate": 5.282563418523859e-07,
"loss": 0.2142,
"step": 620
},
{
"epoch": 12.233009708737864,
"grad_norm": 0.601799105236111,
"learning_rate": 4.6015455845488805e-07,
"loss": 0.1827,
"step": 630
},
{
"epoch": 12.427184466019417,
"grad_norm": 0.8397139799250541,
"learning_rate": 3.963052784458146e-07,
"loss": 0.2212,
"step": 640
},
{
"epoch": 12.62135922330097,
"grad_norm": 0.6624723438080478,
"learning_rate": 3.368416095960092e-07,
"loss": 0.1861,
"step": 650
},
{
"epoch": 12.815533980582524,
"grad_norm": 0.8742788293711848,
"learning_rate": 2.8188751691189813e-07,
"loss": 0.1602,
"step": 660
},
{
"epoch": 13.009708737864077,
"grad_norm": 0.8377328891704509,
"learning_rate": 2.3155756420336046e-07,
"loss": 0.2049,
"step": 670
},
{
"epoch": 13.20388349514563,
"grad_norm": 0.6166032834335495,
"learning_rate": 1.8595667525043965e-07,
"loss": 0.1702,
"step": 680
},
{
"epoch": 13.398058252427184,
"grad_norm": 0.5133188237146326,
"learning_rate": 1.4517991506680762e-07,
"loss": 0.1805,
"step": 690
},
{
"epoch": 13.592233009708737,
"grad_norm": 0.6421575024333711,
"learning_rate": 1.0931229171597584e-07,
"loss": 0.172,
"step": 700
},
{
"epoch": 13.78640776699029,
"grad_norm": 0.6833017346041286,
"learning_rate": 7.842857909342166e-08,
"loss": 0.1824,
"step": 710
},
{
"epoch": 13.980582524271846,
"grad_norm": 0.6474937990820275,
"learning_rate": 5.259316104406637e-08,
"loss": 0.1891,
"step": 720
},
{
"epoch": 14.174757281553399,
"grad_norm": 0.6526458668790724,
"learning_rate": 3.185989714009186e-08,
"loss": 0.2009,
"step": 730
},
{
"epoch": 14.368932038834952,
"grad_norm": 0.5981773924609198,
"learning_rate": 1.627201039889309e-08,
"loss": 0.1903,
"step": 740
},
{
"epoch": 14.563106796116505,
"grad_norm": 0.6732767479005811,
"learning_rate": 5.861997175260759e-09,
"loss": 0.181,
"step": 750
},
{
"epoch": 14.757281553398059,
"grad_norm": 0.5952495060637577,
"learning_rate": 6.515594156286664e-10,
"loss": 0.1647,
"step": 760
}
],
"logging_steps": 10,
"max_steps": 765,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 255,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 169821882810368.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}