QFFT-LIMO-7B / trainer_state.json
lwl-uestc's picture
Upload folder using huggingface_hub
16f8cd5 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.946341463414634,
"eval_steps": 500,
"global_step": 612,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0975609756097561,
"grad_norm": 5.756045341491699,
"learning_rate": 1.4516129032258066e-06,
"loss": 0.8809,
"step": 10
},
{
"epoch": 0.1951219512195122,
"grad_norm": 3.873936176300049,
"learning_rate": 3.0645161290322584e-06,
"loss": 0.8044,
"step": 20
},
{
"epoch": 0.2926829268292683,
"grad_norm": 1.6314491033554077,
"learning_rate": 4.67741935483871e-06,
"loss": 0.7459,
"step": 30
},
{
"epoch": 0.3902439024390244,
"grad_norm": 2.0878822803497314,
"learning_rate": 6.290322580645162e-06,
"loss": 0.727,
"step": 40
},
{
"epoch": 0.4878048780487805,
"grad_norm": 1.7192010879516602,
"learning_rate": 7.903225806451613e-06,
"loss": 0.6279,
"step": 50
},
{
"epoch": 0.5853658536585366,
"grad_norm": 1.5417181253433228,
"learning_rate": 9.516129032258065e-06,
"loss": 0.6367,
"step": 60
},
{
"epoch": 0.6829268292682927,
"grad_norm": 1.7374225854873657,
"learning_rate": 9.996003750499608e-06,
"loss": 0.689,
"step": 70
},
{
"epoch": 0.7804878048780488,
"grad_norm": 1.3835179805755615,
"learning_rate": 9.976445660039118e-06,
"loss": 0.6537,
"step": 80
},
{
"epoch": 0.8780487804878049,
"grad_norm": 1.206093192100525,
"learning_rate": 9.940655438201113e-06,
"loss": 0.6421,
"step": 90
},
{
"epoch": 0.975609756097561,
"grad_norm": 1.456930160522461,
"learning_rate": 9.888749825250151e-06,
"loss": 0.6039,
"step": 100
},
{
"epoch": 1.0682926829268293,
"grad_norm": 1.419948935508728,
"learning_rate": 9.820898126506978e-06,
"loss": 0.5969,
"step": 110
},
{
"epoch": 1.1658536585365853,
"grad_norm": 1.9503365755081177,
"learning_rate": 9.737321660109767e-06,
"loss": 0.516,
"step": 120
},
{
"epoch": 1.2634146341463415,
"grad_norm": 1.587859869003296,
"learning_rate": 9.638293035120342e-06,
"loss": 0.5287,
"step": 130
},
{
"epoch": 1.3609756097560974,
"grad_norm": 1.4838457107543945,
"learning_rate": 9.524135262330098e-06,
"loss": 0.5411,
"step": 140
},
{
"epoch": 1.4585365853658536,
"grad_norm": 1.41899836063385,
"learning_rate": 9.395220700665924e-06,
"loss": 0.5023,
"step": 150
},
{
"epoch": 1.5560975609756098,
"grad_norm": 1.7997419834136963,
"learning_rate": 9.251969842632785e-06,
"loss": 0.5487,
"step": 160
},
{
"epoch": 1.653658536585366,
"grad_norm": 1.1504219770431519,
"learning_rate": 9.094849942754564e-06,
"loss": 0.491,
"step": 170
},
{
"epoch": 1.751219512195122,
"grad_norm": 1.4111475944519043,
"learning_rate": 8.924373493486941e-06,
"loss": 0.4901,
"step": 180
},
{
"epoch": 1.848780487804878,
"grad_norm": 1.6067090034484863,
"learning_rate": 8.741096553573506e-06,
"loss": 0.5439,
"step": 190
},
{
"epoch": 1.946341463414634,
"grad_norm": 1.2158631086349487,
"learning_rate": 8.545616934297733e-06,
"loss": 0.5212,
"step": 200
},
{
"epoch": 2.0390243902439025,
"grad_norm": 1.3324558734893799,
"learning_rate": 8.338572249546813e-06,
"loss": 0.49,
"step": 210
},
{
"epoch": 2.1365853658536587,
"grad_norm": 1.4280637502670288,
"learning_rate": 8.120637836047698e-06,
"loss": 0.4122,
"step": 220
},
{
"epoch": 2.234146341463415,
"grad_norm": 1.0540653467178345,
"learning_rate": 7.892524550559056e-06,
"loss": 0.3693,
"step": 230
},
{
"epoch": 2.3317073170731706,
"grad_norm": 1.2099759578704834,
"learning_rate": 7.654976451204288e-06,
"loss": 0.3851,
"step": 240
},
{
"epoch": 2.4292682926829268,
"grad_norm": 1.056799292564392,
"learning_rate": 7.408768370508577e-06,
"loss": 0.377,
"step": 250
},
{
"epoch": 2.526829268292683,
"grad_norm": 1.4203627109527588,
"learning_rate": 7.154703388056246e-06,
"loss": 0.3864,
"step": 260
},
{
"epoch": 2.624390243902439,
"grad_norm": 0.9842203259468079,
"learning_rate": 6.893610211012067e-06,
"loss": 0.3881,
"step": 270
},
{
"epoch": 2.721951219512195,
"grad_norm": 0.9598954916000366,
"learning_rate": 6.6263404710507495e-06,
"loss": 0.4051,
"step": 280
},
{
"epoch": 2.819512195121951,
"grad_norm": 1.2445122003555298,
"learning_rate": 6.3537659465114275e-06,
"loss": 0.4061,
"step": 290
},
{
"epoch": 2.9170731707317072,
"grad_norm": 1.1891133785247803,
"learning_rate": 6.076775718837911e-06,
"loss": 0.3537,
"step": 300
},
{
"epoch": 3.0097560975609756,
"grad_norm": 1.345109224319458,
"learning_rate": 5.796273272579823e-06,
"loss": 0.3309,
"step": 310
},
{
"epoch": 3.107317073170732,
"grad_norm": 0.9590056538581848,
"learning_rate": 5.513173548413789e-06,
"loss": 0.2277,
"step": 320
},
{
"epoch": 3.204878048780488,
"grad_norm": 1.3723112344741821,
"learning_rate": 5.228399958797117e-06,
"loss": 0.2754,
"step": 330
},
{
"epoch": 3.3024390243902437,
"grad_norm": 1.052578091621399,
"learning_rate": 4.9428813759883e-06,
"loss": 0.2226,
"step": 340
},
{
"epoch": 3.4,
"grad_norm": 1.3540409803390503,
"learning_rate": 4.6575491022587714e-06,
"loss": 0.3032,
"step": 350
},
{
"epoch": 3.497560975609756,
"grad_norm": 0.979840099811554,
"learning_rate": 4.373333832178478e-06,
"loss": 0.2326,
"step": 360
},
{
"epoch": 3.5951219512195123,
"grad_norm": 1.1373505592346191,
"learning_rate": 4.091162616883634e-06,
"loss": 0.3115,
"step": 370
},
{
"epoch": 3.692682926829268,
"grad_norm": 1.0093439817428589,
"learning_rate": 3.8119558402285994e-06,
"loss": 0.2202,
"step": 380
},
{
"epoch": 3.790243902439024,
"grad_norm": 1.2901158332824707,
"learning_rate": 3.5366242166850624e-06,
"loss": 0.2969,
"step": 390
},
{
"epoch": 3.8878048780487804,
"grad_norm": 0.9081848859786987,
"learning_rate": 3.2660658207807205e-06,
"loss": 0.237,
"step": 400
},
{
"epoch": 3.9853658536585366,
"grad_norm": 1.0115485191345215,
"learning_rate": 3.0011631577668327e-06,
"loss": 0.2668,
"step": 410
},
{
"epoch": 4.078048780487805,
"grad_norm": 1.3101578950881958,
"learning_rate": 2.7427802850695306e-06,
"loss": 0.1837,
"step": 420
},
{
"epoch": 4.175609756097561,
"grad_norm": 1.2633917331695557,
"learning_rate": 2.4917599939140884e-06,
"loss": 0.196,
"step": 430
},
{
"epoch": 4.273170731707317,
"grad_norm": 0.7994241714477539,
"learning_rate": 2.2489210603151144e-06,
"loss": 0.1483,
"step": 440
},
{
"epoch": 4.3707317073170735,
"grad_norm": 1.1350624561309814,
"learning_rate": 2.015055574399388e-06,
"loss": 0.1495,
"step": 450
},
{
"epoch": 4.46829268292683,
"grad_norm": 1.1117892265319824,
"learning_rate": 1.7909263567724917e-06,
"loss": 0.1489,
"step": 460
},
{
"epoch": 4.565853658536585,
"grad_norm": 0.9958974719047546,
"learning_rate": 1.5772644703565564e-06,
"loss": 0.1702,
"step": 470
},
{
"epoch": 4.663414634146341,
"grad_norm": 1.1438698768615723,
"learning_rate": 1.3747668358149658e-06,
"loss": 0.1774,
"step": 480
},
{
"epoch": 4.760975609756097,
"grad_norm": 0.9679899215698242,
"learning_rate": 1.1840939583419986e-06,
"loss": 0.1462,
"step": 490
},
{
"epoch": 4.8585365853658535,
"grad_norm": 1.017114520072937,
"learning_rate": 1.0058677732321826e-06,
"loss": 0.1865,
"step": 500
},
{
"epoch": 4.95609756097561,
"grad_norm": 1.2413111925125122,
"learning_rate": 8.406696172566258e-07,
"loss": 0.1795,
"step": 510
},
{
"epoch": 5.048780487804878,
"grad_norm": 0.7522889971733093,
"learning_rate": 6.890383324633121e-07,
"loss": 0.157,
"step": 520
},
{
"epoch": 5.146341463414634,
"grad_norm": 0.9641028046607971,
"learning_rate": 5.514685085863286e-07,
"loss": 0.1632,
"step": 530
},
{
"epoch": 5.2439024390243905,
"grad_norm": 1.0888645648956299,
"learning_rate": 4.2840886979696074e-07,
"loss": 0.1269,
"step": 540
},
{
"epoch": 5.341463414634147,
"grad_norm": 0.7203080058097839,
"learning_rate": 3.2026081105871634e-07,
"loss": 0.1096,
"step": 550
},
{
"epoch": 5.439024390243903,
"grad_norm": 1.3017480373382568,
"learning_rate": 2.2737708886037823e-07,
"loss": 0.1133,
"step": 560
},
{
"epoch": 5.536585365853659,
"grad_norm": 0.9247483015060425,
"learning_rate": 1.5006067059766484e-07,
"loss": 0.1188,
"step": 570
},
{
"epoch": 5.634146341463414,
"grad_norm": 0.8894818425178528,
"learning_rate": 8.856374635655696e-08,
"loss": 0.1069,
"step": 580
},
{
"epoch": 5.7317073170731705,
"grad_norm": 0.981769323348999,
"learning_rate": 4.3086906321652136e-08,
"loss": 0.0943,
"step": 590
},
{
"epoch": 5.829268292682927,
"grad_norm": 0.441194087266922,
"learning_rate": 1.3778486492681542e-08,
"loss": 0.1061,
"step": 600
},
{
"epoch": 5.926829268292683,
"grad_norm": 0.8621994256973267,
"learning_rate": 7.340848433040615e-10,
"loss": 0.1294,
"step": 610
},
{
"epoch": 5.946341463414634,
"step": 612,
"total_flos": 66516718592000.0,
"train_loss": 0.3622722171880061,
"train_runtime": 10104.8075,
"train_samples_per_second": 0.485,
"train_steps_per_second": 0.061
}
],
"logging_steps": 10,
"max_steps": 612,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 66516718592000.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}