Renjie-Ranger's picture
Upload folder using huggingface_hub
b560d72 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 657,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015225045199352935,
"grad_norm": 0.8015036989610748,
"learning_rate": 6.818181818181818e-07,
"loss": 1.2434,
"step": 10
},
{
"epoch": 0.03045009039870587,
"grad_norm": 0.5371339027245265,
"learning_rate": 1.4393939393939396e-06,
"loss": 1.3254,
"step": 20
},
{
"epoch": 0.04567513559805881,
"grad_norm": 0.3873488090138409,
"learning_rate": 2.196969696969697e-06,
"loss": 1.2972,
"step": 30
},
{
"epoch": 0.06090018079741174,
"grad_norm": 0.3521791179594526,
"learning_rate": 2.954545454545455e-06,
"loss": 1.3127,
"step": 40
},
{
"epoch": 0.07612522599676468,
"grad_norm": 0.30676127981755036,
"learning_rate": 3.7121212121212124e-06,
"loss": 1.2638,
"step": 50
},
{
"epoch": 0.09135027119611762,
"grad_norm": 0.4101414956476006,
"learning_rate": 4.46969696969697e-06,
"loss": 1.3305,
"step": 60
},
{
"epoch": 0.10657531639547055,
"grad_norm": 0.32298711751246617,
"learning_rate": 4.999682116415026e-06,
"loss": 1.2712,
"step": 70
},
{
"epoch": 0.12180036159482348,
"grad_norm": 0.317442988654654,
"learning_rate": 4.9940331012821616e-06,
"loss": 1.273,
"step": 80
},
{
"epoch": 0.1370254067941764,
"grad_norm": 0.30534590571797454,
"learning_rate": 4.981338376708957e-06,
"loss": 1.2204,
"step": 90
},
{
"epoch": 0.15225045199352935,
"grad_norm": 0.33409391048066833,
"learning_rate": 4.961633805627912e-06,
"loss": 1.2558,
"step": 100
},
{
"epoch": 0.1674754971928823,
"grad_norm": 0.33936819417642766,
"learning_rate": 4.934975053973217e-06,
"loss": 1.247,
"step": 110
},
{
"epoch": 0.18270054239223524,
"grad_norm": 0.3159856537332395,
"learning_rate": 4.901437433423016e-06,
"loss": 1.2884,
"step": 120
},
{
"epoch": 0.19792558759158815,
"grad_norm": 0.321908331706447,
"learning_rate": 4.861115688641921e-06,
"loss": 1.2543,
"step": 130
},
{
"epoch": 0.2131506327909411,
"grad_norm": 0.328440211023217,
"learning_rate": 4.814123729624837e-06,
"loss": 1.2735,
"step": 140
},
{
"epoch": 0.22837567799029404,
"grad_norm": 0.35087717949475955,
"learning_rate": 4.7605943098982075e-06,
"loss": 1.2938,
"step": 150
},
{
"epoch": 0.24360072318964696,
"grad_norm": 0.32756221423558446,
"learning_rate": 4.7006786514878e-06,
"loss": 1.1977,
"step": 160
},
{
"epoch": 0.2588257683889999,
"grad_norm": 0.31899258273232245,
"learning_rate": 4.6345460177124816e-06,
"loss": 1.2292,
"step": 170
},
{
"epoch": 0.2740508135883528,
"grad_norm": 0.3291697633439208,
"learning_rate": 4.5623832350108674e-06,
"loss": 1.2431,
"step": 180
},
{
"epoch": 0.2892758587877058,
"grad_norm": 0.31915754548001085,
"learning_rate": 4.4843941651517e-06,
"loss": 1.2183,
"step": 190
},
{
"epoch": 0.3045009039870587,
"grad_norm": 0.3104063063524443,
"learning_rate": 4.400799129318952e-06,
"loss": 1.2598,
"step": 200
},
{
"epoch": 0.3197259491864117,
"grad_norm": 0.3277653309557523,
"learning_rate": 4.31183428569867e-06,
"loss": 1.2197,
"step": 210
},
{
"epoch": 0.3349509943857646,
"grad_norm": 0.3565586762710126,
"learning_rate": 4.217750962325845e-06,
"loss": 1.2978,
"step": 220
},
{
"epoch": 0.3501760395851175,
"grad_norm": 0.3124052373761214,
"learning_rate": 4.11881494707608e-06,
"loss": 1.2376,
"step": 230
},
{
"epoch": 0.3654010847844705,
"grad_norm": 0.321888222544292,
"learning_rate": 4.015305736807806e-06,
"loss": 1.2338,
"step": 240
},
{
"epoch": 0.3806261299838234,
"grad_norm": 0.36420812224810134,
"learning_rate": 3.907515747776275e-06,
"loss": 1.2556,
"step": 250
},
{
"epoch": 0.3958511751831763,
"grad_norm": 0.2989183187303492,
"learning_rate": 3.795749489549904e-06,
"loss": 1.2527,
"step": 260
},
{
"epoch": 0.4110762203825293,
"grad_norm": 0.30715884439307567,
"learning_rate": 3.680322704762701e-06,
"loss": 1.2467,
"step": 270
},
{
"epoch": 0.4263012655818822,
"grad_norm": 0.41263512309014105,
"learning_rate": 3.561561477132971e-06,
"loss": 1.2592,
"step": 280
},
{
"epoch": 0.4415263107812351,
"grad_norm": 0.2990764181830289,
"learning_rate": 3.4398013102681956e-06,
"loss": 1.2942,
"step": 290
},
{
"epoch": 0.4567513559805881,
"grad_norm": 0.30817207269780955,
"learning_rate": 3.3153861798584696e-06,
"loss": 1.2278,
"step": 300
},
{
"epoch": 0.471976401179941,
"grad_norm": 0.3072887178239842,
"learning_rate": 3.1886675619360883e-06,
"loss": 1.2753,
"step": 310
},
{
"epoch": 0.4872014463792939,
"grad_norm": 0.38789533497763495,
"learning_rate": 3.060003439946462e-06,
"loss": 1.2951,
"step": 320
},
{
"epoch": 0.5024264915786468,
"grad_norm": 0.28331258754735067,
"learning_rate": 2.929757293435419e-06,
"loss": 1.2851,
"step": 330
},
{
"epoch": 0.5176515367779998,
"grad_norm": 0.2962487342346661,
"learning_rate": 2.7982970712098795e-06,
"loss": 1.2702,
"step": 340
},
{
"epoch": 0.5328765819773528,
"grad_norm": 0.3039773975171447,
"learning_rate": 2.665994151872755e-06,
"loss": 1.2127,
"step": 350
},
{
"epoch": 0.5481016271767056,
"grad_norm": 0.4131494842990153,
"learning_rate": 2.5332222946685707e-06,
"loss": 1.2551,
"step": 360
},
{
"epoch": 0.5633266723760586,
"grad_norm": 0.3888599336298796,
"learning_rate": 2.4003565836037245e-06,
"loss": 1.2354,
"step": 370
},
{
"epoch": 0.5785517175754116,
"grad_norm": 0.2793746233865594,
"learning_rate": 2.267772367824249e-06,
"loss": 1.2819,
"step": 380
},
{
"epoch": 0.5937767627747644,
"grad_norm": 0.2899583112911672,
"learning_rate": 2.135844201244556e-06,
"loss": 1.2523,
"step": 390
},
{
"epoch": 0.6090018079741174,
"grad_norm": 0.2923000214972851,
"learning_rate": 2.0049447844227265e-06,
"loss": 1.2759,
"step": 400
},
{
"epoch": 0.6242268531734704,
"grad_norm": 0.2919228747330785,
"learning_rate": 1.875443911671579e-06,
"loss": 1.2044,
"step": 410
},
{
"epoch": 0.6394518983728233,
"grad_norm": 0.5366405058825244,
"learning_rate": 1.7477074263799632e-06,
"loss": 1.2527,
"step": 420
},
{
"epoch": 0.6546769435721762,
"grad_norm": 0.29907607930356067,
"learning_rate": 1.6220961874955136e-06,
"loss": 1.2442,
"step": 430
},
{
"epoch": 0.6699019887715292,
"grad_norm": 0.30050211246658853,
"learning_rate": 1.4989650500885838e-06,
"loss": 1.2406,
"step": 440
},
{
"epoch": 0.6851270339708821,
"grad_norm": 0.2834575399227386,
"learning_rate": 1.3786618628772938e-06,
"loss": 1.2197,
"step": 450
},
{
"epoch": 0.700352079170235,
"grad_norm": 0.28730355579070144,
"learning_rate": 1.2615264855457037e-06,
"loss": 1.26,
"step": 460
},
{
"epoch": 0.715577124369588,
"grad_norm": 0.5094507361581523,
"learning_rate": 1.1478898286312231e-06,
"loss": 1.3205,
"step": 470
},
{
"epoch": 0.730802169568941,
"grad_norm": 0.2896864936273596,
"learning_rate": 1.038072918693596e-06,
"loss": 1.2423,
"step": 480
},
{
"epoch": 0.7460272147682938,
"grad_norm": 0.29550104589134707,
"learning_rate": 9.323859914063815e-07,
"loss": 1.2507,
"step": 490
},
{
"epoch": 0.7612522599676468,
"grad_norm": 0.2721570960991744,
"learning_rate": 8.311276151329775e-07,
"loss": 1.2683,
"step": 500
},
{
"epoch": 0.7764773051669998,
"grad_norm": 0.2852630721545509,
"learning_rate": 7.345838474630993e-07,
"loss": 1.2815,
"step": 510
},
{
"epoch": 0.7917023503663526,
"grad_norm": 0.2909871526763653,
"learning_rate": 6.430274270925271e-07,
"loss": 1.2503,
"step": 520
},
{
"epoch": 0.8069273955657056,
"grad_norm": 0.30499754932838724,
"learning_rate": 5.56717003329082e-07,
"loss": 1.2364,
"step": 530
},
{
"epoch": 0.8221524407650586,
"grad_norm": 0.2776051695513212,
"learning_rate": 4.758964054014931e-07,
"loss": 1.2012,
"step": 540
},
{
"epoch": 0.8373774859644114,
"grad_norm": 0.2905227549359444,
"learning_rate": 4.0079395363538056e-07,
"loss": 1.3095,
"step": 550
},
{
"epoch": 0.8526025311637644,
"grad_norm": 0.2810906800968861,
"learning_rate": 3.3162181444230056e-07,
"loss": 1.2799,
"step": 560
},
{
"epoch": 0.8678275763631174,
"grad_norm": 0.2687773919781078,
"learning_rate": 2.6857540094402365e-07,
"loss": 1.2415,
"step": 570
},
{
"epoch": 0.8830526215624702,
"grad_norm": 0.30078684187223376,
"learning_rate": 2.1183282092530067e-07,
"loss": 1.2361,
"step": 580
},
{
"epoch": 0.8982776667618232,
"grad_norm": 0.27536926671011003,
"learning_rate": 1.6155437367466277e-07,
"loss": 1.2392,
"step": 590
},
{
"epoch": 0.9135027119611762,
"grad_norm": 0.2753306105776222,
"learning_rate": 1.1788209713469195e-07,
"loss": 1.2716,
"step": 600
},
{
"epoch": 0.928727757160529,
"grad_norm": 0.274286963333408,
"learning_rate": 8.093936664108071e-08,
"loss": 1.2109,
"step": 610
},
{
"epoch": 0.943952802359882,
"grad_norm": 0.29590574898063726,
"learning_rate": 5.083054638404722e-08,
"loss": 1.2449,
"step": 620
},
{
"epoch": 0.959177847559235,
"grad_norm": 0.27873264107936796,
"learning_rate": 2.7640694576737125e-08,
"loss": 1.2194,
"step": 630
},
{
"epoch": 0.9744028927585878,
"grad_norm": 0.2757612055037619,
"learning_rate": 1.1435323163525026e-08,
"loss": 1.2431,
"step": 640
},
{
"epoch": 0.9896279379579408,
"grad_norm": 0.2715956982953583,
"learning_rate": 2.2602127470383593e-09,
"loss": 1.2422,
"step": 650
},
{
"epoch": 1.0,
"step": 657,
"total_flos": 718053792808960.0,
"train_loss": 1.2574238450559851,
"train_runtime": 37309.1102,
"train_samples_per_second": 9.013,
"train_steps_per_second": 0.018
}
],
"logging_steps": 10,
"max_steps": 657,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 718053792808960.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}