gzqaq's picture
Add files using upload-large-folder tool
22de678 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.979036827195468,
"eval_steps": 500,
"global_step": 4410,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11331444759206799,
"grad_norm": 0.2005129361523877,
"learning_rate": 1.977324263038549e-05,
"loss": 0.4547,
"step": 50
},
{
"epoch": 0.22662889518413598,
"grad_norm": 4.374481882033845,
"learning_rate": 1.9546485260770977e-05,
"loss": 0.225,
"step": 100
},
{
"epoch": 0.33994334277620397,
"grad_norm": 0.1016014971343205,
"learning_rate": 1.9319727891156463e-05,
"loss": 0.2215,
"step": 150
},
{
"epoch": 0.45325779036827196,
"grad_norm": 0.06295163563225963,
"learning_rate": 1.9092970521541953e-05,
"loss": 0.2203,
"step": 200
},
{
"epoch": 0.56657223796034,
"grad_norm": 0.06293734713169016,
"learning_rate": 1.886621315192744e-05,
"loss": 0.2195,
"step": 250
},
{
"epoch": 0.6798866855524079,
"grad_norm": 0.060515101336106644,
"learning_rate": 1.863945578231293e-05,
"loss": 0.22,
"step": 300
},
{
"epoch": 0.7932011331444759,
"grad_norm": 0.07540683199497876,
"learning_rate": 1.8412698412698415e-05,
"loss": 0.2196,
"step": 350
},
{
"epoch": 0.9065155807365439,
"grad_norm": 0.08983383050228771,
"learning_rate": 1.81859410430839e-05,
"loss": 0.2193,
"step": 400
},
{
"epoch": 1.0181303116147309,
"grad_norm": 0.0675672563417117,
"learning_rate": 1.795918367346939e-05,
"loss": 0.2156,
"step": 450
},
{
"epoch": 1.1314447592067989,
"grad_norm": 0.07024561390451645,
"learning_rate": 1.7732426303854877e-05,
"loss": 0.2186,
"step": 500
},
{
"epoch": 1.2447592067988669,
"grad_norm": 0.07966954244619774,
"learning_rate": 1.7505668934240366e-05,
"loss": 0.2184,
"step": 550
},
{
"epoch": 1.3580736543909349,
"grad_norm": 0.04818417686134014,
"learning_rate": 1.7278911564625852e-05,
"loss": 0.2182,
"step": 600
},
{
"epoch": 1.4713881019830028,
"grad_norm": 0.05242535702602152,
"learning_rate": 1.705215419501134e-05,
"loss": 0.218,
"step": 650
},
{
"epoch": 1.5847025495750708,
"grad_norm": 0.06651711822208371,
"learning_rate": 1.6825396825396828e-05,
"loss": 0.2179,
"step": 700
},
{
"epoch": 1.6980169971671388,
"grad_norm": 0.19794356005762634,
"learning_rate": 1.6598639455782314e-05,
"loss": 0.2178,
"step": 750
},
{
"epoch": 1.8113314447592068,
"grad_norm": 0.054498240100240465,
"learning_rate": 1.63718820861678e-05,
"loss": 0.2182,
"step": 800
},
{
"epoch": 1.9246458923512748,
"grad_norm": 0.053181868189447694,
"learning_rate": 1.614512471655329e-05,
"loss": 0.2177,
"step": 850
},
{
"epoch": 2.0362606232294618,
"grad_norm": 0.0562999012232746,
"learning_rate": 1.5918367346938776e-05,
"loss": 0.2142,
"step": 900
},
{
"epoch": 2.1495750708215295,
"grad_norm": 0.06574460863314742,
"learning_rate": 1.5691609977324265e-05,
"loss": 0.2174,
"step": 950
},
{
"epoch": 2.2628895184135978,
"grad_norm": 0.07473982858505401,
"learning_rate": 1.546485260770975e-05,
"loss": 0.2174,
"step": 1000
},
{
"epoch": 2.376203966005666,
"grad_norm": 0.04914272343711847,
"learning_rate": 1.523809523809524e-05,
"loss": 0.2172,
"step": 1050
},
{
"epoch": 2.4895184135977337,
"grad_norm": 0.060028789070772234,
"learning_rate": 1.5011337868480727e-05,
"loss": 0.2172,
"step": 1100
},
{
"epoch": 2.6028328611898015,
"grad_norm": 0.04603015974224043,
"learning_rate": 1.4784580498866215e-05,
"loss": 0.2172,
"step": 1150
},
{
"epoch": 2.7161473087818697,
"grad_norm": 0.05801340941717548,
"learning_rate": 1.4557823129251703e-05,
"loss": 0.217,
"step": 1200
},
{
"epoch": 2.829461756373938,
"grad_norm": 0.03757088207951635,
"learning_rate": 1.433106575963719e-05,
"loss": 0.2169,
"step": 1250
},
{
"epoch": 2.9427762039660057,
"grad_norm": 0.0392731960266638,
"learning_rate": 1.4104308390022677e-05,
"loss": 0.2169,
"step": 1300
},
{
"epoch": 3.0543909348441924,
"grad_norm": 0.06241785187409858,
"learning_rate": 1.3877551020408165e-05,
"loss": 0.2136,
"step": 1350
},
{
"epoch": 3.1677053824362607,
"grad_norm": 0.06071070650139286,
"learning_rate": 1.3650793650793652e-05,
"loss": 0.2186,
"step": 1400
},
{
"epoch": 3.2810198300283284,
"grad_norm": 0.0408584442690185,
"learning_rate": 1.342403628117914e-05,
"loss": 0.2171,
"step": 1450
},
{
"epoch": 3.3943342776203966,
"grad_norm": 0.04286108941316469,
"learning_rate": 1.3197278911564626e-05,
"loss": 0.217,
"step": 1500
},
{
"epoch": 3.507648725212465,
"grad_norm": 0.050377281789735655,
"learning_rate": 1.2970521541950114e-05,
"loss": 0.2195,
"step": 1550
},
{
"epoch": 3.6209631728045326,
"grad_norm": 0.05622603657613179,
"learning_rate": 1.2743764172335602e-05,
"loss": 0.2174,
"step": 1600
},
{
"epoch": 3.7342776203966004,
"grad_norm": 0.048485290627651875,
"learning_rate": 1.251700680272109e-05,
"loss": 0.217,
"step": 1650
},
{
"epoch": 3.8475920679886686,
"grad_norm": 0.049896749884447776,
"learning_rate": 1.2290249433106578e-05,
"loss": 0.2169,
"step": 1700
},
{
"epoch": 3.960906515580737,
"grad_norm": 0.042955138530708765,
"learning_rate": 1.2063492063492064e-05,
"loss": 0.2168,
"step": 1750
},
{
"epoch": 4.0725212464589235,
"grad_norm": 0.04972531513985689,
"learning_rate": 1.1836734693877552e-05,
"loss": 0.2133,
"step": 1800
},
{
"epoch": 4.185835694050992,
"grad_norm": 0.03493608076161152,
"learning_rate": 1.160997732426304e-05,
"loss": 0.2165,
"step": 1850
},
{
"epoch": 4.299150141643059,
"grad_norm": 0.05936933051159733,
"learning_rate": 1.1383219954648527e-05,
"loss": 0.2166,
"step": 1900
},
{
"epoch": 4.412464589235127,
"grad_norm": 0.044813050936368184,
"learning_rate": 1.1156462585034013e-05,
"loss": 0.2165,
"step": 1950
},
{
"epoch": 4.5257790368271955,
"grad_norm": 0.039668826323639036,
"learning_rate": 1.0929705215419501e-05,
"loss": 0.2165,
"step": 2000
},
{
"epoch": 4.639093484419264,
"grad_norm": 0.04978591341331415,
"learning_rate": 1.0702947845804989e-05,
"loss": 0.2164,
"step": 2050
},
{
"epoch": 4.752407932011332,
"grad_norm": 0.04572653204373289,
"learning_rate": 1.0476190476190477e-05,
"loss": 0.2162,
"step": 2100
},
{
"epoch": 4.865722379603399,
"grad_norm": 0.0414740460984779,
"learning_rate": 1.0249433106575966e-05,
"loss": 0.216,
"step": 2150
},
{
"epoch": 4.9790368271954675,
"grad_norm": 0.044261316481047364,
"learning_rate": 1.0022675736961451e-05,
"loss": 0.216,
"step": 2200
},
{
"epoch": 5.090651558073654,
"grad_norm": 0.039290118974741946,
"learning_rate": 9.795918367346939e-06,
"loss": 0.2126,
"step": 2250
},
{
"epoch": 5.203966005665722,
"grad_norm": 0.0558689696926692,
"learning_rate": 9.569160997732427e-06,
"loss": 0.2158,
"step": 2300
},
{
"epoch": 5.317280453257791,
"grad_norm": 0.04013222575202666,
"learning_rate": 9.342403628117914e-06,
"loss": 0.2158,
"step": 2350
},
{
"epoch": 5.430594900849858,
"grad_norm": 0.03940518538256405,
"learning_rate": 9.115646258503402e-06,
"loss": 0.2156,
"step": 2400
},
{
"epoch": 5.543909348441926,
"grad_norm": 0.036248333651368,
"learning_rate": 8.888888888888888e-06,
"loss": 0.2156,
"step": 2450
},
{
"epoch": 5.657223796033994,
"grad_norm": 0.030178531163303088,
"learning_rate": 8.662131519274378e-06,
"loss": 0.2155,
"step": 2500
},
{
"epoch": 5.770538243626063,
"grad_norm": 0.040097656925639,
"learning_rate": 8.435374149659866e-06,
"loss": 0.2154,
"step": 2550
},
{
"epoch": 5.88385269121813,
"grad_norm": 0.05225991360322306,
"learning_rate": 8.208616780045352e-06,
"loss": 0.2154,
"step": 2600
},
{
"epoch": 5.997167138810198,
"grad_norm": 0.03410100715408048,
"learning_rate": 7.98185941043084e-06,
"loss": 0.2154,
"step": 2650
},
{
"epoch": 6.108781869688385,
"grad_norm": 0.04267307174763642,
"learning_rate": 7.755102040816327e-06,
"loss": 0.2119,
"step": 2700
},
{
"epoch": 6.222096317280453,
"grad_norm": 0.04309446113662568,
"learning_rate": 7.528344671201815e-06,
"loss": 0.2152,
"step": 2750
},
{
"epoch": 6.335410764872521,
"grad_norm": 0.041102870476639485,
"learning_rate": 7.301587301587301e-06,
"loss": 0.2151,
"step": 2800
},
{
"epoch": 6.4487252124645895,
"grad_norm": 0.04551287987357368,
"learning_rate": 7.07482993197279e-06,
"loss": 0.2151,
"step": 2850
},
{
"epoch": 6.562039660056657,
"grad_norm": 0.04323820835646204,
"learning_rate": 6.848072562358277e-06,
"loss": 0.2149,
"step": 2900
},
{
"epoch": 6.675354107648725,
"grad_norm": 0.05251190173310129,
"learning_rate": 6.621315192743765e-06,
"loss": 0.2149,
"step": 2950
},
{
"epoch": 6.788668555240793,
"grad_norm": 0.07880028642512263,
"learning_rate": 6.394557823129253e-06,
"loss": 0.2149,
"step": 3000
},
{
"epoch": 6.9019830028328615,
"grad_norm": 0.03479910796542844,
"learning_rate": 6.16780045351474e-06,
"loss": 0.2148,
"step": 3050
},
{
"epoch": 7.013597733711048,
"grad_norm": 0.03458517402957525,
"learning_rate": 5.9410430839002275e-06,
"loss": 0.2115,
"step": 3100
},
{
"epoch": 7.126912181303116,
"grad_norm": 0.034419006405346834,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.2145,
"step": 3150
},
{
"epoch": 7.240226628895184,
"grad_norm": 0.03629297170824396,
"learning_rate": 5.487528344671202e-06,
"loss": 0.2144,
"step": 3200
},
{
"epoch": 7.353541076487252,
"grad_norm": 0.03858771713025695,
"learning_rate": 5.260770975056689e-06,
"loss": 0.2143,
"step": 3250
},
{
"epoch": 7.46685552407932,
"grad_norm": 0.0350919228123147,
"learning_rate": 5.034013605442177e-06,
"loss": 0.2143,
"step": 3300
},
{
"epoch": 7.580169971671388,
"grad_norm": 0.048002057370294546,
"learning_rate": 4.807256235827665e-06,
"loss": 0.2142,
"step": 3350
},
{
"epoch": 7.693484419263456,
"grad_norm": 0.03299980225000233,
"learning_rate": 4.580498866213152e-06,
"loss": 0.2142,
"step": 3400
},
{
"epoch": 7.806798866855524,
"grad_norm": 0.0366304286363175,
"learning_rate": 4.35374149659864e-06,
"loss": 0.2141,
"step": 3450
},
{
"epoch": 7.920113314447592,
"grad_norm": 0.039492638736222725,
"learning_rate": 4.126984126984127e-06,
"loss": 0.2141,
"step": 3500
},
{
"epoch": 8.03172804532578,
"grad_norm": 0.052547554783668696,
"learning_rate": 3.9002267573696154e-06,
"loss": 0.2108,
"step": 3550
},
{
"epoch": 8.145042492917847,
"grad_norm": 0.052357108980609274,
"learning_rate": 3.6734693877551024e-06,
"loss": 0.2138,
"step": 3600
},
{
"epoch": 8.258356940509914,
"grad_norm": 0.04203804316106579,
"learning_rate": 3.44671201814059e-06,
"loss": 0.2137,
"step": 3650
},
{
"epoch": 8.371671388101984,
"grad_norm": 0.03800793699581388,
"learning_rate": 3.2199546485260772e-06,
"loss": 0.2138,
"step": 3700
},
{
"epoch": 8.48498583569405,
"grad_norm": 0.03338231105442374,
"learning_rate": 2.993197278911565e-06,
"loss": 0.2138,
"step": 3750
},
{
"epoch": 8.598300283286118,
"grad_norm": 0.03462642479987778,
"learning_rate": 2.7664399092970525e-06,
"loss": 0.2136,
"step": 3800
},
{
"epoch": 8.711614730878187,
"grad_norm": 0.03345477511212533,
"learning_rate": 2.53968253968254e-06,
"loss": 0.2136,
"step": 3850
},
{
"epoch": 8.824929178470255,
"grad_norm": 0.04065897379519029,
"learning_rate": 2.3129251700680273e-06,
"loss": 0.2136,
"step": 3900
},
{
"epoch": 8.938243626062324,
"grad_norm": 0.04230058689139044,
"learning_rate": 2.086167800453515e-06,
"loss": 0.2135,
"step": 3950
},
{
"epoch": 9.04985835694051,
"grad_norm": 0.038128150813345994,
"learning_rate": 1.8594104308390023e-06,
"loss": 0.2102,
"step": 4000
},
{
"epoch": 9.163172804532579,
"grad_norm": 0.03384083547412833,
"learning_rate": 1.6326530612244897e-06,
"loss": 0.2132,
"step": 4050
},
{
"epoch": 9.276487252124646,
"grad_norm": 0.03967958263100997,
"learning_rate": 1.4058956916099775e-06,
"loss": 0.2131,
"step": 4100
},
{
"epoch": 9.389801699716713,
"grad_norm": 0.04693574868490394,
"learning_rate": 1.179138321995465e-06,
"loss": 0.2131,
"step": 4150
},
{
"epoch": 9.503116147308782,
"grad_norm": 0.03686909642390895,
"learning_rate": 9.523809523809525e-07,
"loss": 0.213,
"step": 4200
},
{
"epoch": 9.61643059490085,
"grad_norm": 0.04021478097064685,
"learning_rate": 7.2562358276644e-07,
"loss": 0.213,
"step": 4250
},
{
"epoch": 9.729745042492917,
"grad_norm": 0.034565423196844705,
"learning_rate": 4.988662131519275e-07,
"loss": 0.2129,
"step": 4300
},
{
"epoch": 9.843059490084986,
"grad_norm": 0.04106503466855132,
"learning_rate": 2.72108843537415e-07,
"loss": 0.2128,
"step": 4350
},
{
"epoch": 9.956373937677053,
"grad_norm": 0.03631468837696881,
"learning_rate": 4.53514739229025e-08,
"loss": 0.2127,
"step": 4400
}
],
"logging_steps": 50,
"max_steps": 4410,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1136582509658112e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}