Arko007's picture
Upload folder using huggingface_hub
dfb9c38 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.7735849056603774,
"eval_steps": 500,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.050314465408805034,
"grad_norm": 0.2693639397621155,
"learning_rate": 5.85e-05,
"loss": 0.9976,
"step": 40
},
{
"epoch": 0.10062893081761007,
"grad_norm": 0.1328171044588089,
"learning_rate": 0.0001185,
"loss": 0.7524,
"step": 80
},
{
"epoch": 0.1509433962264151,
"grad_norm": 0.14459013938903809,
"learning_rate": 0.00014998411354903398,
"loss": 0.6747,
"step": 120
},
{
"epoch": 0.20125786163522014,
"grad_norm": 0.13957850635051727,
"learning_rate": 0.00014984685910209738,
"loss": 0.6483,
"step": 160
},
{
"epoch": 0.25157232704402516,
"grad_norm": 0.1547909677028656,
"learning_rate": 0.00014956908749779173,
"loss": 0.6306,
"step": 200
},
{
"epoch": 0.3018867924528302,
"grad_norm": 0.16232997179031372,
"learning_rate": 0.00014915132022328036,
"loss": 0.6248,
"step": 240
},
{
"epoch": 0.3522012578616352,
"grad_norm": 0.1795293539762497,
"learning_rate": 0.00014859434159296945,
"loss": 0.617,
"step": 280
},
{
"epoch": 0.4025157232704403,
"grad_norm": 0.16771389544010162,
"learning_rate": 0.00014789919727603988,
"loss": 0.6121,
"step": 320
},
{
"epoch": 0.4528301886792453,
"grad_norm": 0.13469167053699493,
"learning_rate": 0.00014706719233331246,
"loss": 0.6097,
"step": 360
},
{
"epoch": 0.5031446540880503,
"grad_norm": 0.12725010514259338,
"learning_rate": 0.0001460998887671327,
"loss": 0.6015,
"step": 400
},
{
"epoch": 0.5534591194968553,
"grad_norm": 0.23335981369018555,
"learning_rate": 0.00014499910258887453,
"loss": 0.6046,
"step": 440
},
{
"epoch": 0.6037735849056604,
"grad_norm": 0.1499803513288498,
"learning_rate": 0.00014376690040956871,
"loss": 0.5961,
"step": 480
},
{
"epoch": 0.6540880503144654,
"grad_norm": 0.14626877009868622,
"learning_rate": 0.0001424055955600566,
"loss": 0.6,
"step": 520
},
{
"epoch": 0.7044025157232704,
"grad_norm": 0.1540375053882599,
"learning_rate": 0.00014091774374795326,
"loss": 0.5927,
"step": 560
},
{
"epoch": 0.7547169811320755,
"grad_norm": 0.19099563360214233,
"learning_rate": 0.00013930613825957323,
"loss": 0.5974,
"step": 600
},
{
"epoch": 0.8050314465408805,
"grad_norm": 0.20004510879516602,
"learning_rate": 0.00013757380471582766,
"loss": 0.5858,
"step": 640
},
{
"epoch": 0.8553459119496856,
"grad_norm": 0.23818084597587585,
"learning_rate": 0.00013572399539193693,
"loss": 0.5891,
"step": 680
},
{
"epoch": 0.9056603773584906,
"grad_norm": 0.15015755593776703,
"learning_rate": 0.0001337601831116238,
"loss": 0.5832,
"step": 720
},
{
"epoch": 0.9559748427672956,
"grad_norm": 0.1416957825422287,
"learning_rate": 0.0001316860547272499,
"loss": 0.5824,
"step": 760
},
{
"epoch": 1.0062893081761006,
"grad_norm": 0.5108596086502075,
"learning_rate": 0.00012950550419813545,
"loss": 0.5788,
"step": 800
},
{
"epoch": 1.0566037735849056,
"grad_norm": 0.13162486255168915,
"learning_rate": 0.00012722262528005757,
"loss": 0.5653,
"step": 840
},
{
"epoch": 1.1069182389937107,
"grad_norm": 0.2837337255477905,
"learning_rate": 0.00012484170383965162,
"loss": 0.5695,
"step": 880
},
{
"epoch": 1.1572327044025157,
"grad_norm": 0.14296230673789978,
"learning_rate": 0.0001223672098081444,
"loss": 0.5694,
"step": 920
},
{
"epoch": 1.2075471698113207,
"grad_norm": 0.19151189923286438,
"learning_rate": 0.00011980378878952516,
"loss": 0.5669,
"step": 960
},
{
"epoch": 1.2578616352201257,
"grad_norm": 0.2892361283302307,
"learning_rate": 0.00011715625333890979,
"loss": 0.571,
"step": 1000
},
{
"epoch": 1.3081761006289307,
"grad_norm": 0.2670402228832245,
"learning_rate": 0.00011442957392747125,
"loss": 0.5716,
"step": 1040
},
{
"epoch": 1.3584905660377358,
"grad_norm": 0.3734613358974457,
"learning_rate": 0.00011162886961089939,
"loss": 0.5652,
"step": 1080
},
{
"epoch": 1.408805031446541,
"grad_norm": 0.1506778448820114,
"learning_rate": 0.00010875939841890866,
"loss": 0.5633,
"step": 1120
},
{
"epoch": 1.459119496855346,
"grad_norm": 0.4829564690589905,
"learning_rate": 0.0001058265474838369,
"loss": 0.5614,
"step": 1160
},
{
"epoch": 1.509433962264151,
"grad_norm": 0.15463890135288239,
"learning_rate": 0.00010283582292686707,
"loss": 0.558,
"step": 1200
},
{
"epoch": 1.559748427672956,
"grad_norm": 0.21459299325942993,
"learning_rate": 9.979283952086026e-05,
"loss": 0.5586,
"step": 1240
},
{
"epoch": 1.610062893081761,
"grad_norm": 0.1474829465150833,
"learning_rate": 9.670331014920607e-05,
"loss": 0.5597,
"step": 1280
},
{
"epoch": 1.6603773584905661,
"grad_norm": 0.21971166133880615,
"learning_rate": 9.357303508048122e-05,
"loss": 0.5642,
"step": 1320
},
{
"epoch": 1.7106918238993711,
"grad_norm": 0.24339526891708374,
"learning_rate": 9.040789107905117e-05,
"loss": 0.5629,
"step": 1360
},
{
"epoch": 1.7610062893081762,
"grad_norm": 0.31173282861709595,
"learning_rate": 8.721382037205923e-05,
"loss": 0.561,
"step": 1400
},
{
"epoch": 1.8113207547169812,
"grad_norm": 0.2891390025615692,
"learning_rate": 8.399681949351583e-05,
"loss": 0.5628,
"step": 1440
},
{
"epoch": 1.8616352201257862,
"grad_norm": 0.1505046933889389,
"learning_rate": 8.076292802643262e-05,
"loss": 0.5584,
"step": 1480
},
{
"epoch": 1.9119496855345912,
"grad_norm": 0.19114747643470764,
"learning_rate": 7.751821726413631e-05,
"loss": 0.5516,
"step": 1520
},
{
"epoch": 1.9622641509433962,
"grad_norm": 0.19517794251441956,
"learning_rate": 7.426877881205001e-05,
"loss": 0.559,
"step": 1560
},
{
"epoch": 2.0125786163522013,
"grad_norm": 0.1738079935312271,
"learning_rate": 7.102071315134024e-05,
"loss": 0.5504,
"step": 1600
},
{
"epoch": 2.0628930817610063,
"grad_norm": 0.22095054388046265,
"learning_rate": 6.778011818590128e-05,
"loss": 0.5399,
"step": 1640
},
{
"epoch": 2.1132075471698113,
"grad_norm": 0.21472840011119843,
"learning_rate": 6.455307779417765e-05,
"loss": 0.5401,
"step": 1680
},
{
"epoch": 2.1635220125786163,
"grad_norm": 0.1650049239397049,
"learning_rate": 6.13456504073179e-05,
"loss": 0.5344,
"step": 1720
},
{
"epoch": 2.2138364779874213,
"grad_norm": 0.36687150597572327,
"learning_rate": 5.8163857635103376e-05,
"loss": 0.5391,
"step": 1760
},
{
"epoch": 2.2641509433962264,
"grad_norm": 0.18526889383792877,
"learning_rate": 5.501367296100487e-05,
"loss": 0.5371,
"step": 1800
},
{
"epoch": 2.3144654088050314,
"grad_norm": 0.39570677280426025,
"learning_rate": 5.1901010527591714e-05,
"loss": 0.542,
"step": 1840
},
{
"epoch": 2.3647798742138364,
"grad_norm": 0.21406587958335876,
"learning_rate": 4.8831714033346834e-05,
"loss": 0.536,
"step": 1880
},
{
"epoch": 2.4150943396226414,
"grad_norm": 0.19753104448318481,
"learning_rate": 4.581154576173369e-05,
"loss": 0.5439,
"step": 1920
},
{
"epoch": 2.4654088050314464,
"grad_norm": 0.17012695968151093,
"learning_rate": 4.284617576311105e-05,
"loss": 0.5343,
"step": 1960
},
{
"epoch": 2.5157232704402515,
"grad_norm": 0.17650945484638214,
"learning_rate": 3.994117120980591e-05,
"loss": 0.5339,
"step": 2000
},
{
"epoch": 2.5660377358490565,
"grad_norm": 0.1910872757434845,
"learning_rate": 3.710198594432905e-05,
"loss": 0.537,
"step": 2040
},
{
"epoch": 2.6163522012578615,
"grad_norm": 0.2352050244808197,
"learning_rate": 3.4333950240355794e-05,
"loss": 0.5357,
"step": 2080
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.23112688958644867,
"learning_rate": 3.1642260795693946e-05,
"loss": 0.5432,
"step": 2120
},
{
"epoch": 2.7169811320754715,
"grad_norm": 0.22052186727523804,
"learning_rate": 2.903197097602678e-05,
"loss": 0.5374,
"step": 2160
},
{
"epoch": 2.767295597484277,
"grad_norm": 0.17838357388973236,
"learning_rate": 2.650798132774681e-05,
"loss": 0.539,
"step": 2200
},
{
"epoch": 2.817610062893082,
"grad_norm": 0.20648528635501862,
"learning_rate": 2.4075030377692216e-05,
"loss": 0.5439,
"step": 2240
},
{
"epoch": 2.867924528301887,
"grad_norm": 0.1718207746744156,
"learning_rate": 2.1737685737057638e-05,
"loss": 0.5362,
"step": 2280
},
{
"epoch": 2.918238993710692,
"grad_norm": 0.4676780104637146,
"learning_rate": 1.9500335526181545e-05,
"loss": 0.5389,
"step": 2320
},
{
"epoch": 2.968553459119497,
"grad_norm": 0.4748336374759674,
"learning_rate": 1.7367180136308676e-05,
"loss": 0.539,
"step": 2360
},
{
"epoch": 3.018867924528302,
"grad_norm": 0.16060461103916168,
"learning_rate": 1.534222434379447e-05,
"loss": 0.5358,
"step": 2400
},
{
"epoch": 3.069182389937107,
"grad_norm": 0.17130707204341888,
"learning_rate": 1.3429269791555917e-05,
"loss": 0.5251,
"step": 2440
},
{
"epoch": 3.119496855345912,
"grad_norm": 0.23798075318336487,
"learning_rate": 1.1631907851884142e-05,
"loss": 0.5304,
"step": 2480
},
{
"epoch": 3.169811320754717,
"grad_norm": 0.24216727912425995,
"learning_rate": 9.95351288401817e-06,
"loss": 0.5273,
"step": 2520
},
{
"epoch": 3.220125786163522,
"grad_norm": 0.19990038871765137,
"learning_rate": 8.397235899138127e-06,
"loss": 0.5287,
"step": 2560
},
{
"epoch": 3.270440251572327,
"grad_norm": 0.1680271029472351,
"learning_rate": 6.965998644670948e-06,
"loss": 0.5342,
"step": 2600
},
{
"epoch": 3.3207547169811322,
"grad_norm": 0.16589392721652985,
"learning_rate": 5.662488119014838e-06,
"loss": 0.5203,
"step": 2640
},
{
"epoch": 3.3710691823899372,
"grad_norm": 0.1659441739320755,
"learning_rate": 4.489151526980553e-06,
"loss": 0.5314,
"step": 2680
},
{
"epoch": 3.4213836477987423,
"grad_norm": 0.21029973030090332,
"learning_rate": 3.44819168542011e-06,
"loss": 0.5276,
"step": 2720
},
{
"epoch": 3.4716981132075473,
"grad_norm": 0.15285317599773407,
"learning_rate": 2.5415628876682693e-06,
"loss": 0.5294,
"step": 2760
},
{
"epoch": 3.5220125786163523,
"grad_norm": 0.2968325912952423,
"learning_rate": 1.7709672345610327e-06,
"loss": 0.522,
"step": 2800
},
{
"epoch": 3.5723270440251573,
"grad_norm": 0.16151106357574463,
"learning_rate": 1.1378514389191324e-06,
"loss": 0.5279,
"step": 2840
},
{
"epoch": 3.6226415094339623,
"grad_norm": 0.1621636152267456,
"learning_rate": 6.434041094959235e-07,
"loss": 0.5264,
"step": 2880
},
{
"epoch": 3.6729559748427674,
"grad_norm": 0.16203154623508453,
"learning_rate": 2.885535194886074e-07,
"loss": 0.524,
"step": 2920
},
{
"epoch": 3.7232704402515724,
"grad_norm": 0.15653491020202637,
"learning_rate": 7.396586380230829e-08,
"loss": 0.5256,
"step": 2960
},
{
"epoch": 3.7735849056603774,
"grad_norm": 0.1781754195690155,
"learning_rate": 4.400833874818044e-11,
"loss": 0.5303,
"step": 3000
}
],
"logging_steps": 40,
"max_steps": 3000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 750,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.2760149172305265e+18,
"train_batch_size": 72,
"trial_name": null,
"trial_params": null
}