| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.962085308056872, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02962085308056872, |
| "grad_norm": 0.0359644778072834, |
| "learning_rate": 4.951619273301738e-05, |
| "loss": 0.3566, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05924170616113744, |
| "grad_norm": 0.045561712235212326, |
| "learning_rate": 4.902251184834124e-05, |
| "loss": 0.0019, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08886255924170616, |
| "grad_norm": 0.011206220835447311, |
| "learning_rate": 4.852883096366509e-05, |
| "loss": 0.0017, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11848341232227488, |
| "grad_norm": 0.008807787671685219, |
| "learning_rate": 4.8035150078988947e-05, |
| "loss": 0.0006, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1481042654028436, |
| "grad_norm": 0.005796543322503567, |
| "learning_rate": 4.75414691943128e-05, |
| "loss": 0.0009, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.17772511848341233, |
| "grad_norm": 0.005227432586252689, |
| "learning_rate": 4.7047788309636656e-05, |
| "loss": 0.0006, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.20734597156398105, |
| "grad_norm": 0.003554289462044835, |
| "learning_rate": 4.655410742496051e-05, |
| "loss": 0.0005, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.23696682464454977, |
| "grad_norm": 0.0033741986844688654, |
| "learning_rate": 4.6060426540284365e-05, |
| "loss": 0.0012, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2665876777251185, |
| "grad_norm": 0.002832002704963088, |
| "learning_rate": 4.556674565560822e-05, |
| "loss": 0.0002, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2962085308056872, |
| "grad_norm": 0.002275377744808793, |
| "learning_rate": 4.5073064770932074e-05, |
| "loss": 0.0007, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.32582938388625593, |
| "grad_norm": 0.003911971114575863, |
| "learning_rate": 4.457938388625593e-05, |
| "loss": 0.0006, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.35545023696682465, |
| "grad_norm": 0.006926015485078096, |
| "learning_rate": 4.408570300157978e-05, |
| "loss": 0.0014, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.38507109004739337, |
| "grad_norm": 0.0027338722720742226, |
| "learning_rate": 4.359202211690364e-05, |
| "loss": 0.0008, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4146919431279621, |
| "grad_norm": 0.001827694708481431, |
| "learning_rate": 4.309834123222749e-05, |
| "loss": 0.0003, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4443127962085308, |
| "grad_norm": 0.0012653936864808202, |
| "learning_rate": 4.2604660347551346e-05, |
| "loss": 0.0001, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.47393364928909953, |
| "grad_norm": 0.0022165332920849323, |
| "learning_rate": 4.21109794628752e-05, |
| "loss": 0.0004, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5035545023696683, |
| "grad_norm": 0.0063213687390089035, |
| "learning_rate": 4.1617298578199055e-05, |
| "loss": 0.0007, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.533175355450237, |
| "grad_norm": 0.024515969678759575, |
| "learning_rate": 4.112361769352291e-05, |
| "loss": 0.001, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5627962085308057, |
| "grad_norm": 0.003376233857125044, |
| "learning_rate": 4.0629936808846765e-05, |
| "loss": 0.0005, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5924170616113744, |
| "grad_norm": 0.001513678696937859, |
| "learning_rate": 4.013625592417062e-05, |
| "loss": 0.0002, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6220379146919431, |
| "grad_norm": 0.013340185396373272, |
| "learning_rate": 3.9642575039494474e-05, |
| "loss": 0.0004, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6516587677725119, |
| "grad_norm": 0.0008107981411740184, |
| "learning_rate": 3.914889415481833e-05, |
| "loss": 0.0001, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6812796208530806, |
| "grad_norm": 0.000826548261102289, |
| "learning_rate": 3.865521327014218e-05, |
| "loss": 0.0, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7109004739336493, |
| "grad_norm": 0.0015754875494167209, |
| "learning_rate": 3.816153238546604e-05, |
| "loss": 0.0003, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.740521327014218, |
| "grad_norm": 0.0010875407606363297, |
| "learning_rate": 3.766785150078989e-05, |
| "loss": 0.0001, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7701421800947867, |
| "grad_norm": 0.0013485795352607965, |
| "learning_rate": 3.7174170616113746e-05, |
| "loss": 0.0002, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7997630331753555, |
| "grad_norm": 0.002195018110796809, |
| "learning_rate": 3.66804897314376e-05, |
| "loss": 0.0008, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8293838862559242, |
| "grad_norm": 0.0013226654846221209, |
| "learning_rate": 3.6186808846761455e-05, |
| "loss": 0.0004, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8590047393364929, |
| "grad_norm": 0.020256407558918, |
| "learning_rate": 3.569312796208531e-05, |
| "loss": 0.0011, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8886255924170616, |
| "grad_norm": 0.0060112737119197845, |
| "learning_rate": 3.5199447077409164e-05, |
| "loss": 0.0003, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9182464454976303, |
| "grad_norm": 0.0010298583656549454, |
| "learning_rate": 3.470576619273302e-05, |
| "loss": 0.0002, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.9478672985781991, |
| "grad_norm": 0.0008804717799648643, |
| "learning_rate": 3.4212085308056873e-05, |
| "loss": 0.0003, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9774881516587678, |
| "grad_norm": 0.0007368926890194416, |
| "learning_rate": 3.371840442338073e-05, |
| "loss": 0.0002, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 6.839788693469018e-05, |
| "eval_report": " precision recall f1-score support\n\nCERTIFICATION 1.00 1.00 1.00 1410\n EDUCATION 1.00 1.00 1.00 2241\n LANGUAGE 1.00 1.00 1.00 3014\n SKILL 1.00 1.00 1.00 3069\n\n micro avg 1.00 1.00 1.00 9734\n macro avg 1.00 1.00 1.00 9734\n weighted avg 1.00 1.00 1.00 9734\n", |
| "eval_runtime": 7.1833, |
| "eval_samples_per_second": 208.818, |
| "eval_steps_per_second": 26.172, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.0071090047393365, |
| "grad_norm": 0.0006786159938201308, |
| "learning_rate": 3.322472353870458e-05, |
| "loss": 0.0002, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.0367298578199051, |
| "grad_norm": 0.005513947457075119, |
| "learning_rate": 3.273104265402844e-05, |
| "loss": 0.0001, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.066350710900474, |
| "grad_norm": 0.000574503734242171, |
| "learning_rate": 3.223736176935229e-05, |
| "loss": 0.0001, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0959715639810426, |
| "grad_norm": 0.004586311522871256, |
| "learning_rate": 3.1743680884676146e-05, |
| "loss": 0.0006, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.1255924170616114, |
| "grad_norm": 0.000738324539270252, |
| "learning_rate": 3.125e-05, |
| "loss": 0.0002, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.15521327014218, |
| "grad_norm": 0.0006399003323167562, |
| "learning_rate": 3.0756319115323855e-05, |
| "loss": 0.0001, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.1848341232227488, |
| "grad_norm": 0.0004596344952005893, |
| "learning_rate": 3.026263823064771e-05, |
| "loss": 0.0, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2144549763033174, |
| "grad_norm": 0.0004429569817148149, |
| "learning_rate": 2.9768957345971564e-05, |
| "loss": 0.0001, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.2440758293838863, |
| "grad_norm": 0.0004196607042104006, |
| "learning_rate": 2.927527646129542e-05, |
| "loss": 0.0, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.2736966824644549, |
| "grad_norm": 0.0004485426179599017, |
| "learning_rate": 2.8781595576619273e-05, |
| "loss": 0.0, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.3033175355450237, |
| "grad_norm": 0.0005169134237803519, |
| "learning_rate": 2.8287914691943128e-05, |
| "loss": 0.0001, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.3329383886255926, |
| "grad_norm": 0.0005291880224831402, |
| "learning_rate": 2.7794233807266982e-05, |
| "loss": 0.0002, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.3625592417061612, |
| "grad_norm": 0.00048425907152704895, |
| "learning_rate": 2.7300552922590837e-05, |
| "loss": 0.0001, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.3921800947867298, |
| "grad_norm": 0.0004068867419846356, |
| "learning_rate": 2.6806872037914695e-05, |
| "loss": 0.0, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.4218009478672986, |
| "grad_norm": 0.0003516751166898757, |
| "learning_rate": 2.631319115323855e-05, |
| "loss": 0.0, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4514218009478674, |
| "grad_norm": 0.0003432795056141913, |
| "learning_rate": 2.5819510268562404e-05, |
| "loss": 0.0, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.481042654028436, |
| "grad_norm": 0.00032765124342404306, |
| "learning_rate": 2.532582938388626e-05, |
| "loss": 0.0, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.5106635071090047, |
| "grad_norm": 0.00030620096367783844, |
| "learning_rate": 2.4832148499210113e-05, |
| "loss": 0.0, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.5402843601895735, |
| "grad_norm": 0.00032276054844260216, |
| "learning_rate": 2.4338467614533967e-05, |
| "loss": 0.0, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.5699052132701423, |
| "grad_norm": 0.0003284791891928762, |
| "learning_rate": 2.3844786729857822e-05, |
| "loss": 0.0, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.599526066350711, |
| "grad_norm": 0.0005517126410268247, |
| "learning_rate": 2.3351105845181677e-05, |
| "loss": 0.0001, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.6291469194312795, |
| "grad_norm": 0.0004335689009167254, |
| "learning_rate": 2.285742496050553e-05, |
| "loss": 0.0, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.6587677725118484, |
| "grad_norm": 0.00938709732145071, |
| "learning_rate": 2.2363744075829386e-05, |
| "loss": 0.0011, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.6883886255924172, |
| "grad_norm": 0.002625273773446679, |
| "learning_rate": 2.187006319115324e-05, |
| "loss": 0.0, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.7180094786729858, |
| "grad_norm": 0.00048340365174226463, |
| "learning_rate": 2.1376382306477095e-05, |
| "loss": 0.0, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.7476303317535544, |
| "grad_norm": 0.0035711589735001326, |
| "learning_rate": 2.088270142180095e-05, |
| "loss": 0.0005, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.7772511848341233, |
| "grad_norm": 0.0005380721995607018, |
| "learning_rate": 2.0389020537124804e-05, |
| "loss": 0.0001, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.806872037914692, |
| "grad_norm": 0.00039379362715408206, |
| "learning_rate": 1.9895339652448658e-05, |
| "loss": 0.0, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.8364928909952607, |
| "grad_norm": 0.00031137277255766094, |
| "learning_rate": 1.9401658767772513e-05, |
| "loss": 0.0001, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.8661137440758293, |
| "grad_norm": 0.00033859844552353024, |
| "learning_rate": 1.8907977883096367e-05, |
| "loss": 0.0003, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.8957345971563981, |
| "grad_norm": 0.0003177137696184218, |
| "learning_rate": 1.8414296998420222e-05, |
| "loss": 0.0, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.925355450236967, |
| "grad_norm": 0.5252463817596436, |
| "learning_rate": 1.7920616113744076e-05, |
| "loss": 0.0001, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.9549763033175356, |
| "grad_norm": 0.000578847888391465, |
| "learning_rate": 1.742693522906793e-05, |
| "loss": 0.0002, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.9845971563981042, |
| "grad_norm": 0.00031036767177283764, |
| "learning_rate": 1.6933254344391785e-05, |
| "loss": 0.0, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.0002486561133991927, |
| "eval_report": " precision recall f1-score support\n\nCERTIFICATION 1.00 1.00 1.00 1410\n EDUCATION 1.00 1.00 1.00 2241\n LANGUAGE 1.00 1.00 1.00 3014\n SKILL 1.00 1.00 1.00 3069\n\n micro avg 1.00 1.00 1.00 9734\n macro avg 1.00 1.00 1.00 9734\n weighted avg 1.00 1.00 1.00 9734\n", |
| "eval_runtime": 7.0179, |
| "eval_samples_per_second": 213.738, |
| "eval_steps_per_second": 26.788, |
| "step": 3376 |
| }, |
| { |
| "epoch": 2.014218009478673, |
| "grad_norm": 0.00038110482273623347, |
| "learning_rate": 1.643957345971564e-05, |
| "loss": 0.0002, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.043838862559242, |
| "grad_norm": 0.00030082205194048584, |
| "learning_rate": 1.5945892575039495e-05, |
| "loss": 0.0, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.0734597156398102, |
| "grad_norm": 0.00030870226328261197, |
| "learning_rate": 1.545221169036335e-05, |
| "loss": 0.0, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.103080568720379, |
| "grad_norm": 0.00023404941020999104, |
| "learning_rate": 1.4958530805687204e-05, |
| "loss": 0.0001, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.132701421800948, |
| "grad_norm": 0.00021994848793838173, |
| "learning_rate": 1.4464849921011058e-05, |
| "loss": 0.0, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.1623222748815167, |
| "grad_norm": 0.0002600239240564406, |
| "learning_rate": 1.3971169036334913e-05, |
| "loss": 0.0, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.191943127962085, |
| "grad_norm": 0.00019250177138019353, |
| "learning_rate": 1.3477488151658769e-05, |
| "loss": 0.0, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.221563981042654, |
| "grad_norm": 0.00024143581686075777, |
| "learning_rate": 1.2983807266982623e-05, |
| "loss": 0.0, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.251184834123223, |
| "grad_norm": 0.00020565264276228845, |
| "learning_rate": 1.2490126382306478e-05, |
| "loss": 0.0, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.2808056872037916, |
| "grad_norm": 0.0002789797727018595, |
| "learning_rate": 1.1996445497630332e-05, |
| "loss": 0.0002, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.31042654028436, |
| "grad_norm": 0.0007835368160158396, |
| "learning_rate": 1.1502764612954187e-05, |
| "loss": 0.0007, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.340047393364929, |
| "grad_norm": 0.0004727982450276613, |
| "learning_rate": 1.1009083728278042e-05, |
| "loss": 0.0, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.3696682464454977, |
| "grad_norm": 0.0008073291974142194, |
| "learning_rate": 1.0515402843601896e-05, |
| "loss": 0.0, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.3992890995260665, |
| "grad_norm": 0.15108434855937958, |
| "learning_rate": 1.002172195892575e-05, |
| "loss": 0.0, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.428909952606635, |
| "grad_norm": 0.00033067440381273627, |
| "learning_rate": 9.528041074249605e-06, |
| "loss": 0.0, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.4585308056872037, |
| "grad_norm": 0.019667765125632286, |
| "learning_rate": 9.03436018957346e-06, |
| "loss": 0.0, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.4881516587677726, |
| "grad_norm": 0.000259611289948225, |
| "learning_rate": 8.540679304897314e-06, |
| "loss": 0.0, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.5177725118483414, |
| "grad_norm": 0.0002708205720409751, |
| "learning_rate": 8.046998420221169e-06, |
| "loss": 0.0003, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.5473933649289098, |
| "grad_norm": 0.0002382330858381465, |
| "learning_rate": 7.553317535545023e-06, |
| "loss": 0.0, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.5770142180094786, |
| "grad_norm": 0.0002609147340990603, |
| "learning_rate": 7.059636650868879e-06, |
| "loss": 0.0, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.6066350710900474, |
| "grad_norm": 0.00025002885377034545, |
| "learning_rate": 6.565955766192733e-06, |
| "loss": 0.0, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.6362559241706163, |
| "grad_norm": 0.00023832859005779028, |
| "learning_rate": 6.0722748815165886e-06, |
| "loss": 0.0, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.665876777251185, |
| "grad_norm": 0.0002076889795716852, |
| "learning_rate": 5.578593996840443e-06, |
| "loss": 0.0, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.6954976303317535, |
| "grad_norm": 0.0002478805836290121, |
| "learning_rate": 5.084913112164298e-06, |
| "loss": 0.0, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.7251184834123223, |
| "grad_norm": 0.00021155517606530339, |
| "learning_rate": 4.591232227488152e-06, |
| "loss": 0.0, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.754739336492891, |
| "grad_norm": 0.00021754855697508901, |
| "learning_rate": 4.097551342812007e-06, |
| "loss": 0.0, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.7843601895734595, |
| "grad_norm": 0.00020183408923912793, |
| "learning_rate": 3.6038704581358612e-06, |
| "loss": 0.0, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.8139810426540284, |
| "grad_norm": 0.00022289449407253414, |
| "learning_rate": 3.1101895734597158e-06, |
| "loss": 0.0, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.843601895734597, |
| "grad_norm": 0.007142237853258848, |
| "learning_rate": 2.6165086887835703e-06, |
| "loss": 0.0, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.873222748815166, |
| "grad_norm": 0.00019705097656697035, |
| "learning_rate": 2.122827804107425e-06, |
| "loss": 0.0, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.902843601895735, |
| "grad_norm": 0.00023452220193576068, |
| "learning_rate": 1.6291469194312798e-06, |
| "loss": 0.0001, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.9324644549763033, |
| "grad_norm": 0.000205856587854214, |
| "learning_rate": 1.1354660347551343e-06, |
| "loss": 0.0, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.962085308056872, |
| "grad_norm": 0.00020924191630911082, |
| "learning_rate": 6.41785150078989e-07, |
| "loss": 0.0, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 5064, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1246789065733344.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|