| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.7735849056603774, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.050314465408805034, |
| "grad_norm": 0.2693639397621155, |
| "learning_rate": 5.85e-05, |
| "loss": 0.9976, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10062893081761007, |
| "grad_norm": 0.1328171044588089, |
| "learning_rate": 0.0001185, |
| "loss": 0.7524, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1509433962264151, |
| "grad_norm": 0.14459013938903809, |
| "learning_rate": 0.00014998411354903398, |
| "loss": 0.6747, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.20125786163522014, |
| "grad_norm": 0.13957850635051727, |
| "learning_rate": 0.00014984685910209738, |
| "loss": 0.6483, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.25157232704402516, |
| "grad_norm": 0.1547909677028656, |
| "learning_rate": 0.00014956908749779173, |
| "loss": 0.6306, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3018867924528302, |
| "grad_norm": 0.16232997179031372, |
| "learning_rate": 0.00014915132022328036, |
| "loss": 0.6248, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3522012578616352, |
| "grad_norm": 0.1795293539762497, |
| "learning_rate": 0.00014859434159296945, |
| "loss": 0.617, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4025157232704403, |
| "grad_norm": 0.16771389544010162, |
| "learning_rate": 0.00014789919727603988, |
| "loss": 0.6121, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4528301886792453, |
| "grad_norm": 0.13469167053699493, |
| "learning_rate": 0.00014706719233331246, |
| "loss": 0.6097, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5031446540880503, |
| "grad_norm": 0.12725010514259338, |
| "learning_rate": 0.0001460998887671327, |
| "loss": 0.6015, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5534591194968553, |
| "grad_norm": 0.23335981369018555, |
| "learning_rate": 0.00014499910258887453, |
| "loss": 0.6046, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6037735849056604, |
| "grad_norm": 0.1499803513288498, |
| "learning_rate": 0.00014376690040956871, |
| "loss": 0.5961, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6540880503144654, |
| "grad_norm": 0.14626877009868622, |
| "learning_rate": 0.0001424055955600566, |
| "loss": 0.6, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7044025157232704, |
| "grad_norm": 0.1540375053882599, |
| "learning_rate": 0.00014091774374795326, |
| "loss": 0.5927, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 0.19099563360214233, |
| "learning_rate": 0.00013930613825957323, |
| "loss": 0.5974, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8050314465408805, |
| "grad_norm": 0.20004510879516602, |
| "learning_rate": 0.00013757380471582766, |
| "loss": 0.5858, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8553459119496856, |
| "grad_norm": 0.23818084597587585, |
| "learning_rate": 0.00013572399539193693, |
| "loss": 0.5891, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9056603773584906, |
| "grad_norm": 0.15015755593776703, |
| "learning_rate": 0.0001337601831116238, |
| "loss": 0.5832, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9559748427672956, |
| "grad_norm": 0.1416957825422287, |
| "learning_rate": 0.0001316860547272499, |
| "loss": 0.5824, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.0062893081761006, |
| "grad_norm": 0.5108596086502075, |
| "learning_rate": 0.00012950550419813545, |
| "loss": 0.5788, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.0566037735849056, |
| "grad_norm": 0.13162486255168915, |
| "learning_rate": 0.00012722262528005757, |
| "loss": 0.5653, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.1069182389937107, |
| "grad_norm": 0.2837337255477905, |
| "learning_rate": 0.00012484170383965162, |
| "loss": 0.5695, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.1572327044025157, |
| "grad_norm": 0.14296230673789978, |
| "learning_rate": 0.0001223672098081444, |
| "loss": 0.5694, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.2075471698113207, |
| "grad_norm": 0.19151189923286438, |
| "learning_rate": 0.00011980378878952516, |
| "loss": 0.5669, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.2578616352201257, |
| "grad_norm": 0.2892361283302307, |
| "learning_rate": 0.00011715625333890979, |
| "loss": 0.571, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.3081761006289307, |
| "grad_norm": 0.2670402228832245, |
| "learning_rate": 0.00011442957392747125, |
| "loss": 0.5716, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.3584905660377358, |
| "grad_norm": 0.3734613358974457, |
| "learning_rate": 0.00011162886961089939, |
| "loss": 0.5652, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.408805031446541, |
| "grad_norm": 0.1506778448820114, |
| "learning_rate": 0.00010875939841890866, |
| "loss": 0.5633, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.459119496855346, |
| "grad_norm": 0.4829564690589905, |
| "learning_rate": 0.0001058265474838369, |
| "loss": 0.5614, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.509433962264151, |
| "grad_norm": 0.15463890135288239, |
| "learning_rate": 0.00010283582292686707, |
| "loss": 0.558, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.559748427672956, |
| "grad_norm": 0.21459299325942993, |
| "learning_rate": 9.979283952086026e-05, |
| "loss": 0.5586, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.610062893081761, |
| "grad_norm": 0.1474829465150833, |
| "learning_rate": 9.670331014920607e-05, |
| "loss": 0.5597, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.6603773584905661, |
| "grad_norm": 0.21971166133880615, |
| "learning_rate": 9.357303508048122e-05, |
| "loss": 0.5642, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.7106918238993711, |
| "grad_norm": 0.24339526891708374, |
| "learning_rate": 9.040789107905117e-05, |
| "loss": 0.5629, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.7610062893081762, |
| "grad_norm": 0.31173282861709595, |
| "learning_rate": 8.721382037205923e-05, |
| "loss": 0.561, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.8113207547169812, |
| "grad_norm": 0.2891390025615692, |
| "learning_rate": 8.399681949351583e-05, |
| "loss": 0.5628, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.8616352201257862, |
| "grad_norm": 0.1505046933889389, |
| "learning_rate": 8.076292802643262e-05, |
| "loss": 0.5584, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.9119496855345912, |
| "grad_norm": 0.19114747643470764, |
| "learning_rate": 7.751821726413631e-05, |
| "loss": 0.5516, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.9622641509433962, |
| "grad_norm": 0.19517794251441956, |
| "learning_rate": 7.426877881205001e-05, |
| "loss": 0.559, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.0125786163522013, |
| "grad_norm": 0.1738079935312271, |
| "learning_rate": 7.102071315134024e-05, |
| "loss": 0.5504, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.0628930817610063, |
| "grad_norm": 0.22095054388046265, |
| "learning_rate": 6.778011818590128e-05, |
| "loss": 0.5399, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.1132075471698113, |
| "grad_norm": 0.21472840011119843, |
| "learning_rate": 6.455307779417765e-05, |
| "loss": 0.5401, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.1635220125786163, |
| "grad_norm": 0.1650049239397049, |
| "learning_rate": 6.13456504073179e-05, |
| "loss": 0.5344, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.2138364779874213, |
| "grad_norm": 0.36687150597572327, |
| "learning_rate": 5.8163857635103376e-05, |
| "loss": 0.5391, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.2641509433962264, |
| "grad_norm": 0.18526889383792877, |
| "learning_rate": 5.501367296100487e-05, |
| "loss": 0.5371, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.3144654088050314, |
| "grad_norm": 0.39570677280426025, |
| "learning_rate": 5.1901010527591714e-05, |
| "loss": 0.542, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.3647798742138364, |
| "grad_norm": 0.21406587958335876, |
| "learning_rate": 4.8831714033346834e-05, |
| "loss": 0.536, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.4150943396226414, |
| "grad_norm": 0.19753104448318481, |
| "learning_rate": 4.581154576173369e-05, |
| "loss": 0.5439, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.4654088050314464, |
| "grad_norm": 0.17012695968151093, |
| "learning_rate": 4.284617576311105e-05, |
| "loss": 0.5343, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.5157232704402515, |
| "grad_norm": 0.17650945484638214, |
| "learning_rate": 3.994117120980591e-05, |
| "loss": 0.5339, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.5660377358490565, |
| "grad_norm": 0.1910872757434845, |
| "learning_rate": 3.710198594432905e-05, |
| "loss": 0.537, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.6163522012578615, |
| "grad_norm": 0.2352050244808197, |
| "learning_rate": 3.4333950240355794e-05, |
| "loss": 0.5357, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.23112688958644867, |
| "learning_rate": 3.1642260795693946e-05, |
| "loss": 0.5432, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.7169811320754715, |
| "grad_norm": 0.22052186727523804, |
| "learning_rate": 2.903197097602678e-05, |
| "loss": 0.5374, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.767295597484277, |
| "grad_norm": 0.17838357388973236, |
| "learning_rate": 2.650798132774681e-05, |
| "loss": 0.539, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.817610062893082, |
| "grad_norm": 0.20648528635501862, |
| "learning_rate": 2.4075030377692216e-05, |
| "loss": 0.5439, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.867924528301887, |
| "grad_norm": 0.1718207746744156, |
| "learning_rate": 2.1737685737057638e-05, |
| "loss": 0.5362, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.918238993710692, |
| "grad_norm": 0.4676780104637146, |
| "learning_rate": 1.9500335526181545e-05, |
| "loss": 0.5389, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.968553459119497, |
| "grad_norm": 0.4748336374759674, |
| "learning_rate": 1.7367180136308676e-05, |
| "loss": 0.539, |
| "step": 2360 |
| }, |
| { |
| "epoch": 3.018867924528302, |
| "grad_norm": 0.16060461103916168, |
| "learning_rate": 1.534222434379447e-05, |
| "loss": 0.5358, |
| "step": 2400 |
| }, |
| { |
| "epoch": 3.069182389937107, |
| "grad_norm": 0.17130707204341888, |
| "learning_rate": 1.3429269791555917e-05, |
| "loss": 0.5251, |
| "step": 2440 |
| }, |
| { |
| "epoch": 3.119496855345912, |
| "grad_norm": 0.23798075318336487, |
| "learning_rate": 1.1631907851884142e-05, |
| "loss": 0.5304, |
| "step": 2480 |
| }, |
| { |
| "epoch": 3.169811320754717, |
| "grad_norm": 0.24216727912425995, |
| "learning_rate": 9.95351288401817e-06, |
| "loss": 0.5273, |
| "step": 2520 |
| }, |
| { |
| "epoch": 3.220125786163522, |
| "grad_norm": 0.19990038871765137, |
| "learning_rate": 8.397235899138127e-06, |
| "loss": 0.5287, |
| "step": 2560 |
| }, |
| { |
| "epoch": 3.270440251572327, |
| "grad_norm": 0.1680271029472351, |
| "learning_rate": 6.965998644670948e-06, |
| "loss": 0.5342, |
| "step": 2600 |
| }, |
| { |
| "epoch": 3.3207547169811322, |
| "grad_norm": 0.16589392721652985, |
| "learning_rate": 5.662488119014838e-06, |
| "loss": 0.5203, |
| "step": 2640 |
| }, |
| { |
| "epoch": 3.3710691823899372, |
| "grad_norm": 0.1659441739320755, |
| "learning_rate": 4.489151526980553e-06, |
| "loss": 0.5314, |
| "step": 2680 |
| }, |
| { |
| "epoch": 3.4213836477987423, |
| "grad_norm": 0.21029973030090332, |
| "learning_rate": 3.44819168542011e-06, |
| "loss": 0.5276, |
| "step": 2720 |
| }, |
| { |
| "epoch": 3.4716981132075473, |
| "grad_norm": 0.15285317599773407, |
| "learning_rate": 2.5415628876682693e-06, |
| "loss": 0.5294, |
| "step": 2760 |
| }, |
| { |
| "epoch": 3.5220125786163523, |
| "grad_norm": 0.2968325912952423, |
| "learning_rate": 1.7709672345610327e-06, |
| "loss": 0.522, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.5723270440251573, |
| "grad_norm": 0.16151106357574463, |
| "learning_rate": 1.1378514389191324e-06, |
| "loss": 0.5279, |
| "step": 2840 |
| }, |
| { |
| "epoch": 3.6226415094339623, |
| "grad_norm": 0.1621636152267456, |
| "learning_rate": 6.434041094959235e-07, |
| "loss": 0.5264, |
| "step": 2880 |
| }, |
| { |
| "epoch": 3.6729559748427674, |
| "grad_norm": 0.16203154623508453, |
| "learning_rate": 2.885535194886074e-07, |
| "loss": 0.524, |
| "step": 2920 |
| }, |
| { |
| "epoch": 3.7232704402515724, |
| "grad_norm": 0.15653491020202637, |
| "learning_rate": 7.396586380230829e-08, |
| "loss": 0.5256, |
| "step": 2960 |
| }, |
| { |
| "epoch": 3.7735849056603774, |
| "grad_norm": 0.1781754195690155, |
| "learning_rate": 4.400833874818044e-11, |
| "loss": 0.5303, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 40, |
| "max_steps": 3000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 750, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.2760149172305265e+18, |
| "train_batch_size": 72, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|