| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4888, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.020458265139116204, | |
| "grad_norm": 0.1146482527256012, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.4366, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04091653027823241, | |
| "grad_norm": 0.19526588916778564, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.4074, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06137479541734861, | |
| "grad_norm": 0.2903444170951843, | |
| "learning_rate": 2e-05, | |
| "loss": 2.3776, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08183306055646482, | |
| "grad_norm": 0.38961780071258545, | |
| "learning_rate": 1.9976565632583726e-05, | |
| "loss": 2.3633, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10229132569558101, | |
| "grad_norm": 0.39514774084091187, | |
| "learning_rate": 1.990637236425014e-05, | |
| "loss": 2.3275, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12274959083469722, | |
| "grad_norm": 0.4908258020877838, | |
| "learning_rate": 1.9789749181967304e-05, | |
| "loss": 2.2917, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1432078559738134, | |
| "grad_norm": 0.45681577920913696, | |
| "learning_rate": 1.9627242683835782e-05, | |
| "loss": 2.2691, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.16366612111292964, | |
| "grad_norm": 0.514772891998291, | |
| "learning_rate": 1.9419614517252536e-05, | |
| "loss": 2.2543, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.18412438625204583, | |
| "grad_norm": 0.5909072160720825, | |
| "learning_rate": 1.916783780916589e-05, | |
| "loss": 2.2484, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.20458265139116202, | |
| "grad_norm": 0.5185887217521667, | |
| "learning_rate": 1.8873092605152686e-05, | |
| "loss": 2.2393, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.22504091653027825, | |
| "grad_norm": 0.6383090615272522, | |
| "learning_rate": 1.8536760338693926e-05, | |
| "loss": 2.2473, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.24549918166939444, | |
| "grad_norm": 0.5739400386810303, | |
| "learning_rate": 1.816041735657083e-05, | |
| "loss": 2.2004, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.26595744680851063, | |
| "grad_norm": 0.6199432611465454, | |
| "learning_rate": 1.7745827530726937e-05, | |
| "loss": 2.2167, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2864157119476268, | |
| "grad_norm": 0.6655313372612, | |
| "learning_rate": 1.7294933991223413e-05, | |
| "loss": 2.1977, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.306873977086743, | |
| "grad_norm": 0.6417890787124634, | |
| "learning_rate": 1.6809850019034324e-05, | |
| "loss": 2.1943, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.32733224222585927, | |
| "grad_norm": 0.6394040584564209, | |
| "learning_rate": 1.6292849141366084e-05, | |
| "loss": 2.2409, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.34779050736497547, | |
| "grad_norm": 0.6412447690963745, | |
| "learning_rate": 1.574635447592305e-05, | |
| "loss": 2.2241, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.36824877250409166, | |
| "grad_norm": 0.7298178672790527, | |
| "learning_rate": 1.5172927374061427e-05, | |
| "loss": 2.1782, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.38870703764320785, | |
| "grad_norm": 0.7089855074882507, | |
| "learning_rate": 1.4575255416059513e-05, | |
| "loss": 2.2225, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.40916530278232405, | |
| "grad_norm": 0.6368885636329651, | |
| "learning_rate": 1.3956139814768949e-05, | |
| "loss": 2.1694, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.42962356792144024, | |
| "grad_norm": 0.6372247338294983, | |
| "learning_rate": 1.3318482286684498e-05, | |
| "loss": 2.1647, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4500818330605565, | |
| "grad_norm": 0.8332213759422302, | |
| "learning_rate": 1.2665271451965933e-05, | |
| "loss": 2.1871, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.4705400981996727, | |
| "grad_norm": 0.6431373357772827, | |
| "learning_rate": 1.1999568827153472e-05, | |
| "loss": 2.1635, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4909983633387889, | |
| "grad_norm": 0.7330695986747742, | |
| "learning_rate": 1.1324494476227082e-05, | |
| "loss": 2.1713, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5114566284779051, | |
| "grad_norm": 0.8543004393577576, | |
| "learning_rate": 1.0643212387261345e-05, | |
| "loss": 2.1591, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 0.819828987121582, | |
| "learning_rate": 9.958915643213654e-06, | |
| "loss": 2.1482, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5523731587561375, | |
| "grad_norm": 0.7319233417510986, | |
| "learning_rate": 9.274811456348358e-06, | |
| "loss": 2.1939, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5728314238952537, | |
| "grad_norm": 0.7505283355712891, | |
| "learning_rate": 8.594106136438665e-06, | |
| "loss": 2.1661, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5932896890343698, | |
| "grad_norm": 0.8119780421257019, | |
| "learning_rate": 7.919990063198368e-06, | |
| "loss": 2.1571, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.613747954173486, | |
| "grad_norm": 0.8243106603622437, | |
| "learning_rate": 7.255622733375776e-06, | |
| "loss": 2.1802, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6342062193126022, | |
| "grad_norm": 0.7845710515975952, | |
| "learning_rate": 6.604117952592168e-06, | |
| "loss": 2.1798, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6546644844517185, | |
| "grad_norm": 0.7707119584083557, | |
| "learning_rate": 5.968529241328822e-06, | |
| "loss": 2.1381, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6751227495908347, | |
| "grad_norm": 0.7787159085273743, | |
| "learning_rate": 5.351835523462808e-06, | |
| "loss": 2.156, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6955810147299509, | |
| "grad_norm": 0.6944179534912109, | |
| "learning_rate": 4.756927164427685e-06, | |
| "loss": 2.163, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.7160392798690671, | |
| "grad_norm": 0.8797001838684082, | |
| "learning_rate": 4.18659242443638e-06, | |
| "loss": 2.1375, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.7364975450081833, | |
| "grad_norm": 0.7624711990356445, | |
| "learning_rate": 3.6435043902583344e-06, | |
| "loss": 2.1825, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7569558101472995, | |
| "grad_norm": 0.7933531999588013, | |
| "learning_rate": 3.1302084468000206e-06, | |
| "loss": 2.1442, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7774140752864157, | |
| "grad_norm": 0.9495463967323303, | |
| "learning_rate": 2.6491103472078828e-06, | |
| "loss": 2.1191, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7978723404255319, | |
| "grad_norm": 1.328906774520874, | |
| "learning_rate": 2.202464937407752e-06, | |
| "loss": 2.1698, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.8183306055646481, | |
| "grad_norm": 0.9682719111442566, | |
| "learning_rate": 1.7923655879272395e-06, | |
| "loss": 2.1254, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8387888707037643, | |
| "grad_norm": 0.7559231519699097, | |
| "learning_rate": 1.4207343825329167e-06, | |
| "loss": 2.1739, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8592471358428805, | |
| "grad_norm": 0.8757209777832031, | |
| "learning_rate": 1.089313109666904e-06, | |
| "loss": 2.1108, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8797054009819967, | |
| "grad_norm": 0.911118745803833, | |
| "learning_rate": 7.996550989047813e-07, | |
| "loss": 2.1683, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.900163666121113, | |
| "grad_norm": 0.9142606854438782, | |
| "learning_rate": 5.531179406964016e-07, | |
| "loss": 2.1318, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.9206219312602292, | |
| "grad_norm": 0.9097318649291992, | |
| "learning_rate": 3.5085712351121016e-07, | |
| "loss": 2.1436, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9410801963993454, | |
| "grad_norm": 0.7701767086982727, | |
| "learning_rate": 1.9382061820997112e-07, | |
| "loss": 2.1591, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.7752079367637634, | |
| "learning_rate": 8.274443502528817e-08, | |
| "loss": 2.1662, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9819967266775778, | |
| "grad_norm": 0.7332737445831299, | |
| "learning_rate": 1.814917397474636e-08, | |
| "loss": 2.1488, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 4888, | |
| "total_flos": 8.882163499008e+16, | |
| "train_loss": 2.2017843461856126, | |
| "train_runtime": 1517.6787, | |
| "train_samples_per_second": 6.441, | |
| "train_steps_per_second": 3.221 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4888, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.882163499008e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |