| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.339028296849973, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13347570742124934, |
| "grad_norm": 3.1733596324920654, |
| "learning_rate": 4.933395621996797e-05, |
| "loss": 1.4561, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2669514148424987, |
| "grad_norm": 2.9853200912475586, |
| "learning_rate": 4.866657768286172e-05, |
| "loss": 1.2371, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.400427122263748, |
| "grad_norm": 2.6459007263183594, |
| "learning_rate": 4.7999199145755475e-05, |
| "loss": 1.1388, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5339028296849974, |
| "grad_norm": 2.6050009727478027, |
| "learning_rate": 4.733182060864923e-05, |
| "loss": 1.1141, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6673785371062466, |
| "grad_norm": 2.1738457679748535, |
| "learning_rate": 4.666444207154298e-05, |
| "loss": 1.086, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.800854244527496, |
| "grad_norm": 2.692978620529175, |
| "learning_rate": 4.599706353443674e-05, |
| "loss": 1.0676, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9343299519487454, |
| "grad_norm": 2.072795867919922, |
| "learning_rate": 4.532968499733049e-05, |
| "loss": 0.9895, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0678056593699947, |
| "grad_norm": 2.18379282951355, |
| "learning_rate": 4.466230646022424e-05, |
| "loss": 0.9155, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.201281366791244, |
| "grad_norm": 2.4531776905059814, |
| "learning_rate": 4.3994927923117995e-05, |
| "loss": 0.8136, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.3347570742124932, |
| "grad_norm": 2.8264808654785156, |
| "learning_rate": 4.332754938601175e-05, |
| "loss": 0.8129, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.4682327816337426, |
| "grad_norm": 1.746168851852417, |
| "learning_rate": 4.26601708489055e-05, |
| "loss": 0.8581, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.601708489054992, |
| "grad_norm": 2.722280740737915, |
| "learning_rate": 4.199279231179926e-05, |
| "loss": 0.7905, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.7351841964762413, |
| "grad_norm": 2.4873287677764893, |
| "learning_rate": 4.1325413774693004e-05, |
| "loss": 0.8327, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.8686599038974907, |
| "grad_norm": 2.6175665855407715, |
| "learning_rate": 4.0658035237586763e-05, |
| "loss": 0.8191, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.00213561131874, |
| "grad_norm": 1.2910939455032349, |
| "learning_rate": 3.9990656700480516e-05, |
| "loss": 0.8359, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.1356113187399894, |
| "grad_norm": 2.5565571784973145, |
| "learning_rate": 3.932327816337427e-05, |
| "loss": 0.6788, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.269087026161239, |
| "grad_norm": 2.173668146133423, |
| "learning_rate": 3.865589962626802e-05, |
| "loss": 0.6669, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.402562733582488, |
| "grad_norm": 1.9133882522583008, |
| "learning_rate": 3.798852108916178e-05, |
| "loss": 0.6696, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.536038441003737, |
| "grad_norm": 1.3470282554626465, |
| "learning_rate": 3.7321142552055525e-05, |
| "loss": 0.6803, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.6695141484249865, |
| "grad_norm": 2.3730781078338623, |
| "learning_rate": 3.6653764014949284e-05, |
| "loss": 0.6684, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.802989855846236, |
| "grad_norm": 2.106994390487671, |
| "learning_rate": 3.598638547784303e-05, |
| "loss": 0.6717, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.936465563267485, |
| "grad_norm": 1.7302494049072266, |
| "learning_rate": 3.531900694073679e-05, |
| "loss": 0.676, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.0699412706887346, |
| "grad_norm": 1.477286458015442, |
| "learning_rate": 3.465162840363054e-05, |
| "loss": 0.586, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.203416978109984, |
| "grad_norm": 1.818613052368164, |
| "learning_rate": 3.398424986652429e-05, |
| "loss": 0.5467, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.3368926855312333, |
| "grad_norm": 1.6314208507537842, |
| "learning_rate": 3.3316871329418045e-05, |
| "loss": 0.556, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.4703683929524827, |
| "grad_norm": 2.8924617767333984, |
| "learning_rate": 3.2649492792311804e-05, |
| "loss": 0.5567, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.603844100373732, |
| "grad_norm": 2.6945688724517822, |
| "learning_rate": 3.198211425520555e-05, |
| "loss": 0.5568, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.7373198077949814, |
| "grad_norm": 2.092221736907959, |
| "learning_rate": 3.131473571809931e-05, |
| "loss": 0.5567, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.8707955152162308, |
| "grad_norm": 1.6795735359191895, |
| "learning_rate": 3.064735718099306e-05, |
| "loss": 0.5764, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.00427122263748, |
| "grad_norm": 2.4606454372406006, |
| "learning_rate": 2.9979978643886814e-05, |
| "loss": 0.5716, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.1377469300587295, |
| "grad_norm": 4.759591102600098, |
| "learning_rate": 2.931260010678057e-05, |
| "loss": 0.4671, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.271222637479979, |
| "grad_norm": 1.4791502952575684, |
| "learning_rate": 2.8645221569674318e-05, |
| "loss": 0.4719, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.404698344901228, |
| "grad_norm": 1.2884821891784668, |
| "learning_rate": 2.7977843032568074e-05, |
| "loss": 0.465, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.538174052322478, |
| "grad_norm": 3.4914660453796387, |
| "learning_rate": 2.731046449546183e-05, |
| "loss": 0.4572, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.671649759743727, |
| "grad_norm": 1.9152294397354126, |
| "learning_rate": 2.664308595835558e-05, |
| "loss": 0.4685, |
| "step": 17500 |
| }, |
| { |
| "epoch": 4.805125467164976, |
| "grad_norm": 2.193741798400879, |
| "learning_rate": 2.5975707421249334e-05, |
| "loss": 0.4751, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.938601174586225, |
| "grad_norm": 1.8435180187225342, |
| "learning_rate": 2.530832888414309e-05, |
| "loss": 0.4671, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.072076882007474, |
| "grad_norm": 2.184936761856079, |
| "learning_rate": 2.464095034703684e-05, |
| "loss": 0.4342, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.205552589428724, |
| "grad_norm": 1.9590941667556763, |
| "learning_rate": 2.3973571809930594e-05, |
| "loss": 0.3865, |
| "step": 19500 |
| }, |
| { |
| "epoch": 5.339028296849973, |
| "grad_norm": 2.4979445934295654, |
| "learning_rate": 2.3306193272824347e-05, |
| "loss": 0.3983, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 37460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6911108207443968.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|