| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.9029399156570435, |
| "learning_rate": 3.6e-05, |
| "loss": 0.6032, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.8222858309745789, |
| "learning_rate": 7.6e-05, |
| "loss": 0.2673, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.149100422859192, |
| "learning_rate": 9.998250366089848e-05, |
| "loss": 0.2248, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.6920625567436218, |
| "learning_rate": 9.97858104436822e-05, |
| "loss": 0.1753, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.4281952381134033, |
| "learning_rate": 9.937141654477528e-05, |
| "loss": 0.1482, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.5860852599143982, |
| "learning_rate": 9.87411340032603e-05, |
| "loss": 0.1306, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.5152427554130554, |
| "learning_rate": 9.789771888432375e-05, |
| "loss": 0.1182, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.3876459300518036, |
| "learning_rate": 9.684485922768422e-05, |
| "loss": 0.116, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.3336000442504883, |
| "learning_rate": 9.558715892073323e-05, |
| "loss": 0.1035, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.49530354142189026, |
| "learning_rate": 9.413011756690685e-05, |
| "loss": 0.1023, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.27808159589767456, |
| "learning_rate": 9.248010643731935e-05, |
| "loss": 0.1005, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2289346307516098, |
| "learning_rate": 9.064434061081562e-05, |
| "loss": 0.0996, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.3206384479999542, |
| "learning_rate": 8.863084742426719e-05, |
| "loss": 0.092, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.3374258577823639, |
| "learning_rate": 8.644843137107059e-05, |
| "loss": 0.0868, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.31995415687561035, |
| "learning_rate": 8.410663560133784e-05, |
| "loss": 0.0887, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.2894892394542694, |
| "learning_rate": 8.161570019212921e-05, |
| "loss": 0.0869, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.3088397681713104, |
| "learning_rate": 7.898651737020166e-05, |
| "loss": 0.0884, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.2828108072280884, |
| "learning_rate": 7.623058388307269e-05, |
| "loss": 0.0867, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.28004196286201477, |
| "learning_rate": 7.335995072666848e-05, |
| "loss": 0.0803, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.34351933002471924, |
| "learning_rate": 7.038717044938519e-05, |
| "loss": 0.0827, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 0.28273674845695496, |
| "learning_rate": 6.732524226298841e-05, |
| "loss": 0.0825, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.1750754565000534, |
| "learning_rate": 6.418755520036775e-05, |
| "loss": 0.0803, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.32086455821990967, |
| "learning_rate": 6.0987829568702656e-05, |
| "loss": 0.0772, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.255445659160614, |
| "learning_rate": 5.7740056954050084e-05, |
| "loss": 0.0757, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.2030186653137207, |
| "learning_rate": 5.445843903969854e-05, |
| "loss": 0.078, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.19003793597221375, |
| "learning_rate": 5.1157325505820694e-05, |
| "loss": 0.0759, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.1562698632478714, |
| "learning_rate": 4.785115128197298e-05, |
| "loss": 0.0757, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.18039683997631073, |
| "learning_rate": 4.4554373426821374e-05, |
| "loss": 0.072, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.1918702870607376, |
| "learning_rate": 4.1281407911102425e-05, |
| "loss": 0.0746, |
| "step": 290 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.1714138686656952, |
| "learning_rate": 3.8046566580251e-05, |
| "loss": 0.0747, |
| "step": 300 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|