{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 10030, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9970089730807578, "grad_norm": 13791.8017578125, "learning_rate": 0.0005993999999999999, "loss": 1.2252, "step": 1000 }, { "epoch": 1.0, "eval_accuracy": 0.18732381009853702, "eval_loss": 4.833001136779785, "eval_runtime": 4.3793, "eval_samples_per_second": 51.835, "eval_steps_per_second": 1.827, "step": 1003 }, { "epoch": 1.9940179461615155, "grad_norm": 38028.28125, "learning_rate": 0.0005336212624584717, "loss": 1.104, "step": 2000 }, { "epoch": 2.0, "eval_accuracy": 0.19786718622033328, "eval_loss": 4.691581726074219, "eval_runtime": 3.8992, "eval_samples_per_second": 58.218, "eval_steps_per_second": 2.052, "step": 2006 }, { "epoch": 2.991026919242273, "grad_norm": 34522.31640625, "learning_rate": 0.0004671760797342192, "loss": 1.0865, "step": 3000 }, { "epoch": 3.0, "eval_accuracy": 0.20838469960430012, "eval_loss": 4.60223913192749, "eval_runtime": 3.82, "eval_samples_per_second": 59.424, "eval_steps_per_second": 2.094, "step": 3009 }, { "epoch": 3.988035892323031, "grad_norm": 9552.4384765625, "learning_rate": 0.00040073089700996676, "loss": 1.0446, "step": 4000 }, { "epoch": 4.0, "eval_accuracy": 0.24390285955671268, "eval_loss": 4.333094596862793, "eval_runtime": 3.8108, "eval_samples_per_second": 59.568, "eval_steps_per_second": 2.099, "step": 4012 }, { "epoch": 4.985044865403789, "grad_norm": 7808.9814453125, "learning_rate": 0.00033428571428571426, "loss": 0.9806, "step": 5000 }, { "epoch": 5.0, "eval_accuracy": 0.26604136313870186, "eval_loss": 4.115571022033691, "eval_runtime": 3.7314, "eval_samples_per_second": 60.836, "eval_steps_per_second": 2.144, "step": 5015 }, { "epoch": 5.982053838484546, "grad_norm": 7799.94580078125, "learning_rate": 0.00026784053156146177, "loss": 0.9375, "step": 6000 }, { "epoch": 6.0, "eval_accuracy": 0.2841539005319103, "eval_loss": 3.9862992763519287, "eval_runtime": 3.7749, "eval_samples_per_second": 60.134, "eval_steps_per_second": 2.119, "step": 6018 }, { "epoch": 6.979062811565304, "grad_norm": 7969.98388671875, "learning_rate": 0.00020139534883720927, "loss": 0.9074, "step": 7000 }, { "epoch": 7.0, "eval_accuracy": 0.2942231264601671, "eval_loss": 3.9006783962249756, "eval_runtime": 3.758, "eval_samples_per_second": 60.405, "eval_steps_per_second": 2.129, "step": 7021 }, { "epoch": 7.976071784646062, "grad_norm": 8524.130859375, "learning_rate": 0.0001349501661129568, "loss": 0.8839, "step": 8000 }, { "epoch": 8.0, "eval_accuracy": 0.3046802934558652, "eval_loss": 3.8308379650115967, "eval_runtime": 3.7591, "eval_samples_per_second": 60.388, "eval_steps_per_second": 2.128, "step": 8024 }, { "epoch": 8.97308075772682, "grad_norm": 8583.34375, "learning_rate": 6.850498338870431e-05, "loss": 0.867, "step": 9000 }, { "epoch": 9.0, "eval_accuracy": 0.31158564445632214, "eval_loss": 3.7815797328948975, "eval_runtime": 3.7451, "eval_samples_per_second": 60.613, "eval_steps_per_second": 2.136, "step": 9027 }, { "epoch": 9.970089730807578, "grad_norm": 8688.66796875, "learning_rate": 2.0598006644518273e-06, "loss": 0.8541, "step": 10000 }, { "epoch": 10.0, "eval_accuracy": 0.3152495323154909, "eval_loss": 3.7565455436706543, "eval_runtime": 3.716, "eval_samples_per_second": 61.087, "eval_steps_per_second": 2.153, "step": 10030 }, { "epoch": 10.0, "step": 10030, "total_flos": 8.379374174208e+16, "train_loss": 0.9887108649713092, "train_runtime": 9029.3583, "train_samples_per_second": 35.516, "train_steps_per_second": 1.111 } ], "logging_steps": 1000, "max_steps": 10030, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.379374174208e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }