| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 285, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03508771929824561, |
| "grad_norm": 6.817053318023682, |
| "learning_rate": 3.103448275862069e-06, |
| "loss": 1.7184, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07017543859649122, |
| "grad_norm": 3.1392080783843994, |
| "learning_rate": 6.551724137931035e-06, |
| "loss": 1.3182, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 2.015719175338745, |
| "learning_rate": 1e-05, |
| "loss": 1.1251, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 1.9676525592803955, |
| "learning_rate": 9.962397672993552e-06, |
| "loss": 1.0058, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17543859649122806, |
| "grad_norm": 1.8632580041885376, |
| "learning_rate": 9.850156265972722e-06, |
| "loss": 0.9239, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 1.8993644714355469, |
| "learning_rate": 9.664963994173695e-06, |
| "loss": 0.8924, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24561403508771928, |
| "grad_norm": 1.7902690172195435, |
| "learning_rate": 9.409606321741776e-06, |
| "loss": 0.8465, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 1.7192617654800415, |
| "learning_rate": 9.08792406575792e-06, |
| "loss": 0.8273, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 1.8437436819076538, |
| "learning_rate": 8.704755626774796e-06, |
| "loss": 0.8021, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3508771929824561, |
| "grad_norm": 1.7052956819534302, |
| "learning_rate": 8.265864214768883e-06, |
| "loss": 0.7917, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.38596491228070173, |
| "grad_norm": 1.7855916023254395, |
| "learning_rate": 7.777851165098012e-06, |
| "loss": 0.7887, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 2.0044033527374268, |
| "learning_rate": 7.248056648273034e-06, |
| "loss": 0.763, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.45614035087719296, |
| "grad_norm": 1.8151040077209473, |
| "learning_rate": 6.684449266961101e-06, |
| "loss": 0.7469, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.49122807017543857, |
| "grad_norm": 1.8096654415130615, |
| "learning_rate": 6.095506200784349e-06, |
| "loss": 0.7253, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 1.7122279405593872, |
| "learning_rate": 5.490085701647805e-06, |
| "loss": 0.72, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 1.7539193630218506, |
| "learning_rate": 4.87729385738544e-06, |
| "loss": 0.7192, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5964912280701754, |
| "grad_norm": 1.7335082292556763, |
| "learning_rate": 4.266347627723192e-06, |
| "loss": 0.6983, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 1.573112964630127, |
| "learning_rate": 3.6664362126255087e-06, |
| "loss": 0.71, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.6635979413986206, |
| "learning_rate": 3.0865828381745515e-06, |
| "loss": 0.6856, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 1.7431310415267944, |
| "learning_rate": 2.5355090388510806e-06, |
| "loss": 0.6967, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 1.7996807098388672, |
| "learning_rate": 2.0215034775378336e-06, |
| "loss": 0.6816, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7719298245614035, |
| "grad_norm": 1.960167407989502, |
| "learning_rate": 1.5522972763146653e-06, |
| "loss": 0.6715, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8070175438596491, |
| "grad_norm": 1.6907997131347656, |
| "learning_rate": 1.134947733186315e-06, |
| "loss": 0.6711, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 1.6383390426635742, |
| "learning_rate": 7.757321737514645e-07, |
| "loss": 0.6826, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8771929824561403, |
| "grad_norm": 1.712764859199524, |
| "learning_rate": 4.800535343827834e-07, |
| "loss": 0.6665, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9122807017543859, |
| "grad_norm": 1.6468104124069214, |
| "learning_rate": 2.523590970348166e-07, |
| "loss": 0.6751, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 1.5951693058013916, |
| "learning_rate": 9.607359798384785e-08, |
| "loss": 0.6582, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9824561403508771, |
| "grad_norm": 1.599989891052246, |
| "learning_rate": 1.3547716606548967e-08, |
| "loss": 0.6676, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 285, |
| "total_flos": 2.5900842583955866e+17, |
| "train_loss": 0.8155809151498895, |
| "train_runtime": 632.1431, |
| "train_samples_per_second": 57.644, |
| "train_steps_per_second": 0.451 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 285, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.5900842583955866e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|