| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4979253112033195, | |
| "eval_steps": 500, | |
| "global_step": 30, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016597510373443983, | |
| "grad_norm": 0.7483841776847839, | |
| "learning_rate": 0.0, | |
| "loss": 0.3831, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03319502074688797, | |
| "grad_norm": 0.6623035669326782, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3618, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.04979253112033195, | |
| "grad_norm": 0.9878237843513489, | |
| "learning_rate": 8e-05, | |
| "loss": 0.4327, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.06639004149377593, | |
| "grad_norm": 0.8907354474067688, | |
| "learning_rate": 0.00012, | |
| "loss": 0.4128, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.08298755186721991, | |
| "grad_norm": 0.3212481439113617, | |
| "learning_rate": 0.00016, | |
| "loss": 0.6107, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0995850622406639, | |
| "grad_norm": 0.4514455795288086, | |
| "learning_rate": 0.0002, | |
| "loss": 0.291, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.11618257261410789, | |
| "grad_norm": 0.7458418011665344, | |
| "learning_rate": 0.000192, | |
| "loss": 0.4742, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.13278008298755187, | |
| "grad_norm": 0.5023247599601746, | |
| "learning_rate": 0.00018400000000000003, | |
| "loss": 0.425, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.14937759336099585, | |
| "grad_norm": 0.4497811794281006, | |
| "learning_rate": 0.00017600000000000002, | |
| "loss": 0.381, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.16597510373443983, | |
| "grad_norm": 0.29455727338790894, | |
| "learning_rate": 0.000168, | |
| "loss": 0.2039, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1825726141078838, | |
| "grad_norm": 0.3237267732620239, | |
| "learning_rate": 0.00016, | |
| "loss": 0.3656, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1991701244813278, | |
| "grad_norm": 0.34296661615371704, | |
| "learning_rate": 0.000152, | |
| "loss": 0.1851, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.2157676348547718, | |
| "grad_norm": 0.4001446068286896, | |
| "learning_rate": 0.000144, | |
| "loss": 0.2987, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.23236514522821577, | |
| "grad_norm": 0.4587167799472809, | |
| "learning_rate": 0.00013600000000000003, | |
| "loss": 0.2622, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.24896265560165975, | |
| "grad_norm": 0.3845102787017822, | |
| "learning_rate": 0.00012800000000000002, | |
| "loss": 0.2759, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.26556016597510373, | |
| "grad_norm": 0.16428542137145996, | |
| "learning_rate": 0.00012, | |
| "loss": 0.2536, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.2821576763485477, | |
| "grad_norm": 0.17744974792003632, | |
| "learning_rate": 0.00011200000000000001, | |
| "loss": 0.242, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.2987551867219917, | |
| "grad_norm": 0.20359747111797333, | |
| "learning_rate": 0.00010400000000000001, | |
| "loss": 0.2489, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.3153526970954357, | |
| "grad_norm": 0.24680426716804504, | |
| "learning_rate": 9.6e-05, | |
| "loss": 0.2295, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.33195020746887965, | |
| "grad_norm": 0.2906469404697418, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 0.3108, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.34854771784232363, | |
| "grad_norm": 0.24588999152183533, | |
| "learning_rate": 8e-05, | |
| "loss": 0.31, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.3651452282157676, | |
| "grad_norm": 0.29837578535079956, | |
| "learning_rate": 7.2e-05, | |
| "loss": 0.3647, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.3817427385892116, | |
| "grad_norm": 0.14945703744888306, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 0.1748, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.3983402489626556, | |
| "grad_norm": 0.137114018201828, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 0.1672, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.4149377593360996, | |
| "grad_norm": 0.26122337579727173, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.1654, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4315352697095436, | |
| "grad_norm": 0.24125142395496368, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3292, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.44813278008298757, | |
| "grad_norm": 0.1702798753976822, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.2671, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.46473029045643155, | |
| "grad_norm": 0.18966542184352875, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.1865, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.48132780082987553, | |
| "grad_norm": 0.20254425704479218, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.1979, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.4979253112033195, | |
| "grad_norm": 0.19163808226585388, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.2565, | |
| "step": 30 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 30, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2924502854676480.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |