| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 250.0, |
| "eval_steps": 50, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 6.266666666666667, |
| "grad_norm": 0.26255419850349426, |
| "learning_rate": 0.00029265, |
| "loss": 0.5343, |
| "step": 50 |
| }, |
| { |
| "epoch": 12.533333333333333, |
| "grad_norm": 0.23443113267421722, |
| "learning_rate": 0.00028514999999999997, |
| "loss": 0.1262, |
| "step": 100 |
| }, |
| { |
| "epoch": 18.8, |
| "grad_norm": 0.17284274101257324, |
| "learning_rate": 0.00027764999999999995, |
| "loss": 0.0524, |
| "step": 150 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.18162274360656738, |
| "learning_rate": 0.00027015, |
| "loss": 0.0297, |
| "step": 200 |
| }, |
| { |
| "epoch": 31.266666666666666, |
| "grad_norm": 0.06401970237493515, |
| "learning_rate": 0.00026264999999999996, |
| "loss": 0.0201, |
| "step": 250 |
| }, |
| { |
| "epoch": 37.53333333333333, |
| "grad_norm": 0.0907013937830925, |
| "learning_rate": 0.00025515, |
| "loss": 0.015, |
| "step": 300 |
| }, |
| { |
| "epoch": 43.8, |
| "grad_norm": 0.03851782903075218, |
| "learning_rate": 0.00024765, |
| "loss": 0.0138, |
| "step": 350 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.11328539997339249, |
| "learning_rate": 0.00024014999999999998, |
| "loss": 0.0127, |
| "step": 400 |
| }, |
| { |
| "epoch": 56.266666666666666, |
| "grad_norm": 0.05093704164028168, |
| "learning_rate": 0.00023264999999999996, |
| "loss": 0.0136, |
| "step": 450 |
| }, |
| { |
| "epoch": 62.53333333333333, |
| "grad_norm": 0.09107606112957001, |
| "learning_rate": 0.00022514999999999997, |
| "loss": 0.0141, |
| "step": 500 |
| }, |
| { |
| "epoch": 68.8, |
| "grad_norm": 0.03748961165547371, |
| "learning_rate": 0.00021764999999999998, |
| "loss": 0.0129, |
| "step": 550 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.02932876907289028, |
| "learning_rate": 0.00021014999999999999, |
| "loss": 0.0112, |
| "step": 600 |
| }, |
| { |
| "epoch": 81.26666666666667, |
| "grad_norm": 0.019922535866498947, |
| "learning_rate": 0.00020264999999999997, |
| "loss": 0.011, |
| "step": 650 |
| }, |
| { |
| "epoch": 87.53333333333333, |
| "grad_norm": 0.01521008089184761, |
| "learning_rate": 0.00019514999999999997, |
| "loss": 0.0101, |
| "step": 700 |
| }, |
| { |
| "epoch": 93.8, |
| "grad_norm": 0.015646636486053467, |
| "learning_rate": 0.00018764999999999998, |
| "loss": 0.0098, |
| "step": 750 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.024294869974255562, |
| "learning_rate": 0.00018015, |
| "loss": 0.0098, |
| "step": 800 |
| }, |
| { |
| "epoch": 106.26666666666667, |
| "grad_norm": 0.012117642909288406, |
| "learning_rate": 0.00017265, |
| "loss": 0.0097, |
| "step": 850 |
| }, |
| { |
| "epoch": 112.53333333333333, |
| "grad_norm": 0.01629127934575081, |
| "learning_rate": 0.00016514999999999998, |
| "loss": 0.0097, |
| "step": 900 |
| }, |
| { |
| "epoch": 118.8, |
| "grad_norm": 0.017040058970451355, |
| "learning_rate": 0.00015764999999999998, |
| "loss": 0.0097, |
| "step": 950 |
| }, |
| { |
| "epoch": 125.0, |
| "grad_norm": 0.018876733258366585, |
| "learning_rate": 0.00015014999999999996, |
| "loss": 0.0097, |
| "step": 1000 |
| }, |
| { |
| "epoch": 131.26666666666668, |
| "grad_norm": 0.012507513165473938, |
| "learning_rate": 0.00014264999999999997, |
| "loss": 0.0096, |
| "step": 1050 |
| }, |
| { |
| "epoch": 137.53333333333333, |
| "grad_norm": 0.012686866335570812, |
| "learning_rate": 0.00013514999999999998, |
| "loss": 0.0096, |
| "step": 1100 |
| }, |
| { |
| "epoch": 143.8, |
| "grad_norm": 0.01112140528857708, |
| "learning_rate": 0.00012764999999999999, |
| "loss": 0.0096, |
| "step": 1150 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 0.023951932787895203, |
| "learning_rate": 0.00012014999999999999, |
| "loss": 0.0096, |
| "step": 1200 |
| }, |
| { |
| "epoch": 156.26666666666668, |
| "grad_norm": 0.010721893981099129, |
| "learning_rate": 0.00011264999999999999, |
| "loss": 0.0096, |
| "step": 1250 |
| }, |
| { |
| "epoch": 162.53333333333333, |
| "grad_norm": 0.012511960230767727, |
| "learning_rate": 0.00010514999999999998, |
| "loss": 0.0096, |
| "step": 1300 |
| }, |
| { |
| "epoch": 168.8, |
| "grad_norm": 0.010806918144226074, |
| "learning_rate": 9.764999999999999e-05, |
| "loss": 0.0096, |
| "step": 1350 |
| }, |
| { |
| "epoch": 175.0, |
| "grad_norm": 0.018283583223819733, |
| "learning_rate": 9.014999999999998e-05, |
| "loss": 0.0096, |
| "step": 1400 |
| }, |
| { |
| "epoch": 181.26666666666668, |
| "grad_norm": 0.010316784493625164, |
| "learning_rate": 8.265e-05, |
| "loss": 0.0095, |
| "step": 1450 |
| }, |
| { |
| "epoch": 187.53333333333333, |
| "grad_norm": 0.011216912418603897, |
| "learning_rate": 7.515e-05, |
| "loss": 0.0095, |
| "step": 1500 |
| }, |
| { |
| "epoch": 193.8, |
| "grad_norm": 0.01198404561728239, |
| "learning_rate": 6.764999999999999e-05, |
| "loss": 0.0095, |
| "step": 1550 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 0.014636659994721413, |
| "learning_rate": 6.015e-05, |
| "loss": 0.0095, |
| "step": 1600 |
| }, |
| { |
| "epoch": 206.26666666666668, |
| "grad_norm": 0.010694563388824463, |
| "learning_rate": 5.264999999999999e-05, |
| "loss": 0.0095, |
| "step": 1650 |
| }, |
| { |
| "epoch": 212.53333333333333, |
| "grad_norm": 0.011114147491753101, |
| "learning_rate": 4.514999999999999e-05, |
| "loss": 0.0095, |
| "step": 1700 |
| }, |
| { |
| "epoch": 218.8, |
| "grad_norm": 0.010317948646843433, |
| "learning_rate": 3.7649999999999994e-05, |
| "loss": 0.0095, |
| "step": 1750 |
| }, |
| { |
| "epoch": 225.0, |
| "grad_norm": 0.014378506690263748, |
| "learning_rate": 3.0149999999999998e-05, |
| "loss": 0.0094, |
| "step": 1800 |
| }, |
| { |
| "epoch": 231.26666666666668, |
| "grad_norm": 0.011094390414655209, |
| "learning_rate": 2.2649999999999998e-05, |
| "loss": 0.0094, |
| "step": 1850 |
| }, |
| { |
| "epoch": 237.53333333333333, |
| "grad_norm": 0.011555945500731468, |
| "learning_rate": 1.5149999999999999e-05, |
| "loss": 0.0094, |
| "step": 1900 |
| }, |
| { |
| "epoch": 243.8, |
| "grad_norm": 0.009383700788021088, |
| "learning_rate": 7.65e-06, |
| "loss": 0.0094, |
| "step": 1950 |
| }, |
| { |
| "epoch": 250.0, |
| "grad_norm": 0.014644854702055454, |
| "learning_rate": 1.5e-07, |
| "loss": 0.0094, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 286, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.0511261556736e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|