| { |
| "best_metric": 1.7005794048309326, |
| "best_model_checkpoint": "./results/cluster6_batch1_prop0.2/checkpoint-2000", |
| "epoch": 0.9997354637320777, |
| "eval_steps": 500, |
| "global_step": 2362, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04232580286757314, |
| "grad_norm": 0.9529861807823181, |
| "learning_rate": 9.996046986136509e-05, |
| "loss": 1.9625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08465160573514628, |
| "grad_norm": 0.7830840349197388, |
| "learning_rate": 9.921974561712554e-05, |
| "loss": 1.7382, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12697740860271944, |
| "grad_norm": 0.8583674430847168, |
| "learning_rate": 9.755494410515322e-05, |
| "loss": 1.6945, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16930321147029256, |
| "grad_norm": 1.1656347513198853, |
| "learning_rate": 9.499732118045537e-05, |
| "loss": 1.6661, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2116290143378657, |
| "grad_norm": 1.1309709548950195, |
| "learning_rate": 9.159489499457894e-05, |
| "loss": 1.6762, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2116290143378657, |
| "eval_loss": 1.7201300859451294, |
| "eval_runtime": 1200.9337, |
| "eval_samples_per_second": 4.024, |
| "eval_steps_per_second": 2.013, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2539548172054389, |
| "grad_norm": 0.9538220763206482, |
| "learning_rate": 8.741154447769891e-05, |
| "loss": 1.6503, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.29628062007301204, |
| "grad_norm": 1.1448876857757568, |
| "learning_rate": 8.252581004214648e-05, |
| "loss": 1.6566, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.33860642294058513, |
| "grad_norm": 0.8794606924057007, |
| "learning_rate": 7.702941902359696e-05, |
| "loss": 1.6344, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3809322258081583, |
| "grad_norm": 1.3629435300827026, |
| "learning_rate": 7.1025563544056e-05, |
| "loss": 1.6142, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4232580286757314, |
| "grad_norm": 1.0469815731048584, |
| "learning_rate": 6.462696312894651e-05, |
| "loss": 1.6192, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4232580286757314, |
| "eval_loss": 1.7103641033172607, |
| "eval_runtime": 1200.192, |
| "eval_samples_per_second": 4.027, |
| "eval_steps_per_second": 2.014, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.46558383154330457, |
| "grad_norm": 0.9969177842140198, |
| "learning_rate": 5.795374845173646e-05, |
| "loss": 1.6161, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5079096344108778, |
| "grad_norm": 0.8754364252090454, |
| "learning_rate": 5.1131205937794204e-05, |
| "loss": 1.6293, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5502354372784509, |
| "grad_norm": 0.9306801557540894, |
| "learning_rate": 4.42874255714589e-05, |
| "loss": 1.6365, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5925612401460241, |
| "grad_norm": 1.1393909454345703, |
| "learning_rate": 3.755089606762723e-05, |
| "loss": 1.6258, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6348870430135972, |
| "grad_norm": 1.0938472747802734, |
| "learning_rate": 3.104809255736399e-05, |
| "loss": 1.6125, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6348870430135972, |
| "eval_loss": 1.7008308172225952, |
| "eval_runtime": 1201.1982, |
| "eval_samples_per_second": 4.023, |
| "eval_steps_per_second": 2.012, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6772128458811703, |
| "grad_norm": 1.2525652647018433, |
| "learning_rate": 2.4901102077589505e-05, |
| "loss": 1.613, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7195386487487434, |
| "grad_norm": 1.2261629104614258, |
| "learning_rate": 1.9225331445143426e-05, |
| "loss": 1.6043, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7618644516163166, |
| "grad_norm": 1.1470032930374146, |
| "learning_rate": 1.4127340548796941e-05, |
| "loss": 1.6191, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8041902544838897, |
| "grad_norm": 0.9801532030105591, |
| "learning_rate": 9.702841738123525e-06, |
| "loss": 1.5978, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8465160573514628, |
| "grad_norm": 1.115677833557129, |
| "learning_rate": 6.034902869748399e-06, |
| "loss": 1.6341, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8465160573514628, |
| "eval_loss": 1.7005794048309326, |
| "eval_runtime": 1201.361, |
| "eval_samples_per_second": 4.023, |
| "eval_steps_per_second": 2.012, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.888841860219036, |
| "grad_norm": 1.0392427444458008, |
| "learning_rate": 3.192387747927372e-06, |
| "loss": 1.6242, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9311676630866091, |
| "grad_norm": 1.4983006715774536, |
| "learning_rate": 1.2286632394397811e-06, |
| "loss": 1.5992, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9734934659541823, |
| "grad_norm": 1.0786079168319702, |
| "learning_rate": 1.805973360972546e-07, |
| "loss": 1.6178, |
| "step": 2300 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2362, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.7882681988397056e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|