| { |
| "best_metric": 1.7059074640274048, |
| "best_model_checkpoint": "./results/cluster3_batch1_prop0.2/checkpoint-2000", |
| "epoch": 0.9998175134909669, |
| "eval_steps": 500, |
| "global_step": 2397, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0417112020647045, |
| "grad_norm": 0.6617676019668579, |
| "learning_rate": 9.996421853676199e-05, |
| "loss": 1.9795, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.083422404129409, |
| "grad_norm": 0.9614683985710144, |
| "learning_rate": 9.92540136395745e-05, |
| "loss": 1.8034, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1251336061941135, |
| "grad_norm": 1.0093903541564941, |
| "learning_rate": 9.764589508626579e-05, |
| "loss": 1.7261, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.166844808258818, |
| "grad_norm": 0.7080962657928467, |
| "learning_rate": 9.516917930209698e-05, |
| "loss": 1.7056, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2085560103235225, |
| "grad_norm": 0.8058099150657654, |
| "learning_rate": 9.186901746888266e-05, |
| "loss": 1.7093, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2085560103235225, |
| "eval_loss": 1.7287803888320923, |
| "eval_runtime": 1120.1405, |
| "eval_samples_per_second": 4.354, |
| "eval_steps_per_second": 2.177, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.250267212388227, |
| "grad_norm": 0.8433169722557068, |
| "learning_rate": 8.780557240704062e-05, |
| "loss": 1.6892, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2919784144529315, |
| "grad_norm": 0.8156673908233643, |
| "learning_rate": 8.305292179151174e-05, |
| "loss": 1.6795, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.333689616517636, |
| "grad_norm": 1.0388058423995972, |
| "learning_rate": 7.769770769621275e-05, |
| "loss": 1.6765, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.37540081858234053, |
| "grad_norm": 0.9966904520988464, |
| "learning_rate": 7.183755708618267e-05, |
| "loss": 1.6632, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.417112020647045, |
| "grad_norm": 1.0595520734786987, |
| "learning_rate": 6.557930205226752e-05, |
| "loss": 1.662, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.417112020647045, |
| "eval_loss": 1.7165497541427612, |
| "eval_runtime": 1121.1862, |
| "eval_samples_per_second": 4.35, |
| "eval_steps_per_second": 2.175, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4588232227117495, |
| "grad_norm": 1.003791093826294, |
| "learning_rate": 5.903703223393429e-05, |
| "loss": 1.6331, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.500534424776454, |
| "grad_norm": 1.7693170309066772, |
| "learning_rate": 5.2330014935059945e-05, |
| "loss": 1.6673, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5422456268411585, |
| "grad_norm": 1.3780925273895264, |
| "learning_rate": 4.558052084953292e-05, |
| "loss": 1.6418, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.583956828905863, |
| "grad_norm": 1.0700241327285767, |
| "learning_rate": 3.891159503426274e-05, |
| "loss": 1.6426, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6256680309705676, |
| "grad_norm": 1.2618963718414307, |
| "learning_rate": 3.244481376534764e-05, |
| "loss": 1.6575, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6256680309705676, |
| "eval_loss": 1.7095658779144287, |
| "eval_runtime": 1120.2118, |
| "eval_samples_per_second": 4.354, |
| "eval_steps_per_second": 2.177, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.667379233035272, |
| "grad_norm": 1.447568655014038, |
| "learning_rate": 2.6298068170503566e-05, |
| "loss": 1.6475, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7090904350999765, |
| "grad_norm": 1.2349563837051392, |
| "learning_rate": 2.0583415042720094e-05, |
| "loss": 1.6533, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7508016371646811, |
| "grad_norm": 1.3562304973602295, |
| "learning_rate": 1.5405034015376557e-05, |
| "loss": 1.6364, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7925128392293855, |
| "grad_norm": 2.0301923751831055, |
| "learning_rate": 1.0857328340055205e-05, |
| "loss": 1.6443, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.83422404129409, |
| "grad_norm": 1.3831614255905151, |
| "learning_rate": 7.023203890372182e-06, |
| "loss": 1.6349, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.83422404129409, |
| "eval_loss": 1.7059074640274048, |
| "eval_runtime": 1120.6584, |
| "eval_samples_per_second": 4.352, |
| "eval_steps_per_second": 2.176, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8759352433587946, |
| "grad_norm": 1.450674057006836, |
| "learning_rate": 3.972557766040636e-06, |
| "loss": 1.6622, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.917646445423499, |
| "grad_norm": 0.9268587827682495, |
| "learning_rate": 1.7610040503122649e-06, |
| "loss": 1.6419, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9593576474882035, |
| "grad_norm": 1.4191687107086182, |
| "learning_rate": 4.288599505750612e-07, |
| "loss": 1.6441, |
| "step": 2300 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2397, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.869083564909568e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|