| { |
| "best_global_step": 2890, |
| "best_metric": 3.484435558319092, |
| "best_model_checkpoint": "sindhibert_session6/checkpoint-2890", |
| "epoch": 1.0, |
| "eval_steps": 2890, |
| "global_step": 2890, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03460207612456748, |
| "grad_norm": 5.271186828613281, |
| "learning_rate": 8.583815028901734e-07, |
| "loss": 14.7112890625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06920415224913495, |
| "grad_norm": 5.328507900238037, |
| "learning_rate": 1.7254335260115608e-06, |
| "loss": 14.65601806640625, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.10380622837370242, |
| "grad_norm": 5.2988433837890625, |
| "learning_rate": 2.592485549132948e-06, |
| "loss": 14.6437841796875, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1384083044982699, |
| "grad_norm": 5.144472122192383, |
| "learning_rate": 2.9992958916410005e-06, |
| "loss": 14.644112548828126, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.17301038062283736, |
| "grad_norm": 5.255625247955322, |
| "learning_rate": 2.994135629312096e-06, |
| "loss": 14.592845458984375, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20761245674740483, |
| "grad_norm": 5.365286827087402, |
| "learning_rate": 2.9839827352831524e-06, |
| "loss": 14.59123779296875, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2422145328719723, |
| "grad_norm": 5.114892959594727, |
| "learning_rate": 2.96887113529664e-06, |
| "loss": 14.588660888671875, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2768166089965398, |
| "grad_norm": 5.162683486938477, |
| "learning_rate": 2.948851324536296e-06, |
| "loss": 14.5647998046875, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.31141868512110726, |
| "grad_norm": 5.2245635986328125, |
| "learning_rate": 2.9239901988982294e-06, |
| "loss": 14.566002197265625, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3460207612456747, |
| "grad_norm": 5.22702169418335, |
| "learning_rate": 2.8943708314592917e-06, |
| "loss": 14.56361328125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3806228373702422, |
| "grad_norm": 5.353369235992432, |
| "learning_rate": 2.8600921948896393e-06, |
| "loss": 14.56337890625, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.41522491349480967, |
| "grad_norm": 5.189014911651611, |
| "learning_rate": 2.821268830737051e-06, |
| "loss": 14.566427001953125, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.44982698961937717, |
| "grad_norm": 5.108746528625488, |
| "learning_rate": 2.7780304666880683e-06, |
| "loss": 14.5394287109375, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4844290657439446, |
| "grad_norm": 5.1513895988464355, |
| "learning_rate": 2.7305215830848867e-06, |
| "loss": 14.531617431640624, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5190311418685121, |
| "grad_norm": 5.2011942863464355, |
| "learning_rate": 2.678900930146467e-06, |
| "loss": 14.5282666015625, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5536332179930796, |
| "grad_norm": 5.219590663909912, |
| "learning_rate": 2.6233409975070707e-06, |
| "loss": 14.489625244140624, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 5.411579132080078, |
| "learning_rate": 2.5640274378447445e-06, |
| "loss": 14.49999755859375, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6228373702422145, |
| "grad_norm": 5.120180606842041, |
| "learning_rate": 2.5011584465256946e-06, |
| "loss": 14.515745849609376, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.657439446366782, |
| "grad_norm": 5.3347883224487305, |
| "learning_rate": 2.434944099337454e-06, |
| "loss": 14.52021728515625, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6920415224913494, |
| "grad_norm": 4.978041172027588, |
| "learning_rate": 2.365605650523803e-06, |
| "loss": 14.50703857421875, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.726643598615917, |
| "grad_norm": 5.3864898681640625, |
| "learning_rate": 2.293374793467048e-06, |
| "loss": 14.493427734375, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7612456747404844, |
| "grad_norm": 5.149505138397217, |
| "learning_rate": 2.2184928864880712e-06, |
| "loss": 14.453104248046875, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7958477508650519, |
| "grad_norm": 5.451559066772461, |
| "learning_rate": 2.1412101463511406e-06, |
| "loss": 14.48880126953125, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8304498269896193, |
| "grad_norm": 5.231166839599609, |
| "learning_rate": 2.0617848121683582e-06, |
| "loss": 14.46824951171875, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8650519031141869, |
| "grad_norm": 5.132116794586182, |
| "learning_rate": 1.9804822824975567e-06, |
| "loss": 14.45693603515625, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8996539792387543, |
| "grad_norm": 5.454078197479248, |
| "learning_rate": 1.8975742285170185e-06, |
| "loss": 14.468741455078124, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9342560553633218, |
| "grad_norm": 5.184955596923828, |
| "learning_rate": 1.8133376862403233e-06, |
| "loss": 14.482393798828125, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9688581314878892, |
| "grad_norm": 5.030216693878174, |
| "learning_rate": 1.7280541308046812e-06, |
| "loss": 14.4536962890625, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 3.484435558319092, |
| "eval_runtime": 11.6834, |
| "eval_samples_per_second": 639.538, |
| "eval_steps_per_second": 10.014, |
| "step": 2890 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 5780, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2890, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9471728080409293e+17, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|