{ "best_global_step": 2890, "best_metric": 3.484435558319092, "best_model_checkpoint": "sindhibert_session6/checkpoint-2890", "epoch": 1.0, "eval_steps": 2890, "global_step": 2890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03460207612456748, "grad_norm": 5.271186828613281, "learning_rate": 8.583815028901734e-07, "loss": 14.7112890625, "step": 100 }, { "epoch": 0.06920415224913495, "grad_norm": 5.328507900238037, "learning_rate": 1.7254335260115608e-06, "loss": 14.65601806640625, "step": 200 }, { "epoch": 0.10380622837370242, "grad_norm": 5.2988433837890625, "learning_rate": 2.592485549132948e-06, "loss": 14.6437841796875, "step": 300 }, { "epoch": 0.1384083044982699, "grad_norm": 5.144472122192383, "learning_rate": 2.9992958916410005e-06, "loss": 14.644112548828126, "step": 400 }, { "epoch": 0.17301038062283736, "grad_norm": 5.255625247955322, "learning_rate": 2.994135629312096e-06, "loss": 14.592845458984375, "step": 500 }, { "epoch": 0.20761245674740483, "grad_norm": 5.365286827087402, "learning_rate": 2.9839827352831524e-06, "loss": 14.59123779296875, "step": 600 }, { "epoch": 0.2422145328719723, "grad_norm": 5.114892959594727, "learning_rate": 2.96887113529664e-06, "loss": 14.588660888671875, "step": 700 }, { "epoch": 0.2768166089965398, "grad_norm": 5.162683486938477, "learning_rate": 2.948851324536296e-06, "loss": 14.5647998046875, "step": 800 }, { "epoch": 0.31141868512110726, "grad_norm": 5.2245635986328125, "learning_rate": 2.9239901988982294e-06, "loss": 14.566002197265625, "step": 900 }, { "epoch": 0.3460207612456747, "grad_norm": 5.22702169418335, "learning_rate": 2.8943708314592917e-06, "loss": 14.56361328125, "step": 1000 }, { "epoch": 0.3806228373702422, "grad_norm": 5.353369235992432, "learning_rate": 2.8600921948896393e-06, "loss": 14.56337890625, "step": 1100 }, { "epoch": 0.41522491349480967, "grad_norm": 5.189014911651611, "learning_rate": 2.821268830737051e-06, "loss": 14.566427001953125, "step": 1200 }, { "epoch": 0.44982698961937717, "grad_norm": 5.108746528625488, "learning_rate": 2.7780304666880683e-06, "loss": 14.5394287109375, "step": 1300 }, { "epoch": 0.4844290657439446, "grad_norm": 5.1513895988464355, "learning_rate": 2.7305215830848867e-06, "loss": 14.531617431640624, "step": 1400 }, { "epoch": 0.5190311418685121, "grad_norm": 5.2011942863464355, "learning_rate": 2.678900930146467e-06, "loss": 14.5282666015625, "step": 1500 }, { "epoch": 0.5536332179930796, "grad_norm": 5.219590663909912, "learning_rate": 2.6233409975070707e-06, "loss": 14.489625244140624, "step": 1600 }, { "epoch": 0.5882352941176471, "grad_norm": 5.411579132080078, "learning_rate": 2.5640274378447445e-06, "loss": 14.49999755859375, "step": 1700 }, { "epoch": 0.6228373702422145, "grad_norm": 5.120180606842041, "learning_rate": 2.5011584465256946e-06, "loss": 14.515745849609376, "step": 1800 }, { "epoch": 0.657439446366782, "grad_norm": 5.3347883224487305, "learning_rate": 2.434944099337454e-06, "loss": 14.52021728515625, "step": 1900 }, { "epoch": 0.6920415224913494, "grad_norm": 4.978041172027588, "learning_rate": 2.365605650523803e-06, "loss": 14.50703857421875, "step": 2000 }, { "epoch": 0.726643598615917, "grad_norm": 5.3864898681640625, "learning_rate": 2.293374793467048e-06, "loss": 14.493427734375, "step": 2100 }, { "epoch": 0.7612456747404844, "grad_norm": 5.149505138397217, "learning_rate": 2.2184928864880712e-06, "loss": 14.453104248046875, "step": 2200 }, { "epoch": 0.7958477508650519, "grad_norm": 5.451559066772461, "learning_rate": 2.1412101463511406e-06, "loss": 14.48880126953125, "step": 2300 }, { "epoch": 0.8304498269896193, "grad_norm": 5.231166839599609, "learning_rate": 2.0617848121683582e-06, "loss": 14.46824951171875, "step": 2400 }, { "epoch": 0.8650519031141869, "grad_norm": 5.132116794586182, "learning_rate": 1.9804822824975567e-06, "loss": 14.45693603515625, "step": 2500 }, { "epoch": 0.8996539792387543, "grad_norm": 5.454078197479248, "learning_rate": 1.8975742285170185e-06, "loss": 14.468741455078124, "step": 2600 }, { "epoch": 0.9342560553633218, "grad_norm": 5.184955596923828, "learning_rate": 1.8133376862403233e-06, "loss": 14.482393798828125, "step": 2700 }, { "epoch": 0.9688581314878892, "grad_norm": 5.030216693878174, "learning_rate": 1.7280541308046812e-06, "loss": 14.4536962890625, "step": 2800 }, { "epoch": 1.0, "eval_loss": 3.484435558319092, "eval_runtime": 11.6834, "eval_samples_per_second": 639.538, "eval_steps_per_second": 10.014, "step": 2890 } ], "logging_steps": 100, "max_steps": 5780, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2890, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9471728080409293e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }