| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9969593310528317, |
| "eval_steps": 500, |
| "global_step": 656, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03040668947168377, |
| "grad_norm": 5.294622334039074, |
| "learning_rate": 5e-06, |
| "loss": 0.9257, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06081337894336754, |
| "grad_norm": 1.6301473096686117, |
| "learning_rate": 5e-06, |
| "loss": 0.8122, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09122006841505131, |
| "grad_norm": 1.106205167846798, |
| "learning_rate": 5e-06, |
| "loss": 0.7764, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12162675788673508, |
| "grad_norm": 1.0271191708023029, |
| "learning_rate": 5e-06, |
| "loss": 0.7512, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15203344735841884, |
| "grad_norm": 0.8176114852390686, |
| "learning_rate": 5e-06, |
| "loss": 0.7338, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18244013683010263, |
| "grad_norm": 0.9260828555935472, |
| "learning_rate": 5e-06, |
| "loss": 0.721, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2128468263017864, |
| "grad_norm": 0.9770222831832209, |
| "learning_rate": 5e-06, |
| "loss": 0.7094, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.24325351577347015, |
| "grad_norm": 0.6562302483289237, |
| "learning_rate": 5e-06, |
| "loss": 0.7012, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.27366020524515394, |
| "grad_norm": 0.6984752490925699, |
| "learning_rate": 5e-06, |
| "loss": 0.6979, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3040668947168377, |
| "grad_norm": 0.8518484569843605, |
| "learning_rate": 5e-06, |
| "loss": 0.6852, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.33447358418852147, |
| "grad_norm": 0.8741274500386024, |
| "learning_rate": 5e-06, |
| "loss": 0.69, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.36488027366020526, |
| "grad_norm": 0.7325820852264833, |
| "learning_rate": 5e-06, |
| "loss": 0.6811, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.395286963131889, |
| "grad_norm": 0.5709370572770963, |
| "learning_rate": 5e-06, |
| "loss": 0.6864, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4256936526035728, |
| "grad_norm": 0.6057188325485896, |
| "learning_rate": 5e-06, |
| "loss": 0.6818, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.45610034207525657, |
| "grad_norm": 0.5863099510897374, |
| "learning_rate": 5e-06, |
| "loss": 0.6766, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4865070315469403, |
| "grad_norm": 0.7170797428745375, |
| "learning_rate": 5e-06, |
| "loss": 0.684, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5169137210186241, |
| "grad_norm": 0.559188071891075, |
| "learning_rate": 5e-06, |
| "loss": 0.6763, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5473204104903079, |
| "grad_norm": 0.5679992922149982, |
| "learning_rate": 5e-06, |
| "loss": 0.6845, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5777270999619917, |
| "grad_norm": 0.6193525571471781, |
| "learning_rate": 5e-06, |
| "loss": 0.671, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6081337894336754, |
| "grad_norm": 0.5431044581567518, |
| "learning_rate": 5e-06, |
| "loss": 0.6707, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6385404789053591, |
| "grad_norm": 0.5871938765797495, |
| "learning_rate": 5e-06, |
| "loss": 0.6721, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6689471683770429, |
| "grad_norm": 0.5980280104992193, |
| "learning_rate": 5e-06, |
| "loss": 0.6716, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6993538578487267, |
| "grad_norm": 0.49868159695154574, |
| "learning_rate": 5e-06, |
| "loss": 0.6686, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7297605473204105, |
| "grad_norm": 0.6213515255920768, |
| "learning_rate": 5e-06, |
| "loss": 0.6684, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7601672367920943, |
| "grad_norm": 0.7065699853711176, |
| "learning_rate": 5e-06, |
| "loss": 0.6637, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.790573926263778, |
| "grad_norm": 0.5298004443539811, |
| "learning_rate": 5e-06, |
| "loss": 0.6681, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8209806157354618, |
| "grad_norm": 0.853485338030159, |
| "learning_rate": 5e-06, |
| "loss": 0.6619, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8513873052071456, |
| "grad_norm": 0.6474661712665346, |
| "learning_rate": 5e-06, |
| "loss": 0.6611, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8817939946788294, |
| "grad_norm": 0.5350576553467696, |
| "learning_rate": 5e-06, |
| "loss": 0.6595, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9122006841505131, |
| "grad_norm": 0.6461920255035306, |
| "learning_rate": 5e-06, |
| "loss": 0.6622, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9426073736221969, |
| "grad_norm": 0.7113283875706861, |
| "learning_rate": 5e-06, |
| "loss": 0.6565, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9730140630938806, |
| "grad_norm": 0.5398831103354343, |
| "learning_rate": 5e-06, |
| "loss": 0.6579, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9973394146712277, |
| "eval_loss": 0.6590341925621033, |
| "eval_runtime": 349.1132, |
| "eval_samples_per_second": 25.381, |
| "eval_steps_per_second": 0.398, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.0057012542759407, |
| "grad_norm": 0.6541787183575265, |
| "learning_rate": 5e-06, |
| "loss": 0.7147, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0361079437476244, |
| "grad_norm": 0.5849575356382846, |
| "learning_rate": 5e-06, |
| "loss": 0.6206, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0665146332193083, |
| "grad_norm": 0.6188318897860179, |
| "learning_rate": 5e-06, |
| "loss": 0.6123, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.096921322690992, |
| "grad_norm": 0.5529845628714145, |
| "learning_rate": 5e-06, |
| "loss": 0.6235, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1273280121626759, |
| "grad_norm": 0.6471216585957622, |
| "learning_rate": 5e-06, |
| "loss": 0.6149, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1577347016343595, |
| "grad_norm": 0.5348917582128644, |
| "learning_rate": 5e-06, |
| "loss": 0.6151, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1881413911060434, |
| "grad_norm": 0.5102391958573069, |
| "learning_rate": 5e-06, |
| "loss": 0.611, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2185480805777271, |
| "grad_norm": 0.5738175958206242, |
| "learning_rate": 5e-06, |
| "loss": 0.6123, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2489547700494108, |
| "grad_norm": 0.7700452193739229, |
| "learning_rate": 5e-06, |
| "loss": 0.6099, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2793614595210947, |
| "grad_norm": 0.6437425461349855, |
| "learning_rate": 5e-06, |
| "loss": 0.6143, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3097681489927784, |
| "grad_norm": 0.7772459625346615, |
| "learning_rate": 5e-06, |
| "loss": 0.6162, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.340174838464462, |
| "grad_norm": 0.7382002997370456, |
| "learning_rate": 5e-06, |
| "loss": 0.6139, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.370581527936146, |
| "grad_norm": 0.6650239061593356, |
| "learning_rate": 5e-06, |
| "loss": 0.6153, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.4009882174078299, |
| "grad_norm": 0.4904795314706487, |
| "learning_rate": 5e-06, |
| "loss": 0.6115, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.4313949068795135, |
| "grad_norm": 0.6074182358853202, |
| "learning_rate": 5e-06, |
| "loss": 0.6133, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.4618015963511972, |
| "grad_norm": 0.6115080516205853, |
| "learning_rate": 5e-06, |
| "loss": 0.6151, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4922082858228811, |
| "grad_norm": 0.6245371375579123, |
| "learning_rate": 5e-06, |
| "loss": 0.6146, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.5226149752945648, |
| "grad_norm": 0.6563433070038904, |
| "learning_rate": 5e-06, |
| "loss": 0.6182, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.5530216647662485, |
| "grad_norm": 0.5157535145442664, |
| "learning_rate": 5e-06, |
| "loss": 0.6119, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.5834283542379324, |
| "grad_norm": 0.5467230049625755, |
| "learning_rate": 5e-06, |
| "loss": 0.6146, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.6138350437096163, |
| "grad_norm": 0.5809725331645209, |
| "learning_rate": 5e-06, |
| "loss": 0.612, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.6442417331812997, |
| "grad_norm": 0.7935716363732433, |
| "learning_rate": 5e-06, |
| "loss": 0.6037, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.6746484226529836, |
| "grad_norm": 0.5375427777402721, |
| "learning_rate": 5e-06, |
| "loss": 0.6116, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.7050551121246675, |
| "grad_norm": 0.6075737303881514, |
| "learning_rate": 5e-06, |
| "loss": 0.6087, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.7354618015963512, |
| "grad_norm": 0.5457501519081878, |
| "learning_rate": 5e-06, |
| "loss": 0.6146, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.765868491068035, |
| "grad_norm": 0.5359013635410763, |
| "learning_rate": 5e-06, |
| "loss": 0.6121, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.7962751805397188, |
| "grad_norm": 0.5731354873642086, |
| "learning_rate": 5e-06, |
| "loss": 0.6114, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.8266818700114025, |
| "grad_norm": 0.5798625309299036, |
| "learning_rate": 5e-06, |
| "loss": 0.6156, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.8570885594830862, |
| "grad_norm": 0.6682632356720307, |
| "learning_rate": 5e-06, |
| "loss": 0.612, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.88749524895477, |
| "grad_norm": 0.5386120296701589, |
| "learning_rate": 5e-06, |
| "loss": 0.6097, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.917901938426454, |
| "grad_norm": 0.5373342158868872, |
| "learning_rate": 5e-06, |
| "loss": 0.6029, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.9483086278981376, |
| "grad_norm": 0.5069132774726652, |
| "learning_rate": 5e-06, |
| "loss": 0.6107, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.9787153173698213, |
| "grad_norm": 0.6365254431868149, |
| "learning_rate": 5e-06, |
| "loss": 0.6084, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.9969593310528317, |
| "eval_loss": 0.6481617093086243, |
| "eval_runtime": 349.4697, |
| "eval_samples_per_second": 25.356, |
| "eval_steps_per_second": 0.398, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.9969593310528317, |
| "step": 656, |
| "total_flos": 1098615053352960.0, |
| "train_loss": 0.6551533584914556, |
| "train_runtime": 38635.1705, |
| "train_samples_per_second": 8.715, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 656, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1098615053352960.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|