| { | |
| "best_metric": 0.8285950217539291, | |
| "best_model_checkpoint": "/content/temp_assamese/checkpoint-50000", | |
| "epoch": 1.0, | |
| "eval_steps": 5000, | |
| "global_step": 53713, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09308733453726287, | |
| "grad_norm": 5.457111358642578, | |
| "learning_rate": 4.5348425893172976e-05, | |
| "loss": 2.4466, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.09308733453726287, | |
| "eval_accuracy": 0.707468341537135, | |
| "eval_loss": 1.5004358291625977, | |
| "eval_runtime": 308.6127, | |
| "eval_samples_per_second": 146.822, | |
| "eval_steps_per_second": 9.177, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.18617466907452573, | |
| "grad_norm": 4.776674270629883, | |
| "learning_rate": 4.0694990039655205e-05, | |
| "loss": 1.4994, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.18617466907452573, | |
| "eval_accuracy": 0.7532453087820641, | |
| "eval_loss": 1.2256046533584595, | |
| "eval_runtime": 306.864, | |
| "eval_samples_per_second": 147.658, | |
| "eval_steps_per_second": 9.229, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.2792620036117886, | |
| "grad_norm": 3.9309194087982178, | |
| "learning_rate": 3.604248505948281e-05, | |
| "loss": 1.2888, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.2792620036117886, | |
| "eval_accuracy": 0.7765668816186476, | |
| "eval_loss": 1.099416732788086, | |
| "eval_runtime": 321.8088, | |
| "eval_samples_per_second": 140.801, | |
| "eval_steps_per_second": 8.8, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.37234933814905147, | |
| "grad_norm": 3.988945722579956, | |
| "learning_rate": 3.1389980079310413e-05, | |
| "loss": 1.1746, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.37234933814905147, | |
| "eval_accuracy": 0.791549800931217, | |
| "eval_loss": 1.0090231895446777, | |
| "eval_runtime": 316.5344, | |
| "eval_samples_per_second": 143.147, | |
| "eval_steps_per_second": 8.947, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.4654366726863143, | |
| "grad_norm": 4.230010509490967, | |
| "learning_rate": 2.6736544225792642e-05, | |
| "loss": 1.0994, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.4654366726863143, | |
| "eval_accuracy": 0.8021278468205446, | |
| "eval_loss": 0.9513992667198181, | |
| "eval_runtime": 317.1699, | |
| "eval_samples_per_second": 142.86, | |
| "eval_steps_per_second": 8.929, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.5585240072235772, | |
| "grad_norm": 4.287986755371094, | |
| "learning_rate": 2.2084970118965616e-05, | |
| "loss": 1.0379, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.5585240072235772, | |
| "eval_accuracy": 0.8115293649487124, | |
| "eval_loss": 0.9028974771499634, | |
| "eval_runtime": 316.9066, | |
| "eval_samples_per_second": 142.979, | |
| "eval_steps_per_second": 8.936, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.65161134176084, | |
| "grad_norm": 3.521850347518921, | |
| "learning_rate": 1.743339601213859e-05, | |
| "loss": 0.9956, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.65161134176084, | |
| "eval_accuracy": 0.8174002465681974, | |
| "eval_loss": 0.8695101737976074, | |
| "eval_runtime": 316.9452, | |
| "eval_samples_per_second": 142.962, | |
| "eval_steps_per_second": 8.935, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.7446986762981029, | |
| "grad_norm": 4.046538829803467, | |
| "learning_rate": 1.2779960158620818e-05, | |
| "loss": 0.9647, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.7446986762981029, | |
| "eval_accuracy": 0.8216175421669631, | |
| "eval_loss": 0.8461592793464661, | |
| "eval_runtime": 318.3007, | |
| "eval_samples_per_second": 142.353, | |
| "eval_steps_per_second": 8.897, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.8377860108353657, | |
| "grad_norm": 4.023233413696289, | |
| "learning_rate": 8.12745517844842e-06, | |
| "loss": 0.9351, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.8377860108353657, | |
| "eval_accuracy": 0.8258444821249434, | |
| "eval_loss": 0.8274036645889282, | |
| "eval_runtime": 318.395, | |
| "eval_samples_per_second": 142.311, | |
| "eval_steps_per_second": 8.895, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.9308733453726286, | |
| "grad_norm": 3.7155344486236572, | |
| "learning_rate": 3.4749501982760224e-06, | |
| "loss": 0.9194, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.9308733453726286, | |
| "eval_accuracy": 0.8285950217539291, | |
| "eval_loss": 0.8120360374450684, | |
| "eval_runtime": 309.307, | |
| "eval_samples_per_second": 146.492, | |
| "eval_steps_per_second": 9.156, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 53713, | |
| "total_flos": 1.1339138340497818e+17, | |
| "train_loss": 1.2134282816267128, | |
| "train_runtime": 14742.2291, | |
| "train_samples_per_second": 58.295, | |
| "train_steps_per_second": 3.643 | |
| } | |
| ], | |
| "logging_steps": 5000, | |
| "max_steps": 53713, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1339138340497818e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |