| {"loss": 15.49815655, "token_acc": 0.76073471, "grad_norm": 20.53625107, "learning_rate": 9.995e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.025079, "epoch": 0.04155844, "global_step/max_steps": "1/72", "percentage": "1.39%", "elapsed_time": "39s", "remaining_time": "46m 44s"} |
| {"loss": 14.73864651, "token_acc": 0.76618922, "grad_norm": 16.88204384, "learning_rate": 9.881e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.026274, "epoch": 0.20779221, "global_step/max_steps": "5/72", "percentage": "6.94%", "elapsed_time": "3m 9s", "remaining_time": "42m 24s"} |
| {"loss": 13.4493866, "token_acc": 0.78010744, "grad_norm": 9.01259232, "learning_rate": 9.532e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.02649, "epoch": 0.41558442, "global_step/max_steps": "10/72", "percentage": "13.89%", "elapsed_time": "6m 17s", "remaining_time": "38m 58s"} |
| {"loss": 12.06417923, "token_acc": 0.80274969, "grad_norm": 5.82414389, "learning_rate": 8.967e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.026422, "epoch": 0.62337662, "global_step/max_steps": "15/72", "percentage": "20.83%", "elapsed_time": "9m 27s", "remaining_time": "35m 55s"} |
| {"loss": 11.44986954, "token_acc": 0.81188062, "grad_norm": 5.20554876, "learning_rate": 8.214e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.026452, "epoch": 0.83116883, "global_step/max_steps": "20/72", "percentage": "27.78%", "elapsed_time": "12m 35s", "remaining_time": "32m 44s"} |
| {"loss": 8.99566956, "token_acc": 0.81883488, "grad_norm": 0.97829944, "learning_rate": 7.309e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027663, "epoch": 1.0, "global_step/max_steps": "25/72", "percentage": "34.72%", "elapsed_time": "15m 3s", "remaining_time": "28m 18s"} |
| {"loss": 10.07358246, "token_acc": 0.83231169, "grad_norm": 6.12149954, "learning_rate": 6.294e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027472, "epoch": 1.20779221, "global_step/max_steps": "30/72", "percentage": "41.67%", "elapsed_time": "18m 11s", "remaining_time": "25m 28s"} |
| {"loss": 9.78539276, "token_acc": 0.83752933, "grad_norm": 5.7537756, "learning_rate": 5.218e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027377, "epoch": 1.41558442, "global_step/max_steps": "35/72", "percentage": "48.61%", "elapsed_time": "21m 18s", "remaining_time": "22m 31s"} |
| {"loss": 9.26217804, "token_acc": 0.84512247, "grad_norm": 5.84390259, "learning_rate": 4.132e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027302, "epoch": 1.62337662, "global_step/max_steps": "40/72", "percentage": "55.56%", "elapsed_time": "24m 24s", "remaining_time": "19m 31s"} |
| {"loss": 8.88874435, "token_acc": 0.85017601, "grad_norm": 5.69740343, "learning_rate": 3.087e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027239, "epoch": 1.83116883, "global_step/max_steps": "45/72", "percentage": "62.50%", "elapsed_time": "27m 31s", "remaining_time": "16m 30s"} |
| {"loss": 6.97750778, "token_acc": 0.85622392, "grad_norm": 1.06839299, "learning_rate": 2.132e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027667, "epoch": 2.0, "global_step/max_steps": "50/72", "percentage": "69.44%", "elapsed_time": "30m 6s", "remaining_time": "13m 15s"} |
| {"loss": 8.58852844, "token_acc": 0.8563303, "grad_norm": 5.95823574, "learning_rate": 1.314e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027596, "epoch": 2.20779221, "global_step/max_steps": "55/72", "percentage": "76.39%", "elapsed_time": "33m 12s", "remaining_time": "10m 15s"} |
| {"loss": 8.51608582, "token_acc": 0.8579486, "grad_norm": 6.15225697, "learning_rate": 6.7e-06, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027508, "epoch": 2.41558442, "global_step/max_steps": "60/72", "percentage": "83.33%", "elapsed_time": "36m 20s", "remaining_time": "7m 16s"} |
| {"loss": 8.48798141, "token_acc": 0.85663218, "grad_norm": 6.40906572, "learning_rate": 2.31e-06, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027435, "epoch": 2.62337662, "global_step/max_steps": "65/72", "percentage": "90.28%", "elapsed_time": "39m 28s", "remaining_time": "4m 15s"} |
| {"loss": 8.28515778, "token_acc": 0.86078459, "grad_norm": 5.36148739, "learning_rate": 1.9e-07, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027385, "epoch": 2.83116883, "global_step/max_steps": "70/72", "percentage": "97.22%", "elapsed_time": "42m 35s", "remaining_time": "1m 13s"} |
| {"eval_loss": 0.51206559, "eval_token_acc": 0.85965279, "eval_runtime": 3.2532, "eval_samples_per_second": 0.922, "eval_steps_per_second": 0.922, "epoch": 2.91428571, "global_step/max_steps": "72/72", "percentage": "100.00%", "elapsed_time": "43m 55s", "remaining_time": "0s"} |
| {"train_runtime": 2635.9811, "train_samples_per_second": 0.438, "train_steps_per_second": 0.027, "total_flos": 4978085664278016.0, "train_loss": 9.93005354, "epoch": 2.91428571, "global_step/max_steps": "72/72", "percentage": "100.00%", "elapsed_time": "43m 55s", "remaining_time": "0s"} |
| {"model_parameter_info": "PeftModelForCausalLM: 564.3133M Params (3.7847M Trainable [0.6707%]), 0.0008M Buffers.", "last_model_checkpoint": "/kaggle/working/v1-20250327-154852/checkpoint-72", "best_model_checkpoint": "/kaggle/working/v1-20250327-154852/checkpoint-72", "best_metric": 0.51206559, "global_step": 72, "log_history": [{"loss": 15.498156547546387, "token_acc": 0.7607347136067026, "grad_norm": 20.536251068115234, "learning_rate": 9.99524110790929e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.025079, "epoch": 0.04155844155844156, "step": 1}, {"loss": 14.738646507263184, "token_acc": 0.7661892170575083, "grad_norm": 16.882043838500977, "learning_rate": 9.881480035599667e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.026274, "epoch": 0.2077922077922078, "step": 5}, {"loss": 13.449386596679688, "token_acc": 0.7801074380165289, "grad_norm": 9.012592315673828, "learning_rate": 9.53153893518325e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.02649, "epoch": 0.4155844155844156, "step": 10}, {"loss": 12.064179229736329, "token_acc": 0.8027496875355074, "grad_norm": 5.824143886566162, "learning_rate": 8.966766701456177e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.026422, "epoch": 0.6233766233766234, "step": 15}, {"loss": 11.449869537353516, "token_acc": 0.8118806190730326, "grad_norm": 5.2055487632751465, "learning_rate": 8.213938048432697e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.026452, "epoch": 0.8311688311688312, "step": 20}, {"loss": 8.995669555664062, "token_acc": 0.8188348801845734, "grad_norm": 0.9782994389533997, "learning_rate": 7.308743066175172e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027663, "epoch": 1.0, "step": 25}, {"loss": 10.073582458496094, "token_acc": 0.8323116942796419, "grad_norm": 6.121499538421631, "learning_rate": 6.294095225512603e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027472, "epoch": 1.2077922077922079, "step": 30}, {"loss": 9.785392761230469, "token_acc": 0.8375293275870676, "grad_norm": 5.753775596618652, "learning_rate": 5.218096936826681e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027377, "epoch": 1.4155844155844157, "step": 35}, {"loss": 9.262178039550781, "token_acc": 0.845122473167841, "grad_norm": 5.843902587890625, "learning_rate": 4.131759111665349e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027302, "epoch": 1.6233766233766234, "step": 40}, {"loss": 8.888744354248047, "token_acc": 0.8501760138480872, "grad_norm": 5.697403430938721, "learning_rate": 3.086582838174551e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027239, "epoch": 1.8311688311688312, "step": 45}, {"loss": 6.977507781982422, "token_acc": 0.8562239238451811, "grad_norm": 1.0683929920196533, "learning_rate": 2.132117818244771e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027667, "epoch": 2.0, "step": 50}, {"loss": 8.588528442382813, "token_acc": 0.8563302981165024, "grad_norm": 5.958235740661621, "learning_rate": 1.3136133159493802e-05, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027596, "epoch": 2.207792207792208, "step": 55}, {"loss": 8.516085815429687, "token_acc": 0.8579486039407488, "grad_norm": 6.152256965637207, "learning_rate": 6.698729810778065e-06, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027508, "epoch": 2.4155844155844157, "step": 60}, {"loss": 8.487981414794922, "token_acc": 0.8566321792796272, "grad_norm": 6.4090657234191895, "learning_rate": 2.314152462588659e-06, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027435, "epoch": 2.6233766233766236, "step": 65}, {"loss": 8.285157775878906, "token_acc": 0.8607845902428098, "grad_norm": 5.36148738861084, "learning_rate": 1.9026509541272275e-07, "memory(GiB)": 15.27, "train_speed(iter/s)": 0.027385, "epoch": 2.8311688311688314, "step": 70}, {"eval_loss": 0.512065589427948, "eval_token_acc": 0.8596527923028655, "eval_runtime": 3.2532, "eval_samples_per_second": 0.922, "eval_steps_per_second": 0.922, "epoch": 2.914285714285714, "step": 72}, {"train_runtime": 2635.9811, "train_samples_per_second": 0.438, "train_steps_per_second": 0.027, "total_flos": 4978085664278016.0, "train_loss": 9.930053538746304, "epoch": 2.914285714285714, "step": 72}], "memory": 15.26953125} |
|
|