speed_rm_model / logging.jsonl
shuyuncci's picture
Upload folder using huggingface_hub
65f3e22 verified
{"loss": 0.29331714, "grad_norm": 3.32348633, "learning_rate": 0.0001, "epoch": 0.00421053, "global_step/max_steps": "1/237", "percentage": "0.42%", "elapsed_time": "4s", "remaining_time": "17m 57s", "memory(GiB)": 20.61, "train_speed(iter/s)": 0.219009}
{"loss": 0.35523126, "grad_norm": 4.03946781, "learning_rate": 9.972e-05, "epoch": 0.04210526, "global_step/max_steps": "10/237", "percentage": "4.22%", "elapsed_time": "34s", "remaining_time": "13m 6s", "memory(GiB)": 23.12, "train_speed(iter/s)": 0.288666}
{"loss": 0.2503803, "grad_norm": 2.00496387, "learning_rate": 9.858e-05, "epoch": 0.08421053, "global_step/max_steps": "20/237", "percentage": "8.44%", "elapsed_time": "1m 10s", "remaining_time": "12m 41s", "memory(GiB)": 24.91, "train_speed(iter/s)": 0.285106}
{"loss": 0.18873653, "grad_norm": 1.84974945, "learning_rate": 9.66e-05, "epoch": 0.12631579, "global_step/max_steps": "30/237", "percentage": "12.66%", "elapsed_time": "1m 44s", "remaining_time": "11m 59s", "memory(GiB)": 24.91, "train_speed(iter/s)": 0.287749}
{"loss": 0.16409756, "grad_norm": 0.81310773, "learning_rate": 9.379e-05, "epoch": 0.16842105, "global_step/max_steps": "40/237", "percentage": "16.88%", "elapsed_time": "2m 18s", "remaining_time": "11m 24s", "memory(GiB)": 24.91, "train_speed(iter/s)": 0.287985}
{"loss": 0.08429332, "grad_norm": 3.7631321, "learning_rate": 9.022e-05, "epoch": 0.21052632, "global_step/max_steps": "50/237", "percentage": "21.10%", "elapsed_time": "2m 55s", "remaining_time": "10m 57s", "memory(GiB)": 26.76, "train_speed(iter/s)": 0.284315}
{"eval_loss": 0.06051549, "eval_runtime": 10.866, "eval_samples_per_second": 18.406, "eval_steps_per_second": 4.601, "epoch": 0.21052632, "global_step/max_steps": "50/237", "percentage": "21.10%", "elapsed_time": "3m 6s", "remaining_time": "11m 38s", "memory(GiB)": 26.76, "train_speed(iter/s)": 0.267765}
{"loss": 0.04279229, "grad_norm": 1.31236207, "learning_rate": 8.594e-05, "epoch": 0.25263158, "global_step/max_steps": "60/237", "percentage": "25.32%", "elapsed_time": "3m 40s", "remaining_time": "10m 50s", "memory(GiB)": 26.76, "train_speed(iter/s)": 0.272006}
{"loss": 0.04818052, "grad_norm": 4.15772343, "learning_rate": 8.154e-05, "epoch": 0.29473684, "global_step/max_steps": "70/237", "percentage": "29.54%", "elapsed_time": "4m 15s", "remaining_time": "10m 9s", "memory(GiB)": 26.76, "train_speed(iter/s)": 0.274018}
{"loss": 0.03957385, "grad_norm": 0.09417952, "learning_rate": 7.614e-05, "epoch": 0.33684211, "global_step/max_steps": "80/237", "percentage": "33.76%", "elapsed_time": "4m 47s", "remaining_time": "9m 24s", "memory(GiB)": 26.76, "train_speed(iter/s)": 0.277993}
{"loss": 0.018903, "grad_norm": 0.907426, "learning_rate": 7.028e-05, "epoch": 0.37894737, "global_step/max_steps": "90/237", "percentage": "37.97%", "elapsed_time": "5m 21s", "remaining_time": "8m 45s", "memory(GiB)": 26.76, "train_speed(iter/s)": 0.279936}
{"loss": 0.01172688, "grad_norm": 8.63210106, "learning_rate": 6.406e-05, "epoch": 0.42105263, "global_step/max_steps": "100/237", "percentage": "42.19%", "elapsed_time": "5m 58s", "remaining_time": "8m 10s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.279151}
{"eval_loss": 0.05046036, "eval_runtime": 10.8092, "eval_samples_per_second": 18.503, "eval_steps_per_second": 4.626, "epoch": 0.42105263, "global_step/max_steps": "100/237", "percentage": "42.19%", "elapsed_time": "6m 9s", "remaining_time": "8m 25s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.270972}
{"loss": 0.00881861, "grad_norm": 0.00328147, "learning_rate": 5.759e-05, "epoch": 0.46315789, "global_step/max_steps": "110/237", "percentage": "46.41%", "elapsed_time": "6m 44s", "remaining_time": "7m 46s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.272052}
{"loss": 0.00015774, "grad_norm": 0.00174886, "learning_rate": 5.099e-05, "epoch": 0.50526316, "global_step/max_steps": "120/237", "percentage": "50.63%", "elapsed_time": "7m 19s", "remaining_time": "7m 8s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.27332}
{"loss": 0.03046737, "grad_norm": 0.00935189, "learning_rate": 4.438e-05, "epoch": 0.54736842, "global_step/max_steps": "130/237", "percentage": "54.85%", "elapsed_time": "7m 52s", "remaining_time": "6m 28s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.275106}
{"loss": 0.02314112, "grad_norm": 0.0315498, "learning_rate": 3.786e-05, "epoch": 0.58947368, "global_step/max_steps": "140/237", "percentage": "59.07%", "elapsed_time": "8m 27s", "remaining_time": "5m 51s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.275892}
{"loss": 0.00282207, "grad_norm": 0.01494653, "learning_rate": 3.156e-05, "epoch": 0.63157895, "global_step/max_steps": "150/237", "percentage": "63.29%", "elapsed_time": "9m 3s", "remaining_time": "5m 15s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.276184}
{"eval_loss": 0.02281342, "eval_runtime": 10.8029, "eval_samples_per_second": 18.514, "eval_steps_per_second": 4.628, "epoch": 0.63157895, "global_step/max_steps": "150/237", "percentage": "63.29%", "elapsed_time": "9m 13s", "remaining_time": "5m 21s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.270796}
{"loss": 0.01042076, "grad_norm": 0.28259465, "learning_rate": 2.558e-05, "epoch": 0.67368421, "global_step/max_steps": "160/237", "percentage": "67.51%", "elapsed_time": "9m 50s", "remaining_time": "4m 44s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.271083}
{"loss": 0.02278298, "grad_norm": 0.01980716, "learning_rate": 2.002e-05, "epoch": 0.71578947, "global_step/max_steps": "170/237", "percentage": "71.73%", "elapsed_time": "10m 25s", "remaining_time": "4m 6s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.271795}
{"loss": 0.00553427, "grad_norm": 0.00415618, "learning_rate": 1.5e-05, "epoch": 0.75789474, "global_step/max_steps": "180/237", "percentage": "75.95%", "elapsed_time": "11m 1s", "remaining_time": "3m 29s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.272011}
{"loss": 0.01835876, "grad_norm": 0.01237625, "learning_rate": 1.059e-05, "epoch": 0.8, "global_step/max_steps": "190/237", "percentage": "80.17%", "elapsed_time": "11m 34s", "remaining_time": "2m 51s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.273676}
{"loss": 0.05563602, "grad_norm": 6.58888197, "learning_rate": 6.87e-06, "epoch": 0.84210526, "global_step/max_steps": "200/237", "percentage": "84.39%", "elapsed_time": "12m 10s", "remaining_time": "2m 15s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.273871}
{"eval_loss": 0.02215247, "eval_runtime": 10.8336, "eval_samples_per_second": 18.461, "eval_steps_per_second": 4.615, "epoch": 0.84210526, "global_step/max_steps": "200/237", "percentage": "84.39%", "elapsed_time": "12m 21s", "remaining_time": "2m 17s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.269867}
{"loss": 0.00671489, "grad_norm": 1.64098108, "learning_rate": 3.9e-06, "epoch": 0.88421053, "global_step/max_steps": "210/237", "percentage": "88.61%", "elapsed_time": "12m 57s", "remaining_time": "1m 39s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.270138}
{"loss": 0.00564253, "grad_norm": 0.06729671, "learning_rate": 1.75e-06, "epoch": 0.92631579, "global_step/max_steps": "220/237", "percentage": "92.83%", "elapsed_time": "13m 33s", "remaining_time": "1m 2s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.270437}
{"loss": 0.00627558, "grad_norm": 0.65069735, "learning_rate": 4.4e-07, "epoch": 0.96842105, "global_step/max_steps": "230/237", "percentage": "97.05%", "elapsed_time": "14m 7s", "remaining_time": "25s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.271391}
{"eval_loss": 0.0206338, "eval_runtime": 10.9316, "eval_samples_per_second": 18.296, "eval_steps_per_second": 4.574, "epoch": 0.99789474, "global_step/max_steps": "237/237", "percentage": "100.00%", "elapsed_time": "14m 43s", "remaining_time": "0s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.268236}
{"train_runtime": 884.6485, "train_samples_per_second": 4.295, "train_steps_per_second": 0.268, "total_flos": 7967520390021120.0, "train_loss": 0.05887531, "epoch": 0.99789474, "global_step/max_steps": "237/237", "percentage": "100.00%", "elapsed_time": "14m 44s", "remaining_time": "0s", "memory(GiB)": 29.57, "train_speed(iter/s)": 0.267907}
{"model_parameter_info": "PeftModelForCausalLM: 8423.0246M Params (25.9297M Trainable [0.3078%]), 0.0001M Buffers.", "last_model_checkpoint": "/mnt/raid0/home/yuncci/ITTS-GRPO-Test/paraspeechcaps/output/speed_rm_model/v10-20260131-131336/checkpoint-237", "best_model_checkpoint": "/mnt/raid0/home/yuncci/ITTS-GRPO-Test/paraspeechcaps/output/speed_rm_model/v10-20260131-131336/checkpoint-237", "best_metric": 0.0206338, "global_step": 237, "log_history": [{"loss": 0.29331713914871216, "grad_norm": 3.323486328125, "learning_rate": 9.999560724782174e-05, "epoch": 0.004210526315789474, "step": 1}, {"loss": 0.35523125860426163, "grad_norm": 4.039467811584473, "learning_rate": 9.971912311135967e-05, "epoch": 0.042105263157894736, "step": 10}, {"loss": 0.2503803014755249, "grad_norm": 2.0049638748168945, "learning_rate": 9.85834670020205e-05, "epoch": 0.08421052631578947, "step": 20}, {"loss": 0.18873653411865235, "grad_norm": 1.8497494459152222, "learning_rate": 9.659538714838634e-05, "epoch": 0.12631578947368421, "step": 30}, {"loss": 0.1640975594520569, "grad_norm": 0.8131077289581299, "learning_rate": 9.378976550881394e-05, "epoch": 0.16842105263157894, "step": 40}, {"loss": 0.0842933177947998, "grad_norm": 3.763132095336914, "learning_rate": 9.021582826353824e-05, "epoch": 0.21052631578947367, "step": 50}, {"eval_loss": 0.06051548942923546, "eval_runtime": 10.866, "eval_samples_per_second": 18.406, "eval_steps_per_second": 4.601, "epoch": 0.21052631578947367, "step": 50}, {"loss": 0.0427922934293747, "grad_norm": 1.3123620748519897, "learning_rate": 8.593628211416964e-05, "epoch": 0.25263157894736843, "step": 60}, {"loss": 0.04818052053451538, "grad_norm": 4.157723426818848, "learning_rate": 8.154321920070414e-05, "epoch": 0.29473684210526313, "step": 70}, {"loss": 0.039573848247528076, "grad_norm": 0.09417951852083206, "learning_rate": 7.613905469171246e-05, "epoch": 0.3368421052631579, "step": 80}, {"loss": 0.018903000652790068, "grad_norm": 0.9074259996414185, "learning_rate": 7.027626604064969e-05, "epoch": 0.37894736842105264, "step": 90}, {"loss": 0.011726881563663482, "grad_norm": 8.632101058959961, "learning_rate": 6.405771911037699e-05, "epoch": 0.42105263157894735, "step": 100}, {"eval_loss": 0.05046036094427109, "eval_runtime": 10.8092, "eval_samples_per_second": 18.503, "eval_steps_per_second": 4.626, "epoch": 0.42105263157894735, "step": 100}, {"loss": 0.008818605542182922, "grad_norm": 0.0032814720179885626, "learning_rate": 5.7592521739125726e-05, "epoch": 0.4631578947368421, "step": 110}, {"loss": 0.00015773772029206156, "grad_norm": 0.001748856739141047, "learning_rate": 5.0994109383253506e-05, "epoch": 0.5052631578947369, "step": 120}, {"loss": 0.03046737313270569, "grad_norm": 0.009351890534162521, "learning_rate": 4.4378254829551396e-05, "epoch": 0.5473684210526316, "step": 130}, {"loss": 0.023141118884086608, "grad_norm": 0.03154980391263962, "learning_rate": 3.786103689779861e-05, "epoch": 0.5894736842105263, "step": 140}, {"loss": 0.0028220713138580322, "grad_norm": 0.014946533367037773, "learning_rate": 3.1556803773799614e-05, "epoch": 0.631578947368421, "step": 150}, {"eval_loss": 0.022813422605395317, "eval_runtime": 10.8029, "eval_samples_per_second": 18.514, "eval_steps_per_second": 4.628, "epoch": 0.631578947368421, "step": 150}, {"loss": 0.010420759022235871, "grad_norm": 0.2825946509838104, "learning_rate": 2.5576166707349385e-05, "epoch": 0.6736842105263158, "step": 160}, {"loss": 0.022782982885837556, "grad_norm": 0.019807158038020134, "learning_rate": 2.002405927680374e-05, "epoch": 0.7157894736842105, "step": 170}, {"loss": 0.005534267798066139, "grad_norm": 0.004156183451414108, "learning_rate": 1.4997896271528739e-05, "epoch": 0.7578947368421053, "step": 180}, {"loss": 0.018358764052391053, "grad_norm": 0.012376249767839909, "learning_rate": 1.0585864495652897e-05, "epoch": 0.8, "step": 190}, {"loss": 0.055636024475097655, "grad_norm": 6.588881969451904, "learning_rate": 6.865375481914016e-06, "epoch": 0.8421052631578947, "step": 200}, {"eval_loss": 0.022152472287416458, "eval_runtime": 10.8336, "eval_samples_per_second": 18.461, "eval_steps_per_second": 4.615, "epoch": 0.8421052631578947, "step": 200}, {"loss": 0.006714888662099838, "grad_norm": 1.6409810781478882, "learning_rate": 3.901707263589671e-06, "epoch": 0.8842105263157894, "step": 210}, {"loss": 0.005642532929778099, "grad_norm": 0.06729670614004135, "learning_rate": 1.7468590353731495e-06, "epoch": 0.9263157894736842, "step": 220}, {"loss": 0.006275583058595657, "grad_norm": 0.6506973505020142, "learning_rate": 4.386387988014273e-07, "epoch": 0.968421052631579, "step": 230}, {"eval_loss": 0.020633801817893982, "eval_runtime": 10.9316, "eval_samples_per_second": 18.296, "eval_steps_per_second": 4.574, "epoch": 0.9978947368421053, "step": 237}, {"train_runtime": 884.6485, "train_samples_per_second": 4.295, "train_steps_per_second": 0.268, "total_flos": 7967520390021120.0, "train_loss": 0.058875313908439364, "epoch": 0.9978947368421053, "step": 237}], "memory": 29.56640625}