{ "best_global_step": 320, "best_metric": 0.16129032258064516, "best_model_checkpoint": "out_qwen_0.6b_sft/checkpoint-320", "epoch": 5.245901639344262, "eval_steps": 10, "global_step": 320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08196721311475409, "grad_norm": 128.95440298135514, "learning_rate": 4.0816326530612243e-07, "loss": 40.2266, "step": 5, "true_loss": 5.0094 }, { "epoch": 0.16393442622950818, "grad_norm": 115.65629290597954, "learning_rate": 9.183673469387756e-07, "loss": 39.852, "step": 10, "true_loss": 4.8969 }, { "epoch": 0.16393442622950818, "eval_accuracy": 0.0069124423963133645, "eval_loss": 5.0297017097473145, "eval_runtime": 7.6937, "eval_samples_per_second": 56.41, "eval_steps_per_second": 7.149, "step": 10 }, { "epoch": 0.2459016393442623, "grad_norm": 121.33698181204981, "learning_rate": 1.4285714285714286e-06, "loss": 39.7914, "step": 15, "true_loss": 5.0324 }, { "epoch": 0.32786885245901637, "grad_norm": 123.59741878237826, "learning_rate": 1.938775510204082e-06, "loss": 40.1176, "step": 20, "true_loss": 5.0516 }, { "epoch": 0.32786885245901637, "eval_accuracy": 0.016129032258064516, "eval_loss": 4.985995292663574, "eval_runtime": 7.5658, "eval_samples_per_second": 57.364, "eval_steps_per_second": 7.27, "step": 20 }, { "epoch": 0.4098360655737705, "grad_norm": 119.26551170123645, "learning_rate": 2.4489795918367347e-06, "loss": 39.4852, "step": 25, "true_loss": 4.8977 }, { "epoch": 0.4918032786885246, "grad_norm": 121.17042135033064, "learning_rate": 2.959183673469388e-06, "loss": 38.3961, "step": 30, "true_loss": 4.7777 }, { "epoch": 0.4918032786885246, "eval_accuracy": 0.02534562211981567, "eval_loss": 4.856062889099121, "eval_runtime": 9.8239, "eval_samples_per_second": 44.178, "eval_steps_per_second": 5.599, "step": 30 }, { "epoch": 0.5737704918032787, "grad_norm": 148.64472538819717, "learning_rate": 3.469387755102041e-06, "loss": 38.4152, "step": 35, "true_loss": 4.7766 }, { "epoch": 0.6557377049180327, "grad_norm": 157.8362042375912, "learning_rate": 3.979591836734694e-06, "loss": 38.5402, "step": 40, "true_loss": 4.8457 }, { "epoch": 0.6557377049180327, "eval_accuracy": 0.04838709677419355, "eval_loss": 4.698588848114014, "eval_runtime": 7.6407, "eval_samples_per_second": 56.801, "eval_steps_per_second": 7.198, "step": 40 }, { "epoch": 0.7377049180327869, "grad_norm": 146.82607440660624, "learning_rate": 4.489795918367348e-06, "loss": 36.5211, "step": 45, "true_loss": 4.6023 }, { "epoch": 0.819672131147541, "grad_norm": 158.20262885169598, "learning_rate": 5e-06, "loss": 37.591, "step": 50, "true_loss": 4.9816 }, { "epoch": 0.819672131147541, "eval_accuracy": 0.07373271889400922, "eval_loss": 4.536938190460205, "eval_runtime": 7.6075, "eval_samples_per_second": 57.049, "eval_steps_per_second": 7.23, "step": 50 }, { "epoch": 0.9016393442622951, "grad_norm": 147.980428218924, "learning_rate": 4.943052391799545e-06, "loss": 37.0629, "step": 55, "true_loss": 4.6125 }, { "epoch": 0.9836065573770492, "grad_norm": 126.54677633437188, "learning_rate": 4.886104783599089e-06, "loss": 36.1198, "step": 60, "true_loss": 4.4017 }, { "epoch": 0.9836065573770492, "eval_accuracy": 0.07603686635944701, "eval_loss": 4.475734233856201, "eval_runtime": 7.662, "eval_samples_per_second": 56.643, "eval_steps_per_second": 7.178, "step": 60 }, { "epoch": 1.0655737704918034, "grad_norm": 122.80613718263169, "learning_rate": 4.829157175398634e-06, "loss": 36.1342, "step": 65, "true_loss": 4.6152 }, { "epoch": 1.1475409836065573, "grad_norm": 136.54531412889096, "learning_rate": 4.772209567198178e-06, "loss": 36.0707, "step": 70, "true_loss": 4.284 }, { "epoch": 1.1475409836065573, "eval_accuracy": 0.07373271889400922, "eval_loss": 4.457517147064209, "eval_runtime": 7.7142, "eval_samples_per_second": 56.26, "eval_steps_per_second": 7.13, "step": 70 }, { "epoch": 1.2295081967213115, "grad_norm": 139.59274630104497, "learning_rate": 4.7152619589977225e-06, "loss": 34.9596, "step": 75, "true_loss": 4.2389 }, { "epoch": 1.3114754098360657, "grad_norm": 142.6891577520439, "learning_rate": 4.658314350797267e-06, "loss": 35.7543, "step": 80, "true_loss": 4.5697 }, { "epoch": 1.3114754098360657, "eval_accuracy": 0.07603686635944701, "eval_loss": 4.3655314445495605, "eval_runtime": 7.6899, "eval_samples_per_second": 56.437, "eval_steps_per_second": 7.152, "step": 80 }, { "epoch": 1.3934426229508197, "grad_norm": 159.00089279785396, "learning_rate": 4.601366742596811e-06, "loss": 34.967, "step": 85, "true_loss": 4.449 }, { "epoch": 1.4754098360655736, "grad_norm": 150.6608564699539, "learning_rate": 4.544419134396356e-06, "loss": 34.9076, "step": 90, "true_loss": 4.6 }, { "epoch": 1.4754098360655736, "eval_accuracy": 0.07373271889400922, "eval_loss": 4.319250583648682, "eval_runtime": 7.6466, "eval_samples_per_second": 56.757, "eval_steps_per_second": 7.193, "step": 90 }, { "epoch": 1.5573770491803278, "grad_norm": 154.24917515552164, "learning_rate": 4.4874715261959e-06, "loss": 34.3756, "step": 95, "true_loss": 4.4023 }, { "epoch": 1.639344262295082, "grad_norm": 158.50353361820606, "learning_rate": 4.4305239179954446e-06, "loss": 34.9543, "step": 100, "true_loss": 4.5141 }, { "epoch": 1.639344262295082, "eval_accuracy": 0.08525345622119816, "eval_loss": 4.283203125, "eval_runtime": 7.5617, "eval_samples_per_second": 57.394, "eval_steps_per_second": 7.273, "step": 100 }, { "epoch": 1.721311475409836, "grad_norm": 126.79955873407555, "learning_rate": 4.373576309794989e-06, "loss": 32.6608, "step": 105, "true_loss": 3.9732 }, { "epoch": 1.8032786885245902, "grad_norm": 152.75650023991204, "learning_rate": 4.316628701594533e-06, "loss": 33.611, "step": 110, "true_loss": 4.0678 }, { "epoch": 1.8032786885245902, "eval_accuracy": 0.0944700460829493, "eval_loss": 4.219686031341553, "eval_runtime": 7.5831, "eval_samples_per_second": 57.233, "eval_steps_per_second": 7.253, "step": 110 }, { "epoch": 1.8852459016393444, "grad_norm": 154.91519081206442, "learning_rate": 4.259681093394078e-06, "loss": 34.0898, "step": 115, "true_loss": 4.3053 }, { "epoch": 1.9672131147540983, "grad_norm": 154.26448928608832, "learning_rate": 4.202733485193622e-06, "loss": 33.0864, "step": 120, "true_loss": 4.2828 }, { "epoch": 1.9672131147540983, "eval_accuracy": 0.10368663594470046, "eval_loss": 4.198147773742676, "eval_runtime": 7.7523, "eval_samples_per_second": 55.984, "eval_steps_per_second": 7.095, "step": 120 }, { "epoch": 2.0491803278688523, "grad_norm": 152.56212863173033, "learning_rate": 4.145785876993167e-06, "loss": 33.29, "step": 125, "true_loss": 4.1834 }, { "epoch": 2.1311475409836067, "grad_norm": 162.83913288979136, "learning_rate": 4.088838268792711e-06, "loss": 32.6221, "step": 130, "true_loss": 4.0537 }, { "epoch": 2.1311475409836067, "eval_accuracy": 0.11059907834101383, "eval_loss": 4.172513961791992, "eval_runtime": 7.5194, "eval_samples_per_second": 57.718, "eval_steps_per_second": 7.314, "step": 130 }, { "epoch": 2.2131147540983607, "grad_norm": 179.4233086801099, "learning_rate": 4.0318906605922555e-06, "loss": 31.9488, "step": 135, "true_loss": 4.0262 }, { "epoch": 2.2950819672131146, "grad_norm": 171.46365679764654, "learning_rate": 3.9749430523918e-06, "loss": 32.0664, "step": 140, "true_loss": 3.8796 }, { "epoch": 2.2950819672131146, "eval_accuracy": 0.10368663594470046, "eval_loss": 4.146646499633789, "eval_runtime": 7.4997, "eval_samples_per_second": 57.869, "eval_steps_per_second": 7.334, "step": 140 }, { "epoch": 2.3770491803278686, "grad_norm": 178.73613610852706, "learning_rate": 3.917995444191344e-06, "loss": 32.302, "step": 145, "true_loss": 4.0002 }, { "epoch": 2.459016393442623, "grad_norm": 200.35242389508278, "learning_rate": 3.861047835990889e-06, "loss": 32.0721, "step": 150, "true_loss": 3.6775 }, { "epoch": 2.459016393442623, "eval_accuracy": 0.11290322580645161, "eval_loss": 4.112561225891113, "eval_runtime": 7.5207, "eval_samples_per_second": 57.708, "eval_steps_per_second": 7.313, "step": 150 }, { "epoch": 2.540983606557377, "grad_norm": 189.21153835823273, "learning_rate": 3.804100227790433e-06, "loss": 31.7859, "step": 155, "true_loss": 3.8711 }, { "epoch": 2.6229508196721314, "grad_norm": 179.8665671736523, "learning_rate": 3.7471526195899776e-06, "loss": 32.339, "step": 160, "true_loss": 3.9259 }, { "epoch": 2.6229508196721314, "eval_accuracy": 0.1175115207373272, "eval_loss": 4.069781303405762, "eval_runtime": 7.5234, "eval_samples_per_second": 57.687, "eval_steps_per_second": 7.311, "step": 160 }, { "epoch": 2.7049180327868854, "grad_norm": 184.87406326616818, "learning_rate": 3.690205011389522e-06, "loss": 31.6148, "step": 165, "true_loss": 3.9918 }, { "epoch": 2.7868852459016393, "grad_norm": 194.9656301652979, "learning_rate": 3.6332574031890664e-06, "loss": 30.9461, "step": 170, "true_loss": 4.1431 }, { "epoch": 2.7868852459016393, "eval_accuracy": 0.11981566820276497, "eval_loss": 4.030214786529541, "eval_runtime": 7.5282, "eval_samples_per_second": 57.65, "eval_steps_per_second": 7.306, "step": 170 }, { "epoch": 2.8688524590163933, "grad_norm": 168.19816225086294, "learning_rate": 3.5763097949886104e-06, "loss": 31.2117, "step": 175, "true_loss": 4.0352 }, { "epoch": 2.9508196721311473, "grad_norm": 177.32224733459006, "learning_rate": 3.519362186788155e-06, "loss": 31.3617, "step": 180, "true_loss": 3.6492 }, { "epoch": 2.9508196721311473, "eval_accuracy": 0.1313364055299539, "eval_loss": 3.996732711791992, "eval_runtime": 7.498, "eval_samples_per_second": 57.882, "eval_steps_per_second": 7.335, "step": 180 }, { "epoch": 3.0327868852459017, "grad_norm": 192.7334527443661, "learning_rate": 3.4624145785876997e-06, "loss": 31.2043, "step": 185, "true_loss": 3.8145 }, { "epoch": 3.1147540983606556, "grad_norm": 204.32213875438026, "learning_rate": 3.405466970387244e-06, "loss": 28.996, "step": 190, "true_loss": 3.1655 }, { "epoch": 3.1147540983606556, "eval_accuracy": 0.11290322580645161, "eval_loss": 4.006399631500244, "eval_runtime": 7.5951, "eval_samples_per_second": 57.142, "eval_steps_per_second": 7.242, "step": 190 }, { "epoch": 3.19672131147541, "grad_norm": 219.38722327347378, "learning_rate": 3.3485193621867885e-06, "loss": 30.1186, "step": 195, "true_loss": 3.619 }, { "epoch": 3.278688524590164, "grad_norm": 232.33818268663353, "learning_rate": 3.291571753986333e-06, "loss": 30.4023, "step": 200, "true_loss": 3.7051 }, { "epoch": 3.278688524590164, "eval_accuracy": 0.12442396313364056, "eval_loss": 3.9848430156707764, "eval_runtime": 7.5257, "eval_samples_per_second": 57.669, "eval_steps_per_second": 7.308, "step": 200 }, { "epoch": 3.360655737704918, "grad_norm": 229.39700824019172, "learning_rate": 3.2346241457858773e-06, "loss": 29.5804, "step": 205, "true_loss": 3.5024 }, { "epoch": 3.442622950819672, "grad_norm": 225.9592509617873, "learning_rate": 3.1776765375854217e-06, "loss": 29.0619, "step": 210, "true_loss": 3.5379 }, { "epoch": 3.442622950819672, "eval_accuracy": 0.15207373271889402, "eval_loss": 3.9506587982177734, "eval_runtime": 7.8653, "eval_samples_per_second": 55.179, "eval_steps_per_second": 6.993, "step": 210 }, { "epoch": 3.5245901639344264, "grad_norm": 209.06281793043178, "learning_rate": 3.120728929384966e-06, "loss": 29.9575, "step": 215, "true_loss": 3.71 }, { "epoch": 3.6065573770491803, "grad_norm": 213.36792913688217, "learning_rate": 3.0637813211845106e-06, "loss": 29.4311, "step": 220, "true_loss": 3.8148 }, { "epoch": 3.6065573770491803, "eval_accuracy": 0.12211981566820276, "eval_loss": 3.9277074337005615, "eval_runtime": 7.5116, "eval_samples_per_second": 57.778, "eval_steps_per_second": 7.322, "step": 220 }, { "epoch": 3.6885245901639343, "grad_norm": 215.89135789777671, "learning_rate": 3.0068337129840546e-06, "loss": 30.0775, "step": 225, "true_loss": 3.7924 }, { "epoch": 3.7704918032786887, "grad_norm": 216.66007631870477, "learning_rate": 2.949886104783599e-06, "loss": 29.6202, "step": 230, "true_loss": 3.6933 }, { "epoch": 3.7704918032786887, "eval_accuracy": 0.12672811059907835, "eval_loss": 3.9045398235321045, "eval_runtime": 7.6392, "eval_samples_per_second": 56.813, "eval_steps_per_second": 7.2, "step": 230 }, { "epoch": 3.8524590163934427, "grad_norm": 216.8488298790556, "learning_rate": 2.892938496583144e-06, "loss": 29.7172, "step": 235, "true_loss": 3.3319 }, { "epoch": 3.9344262295081966, "grad_norm": 220.90823999032682, "learning_rate": 2.8359908883826882e-06, "loss": 29.235, "step": 240, "true_loss": 3.5549 }, { "epoch": 3.9344262295081966, "eval_accuracy": 0.11290322580645161, "eval_loss": 3.904125928878784, "eval_runtime": 7.6954, "eval_samples_per_second": 56.398, "eval_steps_per_second": 7.147, "step": 240 }, { "epoch": 4.016393442622951, "grad_norm": 242.3023416449528, "learning_rate": 2.7790432801822326e-06, "loss": 27.9219, "step": 245, "true_loss": 3.2678 }, { "epoch": 4.098360655737705, "grad_norm": 255.00674520341965, "learning_rate": 2.722095671981777e-06, "loss": 27.9275, "step": 250, "true_loss": 3.5343 }, { "epoch": 4.098360655737705, "eval_accuracy": 0.1313364055299539, "eval_loss": 3.898968458175659, "eval_runtime": 7.517, "eval_samples_per_second": 57.735, "eval_steps_per_second": 7.317, "step": 250 }, { "epoch": 4.180327868852459, "grad_norm": 273.1905409329504, "learning_rate": 2.6651480637813215e-06, "loss": 27.1873, "step": 255, "true_loss": 3.3906 }, { "epoch": 4.262295081967213, "grad_norm": 227.69914032682405, "learning_rate": 2.608200455580866e-06, "loss": 27.2035, "step": 260, "true_loss": 3.4531 }, { "epoch": 4.262295081967213, "eval_accuracy": 0.1359447004608295, "eval_loss": 3.8699867725372314, "eval_runtime": 7.5321, "eval_samples_per_second": 57.62, "eval_steps_per_second": 7.302, "step": 260 }, { "epoch": 4.344262295081967, "grad_norm": 254.45648837084522, "learning_rate": 2.5512528473804103e-06, "loss": 27.0482, "step": 265, "true_loss": 3.4579 }, { "epoch": 4.426229508196721, "grad_norm": 266.3782057050062, "learning_rate": 2.4943052391799547e-06, "loss": 27.514, "step": 270, "true_loss": 3.5607 }, { "epoch": 4.426229508196721, "eval_accuracy": 0.1336405529953917, "eval_loss": 3.8537046909332275, "eval_runtime": 7.546, "eval_samples_per_second": 57.514, "eval_steps_per_second": 7.289, "step": 270 }, { "epoch": 4.508196721311475, "grad_norm": 270.0666125278816, "learning_rate": 2.437357630979499e-06, "loss": 27.5337, "step": 275, "true_loss": 3.4807 }, { "epoch": 4.590163934426229, "grad_norm": 274.734392454001, "learning_rate": 2.3804100227790436e-06, "loss": 28.3342, "step": 280, "true_loss": 3.3558 }, { "epoch": 4.590163934426229, "eval_accuracy": 0.12903225806451613, "eval_loss": 3.859492063522339, "eval_runtime": 7.6721, "eval_samples_per_second": 56.569, "eval_steps_per_second": 7.169, "step": 280 }, { "epoch": 4.672131147540983, "grad_norm": 272.2658673500175, "learning_rate": 2.323462414578588e-06, "loss": 27.877, "step": 285, "true_loss": 3.4582 }, { "epoch": 4.754098360655737, "grad_norm": 268.9659407625432, "learning_rate": 2.2665148063781324e-06, "loss": 27.6915, "step": 290, "true_loss": 3.9791 }, { "epoch": 4.754098360655737, "eval_accuracy": 0.1336405529953917, "eval_loss": 3.843111038208008, "eval_runtime": 7.6659, "eval_samples_per_second": 56.615, "eval_steps_per_second": 7.175, "step": 290 }, { "epoch": 4.836065573770492, "grad_norm": 263.12731442923484, "learning_rate": 2.209567198177677e-06, "loss": 26.7233, "step": 295, "true_loss": 3.1607 }, { "epoch": 4.918032786885246, "grad_norm": 295.63911480933103, "learning_rate": 2.1526195899772212e-06, "loss": 28.7988, "step": 300, "true_loss": 3.9041 }, { "epoch": 4.918032786885246, "eval_accuracy": 0.1313364055299539, "eval_loss": 3.8396456241607666, "eval_runtime": 7.7433, "eval_samples_per_second": 56.049, "eval_steps_per_second": 7.103, "step": 300 }, { "epoch": 5.0, "grad_norm": 276.8145069400469, "learning_rate": 2.0956719817767656e-06, "loss": 27.155, "step": 305, "true_loss": 3.1979 }, { "epoch": 5.081967213114754, "grad_norm": 277.6991231127756, "learning_rate": 2.03872437357631e-06, "loss": 25.5137, "step": 310, "true_loss": 3.413 }, { "epoch": 5.081967213114754, "eval_accuracy": 0.14285714285714285, "eval_loss": 3.8058576583862305, "eval_runtime": 7.6657, "eval_samples_per_second": 56.616, "eval_steps_per_second": 7.175, "step": 310 }, { "epoch": 5.163934426229508, "grad_norm": 284.7257730508428, "learning_rate": 1.9817767653758545e-06, "loss": 26.0606, "step": 315, "true_loss": 3.2927 }, { "epoch": 5.245901639344262, "grad_norm": 292.26300972505453, "learning_rate": 1.924829157175399e-06, "loss": 25.2526, "step": 320, "true_loss": 3.2581 }, { "epoch": 5.245901639344262, "eval_accuracy": 0.16129032258064516, "eval_loss": 3.8149120807647705, "eval_runtime": 7.6305, "eval_samples_per_second": 56.877, "eval_steps_per_second": 7.208, "step": 320 } ], "logging_steps": 5, "max_steps": 488, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }