{ "best_global_step": 120, "best_metric": 0.7120494842529297, "best_model_checkpoint": "/dss/dssfs05/pn39qo/pn39qo-dss-0001/tong/efficient_reasoning/extraction-vs-summary-efficient-cot-reasoning-perspective---Experiment-main/output/lora/Limo_qwen/checkpoint-120", "epoch": 10.0, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16842105263157894, "grad_norm": 0.3850134313106537, "learning_rate": 1.3333333333333333e-05, "loss": 0.9614, "step": 2 }, { "epoch": 0.3368421052631579, "grad_norm": 0.38739946484565735, "learning_rate": 4e-05, "loss": 0.9707, "step": 4 }, { "epoch": 0.5052631578947369, "grad_norm": 0.3434392809867859, "learning_rate": 6.666666666666667e-05, "loss": 0.9543, "step": 6 }, { "epoch": 0.6736842105263158, "grad_norm": 0.1770918369293213, "learning_rate": 7.998481228099806e-05, "loss": 0.865, "step": 8 }, { "epoch": 0.8421052631578947, "grad_norm": 0.2255554050207138, "learning_rate": 7.98633797202668e-05, "loss": 0.8631, "step": 10 }, { "epoch": 1.0, "grad_norm": 0.274989515542984, "learning_rate": 7.962088338550013e-05, "loss": 0.8842, "step": 12 }, { "epoch": 1.0, "eval_loss": 0.8996532559394836, "eval_runtime": 12.9673, "eval_samples_per_second": 3.085, "eval_steps_per_second": 0.771, "step": 12 }, { "epoch": 1.168421052631579, "grad_norm": 0.2877059280872345, "learning_rate": 7.925805973009672e-05, "loss": 0.856, "step": 14 }, { "epoch": 1.3368421052631578, "grad_norm": 0.17793452739715576, "learning_rate": 7.877601063757323e-05, "loss": 0.8322, "step": 16 }, { "epoch": 1.5052631578947369, "grad_norm": 0.09469418972730637, "learning_rate": 7.81762000751803e-05, "loss": 0.8178, "step": 18 }, { "epoch": 1.6736842105263157, "grad_norm": 0.09645849466323853, "learning_rate": 7.74604496478822e-05, "loss": 0.8212, "step": 20 }, { "epoch": 1.8421052631578947, "grad_norm": 0.11077257990837097, "learning_rate": 7.663093306620231e-05, "loss": 0.798, "step": 22 }, { "epoch": 2.0, "grad_norm": 0.10621998459100723, "learning_rate": 7.569016954473577e-05, "loss": 0.8086, "step": 24 }, { "epoch": 2.0, "eval_loss": 0.8223039507865906, "eval_runtime": 12.9571, "eval_samples_per_second": 3.087, "eval_steps_per_second": 0.772, "step": 24 }, { "epoch": 2.168421052631579, "grad_norm": 0.09538944810628891, "learning_rate": 7.464101615137756e-05, "loss": 0.7815, "step": 26 }, { "epoch": 2.336842105263158, "grad_norm": 0.07264512777328491, "learning_rate": 7.348665913050115e-05, "loss": 0.7909, "step": 28 }, { "epoch": 2.5052631578947366, "grad_norm": 0.0889253318309784, "learning_rate": 7.223060422643914e-05, "loss": 0.795, "step": 30 }, { "epoch": 2.6736842105263157, "grad_norm": 0.09207943081855774, "learning_rate": 7.087666603665284e-05, "loss": 0.7529, "step": 32 }, { "epoch": 2.8421052631578947, "grad_norm": 0.07369054853916168, "learning_rate": 6.942895642692527e-05, "loss": 0.7329, "step": 34 }, { "epoch": 3.0, "grad_norm": 0.06927139312028885, "learning_rate": 6.789187204375981e-05, "loss": 0.7502, "step": 36 }, { "epoch": 3.0, "eval_loss": 0.7780648469924927, "eval_runtime": 12.9544, "eval_samples_per_second": 3.088, "eval_steps_per_second": 0.772, "step": 36 }, { "epoch": 3.168421052631579, "grad_norm": 0.0669899433851242, "learning_rate": 6.627008096190938e-05, "loss": 0.7365, "step": 38 }, { "epoch": 3.336842105263158, "grad_norm": 0.06613507866859436, "learning_rate": 6.456850850758673e-05, "loss": 0.7316, "step": 40 }, { "epoch": 3.5052631578947366, "grad_norm": 0.06987571716308594, "learning_rate": 6.279232230041065e-05, "loss": 0.742, "step": 42 }, { "epoch": 3.6736842105263157, "grad_norm": 0.057263512164354324, "learning_rate": 6.094691655951512e-05, "loss": 0.7204, "step": 44 }, { "epoch": 3.8421052631578947, "grad_norm": 0.061604950577020645, "learning_rate": 5.903789572148295e-05, "loss": 0.7258, "step": 46 }, { "epoch": 4.0, "grad_norm": 0.06501278281211853, "learning_rate": 5.707105741985615e-05, "loss": 0.7287, "step": 48 }, { "epoch": 4.0, "eval_loss": 0.7513701319694519, "eval_runtime": 12.9656, "eval_samples_per_second": 3.085, "eval_steps_per_second": 0.771, "step": 48 }, { "epoch": 4.168421052631579, "grad_norm": 0.0571233369410038, "learning_rate": 5.505237487791343e-05, "loss": 0.7132, "step": 50 }, { "epoch": 4.336842105263158, "grad_norm": 0.059772882610559464, "learning_rate": 5.298797876818735e-05, "loss": 0.7349, "step": 52 }, { "epoch": 4.505263157894737, "grad_norm": 0.05535353720188141, "learning_rate": 5.088413859381341e-05, "loss": 0.708, "step": 54 }, { "epoch": 4.673684210526316, "grad_norm": 0.058156561106443405, "learning_rate": 4.874724364825504e-05, "loss": 0.7166, "step": 56 }, { "epoch": 4.842105263157895, "grad_norm": 0.05242394655942917, "learning_rate": 4.658378361122936e-05, "loss": 0.6791, "step": 58 }, { "epoch": 5.0, "grad_norm": 0.053081415593624115, "learning_rate": 4.440032883976318e-05, "loss": 0.6899, "step": 60 }, { "epoch": 5.0, "eval_loss": 0.7341214418411255, "eval_runtime": 12.9486, "eval_samples_per_second": 3.089, "eval_steps_per_second": 0.772, "step": 60 }, { "epoch": 5.168421052631579, "grad_norm": 0.05485621094703674, "learning_rate": 4.220351041423462e-05, "loss": 0.695, "step": 62 }, { "epoch": 5.336842105263158, "grad_norm": 0.047592103481292725, "learning_rate": 4e-05, "loss": 0.696, "step": 64 }, { "epoch": 5.505263157894737, "grad_norm": 0.05209505185484886, "learning_rate": 3.779648958576538e-05, "loss": 0.6842, "step": 66 }, { "epoch": 5.673684210526316, "grad_norm": 0.05388766899704933, "learning_rate": 3.559967116023683e-05, "loss": 0.6967, "step": 68 }, { "epoch": 5.842105263157895, "grad_norm": 0.05102350562810898, "learning_rate": 3.341621638877064e-05, "loss": 0.6816, "step": 70 }, { "epoch": 6.0, "grad_norm": 0.059541136026382446, "learning_rate": 3.125275635174497e-05, "loss": 0.6934, "step": 72 }, { "epoch": 6.0, "eval_loss": 0.7228327989578247, "eval_runtime": 12.9513, "eval_samples_per_second": 3.088, "eval_steps_per_second": 0.772, "step": 72 }, { "epoch": 6.168421052631579, "grad_norm": 0.04430772364139557, "learning_rate": 2.9115861406186593e-05, "loss": 0.6659, "step": 74 }, { "epoch": 6.336842105263158, "grad_norm": 0.04723000526428223, "learning_rate": 2.7012021231812666e-05, "loss": 0.6659, "step": 76 }, { "epoch": 6.505263157894737, "grad_norm": 0.05059608444571495, "learning_rate": 2.4947625122086585e-05, "loss": 0.6888, "step": 78 }, { "epoch": 6.673684210526316, "grad_norm": 0.05666281282901764, "learning_rate": 2.2928942580143855e-05, "loss": 0.6868, "step": 80 }, { "epoch": 6.842105263157895, "grad_norm": 0.05316559597849846, "learning_rate": 2.096210427851706e-05, "loss": 0.7081, "step": 82 }, { "epoch": 7.0, "grad_norm": 0.05505692958831787, "learning_rate": 1.9053083440484887e-05, "loss": 0.6727, "step": 84 }, { "epoch": 7.0, "eval_loss": 0.7168187499046326, "eval_runtime": 12.9563, "eval_samples_per_second": 3.087, "eval_steps_per_second": 0.772, "step": 84 }, { "epoch": 7.168421052631579, "grad_norm": 0.053103502839803696, "learning_rate": 1.7207677699589355e-05, "loss": 0.6816, "step": 86 }, { "epoch": 7.336842105263158, "grad_norm": 0.046842310577631, "learning_rate": 1.5431491492413288e-05, "loss": 0.6642, "step": 88 }, { "epoch": 7.505263157894737, "grad_norm": 0.04811316728591919, "learning_rate": 1.3729919038090627e-05, "loss": 0.6729, "step": 90 }, { "epoch": 7.673684210526316, "grad_norm": 0.04840261861681938, "learning_rate": 1.2108127956240186e-05, "loss": 0.6697, "step": 92 }, { "epoch": 7.842105263157895, "grad_norm": 0.048225287348032, "learning_rate": 1.0571043573074737e-05, "loss": 0.6757, "step": 94 }, { "epoch": 8.0, "grad_norm": 0.05675382539629936, "learning_rate": 9.123333963347166e-06, "loss": 0.69, "step": 96 }, { "epoch": 8.0, "eval_loss": 0.7133689522743225, "eval_runtime": 12.9684, "eval_samples_per_second": 3.084, "eval_steps_per_second": 0.771, "step": 96 }, { "epoch": 8.16842105263158, "grad_norm": 0.04558368772268295, "learning_rate": 7.769395773560874e-06, "loss": 0.6674, "step": 98 }, { "epoch": 8.336842105263157, "grad_norm": 0.049284905195236206, "learning_rate": 6.513340869498859e-06, "loss": 0.6833, "step": 100 }, { "epoch": 8.505263157894737, "grad_norm": 0.04926304891705513, "learning_rate": 5.358983848622452e-06, "loss": 0.6743, "step": 102 }, { "epoch": 8.673684210526316, "grad_norm": 0.04950882866978645, "learning_rate": 4.3098304552642385e-06, "loss": 0.6636, "step": 104 }, { "epoch": 8.842105263157894, "grad_norm": 0.049746621400117874, "learning_rate": 3.3690669337977e-06, "loss": 0.6609, "step": 106 }, { "epoch": 9.0, "grad_norm": 0.04946954548358917, "learning_rate": 2.5395503521178143e-06, "loss": 0.6892, "step": 108 }, { "epoch": 9.0, "eval_loss": 0.7123732566833496, "eval_runtime": 12.9488, "eval_samples_per_second": 3.089, "eval_steps_per_second": 0.772, "step": 108 }, { "epoch": 9.16842105263158, "grad_norm": 0.04948243126273155, "learning_rate": 1.8237999248197002e-06, "loss": 0.6742, "step": 110 }, { "epoch": 9.336842105263157, "grad_norm": 0.05210984870791435, "learning_rate": 1.2239893624267852e-06, "loss": 0.6949, "step": 112 }, { "epoch": 9.505263157894737, "grad_norm": 0.047761935740709305, "learning_rate": 7.419402699032852e-07, "loss": 0.6467, "step": 114 }, { "epoch": 9.673684210526316, "grad_norm": 0.04957466945052147, "learning_rate": 3.791166144998704e-07, "loss": 0.6643, "step": 116 }, { "epoch": 9.842105263157894, "grad_norm": 0.0482342354953289, "learning_rate": 1.3662027973320614e-07, "loss": 0.6764, "step": 118 }, { "epoch": 10.0, "grad_norm": 0.04675516113638878, "learning_rate": 1.5187719001943378e-08, "loss": 0.6735, "step": 120 }, { "epoch": 10.0, "eval_loss": 0.7120494842529297, "eval_runtime": 12.9487, "eval_samples_per_second": 3.089, "eval_steps_per_second": 0.772, "step": 120 }, { "epoch": 10.0, "step": 120, "total_flos": 3.766644395416224e+18, "train_loss": 0.7336713840564092, "train_runtime": 9734.8369, "train_samples_per_second": 0.781, "train_steps_per_second": 0.012 } ], "logging_steps": 2, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.766644395416224e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }