| { |
| "best_global_step": 13430, |
| "best_metric": 0.8506006776876477, |
| "best_model_checkpoint": "./aner_lora_model/checkpoint-13430", |
| "epoch": 34.0, |
| "eval_steps": 500, |
| "global_step": 13430, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.1466563940048218, |
| "learning_rate": 9.450337512054003e-06, |
| "loss": 1.6728, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.07284681130834977, |
| "eval_loss": 0.8734206557273865, |
| "eval_precision": 0.09899928520371694, |
| "eval_recall": 0.05762429789889744, |
| "eval_runtime": 6.1863, |
| "eval_samples_per_second": 415.757, |
| "eval_steps_per_second": 6.628, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.8038758039474487, |
| "learning_rate": 1.8948891031822567e-05, |
| "loss": 0.5426, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.6452263332997278, |
| "eval_loss": 0.3722752630710602, |
| "eval_precision": 0.6259780907668232, |
| "eval_recall": 0.6656958602038694, |
| "eval_runtime": 6.2125, |
| "eval_samples_per_second": 414.003, |
| "eval_steps_per_second": 6.6, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 2.234807014465332, |
| "learning_rate": 2.844744455159113e-05, |
| "loss": 0.3384, |
| "step": 1185 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1": 0.7009996970614966, |
| "eval_loss": 0.2873896658420563, |
| "eval_precision": 0.6811224489795918, |
| "eval_recall": 0.7220719783648846, |
| "eval_runtime": 6.2219, |
| "eval_samples_per_second": 413.378, |
| "eval_steps_per_second": 6.59, |
| "step": 1185 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 2.2888472080230713, |
| "learning_rate": 3.7970106075216974e-05, |
| "loss": 0.2783, |
| "step": 1580 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_f1": 0.7592421915002561, |
| "eval_loss": 0.27101996541023254, |
| "eval_precision": 0.7476805163372328, |
| "eval_recall": 0.7711670480549199, |
| "eval_runtime": 6.2368, |
| "eval_samples_per_second": 412.391, |
| "eval_steps_per_second": 6.574, |
| "step": 1580 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 2.091982841491699, |
| "learning_rate": 4.749276759884282e-05, |
| "loss": 0.2408, |
| "step": 1975 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_f1": 0.7708728179551123, |
| "eval_loss": 0.2352629005908966, |
| "eval_precision": 0.7405136067458796, |
| "eval_recall": 0.8038277511961722, |
| "eval_runtime": 6.2249, |
| "eval_samples_per_second": 413.181, |
| "eval_steps_per_second": 6.586, |
| "step": 1975 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 3.6555726528167725, |
| "learning_rate": 5.699132111861139e-05, |
| "loss": 0.2138, |
| "step": 2370 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_f1": 0.7946803069053708, |
| "eval_loss": 0.23413550853729248, |
| "eval_precision": 0.7818035426731079, |
| "eval_recall": 0.8079883503224464, |
| "eval_runtime": 6.2199, |
| "eval_samples_per_second": 413.509, |
| "eval_steps_per_second": 6.592, |
| "step": 2370 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.3194808959960938, |
| "learning_rate": 6.648987463837994e-05, |
| "loss": 0.1997, |
| "step": 2765 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_f1": 0.7968973259848949, |
| "eval_loss": 0.217984139919281, |
| "eval_precision": 0.7822079743538369, |
| "eval_recall": 0.8121489494487206, |
| "eval_runtime": 6.2321, |
| "eval_samples_per_second": 412.701, |
| "eval_steps_per_second": 6.579, |
| "step": 2765 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 3.00563907623291, |
| "learning_rate": 7.601253616200579e-05, |
| "loss": 0.1796, |
| "step": 3160 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_f1": 0.8006453564586065, |
| "eval_loss": 0.21363000571727753, |
| "eval_precision": 0.776908023483366, |
| "eval_recall": 0.8258789265654254, |
| "eval_runtime": 6.2148, |
| "eval_samples_per_second": 413.849, |
| "eval_steps_per_second": 6.597, |
| "step": 3160 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 1.7021512985229492, |
| "learning_rate": 8.553519768563163e-05, |
| "loss": 0.1628, |
| "step": 3555 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_f1": 0.8144906889182865, |
| "eval_loss": 0.21552523970603943, |
| "eval_precision": 0.7972111553784861, |
| "eval_recall": 0.8325358851674641, |
| "eval_runtime": 6.2332, |
| "eval_samples_per_second": 412.63, |
| "eval_steps_per_second": 6.578, |
| "step": 3555 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.311308741569519, |
| "learning_rate": 9.505785920925749e-05, |
| "loss": 0.1487, |
| "step": 3950 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_f1": 0.8213381921247089, |
| "eval_loss": 0.2061518430709839, |
| "eval_precision": 0.7998817034700315, |
| "eval_recall": 0.8439775327647181, |
| "eval_runtime": 6.2201, |
| "eval_samples_per_second": 413.497, |
| "eval_steps_per_second": 6.592, |
| "step": 3950 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.8549454808235168, |
| "learning_rate": 9.998387447114843e-05, |
| "loss": 0.1373, |
| "step": 4345 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_f1": 0.8243118796684744, |
| "eval_loss": 0.22040106356143951, |
| "eval_precision": 0.8111155859846959, |
| "eval_recall": 0.8379446640316206, |
| "eval_runtime": 6.2308, |
| "eval_samples_per_second": 412.791, |
| "eval_steps_per_second": 6.58, |
| "step": 4345 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 1.147991418838501, |
| "learning_rate": 9.984720099749273e-05, |
| "loss": 0.1214, |
| "step": 4740 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_f1": 0.833384789544098, |
| "eval_loss": 0.21504363417625427, |
| "eval_precision": 0.824643584521385, |
| "eval_recall": 0.8423132931142084, |
| "eval_runtime": 6.2273, |
| "eval_samples_per_second": 413.023, |
| "eval_steps_per_second": 6.584, |
| "step": 4740 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.7487550973892212, |
| "learning_rate": 9.957158852398329e-05, |
| "loss": 0.1091, |
| "step": 5135 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_f1": 0.83111337061515, |
| "eval_loss": 0.21468985080718994, |
| "eval_precision": 0.8128480509148767, |
| "eval_recall": 0.8502184314541293, |
| "eval_runtime": 6.2234, |
| "eval_samples_per_second": 413.28, |
| "eval_steps_per_second": 6.588, |
| "step": 5135 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 1.407198429107666, |
| "learning_rate": 9.915780526469535e-05, |
| "loss": 0.0979, |
| "step": 5530 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_f1": 0.838515129458251, |
| "eval_loss": 0.21988336741924286, |
| "eval_precision": 0.8382536382536383, |
| "eval_recall": 0.8387767838568754, |
| "eval_runtime": 6.2301, |
| "eval_samples_per_second": 412.832, |
| "eval_steps_per_second": 6.581, |
| "step": 5530 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.48871779441833496, |
| "learning_rate": 9.86070045568485e-05, |
| "loss": 0.0873, |
| "step": 5925 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_f1": 0.8290302907094939, |
| "eval_loss": 0.22614166140556335, |
| "eval_precision": 0.8105744384814152, |
| "eval_recall": 0.848346161847306, |
| "eval_runtime": 6.2224, |
| "eval_samples_per_second": 413.346, |
| "eval_steps_per_second": 6.589, |
| "step": 5925 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 2.422437906265259, |
| "learning_rate": 9.792072164611214e-05, |
| "loss": 0.0784, |
| "step": 6320 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_f1": 0.8394858611825193, |
| "eval_loss": 0.2310497760772705, |
| "eval_precision": 0.8300122000813339, |
| "eval_recall": 0.8491782816725608, |
| "eval_runtime": 6.2296, |
| "eval_samples_per_second": 412.868, |
| "eval_steps_per_second": 6.581, |
| "step": 6320 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 1.0132837295532227, |
| "learning_rate": 9.710086940741867e-05, |
| "loss": 0.0699, |
| "step": 6715 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_f1": 0.8350071736011477, |
| "eval_loss": 0.24940118193626404, |
| "eval_precision": 0.8228640678650777, |
| "eval_recall": 0.8475140420220512, |
| "eval_runtime": 6.2441, |
| "eval_samples_per_second": 411.911, |
| "eval_steps_per_second": 6.566, |
| "step": 6715 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 1.5865906476974487, |
| "learning_rate": 9.61523045626935e-05, |
| "loss": 0.0615, |
| "step": 7110 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_f1": 0.8329538397847237, |
| "eval_loss": 0.2699420750141144, |
| "eval_precision": 0.8288362512873326, |
| "eval_recall": 0.8371125442063657, |
| "eval_runtime": 6.2246, |
| "eval_samples_per_second": 413.199, |
| "eval_steps_per_second": 6.587, |
| "step": 7110 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.9923080205917358, |
| "learning_rate": 9.507285711634415e-05, |
| "loss": 0.0576, |
| "step": 7505 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_f1": 0.82642089093702, |
| "eval_loss": 0.2682414948940277, |
| "eval_precision": 0.8138362242839855, |
| "eval_recall": 0.8394008737258165, |
| "eval_runtime": 6.2361, |
| "eval_samples_per_second": 412.439, |
| "eval_steps_per_second": 6.575, |
| "step": 7505 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 1.0131040811538696, |
| "learning_rate": 9.38677781889291e-05, |
| "loss": 0.0497, |
| "step": 7900 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_f1": 0.8408021280949458, |
| "eval_loss": 0.2817462980747223, |
| "eval_precision": 0.8272599154419167, |
| "eval_recall": 0.854795090493031, |
| "eval_runtime": 6.2397, |
| "eval_samples_per_second": 412.202, |
| "eval_steps_per_second": 6.571, |
| "step": 7900 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 1.129745364189148, |
| "learning_rate": 9.254042669444088e-05, |
| "loss": 0.0463, |
| "step": 8295 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_f1": 0.8426954625687884, |
| "eval_loss": 0.2874164581298828, |
| "eval_precision": 0.8412106135986733, |
| "eval_recall": 0.8441855627210318, |
| "eval_runtime": 6.2285, |
| "eval_samples_per_second": 412.943, |
| "eval_steps_per_second": 6.583, |
| "step": 8295 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 1.3233388662338257, |
| "learning_rate": 9.10945023569444e-05, |
| "loss": 0.0412, |
| "step": 8690 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_f1": 0.8439258178787248, |
| "eval_loss": 0.29583561420440674, |
| "eval_precision": 0.8453350031308704, |
| "eval_recall": 0.8425213230705222, |
| "eval_runtime": 6.2232, |
| "eval_samples_per_second": 413.292, |
| "eval_steps_per_second": 6.588, |
| "step": 8690 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 0.6825762987136841, |
| "learning_rate": 8.953403539834375e-05, |
| "loss": 0.0388, |
| "step": 9085 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_f1": 0.8467850836189883, |
| "eval_loss": 0.29167571663856506, |
| "eval_precision": 0.845643153526971, |
| "eval_recall": 0.8479301019346785, |
| "eval_runtime": 6.2302, |
| "eval_samples_per_second": 412.826, |
| "eval_steps_per_second": 6.581, |
| "step": 9085 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 0.829084038734436, |
| "learning_rate": 8.786337530495295e-05, |
| "loss": 0.0343, |
| "step": 9480 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_f1": 0.8326785534401128, |
| "eval_loss": 0.29245325922966003, |
| "eval_precision": 0.8072265625, |
| "eval_recall": 0.85978780944456, |
| "eval_runtime": 6.2492, |
| "eval_samples_per_second": 411.57, |
| "eval_steps_per_second": 6.561, |
| "step": 9480 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 1.2089000940322876, |
| "learning_rate": 8.609180455499081e-05, |
| "loss": 0.0319, |
| "step": 9875 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_f1": 0.8404788703571062, |
| "eval_loss": 0.3039838969707489, |
| "eval_precision": 0.8270237615787354, |
| "eval_recall": 0.8543790305804035, |
| "eval_runtime": 6.2496, |
| "eval_samples_per_second": 411.548, |
| "eval_steps_per_second": 6.56, |
| "step": 9875 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.7031717300415039, |
| "learning_rate": 8.421527039366616e-05, |
| "loss": 0.0297, |
| "step": 10270 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_f1": 0.8409658617818484, |
| "eval_loss": 0.3323589265346527, |
| "eval_precision": 0.8414913559675068, |
| "eval_recall": 0.840441023507385, |
| "eval_runtime": 6.2285, |
| "eval_samples_per_second": 412.939, |
| "eval_steps_per_second": 6.583, |
| "step": 10270 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 1.0431761741638184, |
| "learning_rate": 8.224336808020677e-05, |
| "loss": 0.0254, |
| "step": 10665 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_f1": 0.842159067675322, |
| "eval_loss": 0.31897974014282227, |
| "eval_precision": 0.8279396984924623, |
| "eval_recall": 0.8568753900561681, |
| "eval_runtime": 6.2339, |
| "eval_samples_per_second": 412.583, |
| "eval_steps_per_second": 6.577, |
| "step": 10665 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.3485606014728546, |
| "learning_rate": 8.018159389383709e-05, |
| "loss": 0.0237, |
| "step": 11060 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_f1": 0.8385502471169687, |
| "eval_loss": 0.3295208811759949, |
| "eval_precision": 0.8301732925586136, |
| "eval_recall": 0.8470979821094238, |
| "eval_runtime": 6.2249, |
| "eval_samples_per_second": 413.179, |
| "eval_steps_per_second": 6.586, |
| "step": 11060 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 2.1144888401031494, |
| "learning_rate": 7.80356946134664e-05, |
| "loss": 0.0217, |
| "step": 11455 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_f1": 0.8437499999999999, |
| "eval_loss": 0.32363420724868774, |
| "eval_precision": 0.833976833976834, |
| "eval_recall": 0.8537549407114624, |
| "eval_runtime": 6.2372, |
| "eval_samples_per_second": 412.362, |
| "eval_steps_per_second": 6.573, |
| "step": 11455 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.29809829592704773, |
| "learning_rate": 7.581165149970385e-05, |
| "loss": 0.0204, |
| "step": 11850 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_f1": 0.8443138900545532, |
| "eval_loss": 0.35288870334625244, |
| "eval_precision": 0.8516402116402116, |
| "eval_recall": 0.8371125442063657, |
| "eval_runtime": 6.2428, |
| "eval_samples_per_second": 411.996, |
| "eval_steps_per_second": 6.568, |
| "step": 11850 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 0.03988971561193466, |
| "learning_rate": 7.351566362330311e-05, |
| "loss": 0.0187, |
| "step": 12245 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_f1": 0.8424198701432547, |
| "eval_loss": 0.3403635323047638, |
| "eval_precision": 0.8347630718954249, |
| "eval_recall": 0.8502184314541293, |
| "eval_runtime": 6.1938, |
| "eval_samples_per_second": 415.254, |
| "eval_steps_per_second": 6.62, |
| "step": 12245 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 0.40415504574775696, |
| "learning_rate": 7.115413058650544e-05, |
| "loss": 0.0168, |
| "step": 12640 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_f1": 0.8455690929917734, |
| "eval_loss": 0.34554678201675415, |
| "eval_precision": 0.8465387823185988, |
| "eval_recall": 0.8446016226336592, |
| "eval_runtime": 6.1803, |
| "eval_samples_per_second": 416.159, |
| "eval_steps_per_second": 6.634, |
| "step": 12640 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 1.3843847513198853, |
| "learning_rate": 6.873363468544161e-05, |
| "loss": 0.0154, |
| "step": 13035 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_f1": 0.8446492575524833, |
| "eval_loss": 0.34318989515304565, |
| "eval_precision": 0.8317870108914885, |
| "eval_recall": 0.8579155398377366, |
| "eval_runtime": 6.1485, |
| "eval_samples_per_second": 418.312, |
| "eval_steps_per_second": 6.668, |
| "step": 13035 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 0.4158535897731781, |
| "learning_rate": 6.626092256331148e-05, |
| "loss": 0.0141, |
| "step": 13430 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_f1": 0.8506006776876477, |
| "eval_loss": 0.353132963180542, |
| "eval_precision": 0.8398215733982157, |
| "eval_recall": 0.8616600790513834, |
| "eval_runtime": 6.1312, |
| "eval_samples_per_second": 419.497, |
| "eval_steps_per_second": 6.687, |
| "step": 13430 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 27650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 70, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5841542093969132e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|