{ "best_metric": 0.827, "best_model_checkpoint": "models/adu_parser/checkpoint-1815", "epoch": 5.0, "eval_steps": 500, "global_step": 3025, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1652892561983471, "grad_norm": 360209.0, "learning_rate": 6.600660066006602e-05, "loss": 1.5772, "step": 100 }, { "epoch": 0.3305785123966942, "grad_norm": 195230.484375, "learning_rate": 0.00013201320132013203, "loss": 0.6384, "step": 200 }, { "epoch": 0.49586776859504134, "grad_norm": 143748.359375, "learning_rate": 0.00019801980198019803, "loss": 0.4274, "step": 300 }, { "epoch": 0.6611570247933884, "grad_norm": 187710.203125, "learning_rate": 0.00019287288758265981, "loss": 0.3765, "step": 400 }, { "epoch": 0.8264462809917356, "grad_norm": 123676.3125, "learning_rate": 0.0001855253490080823, "loss": 0.3827, "step": 500 }, { "epoch": 0.9917355371900827, "grad_norm": 179717.921875, "learning_rate": 0.0001781778104335048, "loss": 0.3307, "step": 600 }, { "epoch": 1.0, "eval_accuracy": 0.888, "eval_f1": 0.7892, "eval_loss": 0.3547873795032501, "eval_precision": 0.7275, "eval_recall": 0.8624, "eval_runtime": 19.2905, "eval_samples_per_second": 125.346, "eval_steps_per_second": 3.94, "step": 605 }, { "epoch": 1.1570247933884297, "grad_norm": 148071.671875, "learning_rate": 0.00017083027185892726, "loss": 0.3299, "step": 700 }, { "epoch": 1.322314049586777, "grad_norm": 96186.578125, "learning_rate": 0.00016348273328434975, "loss": 0.3216, "step": 800 }, { "epoch": 1.487603305785124, "grad_norm": 76934.8203125, "learning_rate": 0.0001561351947097722, "loss": 0.3119, "step": 900 }, { "epoch": 1.6528925619834711, "grad_norm": 109702.9453125, "learning_rate": 0.00014878765613519473, "loss": 0.3181, "step": 1000 }, { "epoch": 1.8181818181818183, "grad_norm": 298751.0, "learning_rate": 0.0001414401175606172, "loss": 0.2937, "step": 1100 }, { "epoch": 1.9834710743801653, "grad_norm": 159368.796875, "learning_rate": 0.00013409257898603968, "loss": 0.2963, "step": 1200 }, { "epoch": 2.0, "eval_accuracy": 0.9042, "eval_f1": 0.8267, "eval_loss": 0.27530255913734436, "eval_precision": 0.7794, "eval_recall": 0.8801, "eval_runtime": 19.2072, "eval_samples_per_second": 125.89, "eval_steps_per_second": 3.957, "step": 1210 }, { "epoch": 2.1487603305785123, "grad_norm": 206873.609375, "learning_rate": 0.00012674504041146215, "loss": 0.2772, "step": 1300 }, { "epoch": 2.3140495867768593, "grad_norm": 80094.4609375, "learning_rate": 0.00011939750183688465, "loss": 0.2771, "step": 1400 }, { "epoch": 2.479338842975207, "grad_norm": 191308.390625, "learning_rate": 0.00011204996326230713, "loss": 0.2749, "step": 1500 }, { "epoch": 2.644628099173554, "grad_norm": 134234.203125, "learning_rate": 0.00010470242468772962, "loss": 0.2746, "step": 1600 }, { "epoch": 2.809917355371901, "grad_norm": 96117.125, "learning_rate": 9.73548861131521e-05, "loss": 0.2684, "step": 1700 }, { "epoch": 2.975206611570248, "grad_norm": 109045.7421875, "learning_rate": 9.000734753857459e-05, "loss": 0.2481, "step": 1800 }, { "epoch": 3.0, "eval_accuracy": 0.9036, "eval_f1": 0.827, "eval_loss": 0.26943454146385193, "eval_precision": 0.7786, "eval_recall": 0.8818, "eval_runtime": 19.2976, "eval_samples_per_second": 125.301, "eval_steps_per_second": 3.938, "step": 1815 }, { "epoch": 3.1404958677685952, "grad_norm": 133268.59375, "learning_rate": 8.265980896399706e-05, "loss": 0.2384, "step": 1900 }, { "epoch": 3.3057851239669422, "grad_norm": 341372.5, "learning_rate": 7.531227038941955e-05, "loss": 0.2463, "step": 2000 }, { "epoch": 3.4710743801652892, "grad_norm": 110221.5625, "learning_rate": 6.796473181484203e-05, "loss": 0.2512, "step": 2100 }, { "epoch": 3.6363636363636362, "grad_norm": 173338.03125, "learning_rate": 6.0617193240264514e-05, "loss": 0.2586, "step": 2200 }, { "epoch": 3.8016528925619832, "grad_norm": 180035.3125, "learning_rate": 5.3269654665687e-05, "loss": 0.2639, "step": 2300 }, { "epoch": 3.9669421487603307, "grad_norm": 44317.7578125, "learning_rate": 4.592211609110948e-05, "loss": 0.2377, "step": 2400 }, { "epoch": 4.0, "eval_accuracy": 0.903, "eval_f1": 0.8197, "eval_loss": 0.2731046676635742, "eval_precision": 0.7671, "eval_recall": 0.8801, "eval_runtime": 19.2631, "eval_samples_per_second": 125.525, "eval_steps_per_second": 3.945, "step": 2420 }, { "epoch": 4.132231404958677, "grad_norm": 141946.765625, "learning_rate": 3.8574577516531966e-05, "loss": 0.2376, "step": 2500 }, { "epoch": 4.297520661157025, "grad_norm": 81543.375, "learning_rate": 3.122703894195444e-05, "loss": 0.2373, "step": 2600 }, { "epoch": 4.462809917355372, "grad_norm": 92284.8359375, "learning_rate": 2.387950036737693e-05, "loss": 0.2424, "step": 2700 }, { "epoch": 4.628099173553719, "grad_norm": 51905.05078125, "learning_rate": 1.6531961792799413e-05, "loss": 0.2246, "step": 2800 }, { "epoch": 4.793388429752066, "grad_norm": 157494.578125, "learning_rate": 9.184423218221897e-06, "loss": 0.2408, "step": 2900 }, { "epoch": 4.958677685950414, "grad_norm": 134388.328125, "learning_rate": 1.8368846436443793e-06, "loss": 0.2266, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.9042, "eval_f1": 0.8228, "eval_loss": 0.2719922959804535, "eval_precision": 0.7732, "eval_recall": 0.8793, "eval_runtime": 19.2747, "eval_samples_per_second": 125.449, "eval_steps_per_second": 3.943, "step": 3025 } ], "logging_steps": 100, "max_steps": 3025, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6383455545761280.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }