{ "best_metric": 0.8266331658291457, "best_model_checkpoint": "results/deberta-v3-large/checkpoint-350", "epoch": 14.0, "eval_steps": 500, "global_step": 350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.357721209526062, "learning_rate": 9.9e-06, "loss": 0.6886, "step": 25 }, { "epoch": 1.0, "eval_accuracy": 0.6490566037735849, "eval_f1": 0.6193724420190996, "eval_loss": 0.6591259241104126, "eval_precision": 0.6715976331360947, "eval_recall": 0.5746835443037974, "eval_runtime": 7.9916, "eval_samples_per_second": 99.479, "eval_steps_per_second": 0.876, "step": 25 }, { "epoch": 2.0, "grad_norm": 5.484292984008789, "learning_rate": 9.800000000000001e-06, "loss": 0.6144, "step": 50 }, { "epoch": 2.0, "eval_accuracy": 0.7081761006289308, "eval_f1": 0.7121588089330024, "eval_loss": 0.5845022797584534, "eval_precision": 0.6982968369829684, "eval_recall": 0.7265822784810126, "eval_runtime": 7.2078, "eval_samples_per_second": 110.298, "eval_steps_per_second": 0.971, "step": 50 }, { "epoch": 3.0, "grad_norm": 5.340143203735352, "learning_rate": 9.7e-06, "loss": 0.5033, "step": 75 }, { "epoch": 3.0, "eval_accuracy": 0.7584905660377359, "eval_f1": 0.7453580901856764, "eval_loss": 0.5280972719192505, "eval_precision": 0.7827298050139275, "eval_recall": 0.7113924050632912, "eval_runtime": 7.306, "eval_samples_per_second": 108.814, "eval_steps_per_second": 0.958, "step": 75 }, { "epoch": 4.0, "grad_norm": 11.039319038391113, "learning_rate": 9.600000000000001e-06, "loss": 0.3689, "step": 100 }, { "epoch": 4.0, "eval_accuracy": 0.7773584905660378, "eval_f1": 0.7756653992395437, "eval_loss": 0.5035372972488403, "eval_precision": 0.7766497461928934, "eval_recall": 0.7746835443037975, "eval_runtime": 7.6101, "eval_samples_per_second": 104.467, "eval_steps_per_second": 0.92, "step": 100 }, { "epoch": 5.0, "grad_norm": 10.600135803222656, "learning_rate": 9.5e-06, "loss": 0.2588, "step": 125 }, { "epoch": 5.0, "eval_accuracy": 0.7773584905660378, "eval_f1": 0.7385524372230429, "eval_loss": 0.5942177772521973, "eval_precision": 0.8865248226950354, "eval_recall": 0.6329113924050633, "eval_runtime": 7.274, "eval_samples_per_second": 109.293, "eval_steps_per_second": 0.962, "step": 125 }, { "epoch": 6.0, "grad_norm": 29.822813034057617, "learning_rate": 9.4e-06, "loss": 0.2093, "step": 150 }, { "epoch": 6.0, "eval_accuracy": 0.7849056603773585, "eval_f1": 0.7466666666666667, "eval_loss": 0.6665811538696289, "eval_precision": 0.9, "eval_recall": 0.6379746835443038, "eval_runtime": 7.3649, "eval_samples_per_second": 107.944, "eval_steps_per_second": 0.95, "step": 150 }, { "epoch": 7.0, "grad_norm": 8.441102981567383, "learning_rate": 9.3e-06, "loss": 0.1578, "step": 175 }, { "epoch": 7.0, "eval_accuracy": 0.8125786163522013, "eval_f1": 0.8082368082368082, "eval_loss": 0.5872107744216919, "eval_precision": 0.8219895287958116, "eval_recall": 0.7949367088607595, "eval_runtime": 7.4254, "eval_samples_per_second": 107.065, "eval_steps_per_second": 0.943, "step": 175 }, { "epoch": 8.0, "grad_norm": 8.99835205078125, "learning_rate": 9.200000000000002e-06, "loss": 0.147, "step": 200 }, { "epoch": 8.0, "eval_accuracy": 0.769811320754717, "eval_f1": 0.7197549770290965, "eval_loss": 0.7519409656524658, "eval_precision": 0.9108527131782945, "eval_recall": 0.5949367088607594, "eval_runtime": 7.7545, "eval_samples_per_second": 102.521, "eval_steps_per_second": 0.903, "step": 200 }, { "epoch": 9.0, "grad_norm": 1.7589746713638306, "learning_rate": 9.100000000000001e-06, "loss": 0.1021, "step": 225 }, { "epoch": 9.0, "eval_accuracy": 0.810062893081761, "eval_f1": 0.7945578231292517, "eval_loss": 0.6721034646034241, "eval_precision": 0.8588235294117647, "eval_recall": 0.739240506329114, "eval_runtime": 7.6115, "eval_samples_per_second": 104.448, "eval_steps_per_second": 0.92, "step": 225 }, { "epoch": 10.0, "grad_norm": 5.351127624511719, "learning_rate": 9e-06, "loss": 0.088, "step": 250 }, { "epoch": 10.0, "eval_accuracy": 0.8176100628930818, "eval_f1": 0.8119325551232166, "eval_loss": 0.6214590668678284, "eval_precision": 0.8324468085106383, "eval_recall": 0.7924050632911392, "eval_runtime": 7.3972, "eval_samples_per_second": 107.472, "eval_steps_per_second": 0.946, "step": 250 }, { "epoch": 11.0, "grad_norm": 3.241373300552368, "learning_rate": 8.900000000000001e-06, "loss": 0.0751, "step": 275 }, { "epoch": 11.0, "eval_accuracy": 0.8125786163522013, "eval_f1": 0.8171779141104294, "eval_loss": 0.7023480534553528, "eval_precision": 0.7928571428571428, "eval_recall": 0.8430379746835444, "eval_runtime": 7.2686, "eval_samples_per_second": 109.375, "eval_steps_per_second": 0.963, "step": 275 }, { "epoch": 12.0, "grad_norm": 34.19482421875, "learning_rate": 8.8e-06, "loss": 0.0611, "step": 300 }, { "epoch": 12.0, "eval_accuracy": 0.8, "eval_f1": 0.774468085106383, "eval_loss": 0.9275862574577332, "eval_precision": 0.8806451612903226, "eval_recall": 0.6911392405063291, "eval_runtime": 7.2012, "eval_samples_per_second": 110.399, "eval_steps_per_second": 0.972, "step": 300 }, { "epoch": 13.0, "grad_norm": 16.30063247680664, "learning_rate": 8.700000000000001e-06, "loss": 0.0568, "step": 325 }, { "epoch": 13.0, "eval_accuracy": 0.8113207547169812, "eval_f1": 0.8026315789473685, "eval_loss": 0.7658352255821228, "eval_precision": 0.8356164383561644, "eval_recall": 0.7721518987341772, "eval_runtime": 7.2249, "eval_samples_per_second": 110.036, "eval_steps_per_second": 0.969, "step": 325 }, { "epoch": 14.0, "grad_norm": 0.775602400302887, "learning_rate": 8.6e-06, "loss": 0.0531, "step": 350 }, { "epoch": 14.0, "eval_accuracy": 0.8264150943396227, "eval_f1": 0.8266331658291457, "eval_loss": 0.7808425426483154, "eval_precision": 0.8204488778054863, "eval_recall": 0.8329113924050633, "eval_runtime": 7.7991, "eval_samples_per_second": 101.935, "eval_steps_per_second": 0.898, "step": 350 } ], "logging_steps": 500, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.138632372487107e+16, "train_batch_size": 128, "trial_name": null, "trial_params": null }