| { | |
| "best_metric": 0.8266331658291457, | |
| "best_model_checkpoint": "results/deberta-v3-large/checkpoint-350", | |
| "epoch": 14.0, | |
| "eval_steps": 500, | |
| "global_step": 350, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.357721209526062, | |
| "learning_rate": 9.9e-06, | |
| "loss": 0.6886, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6490566037735849, | |
| "eval_f1": 0.6193724420190996, | |
| "eval_loss": 0.6591259241104126, | |
| "eval_precision": 0.6715976331360947, | |
| "eval_recall": 0.5746835443037974, | |
| "eval_runtime": 7.9916, | |
| "eval_samples_per_second": 99.479, | |
| "eval_steps_per_second": 0.876, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 5.484292984008789, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.6144, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7081761006289308, | |
| "eval_f1": 0.7121588089330024, | |
| "eval_loss": 0.5845022797584534, | |
| "eval_precision": 0.6982968369829684, | |
| "eval_recall": 0.7265822784810126, | |
| "eval_runtime": 7.2078, | |
| "eval_samples_per_second": 110.298, | |
| "eval_steps_per_second": 0.971, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 5.340143203735352, | |
| "learning_rate": 9.7e-06, | |
| "loss": 0.5033, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7584905660377359, | |
| "eval_f1": 0.7453580901856764, | |
| "eval_loss": 0.5280972719192505, | |
| "eval_precision": 0.7827298050139275, | |
| "eval_recall": 0.7113924050632912, | |
| "eval_runtime": 7.306, | |
| "eval_samples_per_second": 108.814, | |
| "eval_steps_per_second": 0.958, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 11.039319038391113, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.3689, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7773584905660378, | |
| "eval_f1": 0.7756653992395437, | |
| "eval_loss": 0.5035372972488403, | |
| "eval_precision": 0.7766497461928934, | |
| "eval_recall": 0.7746835443037975, | |
| "eval_runtime": 7.6101, | |
| "eval_samples_per_second": 104.467, | |
| "eval_steps_per_second": 0.92, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 10.600135803222656, | |
| "learning_rate": 9.5e-06, | |
| "loss": 0.2588, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7773584905660378, | |
| "eval_f1": 0.7385524372230429, | |
| "eval_loss": 0.5942177772521973, | |
| "eval_precision": 0.8865248226950354, | |
| "eval_recall": 0.6329113924050633, | |
| "eval_runtime": 7.274, | |
| "eval_samples_per_second": 109.293, | |
| "eval_steps_per_second": 0.962, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 29.822813034057617, | |
| "learning_rate": 9.4e-06, | |
| "loss": 0.2093, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7849056603773585, | |
| "eval_f1": 0.7466666666666667, | |
| "eval_loss": 0.6665811538696289, | |
| "eval_precision": 0.9, | |
| "eval_recall": 0.6379746835443038, | |
| "eval_runtime": 7.3649, | |
| "eval_samples_per_second": 107.944, | |
| "eval_steps_per_second": 0.95, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 8.441102981567383, | |
| "learning_rate": 9.3e-06, | |
| "loss": 0.1578, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8125786163522013, | |
| "eval_f1": 0.8082368082368082, | |
| "eval_loss": 0.5872107744216919, | |
| "eval_precision": 0.8219895287958116, | |
| "eval_recall": 0.7949367088607595, | |
| "eval_runtime": 7.4254, | |
| "eval_samples_per_second": 107.065, | |
| "eval_steps_per_second": 0.943, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 8.99835205078125, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 0.147, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.769811320754717, | |
| "eval_f1": 0.7197549770290965, | |
| "eval_loss": 0.7519409656524658, | |
| "eval_precision": 0.9108527131782945, | |
| "eval_recall": 0.5949367088607594, | |
| "eval_runtime": 7.7545, | |
| "eval_samples_per_second": 102.521, | |
| "eval_steps_per_second": 0.903, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 1.7589746713638306, | |
| "learning_rate": 9.100000000000001e-06, | |
| "loss": 0.1021, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.810062893081761, | |
| "eval_f1": 0.7945578231292517, | |
| "eval_loss": 0.6721034646034241, | |
| "eval_precision": 0.8588235294117647, | |
| "eval_recall": 0.739240506329114, | |
| "eval_runtime": 7.6115, | |
| "eval_samples_per_second": 104.448, | |
| "eval_steps_per_second": 0.92, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 5.351127624511719, | |
| "learning_rate": 9e-06, | |
| "loss": 0.088, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8176100628930818, | |
| "eval_f1": 0.8119325551232166, | |
| "eval_loss": 0.6214590668678284, | |
| "eval_precision": 0.8324468085106383, | |
| "eval_recall": 0.7924050632911392, | |
| "eval_runtime": 7.3972, | |
| "eval_samples_per_second": 107.472, | |
| "eval_steps_per_second": 0.946, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 3.241373300552368, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 0.0751, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8125786163522013, | |
| "eval_f1": 0.8171779141104294, | |
| "eval_loss": 0.7023480534553528, | |
| "eval_precision": 0.7928571428571428, | |
| "eval_recall": 0.8430379746835444, | |
| "eval_runtime": 7.2686, | |
| "eval_samples_per_second": 109.375, | |
| "eval_steps_per_second": 0.963, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 34.19482421875, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.0611, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8, | |
| "eval_f1": 0.774468085106383, | |
| "eval_loss": 0.9275862574577332, | |
| "eval_precision": 0.8806451612903226, | |
| "eval_recall": 0.6911392405063291, | |
| "eval_runtime": 7.2012, | |
| "eval_samples_per_second": 110.399, | |
| "eval_steps_per_second": 0.972, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 16.30063247680664, | |
| "learning_rate": 8.700000000000001e-06, | |
| "loss": 0.0568, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8113207547169812, | |
| "eval_f1": 0.8026315789473685, | |
| "eval_loss": 0.7658352255821228, | |
| "eval_precision": 0.8356164383561644, | |
| "eval_recall": 0.7721518987341772, | |
| "eval_runtime": 7.2249, | |
| "eval_samples_per_second": 110.036, | |
| "eval_steps_per_second": 0.969, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.775602400302887, | |
| "learning_rate": 8.6e-06, | |
| "loss": 0.0531, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8264150943396227, | |
| "eval_f1": 0.8266331658291457, | |
| "eval_loss": 0.7808425426483154, | |
| "eval_precision": 0.8204488778054863, | |
| "eval_recall": 0.8329113924050633, | |
| "eval_runtime": 7.7991, | |
| "eval_samples_per_second": 101.935, | |
| "eval_steps_per_second": 0.898, | |
| "step": 350 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.138632372487107e+16, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |