| { |
| "best_global_step": 7500, |
| "best_metric": 0.8351681966105685, |
| "best_model_checkpoint": "./GATE-AraBert-AuthId/checkpoint-7500", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 8236, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.24283632831471588, |
| "grad_norm": 15.026191711425781, |
| "learning_rate": 1.2111650485436894e-05, |
| "loss": 2.557, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.24283632831471588, |
| "eval_accuracy": 0.5645898484484003, |
| "eval_f1_macro": 0.4695675081378141, |
| "eval_loss": 1.7181202173233032, |
| "eval_precision_macro": 0.5106584595873682, |
| "eval_recall_macro": 0.5524759395969016, |
| "eval_runtime": 76.9808, |
| "eval_samples_per_second": 54.0, |
| "eval_steps_per_second": 3.377, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.48567265662943176, |
| "grad_norm": 9.262898445129395, |
| "learning_rate": 1.9972503596025437e-05, |
| "loss": 1.0291, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.48567265662943176, |
| "eval_accuracy": 0.7866249699302381, |
| "eval_f1_macro": 0.7227417233786936, |
| "eval_loss": 0.75501948595047, |
| "eval_precision_macro": 0.726093427002561, |
| "eval_recall_macro": 0.7742946230237587, |
| "eval_runtime": 77.5698, |
| "eval_samples_per_second": 53.59, |
| "eval_steps_per_second": 3.352, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7285089849441476, |
| "grad_norm": 23.896453857421875, |
| "learning_rate": 1.9593517288251588e-05, |
| "loss": 0.4903, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7285089849441476, |
| "eval_accuracy": 0.823911474621121, |
| "eval_f1_macro": 0.7429507284333905, |
| "eval_loss": 0.6386048197746277, |
| "eval_precision_macro": 0.7324405608662297, |
| "eval_recall_macro": 0.8045168896184252, |
| "eval_runtime": 77.2697, |
| "eval_samples_per_second": 53.799, |
| "eval_steps_per_second": 3.365, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9713453132588635, |
| "grad_norm": 5.435430526733398, |
| "learning_rate": 1.8785270662888512e-05, |
| "loss": 0.3153, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9713453132588635, |
| "eval_accuracy": 0.8267981717584797, |
| "eval_f1_macro": 0.7575983897581253, |
| "eval_loss": 0.6189785599708557, |
| "eval_precision_macro": 0.7453534211105659, |
| "eval_recall_macro": 0.8233657846968169, |
| "eval_runtime": 77.7838, |
| "eval_samples_per_second": 53.443, |
| "eval_steps_per_second": 3.343, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2141816415735793, |
| "grad_norm": 1.5204271078109741, |
| "learning_rate": 1.7583928579254136e-05, |
| "loss": 0.2053, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.2141816415735793, |
| "eval_accuracy": 0.8681741640606206, |
| "eval_f1_macro": 0.8014082347432068, |
| "eval_loss": 0.5223568677902222, |
| "eval_precision_macro": 0.8062270695838487, |
| "eval_recall_macro": 0.8359468284815337, |
| "eval_runtime": 76.9596, |
| "eval_samples_per_second": 54.015, |
| "eval_steps_per_second": 3.378, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4570179698882952, |
| "grad_norm": 19.720205307006836, |
| "learning_rate": 1.6043244886834486e-05, |
| "loss": 0.144, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.4570179698882952, |
| "eval_accuracy": 0.8450805869617513, |
| "eval_f1_macro": 0.7815726324298025, |
| "eval_loss": 0.6506038308143616, |
| "eval_precision_macro": 0.7880346089919167, |
| "eval_recall_macro": 0.8269659249947254, |
| "eval_runtime": 76.939, |
| "eval_samples_per_second": 54.03, |
| "eval_steps_per_second": 3.379, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6998542982030111, |
| "grad_norm": 24.972082138061523, |
| "learning_rate": 1.4232157218328932e-05, |
| "loss": 0.1468, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.6998542982030111, |
| "eval_accuracy": 0.8737070002405581, |
| "eval_f1_macro": 0.8203655773714699, |
| "eval_loss": 0.5903727412223816, |
| "eval_precision_macro": 0.8181896761506664, |
| "eval_recall_macro": 0.8521354121871312, |
| "eval_runtime": 76.9931, |
| "eval_samples_per_second": 53.992, |
| "eval_steps_per_second": 3.377, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.942690626517727, |
| "grad_norm": 0.03269574046134949, |
| "learning_rate": 1.223170238686927e-05, |
| "loss": 0.104, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.942690626517727, |
| "eval_accuracy": 0.8571084917007458, |
| "eval_f1_macro": 0.7915965450421839, |
| "eval_loss": 0.6936432719230652, |
| "eval_precision_macro": 0.7880101931049168, |
| "eval_recall_macro": 0.8320463480676579, |
| "eval_runtime": 77.0658, |
| "eval_samples_per_second": 53.941, |
| "eval_steps_per_second": 3.374, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.1855269548324427, |
| "grad_norm": 0.032910436391830444, |
| "learning_rate": 1.0131390407445186e-05, |
| "loss": 0.0563, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.1855269548324427, |
| "eval_accuracy": 0.860957421217224, |
| "eval_f1_macro": 0.7924185850496555, |
| "eval_loss": 0.6698474287986755, |
| "eval_precision_macro": 0.7932481333544369, |
| "eval_recall_macro": 0.8235048486314694, |
| "eval_runtime": 77.0098, |
| "eval_samples_per_second": 53.98, |
| "eval_steps_per_second": 3.376, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.4283632831471587, |
| "grad_norm": 14.333985328674316, |
| "learning_rate": 8.025199386337698e-06, |
| "loss": 0.0425, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.4283632831471587, |
| "eval_accuracy": 0.8859754630743325, |
| "eval_f1_macro": 0.8234139223044122, |
| "eval_loss": 0.5807381272315979, |
| "eval_precision_macro": 0.8290082386037015, |
| "eval_recall_macro": 0.8509132569648817, |
| "eval_runtime": 77.1284, |
| "eval_samples_per_second": 53.897, |
| "eval_steps_per_second": 3.371, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.6711996114618746, |
| "grad_norm": 0.02414858713746071, |
| "learning_rate": 6.007370486559185e-06, |
| "loss": 0.0524, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.6711996114618746, |
| "eval_accuracy": 0.8910271830647102, |
| "eval_f1_macro": 0.8201822638781144, |
| "eval_loss": 0.5744612812995911, |
| "eval_precision_macro": 0.8236232407632871, |
| "eval_recall_macro": 0.8612733078780604, |
| "eval_runtime": 77.2429, |
| "eval_samples_per_second": 53.817, |
| "eval_steps_per_second": 3.366, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.9140359397765905, |
| "grad_norm": 0.18948864936828613, |
| "learning_rate": 4.168191122864228e-06, |
| "loss": 0.0335, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.9140359397765905, |
| "eval_accuracy": 0.8758720230935771, |
| "eval_f1_macro": 0.8189459069331455, |
| "eval_loss": 0.6318075060844421, |
| "eval_precision_macro": 0.813872649120604, |
| "eval_recall_macro": 0.8559256912562273, |
| "eval_runtime": 77.1472, |
| "eval_samples_per_second": 53.884, |
| "eval_steps_per_second": 3.37, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.1568722680913064, |
| "grad_norm": 0.020328590646386147, |
| "learning_rate": 2.5899550665613025e-06, |
| "loss": 0.0199, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.1568722680913064, |
| "eval_accuracy": 0.8943949963916286, |
| "eval_f1_macro": 0.8325961279360792, |
| "eval_loss": 0.5555033087730408, |
| "eval_precision_macro": 0.8226434393873626, |
| "eval_recall_macro": 0.8658707646699891, |
| "eval_runtime": 77.2006, |
| "eval_samples_per_second": 53.847, |
| "eval_steps_per_second": 3.368, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.3997085964060223, |
| "grad_norm": 0.012193214148283005, |
| "learning_rate": 1.3432802245560407e-06, |
| "loss": 0.0098, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.3997085964060223, |
| "eval_accuracy": 0.8835698821265335, |
| "eval_f1_macro": 0.8273295992906156, |
| "eval_loss": 0.6275856494903564, |
| "eval_precision_macro": 0.8191217937417908, |
| "eval_recall_macro": 0.8617286460740119, |
| "eval_runtime": 77.1125, |
| "eval_samples_per_second": 53.908, |
| "eval_steps_per_second": 3.372, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.642544924720738, |
| "grad_norm": 0.005845185369253159, |
| "learning_rate": 4.83948852983539e-07, |
| "loss": 0.0146, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.642544924720738, |
| "eval_accuracy": 0.8893432764012509, |
| "eval_f1_macro": 0.8351681966105685, |
| "eval_loss": 0.6066595911979675, |
| "eval_precision_macro": 0.8247769794059985, |
| "eval_recall_macro": 0.8671677760062687, |
| "eval_runtime": 77.0813, |
| "eval_samples_per_second": 53.93, |
| "eval_steps_per_second": 3.373, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.885381253035454, |
| "grad_norm": 0.020264243707060814, |
| "learning_rate": 5.041158953924652e-08, |
| "loss": 0.01, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.885381253035454, |
| "eval_accuracy": 0.8895838344960308, |
| "eval_f1_macro": 0.8329589258888473, |
| "eval_loss": 0.6008756756782532, |
| "eval_precision_macro": 0.8219349344833001, |
| "eval_recall_macro": 0.8674410022032442, |
| "eval_runtime": 77.5586, |
| "eval_samples_per_second": 53.598, |
| "eval_steps_per_second": 3.352, |
| "step": 8000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 8236, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.467447932454093e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|