| { |
| "best_global_step": 3654, |
| "best_metric": 0.634271615758239, |
| "best_model_checkpoint": "outputs/visobert/checkpoint-3654", |
| "epoch": 21.0, |
| "eval_steps": 500, |
| "global_step": 3654, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.9942528735632183, |
| "grad_norm": 4.714195728302002, |
| "learning_rate": 6.88e-06, |
| "loss": 1.7365, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.5495626822157434, |
| "eval_loss": 1.3077976703643799, |
| "eval_macro_f1": 0.3621719465508691, |
| "eval_runtime": 1.0013, |
| "eval_samples_per_second": 685.088, |
| "eval_steps_per_second": 21.971, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.9885057471264367, |
| "grad_norm": 7.385402679443359, |
| "learning_rate": 1.38e-05, |
| "loss": 1.0546, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.6501457725947521, |
| "eval_loss": 0.9251817464828491, |
| "eval_macro_f1": 0.5626337451346989, |
| "eval_runtime": 1.017, |
| "eval_samples_per_second": 674.552, |
| "eval_steps_per_second": 21.633, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.9827586206896552, |
| "grad_norm": 6.6963605880737305, |
| "learning_rate": 1.999994401896218e-05, |
| "loss": 0.6967, |
| "step": 519 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.6690962099125365, |
| "eval_loss": 0.9223118424415588, |
| "eval_macro_f1": 0.5955069733934168, |
| "eval_runtime": 1.0189, |
| "eval_samples_per_second": 673.298, |
| "eval_steps_per_second": 21.593, |
| "step": 522 |
| }, |
| { |
| "epoch": 3.9770114942528734, |
| "grad_norm": 5.811980724334717, |
| "learning_rate": 1.9993697433304146e-05, |
| "loss": 0.3928, |
| "step": 692 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.6603498542274052, |
| "eval_loss": 1.0274471044540405, |
| "eval_macro_f1": 0.6117238630327387, |
| "eval_runtime": 1.0201, |
| "eval_samples_per_second": 672.464, |
| "eval_steps_per_second": 21.566, |
| "step": 696 |
| }, |
| { |
| "epoch": 4.971264367816092, |
| "grad_norm": 16.778417587280273, |
| "learning_rate": 1.9977115923137912e-05, |
| "loss": 0.1721, |
| "step": 865 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.6530612244897959, |
| "eval_loss": 1.358595371246338, |
| "eval_macro_f1": 0.5994376771063253, |
| "eval_runtime": 1.0217, |
| "eval_samples_per_second": 671.439, |
| "eval_steps_per_second": 21.533, |
| "step": 870 |
| }, |
| { |
| "epoch": 5.9655172413793105, |
| "grad_norm": 4.333595275878906, |
| "learning_rate": 1.995021663613649e-05, |
| "loss": 0.0717, |
| "step": 1038 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.6516034985422741, |
| "eval_loss": 1.6116644144058228, |
| "eval_macro_f1": 0.5975804141258729, |
| "eval_runtime": 1.0219, |
| "eval_samples_per_second": 671.317, |
| "eval_steps_per_second": 21.529, |
| "step": 1044 |
| }, |
| { |
| "epoch": 6.959770114942529, |
| "grad_norm": 10.536818504333496, |
| "learning_rate": 1.991302739004225e-05, |
| "loss": 0.0408, |
| "step": 1211 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.6661807580174927, |
| "eval_loss": 1.761244297027588, |
| "eval_macro_f1": 0.6135654899117211, |
| "eval_runtime": 1.0232, |
| "eval_samples_per_second": 670.433, |
| "eval_steps_per_second": 21.501, |
| "step": 1218 |
| }, |
| { |
| "epoch": 7.954022988505747, |
| "grad_norm": 0.06934899091720581, |
| "learning_rate": 1.9865586643899352e-05, |
| "loss": 0.0189, |
| "step": 1384 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.6545189504373178, |
| "eval_loss": 2.068016767501831, |
| "eval_macro_f1": 0.608371709261751, |
| "eval_runtime": 1.0251, |
| "eval_samples_per_second": 669.232, |
| "eval_steps_per_second": 21.462, |
| "step": 1392 |
| }, |
| { |
| "epoch": 8.948275862068966, |
| "grad_norm": 0.2869434654712677, |
| "learning_rate": 1.9807943458281543e-05, |
| "loss": 0.01, |
| "step": 1557 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.6618075801749271, |
| "eval_loss": 2.1802825927734375, |
| "eval_macro_f1": 0.6124464449897763, |
| "eval_runtime": 1.0247, |
| "eval_samples_per_second": 669.457, |
| "eval_steps_per_second": 21.469, |
| "step": 1566 |
| }, |
| { |
| "epoch": 9.942528735632184, |
| "grad_norm": 0.07617083936929703, |
| "learning_rate": 1.974015744455647e-05, |
| "loss": 0.0089, |
| "step": 1730 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.6559766763848397, |
| "eval_loss": 2.3100554943084717, |
| "eval_macro_f1": 0.6074241333392209, |
| "eval_runtime": 1.0231, |
| "eval_samples_per_second": 670.506, |
| "eval_steps_per_second": 21.503, |
| "step": 1740 |
| }, |
| { |
| "epoch": 10.936781609195402, |
| "grad_norm": 4.301032066345215, |
| "learning_rate": 1.9662298703238904e-05, |
| "loss": 0.0087, |
| "step": 1903 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.6530612244897959, |
| "eval_loss": 2.3289756774902344, |
| "eval_macro_f1": 0.6128782018168504, |
| "eval_runtime": 1.0248, |
| "eval_samples_per_second": 669.387, |
| "eval_steps_per_second": 21.467, |
| "step": 1914 |
| }, |
| { |
| "epoch": 11.931034482758621, |
| "grad_norm": 0.050387851893901825, |
| "learning_rate": 1.9574447751496706e-05, |
| "loss": 0.0095, |
| "step": 2076 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.6676384839650146, |
| "eval_loss": 2.365588903427124, |
| "eval_macro_f1": 0.6232160117095836, |
| "eval_runtime": 1.0257, |
| "eval_samples_per_second": 668.809, |
| "eval_steps_per_second": 21.449, |
| "step": 2088 |
| }, |
| { |
| "epoch": 12.925287356321839, |
| "grad_norm": 0.19025640189647675, |
| "learning_rate": 1.9476695439884466e-05, |
| "loss": 0.0082, |
| "step": 2249 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.6472303206997084, |
| "eval_loss": 2.510204553604126, |
| "eval_macro_f1": 0.6009794079827844, |
| "eval_runtime": 1.0241, |
| "eval_samples_per_second": 669.83, |
| "eval_steps_per_second": 21.481, |
| "step": 2262 |
| }, |
| { |
| "epoch": 13.919540229885058, |
| "grad_norm": 0.009207348339259624, |
| "learning_rate": 1.9369142858390936e-05, |
| "loss": 0.0077, |
| "step": 2422 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.6632653061224489, |
| "eval_loss": 2.5246763229370117, |
| "eval_macro_f1": 0.6098772695075174, |
| "eval_runtime": 1.0232, |
| "eval_samples_per_second": 670.437, |
| "eval_steps_per_second": 21.501, |
| "step": 2436 |
| }, |
| { |
| "epoch": 14.913793103448276, |
| "grad_norm": 0.04841063916683197, |
| "learning_rate": 1.9251901231897364e-05, |
| "loss": 0.0087, |
| "step": 2595 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.6618075801749271, |
| "eval_loss": 2.6046578884124756, |
| "eval_macro_f1": 0.5938506871223318, |
| "eval_runtime": 1.0232, |
| "eval_samples_per_second": 670.439, |
| "eval_steps_per_second": 21.501, |
| "step": 2610 |
| }, |
| { |
| "epoch": 15.908045977011493, |
| "grad_norm": 0.01879715360701084, |
| "learning_rate": 1.9125091805154977e-05, |
| "loss": 0.0074, |
| "step": 2768 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.6603498542274052, |
| "eval_loss": 2.5505387783050537, |
| "eval_macro_f1": 0.62519263706606, |
| "eval_runtime": 1.0233, |
| "eval_samples_per_second": 670.404, |
| "eval_steps_per_second": 21.5, |
| "step": 2784 |
| }, |
| { |
| "epoch": 16.902298850574713, |
| "grad_norm": 0.006341532338410616, |
| "learning_rate": 1.8988845717400375e-05, |
| "loss": 0.0078, |
| "step": 2941 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.6603498542274052, |
| "eval_loss": 2.581911563873291, |
| "eval_macro_f1": 0.6102357854686191, |
| "eval_runtime": 1.0227, |
| "eval_samples_per_second": 670.775, |
| "eval_steps_per_second": 21.512, |
| "step": 2958 |
| }, |
| { |
| "epoch": 17.896551724137932, |
| "grad_norm": 13.61514949798584, |
| "learning_rate": 1.884330386673869e-05, |
| "loss": 0.0158, |
| "step": 3114 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.6530612244897959, |
| "eval_loss": 2.585244655609131, |
| "eval_macro_f1": 0.6128145704869843, |
| "eval_runtime": 1.0227, |
| "eval_samples_per_second": 670.768, |
| "eval_steps_per_second": 21.512, |
| "step": 3132 |
| }, |
| { |
| "epoch": 18.89080459770115, |
| "grad_norm": 22.37177085876465, |
| "learning_rate": 1.8688616764434577e-05, |
| "loss": 0.015, |
| "step": 3287 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.6443148688046647, |
| "eval_loss": 2.7335152626037598, |
| "eval_macro_f1": 0.6043926085724337, |
| "eval_runtime": 1.0218, |
| "eval_samples_per_second": 671.336, |
| "eval_steps_per_second": 21.53, |
| "step": 3306 |
| }, |
| { |
| "epoch": 19.885057471264368, |
| "grad_norm": 0.02293994091451168, |
| "learning_rate": 1.8524944379261892e-05, |
| "loss": 0.0118, |
| "step": 3460 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.6428571428571429, |
| "eval_loss": 2.8105897903442383, |
| "eval_macro_f1": 0.6045183468357809, |
| "eval_runtime": 1.0238, |
| "eval_samples_per_second": 670.049, |
| "eval_steps_per_second": 21.488, |
| "step": 3480 |
| }, |
| { |
| "epoch": 20.879310344827587, |
| "grad_norm": 17.96685791015625, |
| "learning_rate": 1.8352455972072908e-05, |
| "loss": 0.0087, |
| "step": 3633 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.6749271137026239, |
| "eval_loss": 2.626377582550049, |
| "eval_macro_f1": 0.634271615758239, |
| "eval_runtime": 1.0224, |
| "eval_samples_per_second": 670.996, |
| "eval_steps_per_second": 21.519, |
| "step": 3654 |
| } |
| ], |
| "logging_steps": 173, |
| "max_steps": 17400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.04182224766452e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|