| { | |
| "best_metric": 0.7999999999999999, | |
| "best_model_checkpoint": "/home/bel3/content/model_folder//finetune/sst2/checkpoint-2000", | |
| "epoch": 8.10126582278481, | |
| "global_step": 6400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.7637795209884644, | |
| "eval_f1": 0.7894736842105263, | |
| "eval_loss": 0.5017877817153931, | |
| "eval_runtime": 0.8927, | |
| "eval_samples_per_second": 569.063, | |
| "eval_steps_per_second": 71.693, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.8070865869522095, | |
| "eval_f1": 0.8178438661710037, | |
| "eval_loss": 0.4368048310279846, | |
| "eval_runtime": 0.896, | |
| "eval_samples_per_second": 566.985, | |
| "eval_steps_per_second": 71.431, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.683544303797468e-05, | |
| "loss": 0.4948, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.789370059967041, | |
| "eval_f1": 0.7906066536203523, | |
| "eval_loss": 0.45398545265197754, | |
| "eval_runtime": 0.895, | |
| "eval_samples_per_second": 567.578, | |
| "eval_steps_per_second": 71.506, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_accuracy": 0.8011810779571533, | |
| "eval_f1": 0.7908902691511387, | |
| "eval_loss": 0.44997021555900574, | |
| "eval_runtime": 0.9042, | |
| "eval_samples_per_second": 561.806, | |
| "eval_steps_per_second": 70.779, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.367088607594937e-05, | |
| "loss": 0.3027, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_accuracy": 0.8129921555519104, | |
| "eval_f1": 0.8155339805825244, | |
| "eval_loss": 0.4525600075721741, | |
| "eval_runtime": 0.833, | |
| "eval_samples_per_second": 609.864, | |
| "eval_steps_per_second": 76.833, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_accuracy": 0.8208661675453186, | |
| "eval_f1": 0.8253358925143954, | |
| "eval_loss": 0.4560699462890625, | |
| "eval_runtime": 0.8459, | |
| "eval_samples_per_second": 600.52, | |
| "eval_steps_per_second": 75.656, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_accuracy": 0.8149606585502625, | |
| "eval_f1": 0.812, | |
| "eval_loss": 0.4809434413909912, | |
| "eval_runtime": 0.8476, | |
| "eval_samples_per_second": 599.306, | |
| "eval_steps_per_second": 75.503, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.050632911392405e-05, | |
| "loss": 0.236, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_accuracy": 0.8169291615486145, | |
| "eval_f1": 0.8228571428571428, | |
| "eval_loss": 0.6252030730247498, | |
| "eval_runtime": 0.9277, | |
| "eval_samples_per_second": 547.599, | |
| "eval_steps_per_second": 68.989, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_accuracy": 0.8149606585502625, | |
| "eval_f1": 0.8142292490118577, | |
| "eval_loss": 0.6088564395904541, | |
| "eval_runtime": 0.8559, | |
| "eval_samples_per_second": 593.542, | |
| "eval_steps_per_second": 74.777, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.7341772151898736e-05, | |
| "loss": 0.1743, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_accuracy": 0.8070865869522095, | |
| "eval_f1": 0.7999999999999999, | |
| "eval_loss": 0.593163251876831, | |
| "eval_runtime": 0.8297, | |
| "eval_samples_per_second": 612.294, | |
| "eval_steps_per_second": 77.139, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_accuracy": 0.8090550899505615, | |
| "eval_f1": 0.7991718426501037, | |
| "eval_loss": 0.5563398599624634, | |
| "eval_runtime": 0.8844, | |
| "eval_samples_per_second": 574.387, | |
| "eval_steps_per_second": 72.364, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "eval_accuracy": 0.7972440719604492, | |
| "eval_f1": 0.8052930056710774, | |
| "eval_loss": 0.5898112058639526, | |
| "eval_runtime": 0.9275, | |
| "eval_samples_per_second": 547.695, | |
| "eval_steps_per_second": 69.001, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.4177215189873416e-05, | |
| "loss": 0.1599, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "eval_accuracy": 0.8228346705436707, | |
| "eval_f1": 0.8228346456692914, | |
| "eval_loss": 0.538675844669342, | |
| "eval_runtime": 0.9213, | |
| "eval_samples_per_second": 551.421, | |
| "eval_steps_per_second": 69.47, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "eval_accuracy": 0.8110235929489136, | |
| "eval_f1": 0.8087649402390438, | |
| "eval_loss": 0.5058029890060425, | |
| "eval_runtime": 0.9251, | |
| "eval_samples_per_second": 549.151, | |
| "eval_steps_per_second": 69.184, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.10126582278481e-05, | |
| "loss": 0.1218, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "eval_accuracy": 0.8248031735420227, | |
| "eval_f1": 0.8216432865731464, | |
| "eval_loss": 0.6525737047195435, | |
| "eval_runtime": 0.8811, | |
| "eval_samples_per_second": 576.522, | |
| "eval_steps_per_second": 72.633, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "eval_accuracy": 0.8110235929489136, | |
| "eval_f1": 0.8102766798418972, | |
| "eval_loss": 0.8057999610900879, | |
| "eval_runtime": 0.8465, | |
| "eval_samples_per_second": 600.11, | |
| "eval_steps_per_second": 75.604, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "eval_accuracy": 0.8110235929489136, | |
| "eval_f1": 0.8032786885245902, | |
| "eval_loss": 0.7197728753089905, | |
| "eval_runtime": 0.8474, | |
| "eval_samples_per_second": 599.485, | |
| "eval_steps_per_second": 75.526, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 2.7848101265822786e-05, | |
| "loss": 0.0955, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "eval_accuracy": 0.8031495809555054, | |
| "eval_f1": 0.7967479674796747, | |
| "eval_loss": 0.7408320903778076, | |
| "eval_runtime": 0.8491, | |
| "eval_samples_per_second": 598.276, | |
| "eval_steps_per_second": 75.373, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "eval_accuracy": 0.8011810779571533, | |
| "eval_f1": 0.8112149532710279, | |
| "eval_loss": 0.7727562785148621, | |
| "eval_runtime": 0.8979, | |
| "eval_samples_per_second": 565.791, | |
| "eval_steps_per_second": 71.281, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.468354430379747e-05, | |
| "loss": 0.0858, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "eval_accuracy": 0.8051180839538574, | |
| "eval_f1": 0.800804828973843, | |
| "eval_loss": 0.8653830885887146, | |
| "eval_runtime": 0.8947, | |
| "eval_samples_per_second": 567.8, | |
| "eval_steps_per_second": 71.534, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "eval_accuracy": 0.789370059967041, | |
| "eval_f1": 0.8065099457504521, | |
| "eval_loss": 0.9238587021827698, | |
| "eval_runtime": 0.8912, | |
| "eval_samples_per_second": 570.005, | |
| "eval_steps_per_second": 71.812, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "eval_accuracy": 0.7814960479736328, | |
| "eval_f1": 0.7940630797773653, | |
| "eval_loss": 0.8553095459938049, | |
| "eval_runtime": 0.8957, | |
| "eval_samples_per_second": 567.124, | |
| "eval_steps_per_second": 71.449, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.1518987341772153e-05, | |
| "loss": 0.0634, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "eval_accuracy": 0.8110235929489136, | |
| "eval_f1": 0.817490494296578, | |
| "eval_loss": 0.6774270534515381, | |
| "eval_runtime": 0.8879, | |
| "eval_samples_per_second": 572.147, | |
| "eval_steps_per_second": 72.081, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "eval_accuracy": 0.7992125749588013, | |
| "eval_f1": 0.8038461538461538, | |
| "eval_loss": 0.9549906849861145, | |
| "eval_runtime": 0.9051, | |
| "eval_samples_per_second": 561.292, | |
| "eval_steps_per_second": 70.714, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.8354430379746836e-05, | |
| "loss": 0.0548, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "eval_accuracy": 0.8031495809555054, | |
| "eval_f1": 0.8091603053435114, | |
| "eval_loss": 1.1089578866958618, | |
| "eval_runtime": 0.8777, | |
| "eval_samples_per_second": 578.768, | |
| "eval_steps_per_second": 72.916, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "eval_accuracy": 0.8051180839538574, | |
| "eval_f1": 0.809248554913295, | |
| "eval_loss": 1.028864860534668, | |
| "eval_runtime": 0.8922, | |
| "eval_samples_per_second": 569.357, | |
| "eval_steps_per_second": 71.73, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "eval_accuracy": 0.7952755689620972, | |
| "eval_f1": 0.8066914498141264, | |
| "eval_loss": 0.9950660467147827, | |
| "eval_runtime": 0.894, | |
| "eval_samples_per_second": 568.236, | |
| "eval_steps_per_second": 71.589, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.5189873417721521e-05, | |
| "loss": 0.0473, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "eval_accuracy": 0.8110235929489136, | |
| "eval_f1": 0.8117647058823529, | |
| "eval_loss": 1.1159313917160034, | |
| "eval_runtime": 0.8468, | |
| "eval_samples_per_second": 599.932, | |
| "eval_steps_per_second": 75.582, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "eval_accuracy": 0.8090550899505615, | |
| "eval_f1": 0.8131021194605009, | |
| "eval_loss": 1.0203063488006592, | |
| "eval_runtime": 0.9245, | |
| "eval_samples_per_second": 549.472, | |
| "eval_steps_per_second": 69.225, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 1.2025316455696203e-05, | |
| "loss": 0.036, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "eval_accuracy": 0.8149606585502625, | |
| "eval_f1": 0.815686274509804, | |
| "eval_loss": 1.0656845569610596, | |
| "eval_runtime": 0.9107, | |
| "eval_samples_per_second": 557.814, | |
| "eval_steps_per_second": 70.276, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "eval_accuracy": 0.8129921555519104, | |
| "eval_f1": 0.8140900195694716, | |
| "eval_loss": 1.0237640142440796, | |
| "eval_runtime": 0.85, | |
| "eval_samples_per_second": 597.63, | |
| "eval_steps_per_second": 75.292, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "eval_accuracy": 0.8208661675453186, | |
| "eval_f1": 0.8205128205128205, | |
| "eval_loss": 1.1597448587417603, | |
| "eval_runtime": 0.8983, | |
| "eval_samples_per_second": 565.501, | |
| "eval_steps_per_second": 71.244, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "step": 6400, | |
| "total_flos": 1.3556183708860416e+16, | |
| "train_loss": 0.14836265951395033, | |
| "train_runtime": 2029.2795, | |
| "train_samples_per_second": 248.995, | |
| "train_steps_per_second": 3.893 | |
| } | |
| ], | |
| "max_steps": 7900, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.3556183708860416e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |