dzungpham's picture
Upload folder using huggingface_hub
abf9e49 verified
raw
history blame
12.3 kB
{
"best_global_step": 600,
"best_metric": 0.9695240197651653,
"best_model_checkpoint": "taskA-codebert-base-focal/checkpoint-600",
"epoch": 0.14680694886224616,
"eval_steps": 200,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002446782481037436,
"grad_norm": 1.0,
"learning_rate": 3.6734693877551025e-07,
"loss": 1.1949,
"step": 10
},
{
"epoch": 0.004893564962074872,
"grad_norm": 0.9999998807907104,
"learning_rate": 7.755102040816327e-07,
"loss": 1.1644,
"step": 20
},
{
"epoch": 0.007340347443112307,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.1836734693877552e-06,
"loss": 0.96,
"step": 30
},
{
"epoch": 0.009787129924149743,
"grad_norm": 1.0,
"learning_rate": 1.5918367346938775e-06,
"loss": 0.7557,
"step": 40
},
{
"epoch": 0.012233912405187179,
"grad_norm": 1.0,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.5786,
"step": 50
},
{
"epoch": 0.014680694886224614,
"grad_norm": 0.9999999403953552,
"learning_rate": 2.4081632653061225e-06,
"loss": 0.4813,
"step": 60
},
{
"epoch": 0.01712747736726205,
"grad_norm": 0.9999999403953552,
"learning_rate": 2.816326530612245e-06,
"loss": 0.4477,
"step": 70
},
{
"epoch": 0.019574259848299486,
"grad_norm": 1.0,
"learning_rate": 3.2244897959183672e-06,
"loss": 0.4254,
"step": 80
},
{
"epoch": 0.022021042329336923,
"grad_norm": 1.0,
"learning_rate": 3.6326530612244903e-06,
"loss": 0.4017,
"step": 90
},
{
"epoch": 0.024467824810374357,
"grad_norm": 0.9999999403953552,
"learning_rate": 4.040816326530612e-06,
"loss": 0.379,
"step": 100
},
{
"epoch": 0.026914607291411794,
"grad_norm": 1.0000001192092896,
"learning_rate": 4.448979591836735e-06,
"loss": 0.3749,
"step": 110
},
{
"epoch": 0.029361389772449228,
"grad_norm": 1.0,
"learning_rate": 4.857142857142858e-06,
"loss": 0.3536,
"step": 120
},
{
"epoch": 0.031808172253486665,
"grad_norm": 0.9999999403953552,
"learning_rate": 5.26530612244898e-06,
"loss": 0.336,
"step": 130
},
{
"epoch": 0.0342549547345241,
"grad_norm": 1.0000001192092896,
"learning_rate": 5.673469387755103e-06,
"loss": 0.3261,
"step": 140
},
{
"epoch": 0.03670173721556154,
"grad_norm": 0.9999999403953552,
"learning_rate": 6.0816326530612245e-06,
"loss": 0.3134,
"step": 150
},
{
"epoch": 0.03914851969659897,
"grad_norm": 1.0,
"learning_rate": 6.489795918367348e-06,
"loss": 0.308,
"step": 160
},
{
"epoch": 0.041595302177636406,
"grad_norm": 1.0,
"learning_rate": 6.8979591836734705e-06,
"loss": 0.2961,
"step": 170
},
{
"epoch": 0.04404208465867385,
"grad_norm": 1.0,
"learning_rate": 7.306122448979592e-06,
"loss": 0.2998,
"step": 180
},
{
"epoch": 0.04648886713971128,
"grad_norm": 1.0,
"learning_rate": 7.714285714285716e-06,
"loss": 0.2895,
"step": 190
},
{
"epoch": 0.048935649620748714,
"grad_norm": 1.0,
"learning_rate": 8.122448979591837e-06,
"loss": 0.2773,
"step": 200
},
{
"epoch": 0.048935649620748714,
"eval_accuracy": 0.81444,
"eval_f1_weighted": 0.8120194753530078,
"eval_loss": 0.10672978311777115,
"eval_macro_f1": 0.8128217919062601,
"eval_precision": 0.8426688032270206,
"eval_recall": 0.81444,
"eval_runtime": 1609.4299,
"eval_samples_per_second": 62.134,
"eval_steps_per_second": 0.243,
"step": 200
},
{
"epoch": 0.051382432101786155,
"grad_norm": 1.0,
"learning_rate": 8.530612244897961e-06,
"loss": 0.2658,
"step": 210
},
{
"epoch": 0.05382921458282359,
"grad_norm": 1.0000001192092896,
"learning_rate": 8.938775510204082e-06,
"loss": 0.2574,
"step": 220
},
{
"epoch": 0.05627599706386102,
"grad_norm": 1.0,
"learning_rate": 9.346938775510204e-06,
"loss": 0.2633,
"step": 230
},
{
"epoch": 0.058722779544898455,
"grad_norm": 1.0,
"learning_rate": 9.755102040816327e-06,
"loss": 0.2503,
"step": 240
},
{
"epoch": 0.061169562025935896,
"grad_norm": 1.0,
"learning_rate": 1.016326530612245e-05,
"loss": 0.2359,
"step": 250
},
{
"epoch": 0.06361634450697333,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.0571428571428572e-05,
"loss": 0.2327,
"step": 260
},
{
"epoch": 0.06606312698801077,
"grad_norm": 1.0,
"learning_rate": 1.0979591836734695e-05,
"loss": 0.2305,
"step": 270
},
{
"epoch": 0.0685099094690482,
"grad_norm": 1.0,
"learning_rate": 1.1387755102040819e-05,
"loss": 0.2284,
"step": 280
},
{
"epoch": 0.07095669195008564,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.179591836734694e-05,
"loss": 0.226,
"step": 290
},
{
"epoch": 0.07340347443112308,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.2204081632653062e-05,
"loss": 0.2225,
"step": 300
},
{
"epoch": 0.0758502569121605,
"grad_norm": 1.0,
"learning_rate": 1.2612244897959185e-05,
"loss": 0.2156,
"step": 310
},
{
"epoch": 0.07829703939319795,
"grad_norm": 1.0,
"learning_rate": 1.3020408163265308e-05,
"loss": 0.2033,
"step": 320
},
{
"epoch": 0.08074382187423539,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.3428571428571429e-05,
"loss": 0.2021,
"step": 330
},
{
"epoch": 0.08319060435527281,
"grad_norm": 1.0,
"learning_rate": 1.3836734693877551e-05,
"loss": 0.1902,
"step": 340
},
{
"epoch": 0.08563738683631025,
"grad_norm": 1.0,
"learning_rate": 1.4244897959183674e-05,
"loss": 0.1975,
"step": 350
},
{
"epoch": 0.0880841693173477,
"grad_norm": 1.0,
"learning_rate": 1.4653061224489798e-05,
"loss": 0.1921,
"step": 360
},
{
"epoch": 0.09053095179838512,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.506122448979592e-05,
"loss": 0.2039,
"step": 370
},
{
"epoch": 0.09297773427942256,
"grad_norm": 1.0000001192092896,
"learning_rate": 1.546938775510204e-05,
"loss": 0.202,
"step": 380
},
{
"epoch": 0.09542451676046,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.5877551020408162e-05,
"loss": 0.1885,
"step": 390
},
{
"epoch": 0.09787129924149743,
"grad_norm": 0.9999998807907104,
"learning_rate": 1.6285714285714287e-05,
"loss": 0.1818,
"step": 400
},
{
"epoch": 0.09787129924149743,
"eval_accuracy": 0.9495,
"eval_f1_weighted": 0.9495267406510758,
"eval_loss": 0.039080820977687836,
"eval_macro_f1": 0.9494696691689186,
"eval_precision": 0.9504394545408126,
"eval_recall": 0.9495,
"eval_runtime": 1609.6857,
"eval_samples_per_second": 62.124,
"eval_steps_per_second": 0.243,
"step": 400
},
{
"epoch": 0.10031808172253487,
"grad_norm": 1.0,
"learning_rate": 1.669387755102041e-05,
"loss": 0.1813,
"step": 410
},
{
"epoch": 0.10276486420357231,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.7102040816326532e-05,
"loss": 0.1758,
"step": 420
},
{
"epoch": 0.10521164668460974,
"grad_norm": 1.0000001192092896,
"learning_rate": 1.7510204081632653e-05,
"loss": 0.1778,
"step": 430
},
{
"epoch": 0.10765842916564718,
"grad_norm": 0.9999998807907104,
"learning_rate": 1.7918367346938777e-05,
"loss": 0.1681,
"step": 440
},
{
"epoch": 0.1101052116466846,
"grad_norm": 0.9999998807907104,
"learning_rate": 1.8326530612244898e-05,
"loss": 0.1715,
"step": 450
},
{
"epoch": 0.11255199412772204,
"grad_norm": 1.0,
"learning_rate": 1.8734693877551022e-05,
"loss": 0.1773,
"step": 460
},
{
"epoch": 0.11499877660875948,
"grad_norm": 1.0,
"learning_rate": 1.9142857142857146e-05,
"loss": 0.1859,
"step": 470
},
{
"epoch": 0.11744555908979691,
"grad_norm": 1.0,
"learning_rate": 1.9551020408163267e-05,
"loss": 0.173,
"step": 480
},
{
"epoch": 0.11989234157083435,
"grad_norm": 1.0,
"learning_rate": 1.9959183673469388e-05,
"loss": 0.1753,
"step": 490
},
{
"epoch": 0.12233912405187179,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.9998764266160687e-05,
"loss": 0.1712,
"step": 500
},
{
"epoch": 0.12478590653290922,
"grad_norm": 1.0,
"learning_rate": 1.9994492985725524e-05,
"loss": 0.1644,
"step": 510
},
{
"epoch": 0.12723268901394666,
"grad_norm": 0.9999998807907104,
"learning_rate": 1.9987172205655365e-05,
"loss": 0.1585,
"step": 520
},
{
"epoch": 0.1296794714949841,
"grad_norm": 1.0000001192092896,
"learning_rate": 1.9976804159651927e-05,
"loss": 0.1625,
"step": 530
},
{
"epoch": 0.13212625397602154,
"grad_norm": 1.0,
"learning_rate": 1.9963392011192586e-05,
"loss": 0.1656,
"step": 540
},
{
"epoch": 0.13457303645705898,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.994693985256516e-05,
"loss": 0.1528,
"step": 550
},
{
"epoch": 0.1370198189380964,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.9927452703619262e-05,
"loss": 0.1547,
"step": 560
},
{
"epoch": 0.13946660141913383,
"grad_norm": 1.0,
"learning_rate": 1.9904936510234648e-05,
"loss": 0.1685,
"step": 570
},
{
"epoch": 0.14191338390017127,
"grad_norm": 0.9999999403953552,
"learning_rate": 1.987939814250705e-05,
"loss": 0.1568,
"step": 580
},
{
"epoch": 0.14436016638120872,
"grad_norm": 1.0,
"learning_rate": 1.985084539265195e-05,
"loss": 0.1589,
"step": 590
},
{
"epoch": 0.14680694886224616,
"grad_norm": 1.0,
"learning_rate": 1.9819286972627066e-05,
"loss": 0.1617,
"step": 600
},
{
"epoch": 0.14680694886224616,
"eval_accuracy": 0.96956,
"eval_f1_weighted": 0.9695722935959534,
"eval_loss": 0.02569369599223137,
"eval_macro_f1": 0.9695240197651653,
"eval_precision": 0.9698519718113257,
"eval_recall": 0.96956,
"eval_runtime": 1608.5431,
"eval_samples_per_second": 62.168,
"eval_steps_per_second": 0.243,
"step": 600
}
],
"logging_steps": 10,
"max_steps": 4087,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.0299639947264e+16,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}