dear-8b-reranker-ce-lora-v1 / trainer_state.json
abdoelsayed's picture
Upload folder using huggingface_hub
f1c59c2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6387225548902196,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006387225548902195,
"grad_norm": 3.5834698688009654,
"learning_rate": 4.007141788485205e-05,
"loss": 0.8162,
"step": 10
},
{
"epoch": 0.01277445109780439,
"grad_norm": 3.7982361966359415,
"learning_rate": 5.213411663697864e-05,
"loss": 0.4857,
"step": 20
},
{
"epoch": 0.019161676646706587,
"grad_norm": 3.3097747900168066,
"learning_rate": 5.919034306446858e-05,
"loss": 0.1919,
"step": 30
},
{
"epoch": 0.02554890219560878,
"grad_norm": 1.6554650990460391,
"learning_rate": 6.419681538910523e-05,
"loss": 0.1177,
"step": 40
},
{
"epoch": 0.031936127744510975,
"grad_norm": 1.0526158883209893,
"learning_rate": 6.80801370175775e-05,
"loss": 0.0997,
"step": 50
},
{
"epoch": 0.03832335329341317,
"grad_norm": 1.322844958779453,
"learning_rate": 7.125304181659517e-05,
"loss": 0.0987,
"step": 60
},
{
"epoch": 0.04471057884231537,
"grad_norm": 0.906098163414632,
"learning_rate": 7.393569459993276e-05,
"loss": 0.0902,
"step": 70
},
{
"epoch": 0.05109780439121756,
"grad_norm": 0.9742791206427208,
"learning_rate": 7.625951414123182e-05,
"loss": 0.08,
"step": 80
},
{
"epoch": 0.05748502994011976,
"grad_norm": 0.7369633538190276,
"learning_rate": 7.83092682440851e-05,
"loss": 0.0598,
"step": 90
},
{
"epoch": 0.06387225548902195,
"grad_norm": 0.6187428023320575,
"learning_rate": 8.01428357697041e-05,
"loss": 0.0836,
"step": 100
},
{
"epoch": 0.07025948103792415,
"grad_norm": 0.7404818716311581,
"learning_rate": 8.180149935405545e-05,
"loss": 0.0758,
"step": 110
},
{
"epoch": 0.07664670658682635,
"grad_norm": 0.4603749462575225,
"learning_rate": 8.331574056872175e-05,
"loss": 0.0698,
"step": 120
},
{
"epoch": 0.08303393213572854,
"grad_norm": 0.5780703019259222,
"learning_rate": 8.470870745519228e-05,
"loss": 0.0771,
"step": 130
},
{
"epoch": 0.08942115768463074,
"grad_norm": 0.7869338127501012,
"learning_rate": 8.599839335205934e-05,
"loss": 0.0741,
"step": 140
},
{
"epoch": 0.09580838323353294,
"grad_norm": 0.5085534390538137,
"learning_rate": 8.719906219719403e-05,
"loss": 0.0637,
"step": 150
},
{
"epoch": 0.10219560878243512,
"grad_norm": 0.6419093548122555,
"learning_rate": 8.83222128933584e-05,
"loss": 0.0683,
"step": 160
},
{
"epoch": 0.10858283433133732,
"grad_norm": 0.4014377657568054,
"learning_rate": 8.937725079936634e-05,
"loss": 0.0732,
"step": 170
},
{
"epoch": 0.11497005988023952,
"grad_norm": 0.4686184226271436,
"learning_rate": 9.03719669962117e-05,
"loss": 0.0727,
"step": 180
},
{
"epoch": 0.12135728542914172,
"grad_norm": 0.3553126799550481,
"learning_rate": 9.13128878003922e-05,
"loss": 0.0642,
"step": 190
},
{
"epoch": 0.1277445109780439,
"grad_norm": 0.2308342524288462,
"learning_rate": 9.220553452183068e-05,
"loss": 0.0662,
"step": 200
},
{
"epoch": 0.1341317365269461,
"grad_norm": 0.4244978948516096,
"learning_rate": 9.305461977954928e-05,
"loss": 0.0653,
"step": 210
},
{
"epoch": 0.1405189620758483,
"grad_norm": 0.6491159925885343,
"learning_rate": 9.386419810618205e-05,
"loss": 0.0692,
"step": 220
},
{
"epoch": 0.1469061876247505,
"grad_norm": 0.6586709935585672,
"learning_rate": 9.46377830473483e-05,
"loss": 0.0603,
"step": 230
},
{
"epoch": 0.1532934131736527,
"grad_norm": 0.4334717142812121,
"learning_rate": 9.537843932084834e-05,
"loss": 0.075,
"step": 240
},
{
"epoch": 0.1596806387225549,
"grad_norm": 0.31119556502835766,
"learning_rate": 9.608885615030295e-05,
"loss": 0.0646,
"step": 250
},
{
"epoch": 0.1660678642714571,
"grad_norm": 0.3201297094259269,
"learning_rate": 9.677140620731887e-05,
"loss": 0.0687,
"step": 260
},
{
"epoch": 0.1724550898203593,
"grad_norm": 0.3607671741835552,
"learning_rate": 9.742819342370165e-05,
"loss": 0.0611,
"step": 270
},
{
"epoch": 0.17884231536926148,
"grad_norm": 0.2599965595224881,
"learning_rate": 9.806109210418593e-05,
"loss": 0.0682,
"step": 280
},
{
"epoch": 0.18522954091816368,
"grad_norm": 0.32241547083298766,
"learning_rate": 9.86717791726321e-05,
"loss": 0.0612,
"step": 290
},
{
"epoch": 0.19161676646706588,
"grad_norm": 0.48624224094653656,
"learning_rate": 9.926176094932063e-05,
"loss": 0.0569,
"step": 300
},
{
"epoch": 0.19800399201596808,
"grad_norm": 0.3035297139442493,
"learning_rate": 9.983239553594597e-05,
"loss": 0.0581,
"step": 310
},
{
"epoch": 0.20439121756487025,
"grad_norm": 0.2209458722911404,
"learning_rate": 9.97870074547391e-05,
"loss": 0.0579,
"step": 320
},
{
"epoch": 0.21077844311377245,
"grad_norm": 0.32849509401911653,
"learning_rate": 9.943201987930422e-05,
"loss": 0.0594,
"step": 330
},
{
"epoch": 0.21716566866267464,
"grad_norm": 0.3121594199523004,
"learning_rate": 9.907703230386936e-05,
"loss": 0.0689,
"step": 340
},
{
"epoch": 0.22355289421157684,
"grad_norm": 0.37282218523682487,
"learning_rate": 9.87220447284345e-05,
"loss": 0.0591,
"step": 350
},
{
"epoch": 0.22994011976047904,
"grad_norm": 0.21932185059016668,
"learning_rate": 9.836705715299965e-05,
"loss": 0.0561,
"step": 360
},
{
"epoch": 0.23632734530938124,
"grad_norm": 0.40431591808333317,
"learning_rate": 9.801206957756479e-05,
"loss": 0.0547,
"step": 370
},
{
"epoch": 0.24271457085828343,
"grad_norm": 0.3545382413148979,
"learning_rate": 9.765708200212993e-05,
"loss": 0.068,
"step": 380
},
{
"epoch": 0.24910179640718563,
"grad_norm": 0.3708116952070532,
"learning_rate": 9.730209442669507e-05,
"loss": 0.0644,
"step": 390
},
{
"epoch": 0.2554890219560878,
"grad_norm": 0.2933151282788518,
"learning_rate": 9.69471068512602e-05,
"loss": 0.0554,
"step": 400
},
{
"epoch": 0.26187624750499,
"grad_norm": 0.2701122564574567,
"learning_rate": 9.659211927582535e-05,
"loss": 0.0669,
"step": 410
},
{
"epoch": 0.2682634730538922,
"grad_norm": 0.43233913171276295,
"learning_rate": 9.62371317003905e-05,
"loss": 0.0696,
"step": 420
},
{
"epoch": 0.2746506986027944,
"grad_norm": 0.29840407730897334,
"learning_rate": 9.588214412495564e-05,
"loss": 0.0583,
"step": 430
},
{
"epoch": 0.2810379241516966,
"grad_norm": 0.347519310387429,
"learning_rate": 9.552715654952076e-05,
"loss": 0.0675,
"step": 440
},
{
"epoch": 0.2874251497005988,
"grad_norm": 0.23159546291731048,
"learning_rate": 9.51721689740859e-05,
"loss": 0.0595,
"step": 450
},
{
"epoch": 0.293812375249501,
"grad_norm": 0.19539165396565417,
"learning_rate": 9.481718139865106e-05,
"loss": 0.0593,
"step": 460
},
{
"epoch": 0.3001996007984032,
"grad_norm": 0.3711679298627976,
"learning_rate": 9.446219382321619e-05,
"loss": 0.0576,
"step": 470
},
{
"epoch": 0.3065868263473054,
"grad_norm": 0.21594608036662566,
"learning_rate": 9.410720624778133e-05,
"loss": 0.0612,
"step": 480
},
{
"epoch": 0.3129740518962076,
"grad_norm": 0.38847976792533884,
"learning_rate": 9.375221867234647e-05,
"loss": 0.0558,
"step": 490
},
{
"epoch": 0.3193612774451098,
"grad_norm": 0.48499489939381346,
"learning_rate": 9.339723109691161e-05,
"loss": 0.0598,
"step": 500
},
{
"epoch": 0.325748502994012,
"grad_norm": 0.26871455217005846,
"learning_rate": 9.304224352147675e-05,
"loss": 0.0599,
"step": 510
},
{
"epoch": 0.3321357285429142,
"grad_norm": 0.3006779859330303,
"learning_rate": 9.26872559460419e-05,
"loss": 0.0533,
"step": 520
},
{
"epoch": 0.3385229540918164,
"grad_norm": 0.252910708387006,
"learning_rate": 9.233226837060704e-05,
"loss": 0.0609,
"step": 530
},
{
"epoch": 0.3449101796407186,
"grad_norm": 0.28615902583412134,
"learning_rate": 9.197728079517217e-05,
"loss": 0.0632,
"step": 540
},
{
"epoch": 0.35129740518962077,
"grad_norm": 0.3309472244637015,
"learning_rate": 9.162229321973731e-05,
"loss": 0.0657,
"step": 550
},
{
"epoch": 0.35768463073852297,
"grad_norm": 0.3181277078140241,
"learning_rate": 9.126730564430246e-05,
"loss": 0.0589,
"step": 560
},
{
"epoch": 0.36407185628742517,
"grad_norm": 0.5196112858300266,
"learning_rate": 9.09123180688676e-05,
"loss": 0.0645,
"step": 570
},
{
"epoch": 0.37045908183632736,
"grad_norm": 0.4316495610405202,
"learning_rate": 9.055733049343273e-05,
"loss": 0.0659,
"step": 580
},
{
"epoch": 0.37684630738522956,
"grad_norm": 0.25853510285529524,
"learning_rate": 9.020234291799787e-05,
"loss": 0.0582,
"step": 590
},
{
"epoch": 0.38323353293413176,
"grad_norm": 0.28576780733983087,
"learning_rate": 8.984735534256301e-05,
"loss": 0.0531,
"step": 600
},
{
"epoch": 0.38962075848303396,
"grad_norm": 0.30628185946436504,
"learning_rate": 8.949236776712816e-05,
"loss": 0.0577,
"step": 610
},
{
"epoch": 0.39600798403193616,
"grad_norm": 0.22146965371135377,
"learning_rate": 8.91373801916933e-05,
"loss": 0.0571,
"step": 620
},
{
"epoch": 0.4023952095808383,
"grad_norm": 0.2720642528700885,
"learning_rate": 8.878239261625844e-05,
"loss": 0.059,
"step": 630
},
{
"epoch": 0.4087824351297405,
"grad_norm": 0.35619962554831525,
"learning_rate": 8.842740504082358e-05,
"loss": 0.0614,
"step": 640
},
{
"epoch": 0.4151696606786427,
"grad_norm": 0.28668258535201413,
"learning_rate": 8.807241746538871e-05,
"loss": 0.0699,
"step": 650
},
{
"epoch": 0.4215568862275449,
"grad_norm": 0.35772133791366534,
"learning_rate": 8.771742988995385e-05,
"loss": 0.0599,
"step": 660
},
{
"epoch": 0.4279441117764471,
"grad_norm": 0.3547047177760543,
"learning_rate": 8.7362442314519e-05,
"loss": 0.0563,
"step": 670
},
{
"epoch": 0.4343313373253493,
"grad_norm": 0.24898549550006935,
"learning_rate": 8.700745473908413e-05,
"loss": 0.0553,
"step": 680
},
{
"epoch": 0.4407185628742515,
"grad_norm": 0.34439451125458864,
"learning_rate": 8.665246716364927e-05,
"loss": 0.0525,
"step": 690
},
{
"epoch": 0.4471057884231537,
"grad_norm": 0.27788279819961054,
"learning_rate": 8.629747958821441e-05,
"loss": 0.0559,
"step": 700
},
{
"epoch": 0.4534930139720559,
"grad_norm": 0.358392882540888,
"learning_rate": 8.594249201277956e-05,
"loss": 0.0657,
"step": 710
},
{
"epoch": 0.4598802395209581,
"grad_norm": 0.3111624222426367,
"learning_rate": 8.55875044373447e-05,
"loss": 0.0472,
"step": 720
},
{
"epoch": 0.4662674650698603,
"grad_norm": 0.26806713373994445,
"learning_rate": 8.523251686190984e-05,
"loss": 0.0696,
"step": 730
},
{
"epoch": 0.4726546906187625,
"grad_norm": 0.2748488176683564,
"learning_rate": 8.487752928647498e-05,
"loss": 0.063,
"step": 740
},
{
"epoch": 0.47904191616766467,
"grad_norm": 0.18592617627322836,
"learning_rate": 8.452254171104012e-05,
"loss": 0.0609,
"step": 750
},
{
"epoch": 0.48542914171656687,
"grad_norm": 0.38695621265068386,
"learning_rate": 8.416755413560525e-05,
"loss": 0.062,
"step": 760
},
{
"epoch": 0.49181636726546907,
"grad_norm": 0.3158810203428474,
"learning_rate": 8.38125665601704e-05,
"loss": 0.0592,
"step": 770
},
{
"epoch": 0.49820359281437127,
"grad_norm": 0.3362065389804566,
"learning_rate": 8.345757898473555e-05,
"loss": 0.0613,
"step": 780
},
{
"epoch": 0.5045908183632735,
"grad_norm": 0.25748557706326924,
"learning_rate": 8.310259140930067e-05,
"loss": 0.0529,
"step": 790
},
{
"epoch": 0.5109780439121756,
"grad_norm": 0.29455626027596066,
"learning_rate": 8.274760383386582e-05,
"loss": 0.065,
"step": 800
},
{
"epoch": 0.5173652694610779,
"grad_norm": 0.2564019757489936,
"learning_rate": 8.239261625843096e-05,
"loss": 0.0579,
"step": 810
},
{
"epoch": 0.52375249500998,
"grad_norm": 0.228508320126866,
"learning_rate": 8.20376286829961e-05,
"loss": 0.0618,
"step": 820
},
{
"epoch": 0.5301397205588823,
"grad_norm": 0.17395934565113735,
"learning_rate": 8.168264110756124e-05,
"loss": 0.0601,
"step": 830
},
{
"epoch": 0.5365269461077844,
"grad_norm": 0.1845587781224566,
"learning_rate": 8.132765353212638e-05,
"loss": 0.0646,
"step": 840
},
{
"epoch": 0.5429141716566867,
"grad_norm": 0.219121974587572,
"learning_rate": 8.097266595669152e-05,
"loss": 0.0602,
"step": 850
},
{
"epoch": 0.5493013972055888,
"grad_norm": 0.3134533264569832,
"learning_rate": 8.061767838125665e-05,
"loss": 0.0593,
"step": 860
},
{
"epoch": 0.555688622754491,
"grad_norm": 0.36630204830289737,
"learning_rate": 8.02626908058218e-05,
"loss": 0.0606,
"step": 870
},
{
"epoch": 0.5620758483033932,
"grad_norm": 0.2390442131459619,
"learning_rate": 7.990770323038695e-05,
"loss": 0.0572,
"step": 880
},
{
"epoch": 0.5684630738522954,
"grad_norm": 0.2906363057717337,
"learning_rate": 7.955271565495209e-05,
"loss": 0.0687,
"step": 890
},
{
"epoch": 0.5748502994011976,
"grad_norm": 0.2853917682780398,
"learning_rate": 7.919772807951722e-05,
"loss": 0.0643,
"step": 900
},
{
"epoch": 0.5812375249500998,
"grad_norm": 0.3544408088733472,
"learning_rate": 7.884274050408236e-05,
"loss": 0.0574,
"step": 910
},
{
"epoch": 0.587624750499002,
"grad_norm": 0.24181631469575632,
"learning_rate": 7.848775292864751e-05,
"loss": 0.0697,
"step": 920
},
{
"epoch": 0.5940119760479042,
"grad_norm": 0.28668164498739,
"learning_rate": 7.813276535321264e-05,
"loss": 0.0641,
"step": 930
},
{
"epoch": 0.6003992015968064,
"grad_norm": 0.29519716447463473,
"learning_rate": 7.777777777777778e-05,
"loss": 0.0547,
"step": 940
},
{
"epoch": 0.6067864271457086,
"grad_norm": 0.2797455777496235,
"learning_rate": 7.742279020234292e-05,
"loss": 0.0622,
"step": 950
},
{
"epoch": 0.6131736526946108,
"grad_norm": 0.20698136186718125,
"learning_rate": 7.706780262690806e-05,
"loss": 0.0538,
"step": 960
},
{
"epoch": 0.619560878243513,
"grad_norm": 0.3029284687538603,
"learning_rate": 7.671281505147319e-05,
"loss": 0.0553,
"step": 970
},
{
"epoch": 0.6259481037924152,
"grad_norm": 0.3308461091763622,
"learning_rate": 7.635782747603835e-05,
"loss": 0.0592,
"step": 980
},
{
"epoch": 0.6323353293413174,
"grad_norm": 0.2652350297520287,
"learning_rate": 7.600283990060349e-05,
"loss": 0.0611,
"step": 990
},
{
"epoch": 0.6387225548902196,
"grad_norm": 0.20354784105739357,
"learning_rate": 7.564785232516862e-05,
"loss": 0.059,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 3130,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}