distilbert-pii-ner / trainer_state.json
ManiKumarAdapala's picture
Upload 6 files
14733eb verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 10880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09191176470588236,
"grad_norm": 2.861119031906128,
"learning_rate": 4.955882352941177e-05,
"loss": 1.678604736328125,
"step": 100
},
{
"epoch": 0.18382352941176472,
"grad_norm": 1.6152201890945435,
"learning_rate": 4.9099264705882355e-05,
"loss": 0.554380111694336,
"step": 200
},
{
"epoch": 0.2757352941176471,
"grad_norm": 2.08105206489563,
"learning_rate": 4.863970588235294e-05,
"loss": 0.33380359649658203,
"step": 300
},
{
"epoch": 0.36764705882352944,
"grad_norm": 1.5297737121582031,
"learning_rate": 4.818014705882353e-05,
"loss": 0.26274593353271486,
"step": 400
},
{
"epoch": 0.45955882352941174,
"grad_norm": 1.867387294769287,
"learning_rate": 4.7720588235294124e-05,
"loss": 0.22210750579833985,
"step": 500
},
{
"epoch": 0.5514705882352942,
"grad_norm": 4.296945095062256,
"learning_rate": 4.7261029411764704e-05,
"loss": 0.1887100601196289,
"step": 600
},
{
"epoch": 0.6433823529411765,
"grad_norm": 1.9779675006866455,
"learning_rate": 4.68014705882353e-05,
"loss": 0.16664567947387696,
"step": 700
},
{
"epoch": 0.7352941176470589,
"grad_norm": 1.235213279724121,
"learning_rate": 4.6341911764705886e-05,
"loss": 0.16922021865844727,
"step": 800
},
{
"epoch": 0.8272058823529411,
"grad_norm": 2.484598398208618,
"learning_rate": 4.588235294117647e-05,
"loss": 0.15672155380249023,
"step": 900
},
{
"epoch": 0.9191176470588235,
"grad_norm": 0.8935145139694214,
"learning_rate": 4.542279411764706e-05,
"loss": 0.1493326473236084,
"step": 1000
},
{
"epoch": 1.0,
"eval_loss": 0.14647769927978516,
"eval_runtime": 4.2967,
"eval_samples_per_second": 2025.035,
"eval_steps_per_second": 63.304,
"step": 1088
},
{
"epoch": 1.0110294117647058,
"grad_norm": 0.8300992250442505,
"learning_rate": 4.496323529411765e-05,
"loss": 0.14999670028686524,
"step": 1100
},
{
"epoch": 1.1029411764705883,
"grad_norm": 0.6350510716438293,
"learning_rate": 4.4503676470588236e-05,
"loss": 0.12446197509765625,
"step": 1200
},
{
"epoch": 1.1948529411764706,
"grad_norm": 1.469220519065857,
"learning_rate": 4.404411764705882e-05,
"loss": 0.11504798889160156,
"step": 1300
},
{
"epoch": 1.2867647058823528,
"grad_norm": 0.9885977506637573,
"learning_rate": 4.358455882352942e-05,
"loss": 0.12503914833068847,
"step": 1400
},
{
"epoch": 1.3786764705882353,
"grad_norm": 1.2623215913772583,
"learning_rate": 4.3125000000000005e-05,
"loss": 0.1185552978515625,
"step": 1500
},
{
"epoch": 1.4705882352941178,
"grad_norm": 1.181009292602539,
"learning_rate": 4.2665441176470585e-05,
"loss": 0.1074635124206543,
"step": 1600
},
{
"epoch": 1.5625,
"grad_norm": 0.7625616788864136,
"learning_rate": 4.220588235294118e-05,
"loss": 0.11462491989135742,
"step": 1700
},
{
"epoch": 1.6544117647058822,
"grad_norm": 2.7165768146514893,
"learning_rate": 4.174632352941177e-05,
"loss": 0.11170839309692383,
"step": 1800
},
{
"epoch": 1.7463235294117647,
"grad_norm": 1.0306402444839478,
"learning_rate": 4.1286764705882354e-05,
"loss": 0.10664710998535157,
"step": 1900
},
{
"epoch": 1.8382352941176472,
"grad_norm": 1.0458590984344482,
"learning_rate": 4.082720588235294e-05,
"loss": 0.10655851364135742,
"step": 2000
},
{
"epoch": 1.9301470588235294,
"grad_norm": 1.751387357711792,
"learning_rate": 4.036764705882353e-05,
"loss": 0.11276634216308594,
"step": 2100
},
{
"epoch": 2.0,
"eval_loss": 0.11876623332500458,
"eval_runtime": 3.7959,
"eval_samples_per_second": 2292.186,
"eval_steps_per_second": 71.656,
"step": 2176
},
{
"epoch": 2.0220588235294117,
"grad_norm": 0.9640232920646667,
"learning_rate": 3.9908088235294123e-05,
"loss": 0.10104022979736328,
"step": 2200
},
{
"epoch": 2.113970588235294,
"grad_norm": 1.449666976928711,
"learning_rate": 3.9448529411764704e-05,
"loss": 0.08359379768371582,
"step": 2300
},
{
"epoch": 2.2058823529411766,
"grad_norm": 0.781505286693573,
"learning_rate": 3.89889705882353e-05,
"loss": 0.08938695907592774,
"step": 2400
},
{
"epoch": 2.297794117647059,
"grad_norm": 0.9161350131034851,
"learning_rate": 3.8529411764705886e-05,
"loss": 0.09331055641174317,
"step": 2500
},
{
"epoch": 2.389705882352941,
"grad_norm": 0.34266597032546997,
"learning_rate": 3.806985294117647e-05,
"loss": 0.0942567253112793,
"step": 2600
},
{
"epoch": 2.4816176470588234,
"grad_norm": 0.3938254714012146,
"learning_rate": 3.761029411764706e-05,
"loss": 0.08378758430480956,
"step": 2700
},
{
"epoch": 2.5735294117647056,
"grad_norm": 0.8159363865852356,
"learning_rate": 3.715073529411765e-05,
"loss": 0.08920242309570313,
"step": 2800
},
{
"epoch": 2.6654411764705883,
"grad_norm": 0.7843156456947327,
"learning_rate": 3.6691176470588235e-05,
"loss": 0.09552728652954101,
"step": 2900
},
{
"epoch": 2.7573529411764706,
"grad_norm": 1.678454041481018,
"learning_rate": 3.623161764705882e-05,
"loss": 0.0881564712524414,
"step": 3000
},
{
"epoch": 2.849264705882353,
"grad_norm": 1.567854642868042,
"learning_rate": 3.577205882352942e-05,
"loss": 0.09041579246520996,
"step": 3100
},
{
"epoch": 2.9411764705882355,
"grad_norm": 0.587993860244751,
"learning_rate": 3.5312500000000005e-05,
"loss": 0.08352569580078124,
"step": 3200
},
{
"epoch": 3.0,
"eval_loss": 0.11157828569412231,
"eval_runtime": 3.9136,
"eval_samples_per_second": 2223.252,
"eval_steps_per_second": 69.501,
"step": 3264
},
{
"epoch": 3.0330882352941178,
"grad_norm": 0.7346888184547424,
"learning_rate": 3.4852941176470585e-05,
"loss": 0.08925918579101562,
"step": 3300
},
{
"epoch": 3.125,
"grad_norm": 0.6136897206306458,
"learning_rate": 3.439338235294118e-05,
"loss": 0.07912126064300537,
"step": 3400
},
{
"epoch": 3.2169117647058822,
"grad_norm": 0.47108200192451477,
"learning_rate": 3.393382352941177e-05,
"loss": 0.07419106960296631,
"step": 3500
},
{
"epoch": 3.3088235294117645,
"grad_norm": 0.8382533192634583,
"learning_rate": 3.3474264705882354e-05,
"loss": 0.0670989227294922,
"step": 3600
},
{
"epoch": 3.400735294117647,
"grad_norm": 0.6706309914588928,
"learning_rate": 3.301470588235294e-05,
"loss": 0.06986721515655518,
"step": 3700
},
{
"epoch": 3.4926470588235294,
"grad_norm": 0.5485235452651978,
"learning_rate": 3.255514705882353e-05,
"loss": 0.07686973571777343,
"step": 3800
},
{
"epoch": 3.5845588235294117,
"grad_norm": 0.8460040092468262,
"learning_rate": 3.209558823529412e-05,
"loss": 0.07120684623718261,
"step": 3900
},
{
"epoch": 3.6764705882352944,
"grad_norm": 0.9563305974006653,
"learning_rate": 3.1636029411764704e-05,
"loss": 0.07464917659759522,
"step": 4000
},
{
"epoch": 3.7683823529411766,
"grad_norm": 0.6851525902748108,
"learning_rate": 3.11764705882353e-05,
"loss": 0.07348180770874023,
"step": 4100
},
{
"epoch": 3.860294117647059,
"grad_norm": 0.46768584847450256,
"learning_rate": 3.0716911764705886e-05,
"loss": 0.08051628112792969,
"step": 4200
},
{
"epoch": 3.952205882352941,
"grad_norm": 0.8145326375961304,
"learning_rate": 3.025735294117647e-05,
"loss": 0.0793468189239502,
"step": 4300
},
{
"epoch": 4.0,
"eval_loss": 0.11373896896839142,
"eval_runtime": 3.8395,
"eval_samples_per_second": 2266.171,
"eval_steps_per_second": 70.842,
"step": 4352
},
{
"epoch": 4.044117647058823,
"grad_norm": 4.8013997077941895,
"learning_rate": 2.979779411764706e-05,
"loss": 0.06481593132019042,
"step": 4400
},
{
"epoch": 4.136029411764706,
"grad_norm": 1.51911199092865,
"learning_rate": 2.933823529411765e-05,
"loss": 0.05568636417388916,
"step": 4500
},
{
"epoch": 4.227941176470588,
"grad_norm": 1.1331921815872192,
"learning_rate": 2.8878676470588235e-05,
"loss": 0.06395863056182861,
"step": 4600
},
{
"epoch": 4.319852941176471,
"grad_norm": 1.9773746728897095,
"learning_rate": 2.8419117647058823e-05,
"loss": 0.05171878814697266,
"step": 4700
},
{
"epoch": 4.411764705882353,
"grad_norm": 0.720111608505249,
"learning_rate": 2.7959558823529414e-05,
"loss": 0.06219084739685059,
"step": 4800
},
{
"epoch": 4.5036764705882355,
"grad_norm": 1.243735671043396,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.06258386135101318,
"step": 4900
},
{
"epoch": 4.595588235294118,
"grad_norm": 0.7214698195457458,
"learning_rate": 2.704044117647059e-05,
"loss": 0.057212424278259275,
"step": 5000
},
{
"epoch": 4.6875,
"grad_norm": 2.4246177673339844,
"learning_rate": 2.658088235294118e-05,
"loss": 0.05896786212921143,
"step": 5100
},
{
"epoch": 4.779411764705882,
"grad_norm": 0.3699852228164673,
"learning_rate": 2.6121323529411767e-05,
"loss": 0.0625047254562378,
"step": 5200
},
{
"epoch": 4.8713235294117645,
"grad_norm": 0.8965820670127869,
"learning_rate": 2.566176470588235e-05,
"loss": 0.06457361221313476,
"step": 5300
},
{
"epoch": 4.963235294117647,
"grad_norm": 0.6348599791526794,
"learning_rate": 2.520220588235294e-05,
"loss": 0.05709341049194336,
"step": 5400
},
{
"epoch": 5.0,
"eval_loss": 0.13100895285606384,
"eval_runtime": 3.7913,
"eval_samples_per_second": 2294.964,
"eval_steps_per_second": 71.742,
"step": 5440
},
{
"epoch": 5.055147058823529,
"grad_norm": 0.30243951082229614,
"learning_rate": 2.4742647058823532e-05,
"loss": 0.0436199951171875,
"step": 5500
},
{
"epoch": 5.147058823529412,
"grad_norm": 1.0563251972198486,
"learning_rate": 2.428308823529412e-05,
"loss": 0.03898259401321411,
"step": 5600
},
{
"epoch": 5.238970588235294,
"grad_norm": 0.3339505195617676,
"learning_rate": 2.3823529411764707e-05,
"loss": 0.03611770153045654,
"step": 5700
},
{
"epoch": 5.330882352941177,
"grad_norm": 3.47481107711792,
"learning_rate": 2.3363970588235295e-05,
"loss": 0.03747700929641724,
"step": 5800
},
{
"epoch": 5.422794117647059,
"grad_norm": 0.49956804513931274,
"learning_rate": 2.2904411764705882e-05,
"loss": 0.034790968894958495,
"step": 5900
},
{
"epoch": 5.514705882352941,
"grad_norm": 0.2137073427438736,
"learning_rate": 2.2444852941176473e-05,
"loss": 0.042610764503479004,
"step": 6000
},
{
"epoch": 5.606617647058823,
"grad_norm": 4.238280296325684,
"learning_rate": 2.198529411764706e-05,
"loss": 0.041733989715576174,
"step": 6100
},
{
"epoch": 5.698529411764706,
"grad_norm": 0.7751985192298889,
"learning_rate": 2.1525735294117648e-05,
"loss": 0.042198920249938966,
"step": 6200
},
{
"epoch": 5.790441176470588,
"grad_norm": 6.274240493774414,
"learning_rate": 2.1066176470588235e-05,
"loss": 0.035168659687042234,
"step": 6300
},
{
"epoch": 5.882352941176471,
"grad_norm": 0.8700118064880371,
"learning_rate": 2.0606617647058823e-05,
"loss": 0.04176306247711182,
"step": 6400
},
{
"epoch": 5.974264705882353,
"grad_norm": 0.40382614731788635,
"learning_rate": 2.0151654411764708e-05,
"loss": 0.03780954122543335,
"step": 6500
},
{
"epoch": 6.0,
"eval_loss": 0.1548856645822525,
"eval_runtime": 3.8561,
"eval_samples_per_second": 2256.438,
"eval_steps_per_second": 70.538,
"step": 6528
},
{
"epoch": 6.0661764705882355,
"grad_norm": 0.35931289196014404,
"learning_rate": 1.9692095588235295e-05,
"loss": 0.026753320693969726,
"step": 6600
},
{
"epoch": 6.158088235294118,
"grad_norm": 1.0790654420852661,
"learning_rate": 1.9232536764705883e-05,
"loss": 0.022549192905426025,
"step": 6700
},
{
"epoch": 6.25,
"grad_norm": 0.39832767844200134,
"learning_rate": 1.8777573529411764e-05,
"loss": 0.02674192190170288,
"step": 6800
},
{
"epoch": 6.341911764705882,
"grad_norm": 0.38946613669395447,
"learning_rate": 1.8318014705882352e-05,
"loss": 0.024337658882141112,
"step": 6900
},
{
"epoch": 6.4338235294117645,
"grad_norm": 6.687967300415039,
"learning_rate": 1.7858455882352943e-05,
"loss": 0.02405022144317627,
"step": 7000
},
{
"epoch": 6.525735294117647,
"grad_norm": 1.1742165088653564,
"learning_rate": 1.739889705882353e-05,
"loss": 0.023499369621276855,
"step": 7100
},
{
"epoch": 6.617647058823529,
"grad_norm": 0.728435754776001,
"learning_rate": 1.693933823529412e-05,
"loss": 0.01860466957092285,
"step": 7200
},
{
"epoch": 6.709558823529412,
"grad_norm": 0.18539367616176605,
"learning_rate": 1.6479779411764705e-05,
"loss": 0.024487736225128173,
"step": 7300
},
{
"epoch": 6.801470588235294,
"grad_norm": 2.0757601261138916,
"learning_rate": 1.6020220588235296e-05,
"loss": 0.027930150032043456,
"step": 7400
},
{
"epoch": 6.893382352941177,
"grad_norm": 0.6962282657623291,
"learning_rate": 1.5560661764705883e-05,
"loss": 0.019371466636657717,
"step": 7500
},
{
"epoch": 6.985294117647059,
"grad_norm": 0.3877858519554138,
"learning_rate": 1.510110294117647e-05,
"loss": 0.023068771362304688,
"step": 7600
},
{
"epoch": 7.0,
"eval_loss": 0.19883336126804352,
"eval_runtime": 3.8871,
"eval_samples_per_second": 2238.423,
"eval_steps_per_second": 69.975,
"step": 7616
},
{
"epoch": 7.077205882352941,
"grad_norm": 0.32462701201438904,
"learning_rate": 1.464154411764706e-05,
"loss": 0.01416821002960205,
"step": 7700
},
{
"epoch": 7.169117647058823,
"grad_norm": 0.70732182264328,
"learning_rate": 1.4181985294117647e-05,
"loss": 0.013301538228988647,
"step": 7800
},
{
"epoch": 7.261029411764706,
"grad_norm": 12.949718475341797,
"learning_rate": 1.3722426470588238e-05,
"loss": 0.01586754560470581,
"step": 7900
},
{
"epoch": 7.352941176470588,
"grad_norm": 1.7924553155899048,
"learning_rate": 1.3262867647058824e-05,
"loss": 0.019125467538833617,
"step": 8000
},
{
"epoch": 7.444852941176471,
"grad_norm": 0.45370689034461975,
"learning_rate": 1.2803308823529411e-05,
"loss": 0.017261466979980468,
"step": 8100
},
{
"epoch": 7.536764705882353,
"grad_norm": 0.24471713602542877,
"learning_rate": 1.2343750000000002e-05,
"loss": 0.016836028099060058,
"step": 8200
},
{
"epoch": 7.6286764705882355,
"grad_norm": 0.273219496011734,
"learning_rate": 1.1884191176470588e-05,
"loss": 0.014804782867431641,
"step": 8300
},
{
"epoch": 7.720588235294118,
"grad_norm": 0.27901849150657654,
"learning_rate": 1.1424632352941177e-05,
"loss": 0.017638254165649413,
"step": 8400
},
{
"epoch": 7.8125,
"grad_norm": 0.41847002506256104,
"learning_rate": 1.0965073529411766e-05,
"loss": 0.014013255834579469,
"step": 8500
},
{
"epoch": 7.904411764705882,
"grad_norm": 0.3298964500427246,
"learning_rate": 1.0505514705882353e-05,
"loss": 0.015006015300750733,
"step": 8600
},
{
"epoch": 7.9963235294117645,
"grad_norm": 0.5094680786132812,
"learning_rate": 1.0045955882352942e-05,
"loss": 0.016840940713882445,
"step": 8700
},
{
"epoch": 8.0,
"eval_loss": 0.22593119740486145,
"eval_runtime": 3.8624,
"eval_samples_per_second": 2252.772,
"eval_steps_per_second": 70.423,
"step": 8704
},
{
"epoch": 8.088235294117647,
"grad_norm": 0.23575666546821594,
"learning_rate": 9.58639705882353e-06,
"loss": 0.011906511783599853,
"step": 8800
},
{
"epoch": 8.180147058823529,
"grad_norm": 0.39200881123542786,
"learning_rate": 9.126838235294117e-06,
"loss": 0.009779441356658935,
"step": 8900
},
{
"epoch": 8.272058823529411,
"grad_norm": 0.2954489588737488,
"learning_rate": 8.667279411764706e-06,
"loss": 0.011333670616149902,
"step": 9000
},
{
"epoch": 8.363970588235293,
"grad_norm": 0.1555805653333664,
"learning_rate": 8.207720588235294e-06,
"loss": 0.011691917181015015,
"step": 9100
},
{
"epoch": 8.455882352941176,
"grad_norm": 0.6293551921844482,
"learning_rate": 7.748161764705883e-06,
"loss": 0.010650770664215088,
"step": 9200
},
{
"epoch": 8.547794117647058,
"grad_norm": 0.47241711616516113,
"learning_rate": 7.288602941176471e-06,
"loss": 0.008451443314552307,
"step": 9300
},
{
"epoch": 8.639705882352942,
"grad_norm": 0.39692994952201843,
"learning_rate": 6.829044117647059e-06,
"loss": 0.01287778615951538,
"step": 9400
},
{
"epoch": 8.731617647058824,
"grad_norm": 4.867070198059082,
"learning_rate": 6.374080882352941e-06,
"loss": 0.012674452066421508,
"step": 9500
},
{
"epoch": 8.823529411764707,
"grad_norm": 0.20786941051483154,
"learning_rate": 5.9145220588235295e-06,
"loss": 0.009658980965614319,
"step": 9600
},
{
"epoch": 8.915441176470589,
"grad_norm": 0.24980570375919342,
"learning_rate": 5.454963235294118e-06,
"loss": 0.012473410367965699,
"step": 9700
},
{
"epoch": 9.0,
"eval_loss": 0.2505253851413727,
"eval_runtime": 3.8579,
"eval_samples_per_second": 2255.375,
"eval_steps_per_second": 70.505,
"step": 9792
},
{
"epoch": 9.007352941176471,
"grad_norm": 0.34893837571144104,
"learning_rate": 4.995404411764706e-06,
"loss": 0.01137054443359375,
"step": 9800
},
{
"epoch": 9.099264705882353,
"grad_norm": 0.1509261131286621,
"learning_rate": 4.535845588235294e-06,
"loss": 0.009393535852432251,
"step": 9900
},
{
"epoch": 9.191176470588236,
"grad_norm": 0.4828801453113556,
"learning_rate": 4.076286764705883e-06,
"loss": 0.011060981750488282,
"step": 10000
},
{
"epoch": 9.283088235294118,
"grad_norm": 0.10140291601419449,
"learning_rate": 3.616727941176471e-06,
"loss": 0.008189416527748107,
"step": 10100
},
{
"epoch": 9.375,
"grad_norm": 0.13836342096328735,
"learning_rate": 3.1571691176470588e-06,
"loss": 0.00860303282737732,
"step": 10200
},
{
"epoch": 9.466911764705882,
"grad_norm": 0.3119546175003052,
"learning_rate": 2.6976102941176475e-06,
"loss": 0.008158923387527465,
"step": 10300
},
{
"epoch": 9.558823529411764,
"grad_norm": 0.7189019918441772,
"learning_rate": 2.2380514705882353e-06,
"loss": 0.007920079231262207,
"step": 10400
},
{
"epoch": 9.650735294117647,
"grad_norm": 0.28993985056877136,
"learning_rate": 1.7784926470588236e-06,
"loss": 0.00800090193748474,
"step": 10500
},
{
"epoch": 9.742647058823529,
"grad_norm": 0.6550254225730896,
"learning_rate": 1.3189338235294119e-06,
"loss": 0.008071759939193726,
"step": 10600
},
{
"epoch": 9.834558823529411,
"grad_norm": 0.47817108035087585,
"learning_rate": 8.593750000000001e-07,
"loss": 0.008685371279716492,
"step": 10700
},
{
"epoch": 9.926470588235293,
"grad_norm": 0.5251961350440979,
"learning_rate": 3.998161764705882e-07,
"loss": 0.007078754305839538,
"step": 10800
},
{
"epoch": 10.0,
"eval_loss": 0.24180535972118378,
"eval_runtime": 3.9524,
"eval_samples_per_second": 2201.471,
"eval_steps_per_second": 68.82,
"step": 10880
}
],
"logging_steps": 100,
"max_steps": 10880,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.138963309056e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}