bhargav-07-bidkar
/

Legalbert_Finetuned

+{
+  "best_metric": 0.7821552723059096,
+  "best_model_checkpoint": "./legalbert_finetuned\\checkpoint-2913",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 2913,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05149330587023687,
+      "grad_norm": 15.42212963104248,
+      "learning_rate": 1.9656711294198423e-05,
+      "loss": 3.664,
+      "step": 50
+    },
+    {
+      "epoch": 0.10298661174047374,
+      "grad_norm": 15.640098571777344,
+      "learning_rate": 1.9313422588396845e-05,
+      "loss": 3.2168,
+      "step": 100
+    },
+    {
+      "epoch": 0.15447991761071062,
+      "grad_norm": 10.927306175231934,
+      "learning_rate": 1.8970133882595267e-05,
+      "loss": 2.7426,
+      "step": 150
+    },
+    {
+      "epoch": 0.2059732234809475,
+      "grad_norm": 11.16470718383789,
+      "learning_rate": 1.8626845176793685e-05,
+      "loss": 2.2935,
+      "step": 200
+    },
+    {
+      "epoch": 0.25746652935118436,
+      "grad_norm": 8.693652153015137,
+      "learning_rate": 1.8283556470992106e-05,
+      "loss": 1.9415,
+      "step": 250
+    },
+    {
+      "epoch": 0.30895983522142123,
+      "grad_norm": 6.318974494934082,
+      "learning_rate": 1.7940267765190528e-05,
+      "loss": 1.7223,
+      "step": 300
+    },
+    {
+      "epoch": 0.3604531410916581,
+      "grad_norm": 9.084778785705566,
+      "learning_rate": 1.7596979059388946e-05,
+      "loss": 1.5035,
+      "step": 350
+    },
+    {
+      "epoch": 0.411946446961895,
+      "grad_norm": 7.663175582885742,
+      "learning_rate": 1.7253690353587368e-05,
+      "loss": 1.4208,
+      "step": 400
+    },
+    {
+      "epoch": 0.46343975283213185,
+      "grad_norm": 6.044983863830566,
+      "learning_rate": 1.691040164778579e-05,
+      "loss": 1.2055,
+      "step": 450
+    },
+    {
+      "epoch": 0.5149330587023687,
+      "grad_norm": 7.2318315505981445,
+      "learning_rate": 1.6567112941984208e-05,
+      "loss": 1.3188,
+      "step": 500
+    },
+    {
+      "epoch": 0.5664263645726055,
+      "grad_norm": 8.419646263122559,
+      "learning_rate": 1.622382423618263e-05,
+      "loss": 1.0725,
+      "step": 550
+    },
+    {
+      "epoch": 0.6179196704428425,
+      "grad_norm": 5.270600318908691,
+      "learning_rate": 1.588053553038105e-05,
+      "loss": 1.1664,
+      "step": 600
+    },
+    {
+      "epoch": 0.6694129763130793,
+      "grad_norm": 5.677024841308594,
+      "learning_rate": 1.5537246824579473e-05,
+      "loss": 1.0537,
+      "step": 650
+    },
+    {
+      "epoch": 0.7209062821833162,
+      "grad_norm": 7.414009094238281,
+      "learning_rate": 1.5193958118777894e-05,
+      "loss": 1.0609,
+      "step": 700
+    },
+    {
+      "epoch": 0.772399588053553,
+      "grad_norm": 9.209087371826172,
+      "learning_rate": 1.4850669412976316e-05,
+      "loss": 0.9383,
+      "step": 750
+    },
+    {
+      "epoch": 0.82389289392379,
+      "grad_norm": 7.801745891571045,
+      "learning_rate": 1.4507380707174734e-05,
+      "loss": 0.9023,
+      "step": 800
+    },
+    {
+      "epoch": 0.8753861997940268,
+      "grad_norm": 5.991678714752197,
+      "learning_rate": 1.4164092001373156e-05,
+      "loss": 0.9126,
+      "step": 850
+    },
+    {
+      "epoch": 0.9268795056642637,
+      "grad_norm": 10.367544174194336,
+      "learning_rate": 1.3820803295571576e-05,
+      "loss": 0.9172,
+      "step": 900
+    },
+    {
+      "epoch": 0.9783728115345005,
+      "grad_norm": 8.197531700134277,
+      "learning_rate": 1.3477514589769997e-05,
+      "loss": 0.9509,
+      "step": 950
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7531865585168018,
+      "eval_f1": 0.7043263913078888,
+      "eval_loss": 0.8081243634223938,
+      "eval_runtime": 234.8912,
+      "eval_samples_per_second": 3.674,
+      "eval_steps_per_second": 0.46,
+      "step": 971
+    },
+    {
+      "epoch": 1.0298661174047374,
+      "grad_norm": 13.948481559753418,
+      "learning_rate": 1.3134225883968419e-05,
+      "loss": 0.8781,
+      "step": 1000
+    },
+    {
+      "epoch": 1.0813594232749741,
+      "grad_norm": 9.951421737670898,
+      "learning_rate": 1.279093717816684e-05,
+      "loss": 0.745,
+      "step": 1050
+    },
+    {
+      "epoch": 1.132852729145211,
+      "grad_norm": 3.6247968673706055,
+      "learning_rate": 1.244764847236526e-05,
+      "loss": 0.7621,
+      "step": 1100
+    },
+    {
+      "epoch": 1.184346035015448,
+      "grad_norm": 11.013542175292969,
+      "learning_rate": 1.210435976656368e-05,
+      "loss": 0.6503,
+      "step": 1150
+    },
+    {
+      "epoch": 1.235839340885685,
+      "grad_norm": 6.411041259765625,
+      "learning_rate": 1.1761071060762102e-05,
+      "loss": 0.6835,
+      "step": 1200
+    },
+    {
+      "epoch": 1.2873326467559219,
+      "grad_norm": 4.373047351837158,
+      "learning_rate": 1.1417782354960522e-05,
+      "loss": 0.764,
+      "step": 1250
+    },
+    {
+      "epoch": 1.3388259526261586,
+      "grad_norm": 5.105803966522217,
+      "learning_rate": 1.1074493649158944e-05,
+      "loss": 0.7505,
+      "step": 1300
+    },
+    {
+      "epoch": 1.3903192584963955,
+      "grad_norm": 9.5465726852417,
+      "learning_rate": 1.0731204943357365e-05,
+      "loss": 0.7634,
+      "step": 1350
+    },
+    {
+      "epoch": 1.4418125643666324,
+      "grad_norm": 6.116573810577393,
+      "learning_rate": 1.0387916237555787e-05,
+      "loss": 0.6366,
+      "step": 1400
+    },
+    {
+      "epoch": 1.4933058702368691,
+      "grad_norm": 3.2460689544677734,
+      "learning_rate": 1.0044627531754205e-05,
+      "loss": 0.5989,
+      "step": 1450
+    },
+    {
+      "epoch": 1.544799176107106,
+      "grad_norm": 6.590632915496826,
+      "learning_rate": 9.701338825952627e-06,
+      "loss": 0.6879,
+      "step": 1500
+    },
+    {
+      "epoch": 1.596292481977343,
+      "grad_norm": 10.803317070007324,
+      "learning_rate": 9.358050120151047e-06,
+      "loss": 0.6825,
+      "step": 1550
+    },
+    {
+      "epoch": 1.64778578784758,
+      "grad_norm": 4.515988349914551,
+      "learning_rate": 9.014761414349469e-06,
+      "loss": 0.669,
+      "step": 1600
+    },
+    {
+      "epoch": 1.6992790937178168,
+      "grad_norm": 10.735493659973145,
+      "learning_rate": 8.67147270854789e-06,
+      "loss": 0.7096,
+      "step": 1650
+    },
+    {
+      "epoch": 1.7507723995880535,
+      "grad_norm": 7.9211039543151855,
+      "learning_rate": 8.32818400274631e-06,
+      "loss": 0.6438,
+      "step": 1700
+    },
+    {
+      "epoch": 1.8022657054582905,
+      "grad_norm": 4.6672282218933105,
+      "learning_rate": 7.984895296944732e-06,
+      "loss": 0.577,
+      "step": 1750
+    },
+    {
+      "epoch": 1.8537590113285272,
+      "grad_norm": 8.310284614562988,
+      "learning_rate": 7.641606591143153e-06,
+      "loss": 0.6346,
+      "step": 1800
+    },
+    {
+      "epoch": 1.905252317198764,
+      "grad_norm": 3.1157386302948,
+      "learning_rate": 7.2983178853415724e-06,
+      "loss": 0.6966,
+      "step": 1850
+    },
+    {
+      "epoch": 1.956745623069001,
+      "grad_norm": 6.533708572387695,
+      "learning_rate": 6.955029179539994e-06,
+      "loss": 0.6416,
+      "step": 1900
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.779837775202781,
+      "eval_f1": 0.7431628971507812,
+      "eval_loss": 0.6662706732749939,
+      "eval_runtime": 233.4018,
+      "eval_samples_per_second": 3.697,
+      "eval_steps_per_second": 0.463,
+      "step": 1942
+    },
+    {
+      "epoch": 2.008238928939238,
+      "grad_norm": 4.492364406585693,
+      "learning_rate": 6.611740473738415e-06,
+      "loss": 0.5988,
+      "step": 1950
+    },
+    {
+      "epoch": 2.059732234809475,
+      "grad_norm": 2.9183290004730225,
+      "learning_rate": 6.268451767936835e-06,
+      "loss": 0.5622,
+      "step": 2000
+    },
+    {
+      "epoch": 2.111225540679712,
+      "grad_norm": 2.6700491905212402,
+      "learning_rate": 5.9251630621352564e-06,
+      "loss": 0.568,
+      "step": 2050
+    },
+    {
+      "epoch": 2.1627188465499483,
+      "grad_norm": 8.388718605041504,
+      "learning_rate": 5.581874356333677e-06,
+      "loss": 0.5647,
+      "step": 2100
+    },
+    {
+      "epoch": 2.214212152420185,
+      "grad_norm": 5.001288414001465,
+      "learning_rate": 5.238585650532097e-06,
+      "loss": 0.5651,
+      "step": 2150
+    },
+    {
+      "epoch": 2.265705458290422,
+      "grad_norm": 4.762598037719727,
+      "learning_rate": 4.895296944730519e-06,
+      "loss": 0.5591,
+      "step": 2200
+    },
+    {
+      "epoch": 2.317198764160659,
+      "grad_norm": 3.356541872024536,
+      "learning_rate": 4.5520082389289396e-06,
+      "loss": 0.5505,
+      "step": 2250
+    },
+    {
+      "epoch": 2.368692070030896,
+      "grad_norm": 5.389300346374512,
+      "learning_rate": 4.20871953312736e-06,
+      "loss": 0.5705,
+      "step": 2300
+    },
+    {
+      "epoch": 2.420185375901133,
+      "grad_norm": 6.133719444274902,
+      "learning_rate": 3.865430827325781e-06,
+      "loss": 0.583,
+      "step": 2350
+    },
+    {
+      "epoch": 2.47167868177137,
+      "grad_norm": 20.499935150146484,
+      "learning_rate": 3.522142121524202e-06,
+      "loss": 0.5125,
+      "step": 2400
+    },
+    {
+      "epoch": 2.5231719876416063,
+      "grad_norm": 6.460402011871338,
+      "learning_rate": 3.178853415722623e-06,
+      "loss": 0.529,
+      "step": 2450
+    },
+    {
+      "epoch": 2.5746652935118437,
+      "grad_norm": 6.227006912231445,
+      "learning_rate": 2.8355647099210435e-06,
+      "loss": 0.5187,
+      "step": 2500
+    },
+    {
+      "epoch": 2.62615859938208,
+      "grad_norm": 11.52637004852295,
+      "learning_rate": 2.4922760041194647e-06,
+      "loss": 0.4892,
+      "step": 2550
+    },
+    {
+      "epoch": 2.677651905252317,
+      "grad_norm": 10.004145622253418,
+      "learning_rate": 2.1489872983178855e-06,
+      "loss": 0.5233,
+      "step": 2600
+    },
+    {
+      "epoch": 2.729145211122554,
+      "grad_norm": 5.854581832885742,
+      "learning_rate": 1.8056985925163065e-06,
+      "loss": 0.5266,
+      "step": 2650
+    },
+    {
+      "epoch": 2.780638516992791,
+      "grad_norm": 5.217226982116699,
+      "learning_rate": 1.462409886714727e-06,
+      "loss": 0.457,
+      "step": 2700
+    },
+    {
+      "epoch": 2.832131822863028,
+      "grad_norm": 3.1806907653808594,
+      "learning_rate": 1.119121180913148e-06,
+      "loss": 0.5234,
+      "step": 2750
+    },
+    {
+      "epoch": 2.883625128733265,
+      "grad_norm": 3.520676374435425,
+      "learning_rate": 7.75832475111569e-07,
+      "loss": 0.4662,
+      "step": 2800
+    },
+    {
+      "epoch": 2.9351184346035017,
+      "grad_norm": 3.366509199142456,
+      "learning_rate": 4.3254376930998975e-07,
+      "loss": 0.4962,
+      "step": 2850
+    },
+    {
+      "epoch": 2.9866117404737382,
+      "grad_norm": 6.643517971038818,
+      "learning_rate": 8.925506350841058e-08,
+      "loss": 0.5214,
+      "step": 2900
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.7821552723059096,
+      "eval_f1": 0.7549173833082047,
+      "eval_loss": 0.6564987301826477,
+      "eval_runtime": 219.0646,
+      "eval_samples_per_second": 3.939,
+      "eval_steps_per_second": 0.493,
+      "step": 2913
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 2913,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3065824222963200.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}