Umt06 commited on Nov 30, 2025

Commit

1939d7b

verified ·

1 Parent(s): e90a3cc

Training in progress, epoch 4

Browse files

Files changed (20) hide show

config.json +2 -2
model.safetensors +1 -1
run-2/checkpoint-345/config.json +2 -2
run-2/checkpoint-345/model.safetensors +1 -1
run-2/checkpoint-345/optimizer.pt +1 -1
run-2/checkpoint-345/scheduler.pt +1 -1
run-2/checkpoint-345/trainer_state.json +125 -125
run-2/checkpoint-345/training_args.bin +1 -1
run-2/checkpoint-460/config.json +26 -0
run-2/checkpoint-460/model.safetensors +3 -0
run-2/checkpoint-460/optimizer.pt +3 -0
run-2/checkpoint-460/rng_state.pth +3 -0
run-2/checkpoint-460/scheduler.pt +3 -0
run-2/checkpoint-460/special_tokens_map.json +7 -0
run-2/checkpoint-460/tokenizer.json +0 -0
run-2/checkpoint-460/tokenizer_config.json +56 -0
run-2/checkpoint-460/trainer_state.json +401 -0
run-2/checkpoint-460/training_args.bin +3 -0
run-2/checkpoint-460/vocab.txt +0 -0
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -2,12 +2,12 @@
   "architectures": [
     "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.27628785152324686,
   "classifier_dropout": null,
   "dtype": "float32",
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.27628785152324686,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,

   "architectures": [
     "BertForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.30716938624140044,
   "classifier_dropout": null,
   "dtype": "float32",
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.30716938624140044,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ee129af0d3461cda8f43dcc1c04b86b09eae4aa5b5a7d2b62e4e4c660df4068
 size 437958648

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee88310a0719da5fea2ba4af46040bc655bc67a15f87d4cb735b482610ab3273
 size 437958648

run-2/checkpoint-345/config.json CHANGED Viewed

@@ -2,12 +2,12 @@
   "architectures": [
     "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.4355887560595135,
   "classifier_dropout": null,
   "dtype": "float32",
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.4355887560595135,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,

   "architectures": [
     "BertForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.30716938624140044,
   "classifier_dropout": null,
   "dtype": "float32",
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.30716938624140044,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,

run-2/checkpoint-345/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a230d945b5f99fde00359bdc41eb4959cace16a3b6c2339f3c64fa30faea227a
 size 437958648

 version https://git-lfs.github.com/spec/v1
+oid sha256:59015fd59426bc6b714ac44d0a06544f1151a3bf783d9e726eff1b67087653d5
 size 437958648

run-2/checkpoint-345/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd21559bcdc2f5d4f6cb42aa4f3e3c76f702c6afd78c74e23ad7b23f969432e9
 size 876038394

 version https://git-lfs.github.com/spec/v1
+oid sha256:32ae245dad3710d524ee497bccce9c76c9fea8b12796a581c23a9a9744e2cdac
 size 876038394

run-2/checkpoint-345/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e3221eb46f601e64928f5cd3e8e92c2e49ed394afd9ba340d07e78c615ee257
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:95a176f4542543fcc0daa5a5add4355639b9c7df2473d5a2a5b271a804478aef
 size 1064

run-2/checkpoint-345/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-  "best_global_step": 230,
-  "best_metric": 0.696078431372549,
-  "best_model_checkpoint": "bert-base-uncased-finetuned-mrpc\\run-2\\checkpoint-230",
   "epoch": 3.0,
   "eval_steps": 500,
   "global_step": 345,
@@ -11,277 +11,277 @@
   "log_history": [
     {
       "epoch": 0.08724100327153762,
-      "grad_norm": 4.4148850440979,
-      "learning_rate": 5.3364625125575524e-05,
-      "loss": 0.7011,
       "step": 10
     },
     {
       "epoch": 0.17448200654307525,
-      "grad_norm": 4.751035690307617,
-      "learning_rate": 5.177639223493339e-05,
-      "loss": 0.642,
       "step": 20
     },
     {
       "epoch": 0.2617230098146129,
-      "grad_norm": 6.830023288726807,
-      "learning_rate": 5.018815934429126e-05,
-      "loss": 0.6572,
       "step": 30
     },
     {
       "epoch": 0.3489640130861505,
-      "grad_norm": 6.113767147064209,
-      "learning_rate": 4.8599926453649135e-05,
-      "loss": 0.6311,
       "step": 40
     },
     {
       "epoch": 0.4362050163576881,
-      "grad_norm": 4.641395568847656,
-      "learning_rate": 4.7011693563007e-05,
-      "loss": 0.5981,
       "step": 50
     },
     {
       "epoch": 0.5234460196292258,
-      "grad_norm": 3.66355299949646,
-      "learning_rate": 4.5423460672364874e-05,
-      "loss": 0.5738,
       "step": 60
     },
     {
       "epoch": 0.6106870229007634,
-      "grad_norm": 4.72593879699707,
-      "learning_rate": 4.3835227781722754e-05,
-      "loss": 0.5866,
       "step": 70
     },
     {
       "epoch": 0.697928026172301,
-      "grad_norm": 8.480412483215332,
-      "learning_rate": 4.224699489108062e-05,
-      "loss": 0.6254,
       "step": 80
     },
     {
       "epoch": 0.7851690294438386,
-      "grad_norm": 4.573983192443848,
-      "learning_rate": 4.065876200043849e-05,
-      "loss": 0.608,
       "step": 90
     },
     {
       "epoch": 0.8724100327153762,
-      "grad_norm": 4.324202060699463,
-      "learning_rate": 3.9070529109796365e-05,
-      "loss": 0.6458,
       "step": 100
     },
     {
       "epoch": 0.9596510359869138,
-      "grad_norm": 4.329319000244141,
-      "learning_rate": 3.748229621915423e-05,
-      "loss": 0.5813,
       "step": 110
     },
     {
       "epoch": 1.0,
       "eval_accuracy": 0.6838235294117647,
       "eval_f1": 0.8122270742358079,
-      "eval_loss": 0.6114351749420166,
-      "eval_runtime": 12.643,
-      "eval_samples_per_second": 32.271,
-      "eval_steps_per_second": 4.034,
       "step": 115
     },
     {
       "epoch": 1.043620501635769,
-      "grad_norm": 5.093260288238525,
-      "learning_rate": 3.5894063328512104e-05,
-      "loss": 0.6107,
       "step": 120
     },
     {
       "epoch": 1.1308615049073065,
-      "grad_norm": 6.72236967086792,
-      "learning_rate": 3.430583043786998e-05,
-      "loss": 0.5974,
       "step": 130
     },
     {
       "epoch": 1.2181025081788441,
-      "grad_norm": 4.98067569732666,
-      "learning_rate": 3.271759754722785e-05,
-      "loss": 0.5637,
       "step": 140
     },
     {
       "epoch": 1.3053435114503817,
-      "grad_norm": 4.982036590576172,
-      "learning_rate": 3.112936465658572e-05,
-      "loss": 0.5754,
       "step": 150
     },
     {
       "epoch": 1.3925845147219194,
-      "grad_norm": 5.333518028259277,
-      "learning_rate": 2.9541131765943592e-05,
-      "loss": 0.5781,
       "step": 160
     },
     {
       "epoch": 1.479825517993457,
-      "grad_norm": 6.1204514503479,
-      "learning_rate": 2.795289887530146e-05,
-      "loss": 0.5841,
       "step": 170
     },
     {
       "epoch": 1.5670665212649946,
-      "grad_norm": 11.200117111206055,
-      "learning_rate": 2.6364665984659334e-05,
-      "loss": 0.5665,
       "step": 180
     },
     {
       "epoch": 1.6543075245365322,
-      "grad_norm": 7.264375686645508,
-      "learning_rate": 2.4776433094017207e-05,
-      "loss": 0.6032,
       "step": 190
     },
     {
       "epoch": 1.7415485278080698,
-      "grad_norm": 8.504103660583496,
-      "learning_rate": 2.3188200203375077e-05,
-      "loss": 0.5764,
       "step": 200
     },
     {
       "epoch": 1.8287895310796074,
-      "grad_norm": 9.18921184539795,
-      "learning_rate": 2.159996731273295e-05,
-      "loss": 0.6016,
       "step": 210
     },
     {
       "epoch": 1.916030534351145,
-      "grad_norm": 13.669305801391602,
-      "learning_rate": 2.001173442209082e-05,
-      "loss": 0.6045,
       "step": 220
     },
     {
       "epoch": 2.0,
-      "grad_norm": 16.49704360961914,
-      "learning_rate": 1.842350153144869e-05,
-      "loss": 0.6168,
       "step": 230
     },
     {
       "epoch": 2.0,
-      "eval_accuracy": 0.696078431372549,
-      "eval_f1": 0.8181818181818182,
-      "eval_loss": 0.5858412981033325,
-      "eval_runtime": 12.3476,
-      "eval_samples_per_second": 33.043,
-      "eval_steps_per_second": 4.13,
       "step": 230
     },
     {
       "epoch": 2.087241003271538,
-      "grad_norm": 10.109269142150879,
-      "learning_rate": 1.6835268640806564e-05,
-      "loss": 0.618,
       "step": 240
     },
     {
       "epoch": 2.174482006543075,
-      "grad_norm": 9.174595832824707,
-      "learning_rate": 1.5247035750164434e-05,
-      "loss": 0.5813,
       "step": 250
     },
     {
       "epoch": 2.261723009814613,
-      "grad_norm": 7.199938774108887,
-      "learning_rate": 1.3658802859522307e-05,
-      "loss": 0.5333,
       "step": 260
     },
     {
       "epoch": 2.3489640130861504,
-      "grad_norm": 10.91189956665039,
-      "learning_rate": 1.2070569968880176e-05,
-      "loss": 0.5614,
       "step": 270
     },
     {
       "epoch": 2.4362050163576883,
-      "grad_norm": 8.650083541870117,
-      "learning_rate": 1.0482337078238049e-05,
-      "loss": 0.5926,
       "step": 280
     },
     {
       "epoch": 2.5234460196292257,
-      "grad_norm": 14.0360689163208,
-      "learning_rate": 8.89410418759592e-06,
-      "loss": 0.5575,
       "step": 290
     },
     {
       "epoch": 2.6106870229007635,
-      "grad_norm": 5.1986308097839355,
-      "learning_rate": 7.305871296953791e-06,
-      "loss": 0.578,
       "step": 300
     },
     {
       "epoch": 2.697928026172301,
-      "grad_norm": 6.444606304168701,
-      "learning_rate": 5.717638406311662e-06,
-      "loss": 0.5572,
       "step": 310
     },
     {
       "epoch": 2.7851690294438387,
-      "grad_norm": 12.22812271118164,
-      "learning_rate": 4.129405515669534e-06,
-      "loss": 0.5741,
       "step": 320
     },
     {
       "epoch": 2.872410032715376,
-      "grad_norm": 6.917333602905273,
-      "learning_rate": 2.5411726250274058e-06,
-      "loss": 0.5769,
       "step": 330
     },
     {
       "epoch": 2.959651035986914,
-      "grad_norm": 6.114295482635498,
-      "learning_rate": 9.529397343852771e-07,
-      "loss": 0.564,
       "step": 340
     },
     {
       "epoch": 3.0,
       "eval_accuracy": 0.6887254901960784,
       "eval_f1": 0.8145985401459854,
-      "eval_loss": 0.6323757767677307,
-      "eval_runtime": 12.4955,
-      "eval_samples_per_second": 32.652,
-      "eval_steps_per_second": 4.081,
       "step": 345
     }
   ],
   "logging_steps": 10,
-  "max_steps": 345,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -290,7 +290,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
@@ -299,9 +299,9 @@
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": {
-    "hidden_dropout_prob": 0.4355887560595135,
-    "learning_rate": 5.4794034727153435e-05,
-    "num_train_epochs": 3,
     "per_device_train_batch_size": 4
   }
 }

 {
+  "best_global_step": 345,
+  "best_metric": 0.6887254901960784,
+  "best_model_checkpoint": "bert-base-uncased-finetuned-mrpc\\run-2\\checkpoint-345",
   "epoch": 3.0,
   "eval_steps": 500,
   "global_step": 345,
   "log_history": [
     {
       "epoch": 0.08724100327153762,
+      "grad_norm": 2.3152477741241455,
+      "learning_rate": 2.396012025677338e-06,
+      "loss": 0.6915,
       "step": 10
     },
     {
       "epoch": 0.17448200654307525,
+      "grad_norm": 7.123985290527344,
+      "learning_rate": 2.342885373223295e-06,
+      "loss": 0.6551,
       "step": 20
     },
     {
       "epoch": 0.2617230098146129,
+      "grad_norm": 2.969932794570923,
+      "learning_rate": 2.2897587207692523e-06,
+      "loss": 0.6812,
       "step": 30
     },
     {
       "epoch": 0.3489640130861505,
+      "grad_norm": 2.524810791015625,
+      "learning_rate": 2.236632068315209e-06,
+      "loss": 0.6443,
       "step": 40
     },
     {
       "epoch": 0.4362050163576881,
+      "grad_norm": 3.7716894149780273,
+      "learning_rate": 2.183505415861166e-06,
+      "loss": 0.6306,
       "step": 50
     },
     {
       "epoch": 0.5234460196292258,
+      "grad_norm": 6.62515926361084,
+      "learning_rate": 2.130378763407123e-06,
+      "loss": 0.619,
       "step": 60
     },
     {
       "epoch": 0.6106870229007634,
+      "grad_norm": 4.636287689208984,
+      "learning_rate": 2.0772521109530803e-06,
+      "loss": 0.6566,
       "step": 70
     },
     {
       "epoch": 0.697928026172301,
+      "grad_norm": 2.319833755493164,
+      "learning_rate": 2.024125458499037e-06,
+      "loss": 0.6577,
       "step": 80
     },
     {
       "epoch": 0.7851690294438386,
+      "grad_norm": 3.791926145553589,
+      "learning_rate": 1.9709988060449944e-06,
+      "loss": 0.6468,
       "step": 90
     },
     {
       "epoch": 0.8724100327153762,
+      "grad_norm": 4.255199432373047,
+      "learning_rate": 1.917872153590951e-06,
+      "loss": 0.663,
       "step": 100
     },
     {
       "epoch": 0.9596510359869138,
+      "grad_norm": 2.442918300628662,
+      "learning_rate": 1.8647455011369084e-06,
+      "loss": 0.6114,
       "step": 110
     },
     {
       "epoch": 1.0,
       "eval_accuracy": 0.6838235294117647,
       "eval_f1": 0.8122270742358079,
+      "eval_loss": 0.6194218397140503,
+      "eval_runtime": 12.4471,
+      "eval_samples_per_second": 32.779,
+      "eval_steps_per_second": 4.097,
       "step": 115
     },
     {
       "epoch": 1.043620501635769,
+      "grad_norm": 3.5363006591796875,
+      "learning_rate": 1.8116188486828654e-06,
+      "loss": 0.6394,
       "step": 120
     },
     {
       "epoch": 1.1308615049073065,
+      "grad_norm": 4.324810028076172,
+      "learning_rate": 1.7584921962288222e-06,
+      "loss": 0.6445,
       "step": 130
     },
     {
       "epoch": 1.2181025081788441,
+      "grad_norm": 4.151200294494629,
+      "learning_rate": 1.7053655437747794e-06,
+      "loss": 0.6276,
       "step": 140
     },
     {
       "epoch": 1.3053435114503817,
+      "grad_norm": 2.969996690750122,
+      "learning_rate": 1.6522388913207364e-06,
+      "loss": 0.6167,
       "step": 150
     },
     {
       "epoch": 1.3925845147219194,
+      "grad_norm": 2.5971832275390625,
+      "learning_rate": 1.5991122388666934e-06,
+      "loss": 0.644,
       "step": 160
     },
     {
       "epoch": 1.479825517993457,
+      "grad_norm": 3.444840431213379,
+      "learning_rate": 1.5459855864126504e-06,
+      "loss": 0.6292,
       "step": 170
     },
     {
       "epoch": 1.5670665212649946,
+      "grad_norm": 4.372523784637451,
+      "learning_rate": 1.4928589339586077e-06,
+      "loss": 0.6357,
       "step": 180
     },
     {
       "epoch": 1.6543075245365322,
+      "grad_norm": 5.912120342254639,
+      "learning_rate": 1.4397322815045647e-06,
+      "loss": 0.6113,
       "step": 190
     },
     {
       "epoch": 1.7415485278080698,
+      "grad_norm": 3.5597641468048096,
+      "learning_rate": 1.3866056290505217e-06,
+      "loss": 0.6195,
       "step": 200
     },
     {
       "epoch": 1.8287895310796074,
+      "grad_norm": 2.4704296588897705,
+      "learning_rate": 1.3334789765964785e-06,
+      "loss": 0.6397,
       "step": 210
     },
     {
       "epoch": 1.916030534351145,
+      "grad_norm": 4.187549114227295,
+      "learning_rate": 1.2803523241424355e-06,
+      "loss": 0.6145,
       "step": 220
     },
     {
       "epoch": 2.0,
+      "grad_norm": 3.662736415863037,
+      "learning_rate": 1.2272256716883927e-06,
+      "loss": 0.6161,
       "step": 230
     },
     {
       "epoch": 2.0,
+      "eval_accuracy": 0.6838235294117647,
+      "eval_f1": 0.8122270742358079,
+      "eval_loss": 0.6124567985534668,
+      "eval_runtime": 12.2949,
+      "eval_samples_per_second": 33.185,
+      "eval_steps_per_second": 4.148,
       "step": 230
     },
     {
       "epoch": 2.087241003271538,
+      "grad_norm": 4.740890979766846,
+      "learning_rate": 1.1740990192343497e-06,
+      "loss": 0.6427,
       "step": 240
     },
     {
       "epoch": 2.174482006543075,
+      "grad_norm": 3.5964860916137695,
+      "learning_rate": 1.1209723667803067e-06,
+      "loss": 0.6071,
       "step": 250
     },
     {
       "epoch": 2.261723009814613,
+      "grad_norm": 2.725940465927124,
+      "learning_rate": 1.0678457143262637e-06,
+      "loss": 0.6001,
       "step": 260
     },
     {
       "epoch": 2.3489640130861504,
+      "grad_norm": 4.717036724090576,
+      "learning_rate": 1.0147190618722207e-06,
+      "loss": 0.6061,
       "step": 270
     },
     {
       "epoch": 2.4362050163576883,
+      "grad_norm": 3.381378173828125,
+      "learning_rate": 9.615924094181778e-07,
+      "loss": 0.6195,
       "step": 280
     },
     {
       "epoch": 2.5234460196292257,
+      "grad_norm": 3.877357244491577,
+      "learning_rate": 9.084657569641349e-07,
+      "loss": 0.5771,
       "step": 290
     },
     {
       "epoch": 2.6106870229007635,
+      "grad_norm": 5.829881191253662,
+      "learning_rate": 8.553391045100918e-07,
+      "loss": 0.6278,
       "step": 300
     },
     {
       "epoch": 2.697928026172301,
+      "grad_norm": 4.983321189880371,
+      "learning_rate": 8.022124520560489e-07,
+      "loss": 0.5921,
       "step": 310
     },
     {
       "epoch": 2.7851690294438387,
+      "grad_norm": 3.385566473007202,
+      "learning_rate": 7.490857996020059e-07,
+      "loss": 0.6355,
       "step": 320
     },
     {
       "epoch": 2.872410032715376,
+      "grad_norm": 3.0855424404144287,
+      "learning_rate": 6.95959147147963e-07,
+      "loss": 0.6111,
       "step": 330
     },
     {
       "epoch": 2.959651035986914,
+      "grad_norm": 3.3900763988494873,
+      "learning_rate": 6.428324946939199e-07,
+      "loss": 0.6181,
       "step": 340
     },
     {
       "epoch": 3.0,
       "eval_accuracy": 0.6887254901960784,
       "eval_f1": 0.8145985401459854,
+      "eval_loss": 0.6093047857284546,
+      "eval_runtime": 12.431,
+      "eval_samples_per_second": 32.821,
+      "eval_steps_per_second": 4.103,
       "step": 345
     }
   ],
   "logging_steps": 10,
+  "max_steps": 460,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": {
+    "hidden_dropout_prob": 0.30716938624140044,
+    "learning_rate": 2.4438260128859767e-06,
+    "num_train_epochs": 4,
     "per_device_train_batch_size": 4
   }
 }

run-2/checkpoint-345/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7910ec94831bf816572caa453dd30c002c77ffc3b3e3cdec79d030a73f5e993
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:8017655c1a55cbfc9386d425ba50c1e9b58602655bfee37869fd1e3fccbcf104
 size 5432

run-2/checkpoint-460/config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.30716938624140044,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.30716938624140044,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "transformers_version": "4.57.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-2/checkpoint-460/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee88310a0719da5fea2ba4af46040bc655bc67a15f87d4cb735b482610ab3273
+size 437958648

run-2/checkpoint-460/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3054a4e7edf4e84d5e7d24d91760a82233c33c49198264e2a2090efc62822104
+size 876038394

run-2/checkpoint-460/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ac81eae5b6a91a1c2a4e96af771c2481446f42d07f79a97683469ebd698e7ca
+size 14244

run-2/checkpoint-460/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4590d0c77692194fb0c6528622656f06517e46d3862041f2a68656bf5654acb7
+size 1064

run-2/checkpoint-460/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-460/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-460/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-460/trainer_state.json ADDED Viewed

	@@ -0,0 +1,401 @@

+{
+  "best_global_step": 345,
+  "best_metric": 0.6887254901960784,
+  "best_model_checkpoint": "bert-base-uncased-finetuned-mrpc\\run-2\\checkpoint-345",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 460,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08724100327153762,
+      "grad_norm": 2.3152477741241455,
+      "learning_rate": 2.396012025677338e-06,
+      "loss": 0.6915,
+      "step": 10
+    },
+    {
+      "epoch": 0.17448200654307525,
+      "grad_norm": 7.123985290527344,
+      "learning_rate": 2.342885373223295e-06,
+      "loss": 0.6551,
+      "step": 20
+    },
+    {
+      "epoch": 0.2617230098146129,
+      "grad_norm": 2.969932794570923,
+      "learning_rate": 2.2897587207692523e-06,
+      "loss": 0.6812,
+      "step": 30
+    },
+    {
+      "epoch": 0.3489640130861505,
+      "grad_norm": 2.524810791015625,
+      "learning_rate": 2.236632068315209e-06,
+      "loss": 0.6443,
+      "step": 40
+    },
+    {
+      "epoch": 0.4362050163576881,
+      "grad_norm": 3.7716894149780273,
+      "learning_rate": 2.183505415861166e-06,
+      "loss": 0.6306,
+      "step": 50
+    },
+    {
+      "epoch": 0.5234460196292258,
+      "grad_norm": 6.62515926361084,
+      "learning_rate": 2.130378763407123e-06,
+      "loss": 0.619,
+      "step": 60
+    },
+    {
+      "epoch": 0.6106870229007634,
+      "grad_norm": 4.636287689208984,
+      "learning_rate": 2.0772521109530803e-06,
+      "loss": 0.6566,
+      "step": 70
+    },
+    {
+      "epoch": 0.697928026172301,
+      "grad_norm": 2.319833755493164,
+      "learning_rate": 2.024125458499037e-06,
+      "loss": 0.6577,
+      "step": 80
+    },
+    {
+      "epoch": 0.7851690294438386,
+      "grad_norm": 3.791926145553589,
+      "learning_rate": 1.9709988060449944e-06,
+      "loss": 0.6468,
+      "step": 90
+    },
+    {
+      "epoch": 0.8724100327153762,
+      "grad_norm": 4.255199432373047,
+      "learning_rate": 1.917872153590951e-06,
+      "loss": 0.663,
+      "step": 100
+    },
+    {
+      "epoch": 0.9596510359869138,
+      "grad_norm": 2.442918300628662,
+      "learning_rate": 1.8647455011369084e-06,
+      "loss": 0.6114,
+      "step": 110
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6838235294117647,
+      "eval_f1": 0.8122270742358079,
+      "eval_loss": 0.6194218397140503,
+      "eval_runtime": 12.4471,
+      "eval_samples_per_second": 32.779,
+      "eval_steps_per_second": 4.097,
+      "step": 115
+    },
+    {
+      "epoch": 1.043620501635769,
+      "grad_norm": 3.5363006591796875,
+      "learning_rate": 1.8116188486828654e-06,
+      "loss": 0.6394,
+      "step": 120
+    },
+    {
+      "epoch": 1.1308615049073065,
+      "grad_norm": 4.324810028076172,
+      "learning_rate": 1.7584921962288222e-06,
+      "loss": 0.6445,
+      "step": 130
+    },
+    {
+      "epoch": 1.2181025081788441,
+      "grad_norm": 4.151200294494629,
+      "learning_rate": 1.7053655437747794e-06,
+      "loss": 0.6276,
+      "step": 140
+    },
+    {
+      "epoch": 1.3053435114503817,
+      "grad_norm": 2.969996690750122,
+      "learning_rate": 1.6522388913207364e-06,
+      "loss": 0.6167,
+      "step": 150
+    },
+    {
+      "epoch": 1.3925845147219194,
+      "grad_norm": 2.5971832275390625,
+      "learning_rate": 1.5991122388666934e-06,
+      "loss": 0.644,
+      "step": 160
+    },
+    {
+      "epoch": 1.479825517993457,
+      "grad_norm": 3.444840431213379,
+      "learning_rate": 1.5459855864126504e-06,
+      "loss": 0.6292,
+      "step": 170
+    },
+    {
+      "epoch": 1.5670665212649946,
+      "grad_norm": 4.372523784637451,
+      "learning_rate": 1.4928589339586077e-06,
+      "loss": 0.6357,
+      "step": 180
+    },
+    {
+      "epoch": 1.6543075245365322,
+      "grad_norm": 5.912120342254639,
+      "learning_rate": 1.4397322815045647e-06,
+      "loss": 0.6113,
+      "step": 190
+    },
+    {
+      "epoch": 1.7415485278080698,
+      "grad_norm": 3.5597641468048096,
+      "learning_rate": 1.3866056290505217e-06,
+      "loss": 0.6195,
+      "step": 200
+    },
+    {
+      "epoch": 1.8287895310796074,
+      "grad_norm": 2.4704296588897705,
+      "learning_rate": 1.3334789765964785e-06,
+      "loss": 0.6397,
+      "step": 210
+    },
+    {
+      "epoch": 1.916030534351145,
+      "grad_norm": 4.187549114227295,
+      "learning_rate": 1.2803523241424355e-06,
+      "loss": 0.6145,
+      "step": 220
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.662736415863037,
+      "learning_rate": 1.2272256716883927e-06,
+      "loss": 0.6161,
+      "step": 230
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.6838235294117647,
+      "eval_f1": 0.8122270742358079,
+      "eval_loss": 0.6124567985534668,
+      "eval_runtime": 12.2949,
+      "eval_samples_per_second": 33.185,
+      "eval_steps_per_second": 4.148,
+      "step": 230
+    },
+    {
+      "epoch": 2.087241003271538,
+      "grad_norm": 4.740890979766846,
+      "learning_rate": 1.1740990192343497e-06,
+      "loss": 0.6427,
+      "step": 240
+    },
+    {
+      "epoch": 2.174482006543075,
+      "grad_norm": 3.5964860916137695,
+      "learning_rate": 1.1209723667803067e-06,
+      "loss": 0.6071,
+      "step": 250
+    },
+    {
+      "epoch": 2.261723009814613,
+      "grad_norm": 2.725940465927124,
+      "learning_rate": 1.0678457143262637e-06,
+      "loss": 0.6001,
+      "step": 260
+    },
+    {
+      "epoch": 2.3489640130861504,
+      "grad_norm": 4.717036724090576,
+      "learning_rate": 1.0147190618722207e-06,
+      "loss": 0.6061,
+      "step": 270
+    },
+    {
+      "epoch": 2.4362050163576883,
+      "grad_norm": 3.381378173828125,
+      "learning_rate": 9.615924094181778e-07,
+      "loss": 0.6195,
+      "step": 280
+    },
+    {
+      "epoch": 2.5234460196292257,
+      "grad_norm": 3.877357244491577,
+      "learning_rate": 9.084657569641349e-07,
+      "loss": 0.5771,
+      "step": 290
+    },
+    {
+      "epoch": 2.6106870229007635,
+      "grad_norm": 5.829881191253662,
+      "learning_rate": 8.553391045100918e-07,
+      "loss": 0.6278,
+      "step": 300
+    },
+    {
+      "epoch": 2.697928026172301,
+      "grad_norm": 4.983321189880371,
+      "learning_rate": 8.022124520560489e-07,
+      "loss": 0.5921,
+      "step": 310
+    },
+    {
+      "epoch": 2.7851690294438387,
+      "grad_norm": 3.385566473007202,
+      "learning_rate": 7.490857996020059e-07,
+      "loss": 0.6355,
+      "step": 320
+    },
+    {
+      "epoch": 2.872410032715376,
+      "grad_norm": 3.0855424404144287,
+      "learning_rate": 6.95959147147963e-07,
+      "loss": 0.6111,
+      "step": 330
+    },
+    {
+      "epoch": 2.959651035986914,
+      "grad_norm": 3.3900763988494873,
+      "learning_rate": 6.428324946939199e-07,
+      "loss": 0.6181,
+      "step": 340
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.6887254901960784,
+      "eval_f1": 0.8145985401459854,
+      "eval_loss": 0.6093047857284546,
+      "eval_runtime": 12.431,
+      "eval_samples_per_second": 32.821,
+      "eval_steps_per_second": 4.103,
+      "step": 345
+    },
+    {
+      "epoch": 3.0436205016357687,
+      "grad_norm": 3.486827850341797,
+      "learning_rate": 5.897058422398769e-07,
+      "loss": 0.561,
+      "step": 350
+    },
+    {
+      "epoch": 3.1308615049073065,
+      "grad_norm": 3.283555507659912,
+      "learning_rate": 5.36579189785834e-07,
+      "loss": 0.6097,
+      "step": 360
+    },
+    {
+      "epoch": 3.218102508178844,
+      "grad_norm": 4.11808443069458,
+      "learning_rate": 4.834525373317911e-07,
+      "loss": 0.5979,
+      "step": 370
+    },
+    {
+      "epoch": 3.3053435114503817,
+      "grad_norm": 2.6555957794189453,
+      "learning_rate": 4.3032588487774807e-07,
+      "loss": 0.5943,
+      "step": 380
+    },
+    {
+      "epoch": 3.392584514721919,
+      "grad_norm": 2.754282236099243,
+      "learning_rate": 3.771992324237051e-07,
+      "loss": 0.5948,
+      "step": 390
+    },
+    {
+      "epoch": 3.479825517993457,
+      "grad_norm": 3.611149787902832,
+      "learning_rate": 3.2407257996966214e-07,
+      "loss": 0.5928,
+      "step": 400
+    },
+    {
+      "epoch": 3.5670665212649943,
+      "grad_norm": 4.274903297424316,
+      "learning_rate": 2.7094592751561915e-07,
+      "loss": 0.5866,
+      "step": 410
+    },
+    {
+      "epoch": 3.654307524536532,
+      "grad_norm": 3.5177125930786133,
+      "learning_rate": 2.178192750615762e-07,
+      "loss": 0.6009,
+      "step": 420
+    },
+    {
+      "epoch": 3.74154852780807,
+      "grad_norm": 3.027575969696045,
+      "learning_rate": 1.646926226075332e-07,
+      "loss": 0.6174,
+      "step": 430
+    },
+    {
+      "epoch": 3.8287895310796074,
+      "grad_norm": 2.8280646800994873,
+      "learning_rate": 1.1156597015349024e-07,
+      "loss": 0.631,
+      "step": 440
+    },
+    {
+      "epoch": 3.916030534351145,
+      "grad_norm": 3.2580528259277344,
+      "learning_rate": 5.843931769944727e-08,
+      "loss": 0.6025,
+      "step": 450
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 3.878319501876831,
+      "learning_rate": 5.312665245404297e-09,
+      "loss": 0.6509,
+      "step": 460
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.6887254901960784,
+      "eval_f1": 0.8145985401459854,
+      "eval_loss": 0.6089843511581421,
+      "eval_runtime": 12.3068,
+      "eval_samples_per_second": 33.153,
+      "eval_steps_per_second": 4.144,
+      "step": 460
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 460,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 965091351060480.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "hidden_dropout_prob": 0.30716938624140044,
+    "learning_rate": 2.4438260128859767e-06,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 4
+  }
+}

run-2/checkpoint-460/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8017655c1a55cbfc9386d425ba50c1e9b58602655bfee37869fd1e3fccbcf104
+size 5432

run-2/checkpoint-460/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41a153127cb706537432ced8326839888209aadcf2a2673718dc223a43a0dd31
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:8017655c1a55cbfc9386d425ba50c1e9b58602655bfee37869fd1e3fccbcf104
 size 5432