End of training

Files changed (5) hide show

README.md CHANGED Viewed

@@ -15,12 +15,12 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- eval_loss: 8.6995
-- eval_runtime: 83.8513
-- eval_samples_per_second: 59.629
-- eval_steps_per_second: 0.942
-- epoch: 0.03
-- step: 40
 ## Model description
@@ -43,11 +43,11 @@ The following hyperparameters were used during training:
 - train_batch_size: 16
 - eval_batch_size: 64
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 50
 - mixed_precision_training: Native AMP
 ### Framework versions

 This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- eval_loss: 2.8215
+- eval_runtime: 84.9249
+- eval_samples_per_second: 58.876
+- eval_steps_per_second: 0.93
+- epoch: 2.02
+- step: 1500
 ## Model description
 - train_batch_size: 16
 - eval_batch_size: 64
 - seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 128
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 100
 - mixed_precision_training: Native AMP
 ### Framework versions

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d798163ba1887a6557a6b3f33e6aa239ed97bbbd2c9f931d72c44c7cd8eeb88
 size 535667604

 version https://git-lfs.github.com/spec/v1
+oid sha256:deef4e21ca3e5ef60fd595e9b0c12db796046b14fcd1183b786136a45ff02da3
 size 535667604

runs/Jan05_09-37-14_be54e94fe191/events.out.tfevents.1704447448.be54e94fe191.25.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:29e2786d518ed72e256917d510d90de7500fae644419846b4870b0ed45759115
+size 1717297

trainer_state.json CHANGED Viewed

@@ -1,48 +1,34 @@
 {
-  "best_metric": 9.321273803710938,
-  "best_model_checkpoint": "ner-bert-ingredients/checkpoint-20",
-  "epoch": 0.02694509936005389,
-  "eval_steps": 20,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.01,
-      "learning_rate": 4.9986522911051216e-05,
-      "loss": 9.8102,
-      "step": 20
     },
     {
-      "epoch": 0.01,
-      "eval_loss": 9.321273803710938,
-      "eval_runtime": 84.1492,
-      "eval_samples_per_second": 59.418,
-      "eval_steps_per_second": 0.939,
-      "step": 20
-    },
-    {
-      "epoch": 0.03,
-      "learning_rate": 4.9973045822102424e-05,
-      "loss": 9.0849,
-      "step": 40
-    },
-    {
-      "epoch": 0.03,
-      "eval_loss": 8.699505805969238,
-      "eval_runtime": 83.8513,
-      "eval_samples_per_second": 59.629,
-      "eval_steps_per_second": 0.942,
-      "step": 40
     }
   ],
-  "logging_steps": 20,
   "max_steps": 74200,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 50,
-  "save_steps": 20,
-  "total_flos": 865668287692800.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0208824520040416,
+  "eval_steps": 1500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 2.02,
+      "learning_rate": 4.8989218328840976e-05,
+      "loss": 4.3834,
+      "step": 1500
     },
     {
+      "epoch": 2.02,
+      "eval_loss": 2.8215107917785645,
+      "eval_runtime": 84.9249,
+      "eval_samples_per_second": 58.876,
+      "eval_steps_per_second": 0.93,
+      "step": 1500
     }
   ],
+  "logging_steps": 1500,
   "max_steps": 74200,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 100,
+  "save_steps": 1500,
+  "total_flos": 6.491971115016192e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7e0cc5c26c36bde050839a58f0c4f021e92cbaffecf465dbdc0090a08f22c21
 size 4283

 version https://git-lfs.github.com/spec/v1
+oid sha256:f265e1dd273ce96b78c47255a0e2ff65085a88d5afd5c3adc7b2f78aafd5a2a8
 size 4283