SM commited on Dec 27, 2023

Commit

c3eb26d

1 Parent(s): 90db77c

With better accuracy

Files changed (31) hide show

README.md +7 -7
all_results.json +11 -11
checkpoint-1000/model.safetensors +1 -1
checkpoint-1000/optimizer.pt +2 -2
checkpoint-1000/rng_state.pth +2 -2
checkpoint-1000/scheduler.pt +1 -1
checkpoint-1000/trainer_state.json +11 -11
checkpoint-1000/training_args.bin +1 -1
checkpoint-1500/model.safetensors +1 -1
checkpoint-1500/optimizer.pt +2 -2
checkpoint-1500/rng_state.pth +2 -2
checkpoint-1500/scheduler.pt +1 -1
checkpoint-1500/trainer_state.json +14 -14
checkpoint-1500/training_args.bin +1 -1
checkpoint-2000/model.safetensors +1 -1
checkpoint-2000/optimizer.pt +2 -2
checkpoint-2000/rng_state.pth +2 -2
checkpoint-2000/scheduler.pt +1 -1
checkpoint-2000/trainer_state.json +17 -17
checkpoint-2000/training_args.bin +1 -1
checkpoint-500/model.safetensors +1 -1
checkpoint-500/optimizer.pt +2 -2
checkpoint-500/rng_state.pth +2 -2
checkpoint-500/scheduler.pt +1 -1
checkpoint-500/trainer_state.json +8 -8
checkpoint-500/training_args.bin +1 -1
eval_results.json +7 -7
model.safetensors +1 -1
train_results.json +5 -5
trainer_state.json +25 -31
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 52.0337
-- Accuracy: 0.1243
 ## Model description
@@ -38,12 +38,12 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
-- train_batch_size: 2
-- eval_batch_size: 2
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 40.0
 ### Training results
@@ -52,6 +52,6 @@ The following hyperparameters were used during training:
 ### Framework versions
 - Transformers 4.37.0.dev0
-- Pytorch 2.1.2
-- Datasets 2.15.0
 - Tokenizers 0.15.0

 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.5093
+- Accuracy: 0.4494
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
+- train_batch_size: 4
+- eval_batch_size: 4
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 60.0
 ### Training results
 ### Framework versions
 - Transformers 4.37.0.dev0
+- Pytorch 2.1.2+cu121
+- Datasets 2.16.0
 - Tokenizers 0.15.0

all_results.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-    "epoch": 40.0,
-    "eval_accuracy": 0.12425328554360812,
-    "eval_loss": 52.03367233276367,
-    "eval_runtime": 4.1042,
     "eval_samples": 9,
-    "eval_samples_per_second": 2.193,
-    "eval_steps_per_second": 1.218,
-    "perplexity": 3.962203408827054e+22,
-    "train_loss": 57.43311643738677,
-    "train_runtime": 10482.6781,
     "train_samples": 138,
-    "train_samples_per_second": 0.527,
-    "train_steps_per_second": 0.263
 }

 {
+    "epoch": 60.0,
+    "eval_accuracy": 0.4494406429890301,
+    "eval_loss": 3.5092644691467285,
+    "eval_runtime": 0.6069,
     "eval_samples": 9,
+    "eval_samples_per_second": 14.829,
+    "eval_steps_per_second": 4.943,
+    "perplexity": 33.423674596634115,
+    "train_loss": 0.9941332481020972,
+    "train_runtime": 1577.313,
     "train_samples": 138,
+    "train_samples_per_second": 5.249,
+    "train_steps_per_second": 1.331
 }

checkpoint-1000/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfcdd32060421fc062c6972b23088021b78ee341a6ba56ac82f86eaea8a9be39
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:12f996d0719a66b10d90e36ef43b31f9eaae1c31749508f6854af308bb085736
 size 497774208

checkpoint-1000/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40792add400940242337cb4f1c1ded33fc53932d579e2aafc1ad92e26b9120ad
-size 995638202

 version https://git-lfs.github.com/spec/v1
+oid sha256:9adc9e7ab852a33f9fe84c647c87587bada024b6538c02088b10c9c8717806e2
+size 995642298

checkpoint-1000/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2248774053cf007b7093c6e0bb2c3b3dd6eaa25d185fd835bab801482da4e4b0
-size 13990

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a6632d83b7bb45efc05bef8c034ed3b2854a29a949ed96a7ca5bd50bcb7d902
+size 14244

checkpoint-1000/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3898258d676f040a88d5e204cd4b72f355d3dc5e6acf2f9d957635fad24937e8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:eee3c9505132ed967b6539dd4a6fb45e2bc29520ec4ec39ac5c68d846d45dec5
 size 1064

checkpoint-1000/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 14.492753623188406,
   "eval_steps": 500,
   "global_step": 1000,
   "is_hyper_param_search": false,
@@ -9,25 +9,25 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 7.25,
-      "learning_rate": 4.094202898550725e-05,
-      "loss": 52.964,
       "step": 500
     },
     {
-      "epoch": 14.49,
-      "learning_rate": 3.188405797101449e-05,
-      "loss": 63.81,
       "step": 1000
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2760,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 40,
   "save_steps": 500,
-  "total_flos": 1045168128000000.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 28.571428571428573,
   "eval_steps": 500,
   "global_step": 1000,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 14.29,
+      "learning_rate": 3.809523809523809e-05,
+      "loss": 1.8052,
       "step": 500
     },
     {
+      "epoch": 28.57,
+      "learning_rate": 2.6190476190476192e-05,
+      "loss": 1.0819,
       "step": 1000
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2100,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 60,
   "save_steps": 500,
+  "total_flos": 2061071548416000.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-1000/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664

checkpoint-1500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42f5e565cdb79f9110a6d84d8389311e50392871d64a8891dbde0a227a8788dc
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:6db548188c57c7cc26a03d2f3836dac8ae7b3f171ffc94f210669f0684391440
 size 497774208

checkpoint-1500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e601a8de001ab43374799bb279945ab8304ecc9cb6457dd39819746e3509e5a
-size 995638202

 version https://git-lfs.github.com/spec/v1
+oid sha256:0857cce43798c01f9e56b21f550de159b359b827bbbe6664dc0920bb722a5373
+size 995642298

checkpoint-1500/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13fd47b12859b8841c4b8248c9b246be3d9ced25781b423c40d0b3a010fa7653
-size 13990

 version https://git-lfs.github.com/spec/v1
+oid sha256:c00755da9ed0bcc7ab4ff6d1881daf48c315a760cde6596f5e5de6ebdb5140f8
+size 14244

checkpoint-1500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d8150471eaa0602abf5ca49129f5d5e1a49fbee7998e0a72bf6f710952d97a1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f136c5ddfbd5bfe9857c433dcc2fc706e931bc068d2eabf598b25c109d462906
 size 1064

checkpoint-1500/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 21.73913043478261,
   "eval_steps": 500,
   "global_step": 1500,
   "is_hyper_param_search": false,
@@ -9,31 +9,31 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 7.25,
-      "learning_rate": 4.094202898550725e-05,
-      "loss": 52.964,
       "step": 500
     },
     {
-      "epoch": 14.49,
-      "learning_rate": 3.188405797101449e-05,
-      "loss": 63.81,
       "step": 1000
     },
     {
-      "epoch": 21.74,
-      "learning_rate": 2.282608695652174e-05,
-      "loss": 62.5429,
       "step": 1500
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2760,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 40,
   "save_steps": 500,
-  "total_flos": 1567752192000000.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 42.857142857142854,
   "eval_steps": 500,
   "global_step": 1500,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 14.29,
+      "learning_rate": 3.809523809523809e-05,
+      "loss": 1.8052,
       "step": 500
     },
     {
+      "epoch": 28.57,
+      "learning_rate": 2.6190476190476192e-05,
+      "loss": 1.0819,
       "step": 1000
     },
     {
+      "epoch": 42.86,
+      "learning_rate": 1.4285714285714285e-05,
+      "loss": 0.6781,
       "step": 1500
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2100,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 60,
   "save_steps": 500,
+  "total_flos": 3091607322624000.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-1500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664

checkpoint-2000/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcbe070b82059badc3cff1bfc0bcae3f883ada68f07a60fa8da20273ad31d041
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:49f835d6e202641570e280cfd02c7abc724ffcf647ef3c4919ddf9d0244fefb0
 size 497774208

checkpoint-2000/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52b6e90b1598b433558c8544104af14d2e9899a893662f3665492f6a88cfb7e1
-size 995638202

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5953577a0a2b353baf20d78a8d3cafd7804195fc51c78ba605dbd587f53247e
+size 995642298

checkpoint-2000/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8af998d92b14891eae8da6a02f34398e26c284418aafc0720f904f72ebc45e9b
-size 13990

 version https://git-lfs.github.com/spec/v1
+oid sha256:54f33e1ce978f1c3e97263679165565a2045be7c9c46fe4e4856a58d36de3efa
+size 14244

checkpoint-2000/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6dd30ada5b40093c7c92eee80875a56bbece06a0cd26cc8b5c5b15dca76defd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9389d2d6b649add7b167e5f96d84163bb4de87fbdbcea2e7d94c8fc162243048
 size 1064

checkpoint-2000/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 28.985507246376812,
   "eval_steps": 500,
   "global_step": 2000,
   "is_hyper_param_search": false,
@@ -9,37 +9,37 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 7.25,
-      "learning_rate": 4.094202898550725e-05,
-      "loss": 52.964,
       "step": 500
     },
     {
-      "epoch": 14.49,
-      "learning_rate": 3.188405797101449e-05,
-      "loss": 63.81,
       "step": 1000
     },
     {
-      "epoch": 21.74,
-      "learning_rate": 2.282608695652174e-05,
-      "loss": 62.5429,
       "step": 1500
     },
     {
-      "epoch": 28.99,
-      "learning_rate": 1.3768115942028985e-05,
-      "loss": 57.5548,
       "step": 2000
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2760,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 40,
   "save_steps": 500,
-  "total_flos": 2090336256000000.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 57.142857142857146,
   "eval_steps": 500,
   "global_step": 2000,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 14.29,
+      "learning_rate": 3.809523809523809e-05,
+      "loss": 1.8052,
       "step": 500
     },
     {
+      "epoch": 28.57,
+      "learning_rate": 2.6190476190476192e-05,
+      "loss": 1.0819,
       "step": 1000
     },
     {
+      "epoch": 42.86,
+      "learning_rate": 1.4285714285714285e-05,
+      "loss": 0.6781,
       "step": 1500
     },
     {
+      "epoch": 57.14,
+      "learning_rate": 2.3809523809523808e-06,
+      "loss": 0.5142,
       "step": 2000
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2100,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 60,
   "save_steps": 500,
+  "total_flos": 4121097928704000.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-2000/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664

checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a838afbfe8df7d2ae25ab6ef968e9623a0ef1b80479cacc84732d3688e94ca49
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b4dcf13f972feb06a7bc56ddc93ab0a631fe7d88e27643cbf9c0e042041fcff
 size 497774208

checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9030cdfbb9f6a84c732f3444b8c651a2122dc91f07c08553a04a4a59d4d5e919
-size 995638202

 version https://git-lfs.github.com/spec/v1
+oid sha256:84d55291718bd821470b8b79351ab3e47b578d3bc202542399220a633dc12848
+size 995642298

checkpoint-500/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2fed77b14fe062f6db72d68cedd6fd95bae3305b7a735eef3c85da43fd15d476
-size 13990

 version https://git-lfs.github.com/spec/v1
+oid sha256:e651e34957d21ef1419934c9c311ba824f956fd612f9f4b5bea9e4854d09d528
+size 14244

checkpoint-500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1835112c9c4e116fab49de7670619145c7152adb2a54074e2003a8ced014d3ac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d84cb0a17b808448928ddd21fdfb54eabfda0598dfe3f0b7eebb6d442d67f65
 size 1064

checkpoint-500/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.246376811594203,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
@@ -9,19 +9,19 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 7.25,
-      "learning_rate": 4.094202898550725e-05,
-      "loss": 52.964,
       "step": 500
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2760,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 40,
   "save_steps": 500,
-  "total_flos": 522584064000000.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 14.285714285714286,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 14.29,
+      "learning_rate": 3.809523809523809e-05,
+      "loss": 1.8052,
       "step": 500
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2100,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 60,
   "save_steps": 500,
+  "total_flos": 1030535774208000.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664

eval_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "epoch": 40.0,
-    "eval_accuracy": 0.12425328554360812,
-    "eval_loss": 52.03367233276367,
-    "eval_runtime": 4.1042,
     "eval_samples": 9,
-    "eval_samples_per_second": 2.193,
-    "eval_steps_per_second": 1.218,
-    "perplexity": 3.962203408827054e+22
 }

 {
+    "epoch": 60.0,
+    "eval_accuracy": 0.4494406429890301,
+    "eval_loss": 3.5092644691467285,
+    "eval_runtime": 0.6069,
     "eval_samples": 9,
+    "eval_samples_per_second": 14.829,
+    "eval_steps_per_second": 4.943,
+    "perplexity": 33.423674596634115
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:972cf3d0d3d030aae15410a32d454084f803350380f05215f9f7a4c30ffc505a
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:5aaa8c132a2f82e4f3a057cd8336ab86a6452f68defee110eba5fbb71b1e662c
 size 497774208

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 40.0,
-    "train_loss": 57.43311643738677,
-    "train_runtime": 10482.6781,
     "train_samples": 138,
-    "train_samples_per_second": 0.527,
-    "train_steps_per_second": 0.263
 }

 {
+    "epoch": 60.0,
+    "train_loss": 0.9941332481020972,
+    "train_runtime": 1577.313,
     "train_samples": 138,
+    "train_samples_per_second": 5.249,
+    "train_steps_per_second": 1.331
 }

trainer_state.json CHANGED Viewed

@@ -1,60 +1,54 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 40.0,
   "eval_steps": 500,
-  "global_step": 2760,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 7.25,
-      "learning_rate": 4.094202898550725e-05,
-      "loss": 52.964,
       "step": 500
     },
     {
-      "epoch": 14.49,
-      "learning_rate": 3.188405797101449e-05,
-      "loss": 63.81,
       "step": 1000
     },
     {
-      "epoch": 21.74,
-      "learning_rate": 2.282608695652174e-05,
-      "loss": 62.5429,
       "step": 1500
     },
     {
-      "epoch": 28.99,
-      "learning_rate": 1.3768115942028985e-05,
-      "loss": 57.5548,
       "step": 2000
     },
     {
-      "epoch": 36.23,
-      "learning_rate": 4.710144927536232e-06,
-      "loss": 53.2908,
-      "step": 2500
-    },
-    {
-      "epoch": 40.0,
-      "step": 2760,
-      "total_flos": 2884664033280000.0,
-      "train_loss": 57.43311643738677,
-      "train_runtime": 10482.6781,
-      "train_samples_per_second": 0.527,
-      "train_steps_per_second": 0.263
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2760,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 40,
   "save_steps": 500,
-  "total_flos": 2884664033280000.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 60.0,
   "eval_steps": 500,
+  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 14.29,
+      "learning_rate": 3.809523809523809e-05,
+      "loss": 1.8052,
       "step": 500
     },
     {
+      "epoch": 28.57,
+      "learning_rate": 2.6190476190476192e-05,
+      "loss": 1.0819,
       "step": 1000
     },
     {
+      "epoch": 42.86,
+      "learning_rate": 1.4285714285714285e-05,
+      "loss": 0.6781,
       "step": 1500
     },
     {
+      "epoch": 57.14,
+      "learning_rate": 2.3809523809523808e-06,
+      "loss": 0.5142,
       "step": 2000
     },
     {
+      "epoch": 60.0,
+      "step": 2100,
+      "total_flos": 4326996049920000.0,
+      "train_loss": 0.9941332481020972,
+      "train_runtime": 1577.313,
+      "train_samples_per_second": 5.249,
+      "train_steps_per_second": 1.331
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2100,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 60,
   "save_steps": 500,
+  "total_flos": 4326996049920000.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664