SM commited on Dec 27, 2023

Commit

9efeae0

1 Parent(s): c3eb26d

Upload 66 files

Files changed (19) hide show

README.md +3 -3
all_results.json +13 -13
checkpoint-1000/model.safetensors +1 -1
checkpoint-1000/optimizer.pt +1 -1
checkpoint-1000/rng_state.pth +1 -1
checkpoint-1000/scheduler.pt +1 -1
checkpoint-1000/trainer_state.json +10 -10
checkpoint-1000/training_args.bin +1 -1
checkpoint-500/model.safetensors +1 -1
checkpoint-500/optimizer.pt +1 -1
checkpoint-500/rng_state.pth +1 -1
checkpoint-500/scheduler.pt +1 -1
checkpoint-500/trainer_state.json +7 -7
checkpoint-500/training_args.bin +1 -1
eval_results.json +8 -8
model.safetensors +1 -1
train_results.json +6 -6
trainer_state.json +18 -30
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.5093
-- Accuracy: 0.4494
 ## Model description
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 60.0
 ### Training results

 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.4268
+- Accuracy: 0.3678
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 40.0
 ### Training results

all_results.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-    "epoch": 60.0,
-    "eval_accuracy": 0.4494406429890301,
-    "eval_loss": 3.5092644691467285,
-    "eval_runtime": 0.6069,
-    "eval_samples": 9,
-    "eval_samples_per_second": 14.829,
-    "eval_steps_per_second": 4.943,
-    "perplexity": 33.423674596634115,
-    "train_loss": 0.9941332481020972,
-    "train_runtime": 1577.313,
-    "train_samples": 138,
-    "train_samples_per_second": 5.249,
-    "train_steps_per_second": 1.331
 }

 {
+    "epoch": 40.0,
+    "eval_accuracy": 0.36779081133919844,
+    "eval_loss": 3.4267592430114746,
+    "eval_runtime": 0.5326,
+    "eval_samples": 8,
+    "eval_samples_per_second": 15.021,
+    "eval_steps_per_second": 3.755,
+    "perplexity": 30.776741019953068,
+    "train_loss": 1.7243760996851427,
+    "train_runtime": 861.0101,
+    "train_samples": 113,
+    "train_samples_per_second": 5.25,
+    "train_steps_per_second": 1.347
 }

checkpoint-1000/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12f996d0719a66b10d90e36ef43b31f9eaae1c31749508f6854af308bb085736
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:4539d2a083bbe3c5582d647b1ae6bc86e66a35cfc1b8f9e221aff26bbbde8195
 size 497774208

checkpoint-1000/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9adc9e7ab852a33f9fe84c647c87587bada024b6538c02088b10c9c8717806e2
 size 995642298

 version https://git-lfs.github.com/spec/v1
+oid sha256:7056e6ee4e46e8e59c098ffc9fa74eb8cf4091180be014749fc9672c6dee96fa
 size 995642298

checkpoint-1000/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a6632d83b7bb45efc05bef8c034ed3b2854a29a949ed96a7ca5bd50bcb7d902
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d420b459d39d02b619c50d8cdf88cb444183f0ce7c2d4fb429cd2b5d34fff044
 size 14244

checkpoint-1000/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eee3c9505132ed967b6539dd4a6fb45e2bc29520ec4ec39ac5c68d846d45dec5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:76a794237642516faf4b87039234f574d68642549c3c3bc8098873f4599d9d7e
 size 1064

checkpoint-1000/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 28.571428571428573,
   "eval_steps": 500,
   "global_step": 1000,
   "is_hyper_param_search": false,
@@ -9,24 +9,24 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 14.29,
-      "learning_rate": 3.809523809523809e-05,
-      "loss": 1.8052,
       "step": 500
     },
     {
-      "epoch": 28.57,
-      "learning_rate": 2.6190476190476192e-05,
-      "loss": 1.0819,
       "step": 1000
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2100,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 60,
   "save_steps": 500,
-  "total_flos": 2061071548416000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 34.48275862068966,
   "eval_steps": 500,
   "global_step": 1000,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 17.24,
+      "learning_rate": 2.844827586206897e-05,
+      "loss": 2.1876,
       "step": 500
     },
     {
+      "epoch": 34.48,
+      "learning_rate": 6.896551724137932e-06,
+      "loss": 1.4227,
       "step": 1000
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1160,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 40,
   "save_steps": 500,
+  "total_flos": 2037032681472000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

checkpoint-1000/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7be20547a6728ba6b1c0823eb883cb2999dcf6b825f9d82a942dc5c63262ce46
 size 4664

checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b4dcf13f972feb06a7bc56ddc93ab0a631fe7d88e27643cbf9c0e042041fcff
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffc6c3538ac0c892d4ba1dbc0b4658747000215b6f0344b4810f8c693469a2bd
 size 497774208

checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84d55291718bd821470b8b79351ab3e47b578d3bc202542399220a633dc12848
 size 995642298

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e083993f7a062b56c3ecbf41fddef3aef72845c7a860c14dc9063f295d4cfa6
 size 995642298

checkpoint-500/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e651e34957d21ef1419934c9c311ba824f956fd612f9f4b5bea9e4854d09d528
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cb841abcf2e8906e340787ceb62a5bd4a7332d20f27e04e3ad3d26c6caf5856
 size 14244

checkpoint-500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d84cb0a17b808448928ddd21fdfb54eabfda0598dfe3f0b7eebb6d442d67f65
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc56f025127707558594caa06236787d82e466db8432b4081f402bb03eef7151
 size 1064

checkpoint-500/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 14.285714285714286,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
@@ -9,18 +9,18 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 14.29,
-      "learning_rate": 3.809523809523809e-05,
-      "loss": 1.8052,
       "step": 500
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2100,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 60,
   "save_steps": 500,
-  "total_flos": 1030535774208000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 17.24137931034483,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 17.24,
+      "learning_rate": 2.844827586206897e-05,
+      "loss": 2.1876,
       "step": 500
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1160,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 40,
   "save_steps": 500,
+  "total_flos": 1018516340736000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7be20547a6728ba6b1c0823eb883cb2999dcf6b825f9d82a942dc5c63262ce46
 size 4664

eval_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "epoch": 60.0,
-    "eval_accuracy": 0.4494406429890301,
-    "eval_loss": 3.5092644691467285,
-    "eval_runtime": 0.6069,
-    "eval_samples": 9,
-    "eval_samples_per_second": 14.829,
-    "eval_steps_per_second": 4.943,
-    "perplexity": 33.423674596634115
 }

 {
+    "epoch": 40.0,
+    "eval_accuracy": 0.36779081133919844,
+    "eval_loss": 3.4267592430114746,
+    "eval_runtime": 0.5326,
+    "eval_samples": 8,
+    "eval_samples_per_second": 15.021,
+    "eval_steps_per_second": 3.755,
+    "perplexity": 30.776741019953068
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5aaa8c132a2f82e4f3a057cd8336ab86a6452f68defee110eba5fbb71b1e662c
 size 497774208

 version https://git-lfs.github.com/spec/v1
+oid sha256:85daf429e1d2edfcaf99b4727e7c9dfd52b443830c2696dbc1a1af96cd3bbedd
 size 497774208

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 60.0,
-    "train_loss": 0.9941332481020972,
-    "train_runtime": 1577.313,
-    "train_samples": 138,
-    "train_samples_per_second": 5.249,
-    "train_steps_per_second": 1.331
 }

 {
+    "epoch": 40.0,
+    "train_loss": 1.7243760996851427,
+    "train_runtime": 861.0101,
+    "train_samples": 113,
+    "train_samples_per_second": 5.25,
+    "train_steps_per_second": 1.347
 }

trainer_state.json CHANGED Viewed

@@ -1,53 +1,41 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 60.0,
   "eval_steps": 500,
-  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 14.29,
-      "learning_rate": 3.809523809523809e-05,
-      "loss": 1.8052,
       "step": 500
     },
     {
-      "epoch": 28.57,
-      "learning_rate": 2.6190476190476192e-05,
-      "loss": 1.0819,
       "step": 1000
     },
     {
-      "epoch": 42.86,
-      "learning_rate": 1.4285714285714285e-05,
-      "loss": 0.6781,
-      "step": 1500
-    },
-    {
-      "epoch": 57.14,
-      "learning_rate": 2.3809523809523808e-06,
-      "loss": 0.5142,
-      "step": 2000
-    },
-    {
-      "epoch": 60.0,
-      "step": 2100,
-      "total_flos": 4326996049920000.0,
-      "train_loss": 0.9941332481020972,
-      "train_runtime": 1577.313,
-      "train_samples_per_second": 5.249,
-      "train_steps_per_second": 1.331
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2100,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 60,
   "save_steps": 500,
-  "total_flos": 4326996049920000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 40.0,
   "eval_steps": 500,
+  "global_step": 1160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 17.24,
+      "learning_rate": 2.844827586206897e-05,
+      "loss": 2.1876,
       "step": 500
     },
     {
+      "epoch": 34.48,
+      "learning_rate": 6.896551724137932e-06,
+      "loss": 1.4227,
       "step": 1000
     },
     {
+      "epoch": 40.0,
+      "step": 1160,
+      "total_flos": 2362079969280000.0,
+      "train_loss": 1.7243760996851427,
+      "train_runtime": 861.0101,
+      "train_samples_per_second": 5.25,
+      "train_steps_per_second": 1.347
     }
   ],
   "logging_steps": 500,
+  "max_steps": 1160,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 40,
   "save_steps": 500,
+  "total_flos": 2362079969280000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:7be20547a6728ba6b1c0823eb883cb2999dcf6b825f9d82a942dc5c63262ce46
 size 4664