Training in progress, epoch 1

Browse files

Files changed (7) hide show

README.md +43 -55
adapter_model.safetensors +1 -1
all_results.json +11 -11
eval_results.json +6 -6
tokenizer.json +1 -6
train_results.json +6 -6
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -5,18 +5,18 @@ base_model: gpt2
 tags:
 - generated_from_trainer
 model-index:
-- name: Se124M100KInfMinimalist
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# Se124M100KInfMinimalist
 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5392
 ## Model description
@@ -46,58 +46,46 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss |
-|:-------------:|:-----:|:-----:|:---------------:|
-| 0.1691        | 1.0   | 1860  | 0.6314          |
-| 0.1598        | 2.0   | 3720  | 0.6036          |
-| 0.1539        | 3.0   | 5580  | 0.5906          |
-| 0.153         | 4.0   | 7440  | 0.5836          |
-| 0.1507        | 5.0   | 9300  | 0.5790          |
-| 0.1483        | 6.0   | 11160 | 0.5746          |
-| 0.149         | 7.0   | 13020 | 0.5703          |
-| 0.1485        | 8.0   | 14880 | 0.5684          |
-| 0.1462        | 9.0   | 16740 | 0.5656          |
-| 0.1469        | 10.0  | 18600 | 0.5630          |
-| 0.1449        | 11.0  | 20460 | 0.5617          |
-| 0.1469        | 12.0  | 22320 | 0.5581          |
-| 0.1456        | 13.0  | 24180 | 0.5575          |
-| 0.1459        | 14.0  | 26040 | 0.5547          |
-| 0.1432        | 15.0  | 27900 | 0.5544          |
-| 0.1429        | 16.0  | 29760 | 0.5540          |
-| 0.1431        | 17.0  | 31620 | 0.5523          |
-| 0.1432        | 18.0  | 33480 | 0.5512          |
-| 0.1423        | 19.0  | 35340 | 0.5519          |
-| 0.1429        | 20.0  | 37200 | 0.5506          |
-| 0.1429        | 21.0  | 39060 | 0.5490          |
-| 0.1441        | 22.0  | 40920 | 0.5477          |
-| 0.1426        | 23.0  | 42780 | 0.5476          |
-| 0.1436        | 24.0  | 44640 | 0.5463          |
-| 0.1419        | 25.0  | 46500 | 0.5462          |
-| 0.1399        | 26.0  | 48360 | 0.5449          |
-| 0.1412        | 27.0  | 50220 | 0.5452          |
-| 0.14          | 28.0  | 52080 | 0.5440          |
-| 0.1396        | 29.0  | 53940 | 0.5440          |
-| 0.1402        | 30.0  | 55800 | 0.5440          |
-| 0.1404        | 31.0  | 57660 | 0.5437          |
-| 0.1415        | 32.0  | 59520 | 0.5427          |
-| 0.1406        | 33.0  | 61380 | 0.5420          |
-| 0.1387        | 34.0  | 63240 | 0.5422          |
-| 0.1392        | 35.0  | 65100 | 0.5420          |
-| 0.1404        | 36.0  | 66960 | 0.5420          |
-| 0.1436        | 37.0  | 68820 | 0.5411          |
-| 0.1424        | 38.0  | 70680 | 0.5415          |
-| 0.141         | 39.0  | 72540 | 0.5407          |
-| 0.1402        | 40.0  | 74400 | 0.5403          |
-| 0.1412        | 41.0  | 76260 | 0.5407          |
-| 0.139         | 42.0  | 78120 | 0.5403          |
-| 0.1357        | 43.0  | 79980 | 0.5401          |
-| 0.1396        | 44.0  | 81840 | 0.5397          |
-| 0.1398        | 45.0  | 83700 | 0.5394          |
-| 0.1385        | 46.0  | 85560 | 0.5395          |
-| 0.1408        | 47.0  | 87420 | 0.5396          |
-| 0.1371        | 48.0  | 89280 | 0.5392          |
-| 0.1418        | 49.0  | 91140 | 0.5393          |
-| 0.1382        | 50.0  | 93000 | 0.5392          |
 ### Framework versions

 tags:
 - generated_from_trainer
 model-index:
+- name: Se124M10KInfSimple
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Se124M10KInfSimple
 This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5204
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.3959        | 1.0   | 237  | 0.9383          |
+| 0.2182        | 2.0   | 474  | 0.6835          |
+| 0.1772        | 3.0   | 711  | 0.6222          |
+| 0.1676        | 4.0   | 948  | 0.5980          |
+| 0.1609        | 5.0   | 1185 | 0.5840          |
+| 0.1519        | 6.0   | 1422 | 0.5740          |
+| 0.1535        | 7.0   | 1659 | 0.5659          |
+| 0.1492        | 8.0   | 1896 | 0.5587          |
+| 0.1456        | 9.0   | 2133 | 0.5575          |
+| 0.1427        | 10.0  | 2370 | 0.5528          |
+| 0.1442        | 11.0  | 2607 | 0.5499          |
+| 0.1403        | 12.0  | 2844 | 0.5466          |
+| 0.1425        | 13.0  | 3081 | 0.5449          |
+| 0.1403        | 14.0  | 3318 | 0.5414          |
+| 0.1405        | 15.0  | 3555 | 0.5399          |
+| 0.1387        | 16.0  | 3792 | 0.5383          |
+| 0.1377        | 17.0  | 4029 | 0.5368          |
+| 0.1379        | 18.0  | 4266 | 0.5376          |
+| 0.1353        | 19.0  | 4503 | 0.5349          |
+| 0.1378        | 20.0  | 4740 | 0.5313          |
+| 0.1372        | 21.0  | 4977 | 0.5320          |
+| 0.135         | 22.0  | 5214 | 0.5286          |
+| 0.1361        | 23.0  | 5451 | 0.5282          |
+| 0.1357        | 24.0  | 5688 | 0.5287          |
+| 0.1372        | 25.0  | 5925 | 0.5269          |
+| 0.1343        | 26.0  | 6162 | 0.5271          |
+| 0.1321        | 27.0  | 6399 | 0.5255          |
+| 0.1341        | 28.0  | 6636 | 0.5240          |
+| 0.1346        | 29.0  | 6873 | 0.5235          |
+| 0.1327        | 30.0  | 7110 | 0.5239          |
+| 0.1335        | 31.0  | 7347 | 0.5230          |
+| 0.1332        | 32.0  | 7584 | 0.5228          |
+| 0.1317        | 33.0  | 7821 | 0.5234          |
+| 0.1331        | 34.0  | 8058 | 0.5220          |
+| 0.1327        | 35.0  | 8295 | 0.5213          |
+| 0.1338        | 36.0  | 8532 | 0.5204          |
+| 0.1306        | 37.0  | 8769 | 0.5206          |
+| 0.1317        | 38.0  | 9006 | 0.5208          |
 ### Framework versions

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ed3f57f2acfde18959030e0b27638eae2b1ec5e94d02eab7e71bb065955c7a1
 size 309974336

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b1a68a7edfa788d730134e4ab6466ecc272829d11c502d36084ef6f3b919790
 size 309974336

all_results.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-    "epoch": 50.0,
-    "eval_loss": 0.5392394065856934,
-    "eval_runtime": 80.8454,
-    "eval_samples_per_second": 158.154,
-    "eval_steps_per_second": 4.948,
-    "perplexity": 1.7147021748977518,
-    "total_flos": 1.950556483878912e+17,
-    "train_loss": 0.14462488290315034,
-    "train_runtime": 9614.4477,
-    "train_samples_per_second": 309.503,
-    "train_steps_per_second": 9.673
 }

 {
+    "epoch": 38.0,
+    "eval_loss": 0.5203976631164551,
+    "eval_runtime": 9.8245,
+    "eval_samples_per_second": 161.535,
+    "eval_steps_per_second": 5.089,
+    "perplexity": 1.6826966630681837,
+    "total_flos": 1.886593281372979e+16,
+    "train_loss": 0.15147609674822032,
+    "train_runtime": 971.6736,
+    "train_samples_per_second": 389.74,
+    "train_steps_per_second": 12.195
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 50.0,
-    "eval_loss": 0.5392394065856934,
-    "eval_runtime": 80.8454,
-    "eval_samples_per_second": 158.154,
-    "eval_steps_per_second": 4.948,
-    "perplexity": 1.7147021748977518
 }

 {
+    "epoch": 38.0,
+    "eval_loss": 0.5203976631164551,
+    "eval_runtime": 9.8245,
+    "eval_samples_per_second": 161.535,
+    "eval_steps_per_second": 5.089,
+    "perplexity": 1.6826966630681837
 }

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 1024,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 50.0,
-    "total_flos": 1.950556483878912e+17,
-    "train_loss": 0.14462488290315034,
-    "train_runtime": 9614.4477,
-    "train_samples_per_second": 309.503,
-    "train_steps_per_second": 9.673
 }

 {
+    "epoch": 38.0,
+    "total_flos": 1.886593281372979e+16,
+    "train_loss": 0.15147609674822032,
+    "train_runtime": 971.6736,
+    "train_samples_per_second": 389.74,
+    "train_steps_per_second": 12.195
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0426832942c50610ac436828fa8c166ebacacdf1b75d24835dff50e7467b4f5
-size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8e7e48074b9d4a9223c7ec1f8f3c14b6667da1c3552646e4edae6dada307e73
+size 5432