Training in progress, step 92910

Browse files

Files changed (6) hide show

README.md +101 -60
all_results.json +14 -14
eval_results.json +8 -8
model.safetensors +1 -1
train_results.json +7 -7
trainer_state.json +0 -0

README.md CHANGED Viewed

@@ -2,34 +2,22 @@
 library_name: transformers
 tags:
 - generated_from_trainer
-datasets:
-- craa/100M
 metrics:
 - accuracy
 model-index:
-- name: 100M_495
-  results:
-  - task:
-      name: Causal Language Modeling
-      type: text-generation
-    dataset:
-      name: craa/100M
-      type: craa/100M
-    metrics:
-    - name: Accuracy
-      type: accuracy
-      value: 0.39720377760770925
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# 100M_495
-This model is a fine-tuned version of [](https://huggingface.co/) on the craa/100M dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.2701
-- Accuracy: 0.3972
 ## Model description
@@ -52,55 +40,108 @@ The following hyperparameters were used during training:
 - train_batch_size: 32
 - eval_batch_size: 16
 - seed: 495
-- gradient_accumulation_steps: 5
-- total_train_batch_size: 160
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 100
-- num_epochs: 20.0
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch   | Step  | Accuracy | Validation Loss |
-|:-------------:|:-------:|:-----:|:--------:|:---------------:|
-| 21.5194       | 0.5391  | 1000  | 0.2983   | 4.2090          |
-| 19.1009       | 1.0782  | 2000  | 0.3384   | 3.7990          |
-| 18.378        | 1.6173  | 3000  | 0.3540   | 3.6374          |
-| 17.6276       | 2.1563  | 4000  | 0.3627   | 3.5499          |
-| 17.4667       | 2.6954  | 5000  | 0.3686   | 3.4897          |
-| 16.9558       | 3.2345  | 6000  | 0.3730   | 3.4489          |
-| 16.955        | 3.7736  | 7000  | 0.3770   | 3.4124          |
-| 16.5922       | 4.3127  | 8000  | 0.3795   | 3.3918          |
-| 16.6038       | 4.8518  | 9000  | 0.3823   | 3.3646          |
-| 16.2971       | 5.3908  | 10000 | 0.3837   | 3.3563          |
-| 16.3311       | 5.9299  | 11000 | 3.3356   | 0.3855          |
-| 16.0534       | 6.4690  | 12000 | 3.3318   | 0.3865          |
-| 16.0089       | 7.0081  | 13000 | 3.3224   | 0.3880          |
-| 15.8981       | 7.5472  | 14000 | 3.3139   | 0.3891          |
-| 15.5564       | 8.0863  | 15000 | 3.3104   | 0.3898          |
-| 15.7098       | 8.6253  | 16000 | 3.3004   | 0.3909          |
-| 15.4365       | 9.1644  | 17000 | 3.3022   | 0.3914          |
-| 15.585        | 9.7035  | 18000 | 3.2876   | 0.3926          |
-| 15.3452       | 10.2426 | 19000 | 3.2908   | 0.3929          |
-| 15.4461       | 10.7817 | 20000 | 3.2806   | 0.3936          |
-| 15.2516       | 11.3208 | 21000 | 3.2851   | 0.3938          |
-| 15.3346       | 11.8598 | 22000 | 3.2735   | 0.3949          |
-| 15.1577       | 12.3989 | 23000 | 3.2778   | 0.3949          |
-| 15.2141       | 12.9380 | 24000 | 3.2672   | 0.3959          |
-| 15.0879       | 13.4771 | 25000 | 3.2710   | 0.3960          |
-| 14.8943       | 14.0162 | 26000 | 3.2727   | 0.3962          |
-| 14.9168       | 14.5553 | 27000 | 3.2694   | 0.3965          |
-| 14.7581       | 15.0943 | 28000 | 3.2720   | 0.3966          |
-| 14.8475       | 15.6334 | 29000 | 3.2678   | 0.3970          |
-| 14.6722       | 16.1725 | 30000 | 3.2701   | 0.3972          |
-| 14.7754       | 16.7116 | 31000 | 3.2638   | 0.3978          |
-| 14.5855       | 17.2507 | 32000 | 3.2677   | 0.3977          |
-| 14.6762       | 17.7898 | 33000 | 3.2615   | 0.3983          |
-| 14.565        | 18.3288 | 34000 | 3.2645   | 0.3982          |
-| 14.5499       | 18.8679 | 35000 | 3.2605   | 0.3987          |
-| 14.4716       | 19.4070 | 36000 | 3.2622   | 0.3987          |
-| 14.4369       | 19.9461 | 37000 | 3.2604   | 0.3989          |
 ### Framework versions

 library_name: transformers
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
 model-index:
+- name: 100M_low_10_495
+  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# 100M_low_10_495
+This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.3084
+- Accuracy: 0.3936
 ## Model description
 - train_batch_size: 32
 - eval_batch_size: 16
 - seed: 495
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 100
+- num_epochs: 10
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch  | Step  | Validation Loss | Accuracy |
+|:-------------:|:------:|:-----:|:---------------:|:--------:|
+| 5.1178        | 0.1076 | 1000  | 5.0388          | 0.2259   |
+| 4.5945        | 0.2153 | 2000  | 4.5387          | 0.2671   |
+| 4.3222        | 0.3229 | 3000  | 4.2490          | 0.2972   |
+| 4.1793        | 0.4305 | 4000  | 4.1015          | 0.3109   |
+| 4.0646        | 0.5382 | 5000  | 4.0043          | 0.3201   |
+| 4.0013        | 0.6458 | 6000  | 3.9299          | 0.3272   |
+| 3.9392        | 0.7534 | 7000  | 3.8716          | 0.3324   |
+| 3.8996        | 0.8610 | 8000  | 3.8228          | 0.3371   |
+| 3.8404        | 0.9687 | 9000  | 3.7830          | 0.3409   |
+| 3.7704        | 1.0763 | 10000 | 3.7538          | 0.3444   |
+| 3.7569        | 1.1839 | 11000 | 3.7258          | 0.3464   |
+| 3.7236        | 1.2916 | 12000 | 3.7004          | 0.3491   |
+| 3.7062        | 1.3992 | 13000 | 3.6795          | 0.3513   |
+| 3.7181        | 1.5068 | 14000 | 3.6571          | 0.3537   |
+| 3.6986        | 1.6145 | 15000 | 3.6386          | 0.3557   |
+| 3.6594        | 1.7221 | 16000 | 3.6226          | 0.3570   |
+| 3.6535        | 1.8297 | 17000 | 3.6091          | 0.3585   |
+| 3.6528        | 1.9374 | 18000 | 3.5924          | 0.3601   |
+| 3.5646        | 2.0450 | 19000 | 3.5838          | 0.3615   |
+| 3.5654        | 2.1526 | 20000 | 3.5733          | 0.3622   |
+| 3.5613        | 2.2603 | 21000 | 3.5629          | 0.3638   |
+| 3.5588        | 2.3679 | 22000 | 3.5526          | 0.3644   |
+| 3.5626        | 2.4755 | 23000 | 3.5408          | 0.3659   |
+| 3.5623        | 2.5831 | 24000 | 3.5337          | 0.3662   |
+| 3.5571        | 2.6908 | 25000 | 3.5264          | 0.3677   |
+| 3.5268        | 2.7984 | 26000 | 3.5157          | 0.3684   |
+| 3.5395        | 2.9060 | 27000 | 3.5055          | 0.3693   |
+| 3.4403        | 3.0137 | 28000 | 3.5016          | 0.3705   |
+| 3.4636        | 3.1213 | 29000 | 3.4972          | 0.3711   |
+| 3.4348        | 3.2289 | 30000 | 3.4919          | 0.3716   |
+| 3.4676        | 3.3366 | 31000 | 3.4868          | 0.3719   |
+| 3.4556        | 3.4442 | 32000 | 3.4819          | 0.3726   |
+| 3.4758        | 3.5518 | 33000 | 3.4755          | 0.3727   |
+| 3.455         | 3.6595 | 34000 | 3.4661          | 0.3743   |
+| 3.4728        | 3.7671 | 35000 | 3.4589          | 0.3750   |
+| 3.4347        | 3.8747 | 36000 | 3.4531          | 0.3753   |
+| 3.46          | 3.9823 | 37000 | 3.4463          | 0.3760   |
+| 3.3683        | 4.0900 | 38000 | 3.4493          | 0.3761   |
+| 3.3682        | 4.1976 | 39000 | 3.4479          | 0.3768   |
+| 3.3876        | 4.3052 | 40000 | 3.4414          | 0.3776   |
+| 3.3813        | 4.4129 | 41000 | 3.4350          | 0.3777   |
+| 3.3924        | 4.5205 | 42000 | 3.4311          | 0.3786   |
+| 3.3912        | 4.6281 | 43000 | 3.4264          | 0.3786   |
+| 3.4047        | 4.7358 | 44000 | 3.4213          | 0.3793   |
+| 3.3776        | 4.8434 | 45000 | 3.4164          | 0.3797   |
+| 3.3902        | 4.9510 | 46000 | 3.4134          | 0.3803   |
+| 3.2993        | 5.0587 | 47000 | 3.4128          | 0.3807   |
+| 3.3255        | 5.1663 | 48000 | 3.4129          | 0.3804   |
+| 3.3417        | 5.2739 | 49000 | 3.4100          | 0.3812   |
+| 3.3513        | 5.3816 | 50000 | 3.4053          | 0.3815   |
+| 3.334         | 5.4892 | 51000 | 3.4005          | 0.3822   |
+| 3.334         | 5.5968 | 52000 | 3.3943          | 0.3824   |
+| 3.3328        | 5.7044 | 53000 | 3.3905          | 0.3829   |
+| 3.3229        | 5.8121 | 54000 | 3.3878          | 0.3833   |
+| 3.3521        | 5.9197 | 55000 | 3.3835          | 0.3836   |
+| 3.2608        | 6.0273 | 56000 | 3.3862          | 0.3840   |
+| 3.2733        | 6.1350 | 57000 | 3.3877          | 0.3843   |
+| 3.2897        | 6.2426 | 58000 | 3.3816          | 0.3847   |
+| 3.2645        | 6.3502 | 59000 | 3.3788          | 0.3849   |
+| 3.2801        | 6.4579 | 60000 | 3.3741          | 0.3853   |
+| 3.2836        | 6.5655 | 61000 | 3.3708          | 0.3856   |
+| 3.2808        | 6.6731 | 62000 | 3.3648          | 0.3863   |
+| 3.2776        | 6.7808 | 63000 | 3.3619          | 0.3865   |
+| 3.2847        | 6.8884 | 64000 | 3.3614          | 0.3866   |
+| 3.2856        | 6.9960 | 65000 | 3.3534          | 0.3874   |
+| 3.2254        | 7.1036 | 66000 | 3.3612          | 0.3870   |
+| 3.2413        | 7.2113 | 67000 | 3.3590          | 0.3875   |
+| 3.2095        | 7.3189 | 68000 | 3.3565          | 0.3878   |
+| 3.2314        | 7.4265 | 69000 | 3.3515          | 0.3881   |
+| 3.2351        | 7.5342 | 70000 | 3.3482          | 0.3886   |
+| 3.2345        | 7.6418 | 71000 | 3.3438          | 0.3891   |
+| 3.2414        | 7.7494 | 72000 | 3.3423          | 0.3891   |
+| 3.2356        | 7.8571 | 73000 | 3.3367          | 0.3898   |
+| 3.2443        | 7.9647 | 74000 | 3.3352          | 0.3902   |
+| 3.1627        | 8.0723 | 75000 | 3.3387          | 0.3899   |
+| 3.1778        | 8.1800 | 76000 | 3.3358          | 0.3904   |
+| 3.1902        | 8.2876 | 77000 | 3.3349          | 0.3904   |
+| 3.1921        | 8.3952 | 78000 | 3.3311          | 0.3905   |
+| 3.1977        | 8.5029 | 79000 | 3.3284          | 0.3912   |
+| 3.1876        | 8.6105 | 80000 | 3.3249          | 0.3916   |
+| 3.1825        | 8.7181 | 81000 | 3.3222          | 0.3917   |
+| 3.2005        | 8.8257 | 82000 | 3.3187          | 0.3922   |
+| 3.1977        | 8.9334 | 83000 | 3.3146          | 0.3926   |
+| 3.1246        | 9.0410 | 84000 | 3.3178          | 0.3926   |
+| 3.1312        | 9.1486 | 85000 | 3.3160          | 0.3927   |
+| 3.1217        | 9.2563 | 86000 | 3.3155          | 0.3929   |
+| 3.1471        | 9.3639 | 87000 | 3.3125          | 0.3933   |
+| 3.1102        | 9.4715 | 88000 | 3.3100          | 0.3936   |
+| 3.1177        | 9.5792 | 89000 | 3.3076          | 0.3937   |
+| 3.1193        | 9.6868 | 90000 | 3.3056          | 0.3940   |
+| 3.1506        | 9.7944 | 91000 | 3.3037          | 0.3942   |
+| 3.1476        | 9.9021 | 92000 | 3.3020          | 0.3945   |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
-    "epoch": 20.0,
-    "eval_accuracy": 0.39720377760770925,
-    "eval_loss": 3.2700510025024414,
-    "eval_runtime": 144.6924,
-    "eval_samples": 18012,
-    "eval_samples_per_second": 124.485,
-    "eval_steps_per_second": 7.782,
-    "perplexity": 26.31268132163682,
-    "total_flos": 1.55087795257344e+18,
-    "train_loss": 11.093011497641509,
-    "train_runtime": 72478.5883,
-    "train_samples": 296771,
-    "train_samples_per_second": 81.892,
-    "train_steps_per_second": 0.512
 }

 {
+    "epoch": 10.0,
+    "eval_accuracy": 0.3935781362574578,
+    "eval_loss": 3.308422088623047,
+    "eval_runtime": 180.1172,
+    "eval_samples": 18011,
+    "eval_samples_per_second": 99.996,
+    "eval_steps_per_second": 6.251,
+    "perplexity": 27.341948244739367,
+    "total_flos": 7.7681598529536e+17,
+    "train_loss": 3.46011078748004,
+    "train_runtime": 80911.2202,
+    "train_samples": 297298,
+    "train_samples_per_second": 36.744,
+    "train_steps_per_second": 1.148
 }

eval_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "epoch": 20.0,
-    "eval_accuracy": 0.39720377760770925,
-    "eval_loss": 3.2700510025024414,
-    "eval_runtime": 144.6924,
-    "eval_samples": 18012,
-    "eval_samples_per_second": 124.485,
-    "eval_steps_per_second": 7.782,
-    "perplexity": 26.31268132163682
 }

 {
+    "epoch": 10.0,
+    "eval_accuracy": 0.3935781362574578,
+    "eval_loss": 3.308422088623047,
+    "eval_runtime": 180.1172,
+    "eval_samples": 18011,
+    "eval_samples_per_second": 99.996,
+    "eval_steps_per_second": 6.251,
+    "perplexity": 27.341948244739367
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97d7b8071820b39598ed7e02b54bfa64bc85bd2844df8a7c33e238116fc6526a
 size 503128704

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef1dbb2fb6e866684a31fda443f6bfb3b7a72de8606a91ab35039644015ea5c2
 size 503128704

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 20.0,
-    "total_flos": 1.55087795257344e+18,
-    "train_loss": 11.093011497641509,
-    "train_runtime": 72478.5883,
-    "train_samples": 296771,
-    "train_samples_per_second": 81.892,
-    "train_steps_per_second": 0.512
 }

 {
+    "epoch": 10.0,
+    "total_flos": 7.7681598529536e+17,
+    "train_loss": 3.46011078748004,
+    "train_runtime": 80911.2202,
+    "train_samples": 297298,
+    "train_samples_per_second": 36.744,
+    "train_steps_per_second": 1.148
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff