Training in progress, step 500

Browse files

Files changed (5) hide show

README.md +40 -12
config.json +6 -6
model.safetensors +2 -2
special_tokens_map.json +21 -3
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: t5-base
 tags:
 - generated_from_trainer
 metrics:
@@ -16,12 +16,12 @@ should probably proofread and complete it, then remove this comment. -->
 # randomization_model
-This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1397
 - Bleu: 0.0001
 - Accuracy: 0.0
-- Gen Len: 18.9987
 ## Model description
@@ -41,25 +41,53 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 2
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Bleu   | Accuracy | Gen Len |
-|:-------------:|:-----:|:-----:|:---------------:|:------:|:--------:|:-------:|
-| 0.2127        | 1.0   | 6250  | 0.1517          | 0.0001 | 0.0      | 18.9987 |
-| 0.1844        | 2.0   | 12500 | 0.1397          | 0.0001 | 0.0      | 18.9987 |
 ### Framework versions
 - Transformers 4.37.1
-- Pytorch 2.3.0.dev20240122+cu121
 - Datasets 2.16.1
 - Tokenizers 0.15.1

 ---
 license: apache-2.0
+base_model: t5-small
 tags:
 - generated_from_trainer
 metrics:
 # randomization_model
+This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.2626
 - Bleu: 0.0001
 - Accuracy: 0.0
+- Gen Len: 18.999
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 10
+- eval_batch_size: 10
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 3
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | Bleu   | Accuracy | Gen Len |
+|:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:-------:|
+| 3.0103        | 0.1   | 50   | 2.5132          | 0.0    | 0.0      | 18.9985 |
+| 2.999         | 0.2   | 100  | 2.4883          | 0.0    | 0.0      | 19.0    |
+| 2.9457        | 0.3   | 150  | 2.4640          | 0.0    | 0.0      | 19.0    |
+| 2.8865        | 0.4   | 200  | 2.4431          | 0.0    | 0.0      | 19.0    |
+| 2.8935        | 0.5   | 250  | 2.4240          | 0.0    | 0.0      | 19.0    |
+| 2.8983        | 0.6   | 300  | 2.4079          | 0.0    | 0.0      | 19.0    |
+| 2.8579        | 0.7   | 350  | 2.3933          | 0.0    | 0.0      | 19.0    |
+| 2.8501        | 0.8   | 400  | 2.3794          | 0.0    | 0.0      | 19.0    |
+| 2.7892        | 0.9   | 450  | 2.3683          | 0.0    | 0.0      | 19.0    |
+| 2.7962        | 1.0   | 500  | 2.3561          | 0.0    | 0.0      | 19.0    |
+| 2.8408        | 1.1   | 550  | 2.3456          | 0.0    | 0.0      | 19.0    |
+| 2.8049        | 1.2   | 600  | 2.3350          | 0.0001 | 0.0      | 19.0    |
+| 2.8051        | 1.3   | 650  | 2.3278          | 0.0001 | 0.0      | 19.0    |
+| 2.8126        | 1.4   | 700  | 2.3192          | 0.0001 | 0.0      | 19.0    |
+| 2.7689        | 1.5   | 750  | 2.3121          | 0.0001 | 0.0      | 19.0    |
+| 2.7559        | 1.6   | 800  | 2.3051          | 0.0001 | 0.0      | 18.9995 |
+| 2.7672        | 1.7   | 850  | 2.2978          | 0.0001 | 0.0      | 18.9985 |
+| 2.7901        | 1.8   | 900  | 2.2916          | 0.0001 | 0.0      | 18.9995 |
+| 2.7571        | 1.9   | 950  | 2.2868          | 0.0001 | 0.0      | 18.9985 |
+| 2.7796        | 2.0   | 1000 | 2.2834          | 0.0001 | 0.0      | 18.9985 |
+| 2.7393        | 2.1   | 1050 | 2.2798          | 0.0001 | 0.0      | 18.9985 |
+| 2.7309        | 2.2   | 1100 | 2.2757          | 0.0001 | 0.0      | 18.9985 |
+| 2.7703        | 2.3   | 1150 | 2.2729          | 0.0001 | 0.0      | 18.999  |
+| 2.7354        | 2.4   | 1200 | 2.2703          | 0.0001 | 0.0      | 18.999  |
+| 2.7428        | 2.5   | 1250 | 2.2678          | 0.0001 | 0.0      | 18.999  |
+| 2.7571        | 2.6   | 1300 | 2.2661          | 0.0001 | 0.0      | 18.999  |
+| 2.7218        | 2.7   | 1350 | 2.2645          | 0.0001 | 0.0      | 18.999  |
+| 2.7051        | 2.8   | 1400 | 2.2634          | 0.0001 | 0.0      | 18.999  |
+| 2.7466        | 2.9   | 1450 | 2.2628          | 0.0001 | 0.0      | 18.999  |
+| 2.722         | 3.0   | 1500 | 2.2626          | 0.0001 | 0.0      | 18.999  |
 ### Framework versions
 - Transformers 4.37.1
+- Pytorch 2.1.0+cu121
 - Datasets 2.16.1
 - Tokenizers 0.15.1

config.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
-  "_name_or_path": "t5-base",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
-  "d_ff": 3072,
   "d_kv": 64,
-  "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "relu",
   "dropout_rate": 0.1,
@@ -18,9 +18,9 @@
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
-  "num_decoder_layers": 12,
-  "num_heads": 12,
-  "num_layers": 12,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

 {
+  "_name_or_path": "t5-small",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
+  "d_ff": 2048,
   "d_kv": 64,
+  "d_model": 512,
   "decoder_start_token_id": 0,
   "dense_act_fn": "relu",
   "dropout_rate": 0.1,
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19375188e79a87051cbcd9ff506b4bb09384325ecc53d097f6a3962e850b0e0e
-size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:73ff93c9a2b7d1c91f5650bc4826eb7a9ba7a428d32cc96ea7203d78dcab507e
+size 242041896

special_tokens_map.json CHANGED Viewed

@@ -101,7 +101,25 @@
     "<extra_id_98>",
     "<extra_id_99>"
   ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
 }

     "<extra_id_98>",
     "<extra_id_99>"
   ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d97c834ddfa87a56c0230768be9c2a90a9658a17969b9fba31188ddf6f7829ba
-size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:e51819030c3ee068bcccae76b550e921a016437505c7fae8065b0ca02f9bedea
+size 4856