Training in progress, step 3000

Browse files

Files changed (6) hide show

README.md +40 -12
config.json +6 -6
model.safetensors +2 -2
special_tokens_map.json +21 -3
tokenizer.json +6 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: t5-base
 tags:
 - generated_from_trainer
 metrics:
@@ -16,12 +16,12 @@ should probably proofread and complete it, then remove this comment. -->
 # randomization_model
-This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1397
 - Bleu: 0.0001
 - Accuracy: 0.0
-- Gen Len: 18.9987
 ## Model description
@@ -41,25 +41,53 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 2
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | Bleu   | Accuracy | Gen Len |
-|:-------------:|:-----:|:-----:|:---------------:|:------:|:--------:|:-------:|
-| 0.2127        | 1.0   | 6250  | 0.1517          | 0.0001 | 0.0      | 18.9987 |
-| 0.1844        | 2.0   | 12500 | 0.1397          | 0.0001 | 0.0      | 18.9987 |
 ### Framework versions
 - Transformers 4.37.1
-- Pytorch 2.3.0.dev20240122+cu121
 - Datasets 2.16.1
 - Tokenizers 0.15.1

 ---
 license: apache-2.0
+base_model: t5-small
 tags:
 - generated_from_trainer
 metrics:
 # randomization_model
+This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.2626
 - Bleu: 0.0001
 - Accuracy: 0.0
+- Gen Len: 18.999
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 10
+- eval_batch_size: 10
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 3
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | Bleu   | Accuracy | Gen Len |
+|:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:-------:|
+| 3.0103        | 0.1   | 50   | 2.5132          | 0.0    | 0.0      | 18.9985 |
+| 2.999         | 0.2   | 100  | 2.4883          | 0.0    | 0.0      | 19.0    |
+| 2.9457        | 0.3   | 150  | 2.4640          | 0.0    | 0.0      | 19.0    |
+| 2.8865        | 0.4   | 200  | 2.4431          | 0.0    | 0.0      | 19.0    |
+| 2.8935        | 0.5   | 250  | 2.4240          | 0.0    | 0.0      | 19.0    |
+| 2.8983        | 0.6   | 300  | 2.4079          | 0.0    | 0.0      | 19.0    |
+| 2.8579        | 0.7   | 350  | 2.3933          | 0.0    | 0.0      | 19.0    |
+| 2.8501        | 0.8   | 400  | 2.3794          | 0.0    | 0.0      | 19.0    |
+| 2.7892        | 0.9   | 450  | 2.3683          | 0.0    | 0.0      | 19.0    |
+| 2.7962        | 1.0   | 500  | 2.3561          | 0.0    | 0.0      | 19.0    |
+| 2.8408        | 1.1   | 550  | 2.3456          | 0.0    | 0.0      | 19.0    |
+| 2.8049        | 1.2   | 600  | 2.3350          | 0.0001 | 0.0      | 19.0    |
+| 2.8051        | 1.3   | 650  | 2.3278          | 0.0001 | 0.0      | 19.0    |
+| 2.8126        | 1.4   | 700  | 2.3192          | 0.0001 | 0.0      | 19.0    |
+| 2.7689        | 1.5   | 750  | 2.3121          | 0.0001 | 0.0      | 19.0    |
+| 2.7559        | 1.6   | 800  | 2.3051          | 0.0001 | 0.0      | 18.9995 |
+| 2.7672        | 1.7   | 850  | 2.2978          | 0.0001 | 0.0      | 18.9985 |
+| 2.7901        | 1.8   | 900  | 2.2916          | 0.0001 | 0.0      | 18.9995 |
+| 2.7571        | 1.9   | 950  | 2.2868          | 0.0001 | 0.0      | 18.9985 |
+| 2.7796        | 2.0   | 1000 | 2.2834          | 0.0001 | 0.0      | 18.9985 |
+| 2.7393        | 2.1   | 1050 | 2.2798          | 0.0001 | 0.0      | 18.9985 |
+| 2.7309        | 2.2   | 1100 | 2.2757          | 0.0001 | 0.0      | 18.9985 |
+| 2.7703        | 2.3   | 1150 | 2.2729          | 0.0001 | 0.0      | 18.999  |
+| 2.7354        | 2.4   | 1200 | 2.2703          | 0.0001 | 0.0      | 18.999  |
+| 2.7428        | 2.5   | 1250 | 2.2678          | 0.0001 | 0.0      | 18.999  |
+| 2.7571        | 2.6   | 1300 | 2.2661          | 0.0001 | 0.0      | 18.999  |
+| 2.7218        | 2.7   | 1350 | 2.2645          | 0.0001 | 0.0      | 18.999  |
+| 2.7051        | 2.8   | 1400 | 2.2634          | 0.0001 | 0.0      | 18.999  |
+| 2.7466        | 2.9   | 1450 | 2.2628          | 0.0001 | 0.0      | 18.999  |
+| 2.722         | 3.0   | 1500 | 2.2626          | 0.0001 | 0.0      | 18.999  |
 ### Framework versions
 - Transformers 4.37.1
+- Pytorch 2.1.0+cu121
 - Datasets 2.16.1
 - Tokenizers 0.15.1

config.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
-  "_name_or_path": "t5-base",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
-  "d_ff": 3072,
   "d_kv": 64,
-  "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "relu",
   "dropout_rate": 0.1,
@@ -18,9 +18,9 @@
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
-  "num_decoder_layers": 12,
-  "num_heads": 12,
-  "num_layers": 12,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

 {
+  "_name_or_path": "t5-small",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
+  "d_ff": 2048,
   "d_kv": 64,
+  "d_model": 512,
   "decoder_start_token_id": 0,
   "dense_act_fn": "relu",
   "dropout_rate": 0.1,
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e60d986a1f82f878dd2be5f422d6e3156ed191fbc6a9ccf3309626fc1639e961
-size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9de13f573190168dd8af8e2178fa752d35f9e9f73a2b2c2587ee0f4cafd9002
+size 242041896

special_tokens_map.json CHANGED Viewed

@@ -101,7 +101,25 @@
     "<extra_id_98>",
     "<extra_id_99>"
   ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
 }

     "<extra_id_98>",
     "<extra_id_99>"
   ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer.json CHANGED Viewed

@@ -1,6 +1,11 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 512,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": null,
   "added_tokens": [
     {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:033dc0c36c885dd52dded125761179571aec44100ca76b30e68dacf373b29873
-size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:e51819030c3ee068bcccae76b550e921a016437505c7fae8065b0ca02f9bedea
+size 4856