Training in progress, step 500

Browse files

Files changed (6) hide show

README.md +12 -70
config.json +6 -6
model.safetensors +2 -2
special_tokens_map.json +3 -21
tokenizer.json +1 -6
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: t5-small
 tags:
 - generated_from_trainer
 metrics:
@@ -16,12 +16,12 @@ should probably proofread and complete it, then remove this comment. -->
 # randomization_model
-This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.2271
 - Bleu: 0.0001
 - Accuracy: 0.0
-- Gen Len: 18.997
 ## Model description
@@ -41,83 +41,25 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 10
-- eval_batch_size: 10
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 3
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Bleu   | Accuracy | Gen Len |
-|:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:-------:|
-| 6.7713        | 0.05  | 50   | 4.1342          | 0.0    | 0.0      | 19.0    |
-| 4.0962        | 0.1   | 100  | 3.6084          | 0.0    | 0.0      | 19.0    |
-| 3.7511        | 0.15  | 150  | 3.3793          | 0.0    | 0.0      | 18.99   |
-| 3.6618        | 0.2   | 200  | 3.2099          | 0.0    | 0.0      | 18.9845 |
-| 3.5055        | 0.25  | 250  | 3.0655          | 0.0    | 0.0      | 18.9845 |
-| 3.4285        | 0.3   | 300  | 2.9465          | 0.0    | 0.0      | 18.986  |
-| 3.323         | 0.35  | 350  | 2.8558          | 0.0    | 0.0      | 18.982  |
-| 3.257         | 0.4   | 400  | 2.7828          | 0.0    | 0.0      | 18.9845 |
-| 3.2148        | 0.45  | 450  | 2.7311          | 0.0    | 0.0      | 18.9915 |
-| 3.1964        | 0.5   | 500  | 2.6861          | 0.0    | 0.0      | 18.9905 |
-| 3.108         | 0.55  | 550  | 2.6446          | 0.0    | 0.0      | 18.9905 |
-| 3.0801        | 0.6   | 600  | 2.6087          | 0.0    | 0.0      | 18.9925 |
-| 3.0439        | 0.65  | 650  | 2.5770          | 0.0    | 0.0      | 18.9915 |
-| 3.0683        | 0.7   | 700  | 2.5504          | 0.0    | 0.0      | 18.9955 |
-| 3.0283        | 0.75  | 750  | 2.5223          | 0.0    | 0.0      | 18.9955 |
-| 3.0292        | 0.8   | 800  | 2.5003          | 0.0    | 0.0      | 18.9935 |
-| 2.9533        | 0.85  | 850  | 2.4797          | 0.0    | 0.0      | 18.9965 |
-| 3.006         | 0.9   | 900  | 2.4627          | 0.0    | 0.0      | 19.0    |
-| 2.9028        | 0.95  | 950  | 2.4463          | 0.0    | 0.0      | 18.997  |
-| 2.9219        | 1.0   | 1000 | 2.4287          | 0.0    | 0.0      | 18.996  |
-| 2.8995        | 1.05  | 1050 | 2.4120          | 0.0    | 0.0      | 18.9995 |
-| 2.8857        | 1.1   | 1100 | 2.3988          | 0.0    | 0.0      | 19.0    |
-| 2.8971        | 1.15  | 1150 | 2.3861          | 0.0    | 0.0      | 18.998  |
-| 2.8882        | 1.2   | 1200 | 2.3747          | 0.0    | 0.0      | 18.998  |
-| 2.8425        | 1.25  | 1250 | 2.3632          | 0.0    | 0.0      | 18.998  |
-| 2.865         | 1.3   | 1300 | 2.3544          | 0.0    | 0.0      | 18.998  |
-| 2.8245        | 1.35  | 1350 | 2.3440          | 0.0    | 0.0      | 18.998  |
-| 2.8208        | 1.4   | 1400 | 2.3373          | 0.0    | 0.0      | 18.998  |
-| 2.8397        | 1.45  | 1450 | 2.3286          | 0.0    | 0.0      | 18.998  |
-| 2.8103        | 1.5   | 1500 | 2.3203          | 0.0    | 0.0      | 18.998  |
-| 2.82          | 1.55  | 1550 | 2.3133          | 0.0    | 0.0      | 18.9995 |
-| 2.7653        | 1.6   | 1600 | 2.3058          | 0.0    | 0.0      | 18.998  |
-| 2.7945        | 1.65  | 1650 | 2.2998          | 0.0    | 0.0      | 18.998  |
-| 2.7758        | 1.7   | 1700 | 2.2940          | 0.0001 | 0.0      | 18.998  |
-| 2.8035        | 1.75  | 1750 | 2.2886          | 0.0001 | 0.0      | 18.998  |
-| 2.8045        | 1.8   | 1800 | 2.2827          | 0.0001 | 0.0      | 18.998  |
-| 2.7667        | 1.85  | 1850 | 2.2777          | 0.0001 | 0.0      | 18.9965 |
-| 2.7792        | 1.9   | 1900 | 2.2730          | 0.0001 | 0.0      | 18.9965 |
-| 2.7337        | 1.95  | 1950 | 2.2683          | 0.0001 | 0.0      | 18.9965 |
-| 2.7634        | 2.0   | 2000 | 2.2647          | 0.0001 | 0.0      | 18.9965 |
-| 2.7357        | 2.05  | 2050 | 2.2607          | 0.0001 | 0.0      | 18.998  |
-| 2.7261        | 2.1   | 2100 | 2.2569          | 0.0001 | 0.0      | 18.998  |
-| 2.7827        | 2.15  | 2150 | 2.2539          | 0.0001 | 0.0      | 18.998  |
-| 2.7363        | 2.2   | 2200 | 2.2509          | 0.0001 | 0.0      | 18.998  |
-| 2.7647        | 2.25  | 2250 | 2.2480          | 0.0001 | 0.0      | 18.998  |
-| 2.737         | 2.3   | 2300 | 2.2449          | 0.0001 | 0.0      | 18.998  |
-| 2.72          | 2.35  | 2350 | 2.2422          | 0.0001 | 0.0      | 18.998  |
-| 2.7312        | 2.4   | 2400 | 2.2403          | 0.0001 | 0.0      | 18.998  |
-| 2.7345        | 2.45  | 2450 | 2.2382          | 0.0001 | 0.0      | 18.998  |
-| 2.6951        | 2.5   | 2500 | 2.2363          | 0.0001 | 0.0      | 18.998  |
-| 2.7591        | 2.55  | 2550 | 2.2349          | 0.0001 | 0.0      | 18.998  |
-| 2.7018        | 2.6   | 2600 | 2.2333          | 0.0001 | 0.0      | 18.998  |
-| 2.6993        | 2.65  | 2650 | 2.2321          | 0.0001 | 0.0      | 18.998  |
-| 2.7006        | 2.7   | 2700 | 2.2309          | 0.0001 | 0.0      | 18.998  |
-| 2.6694        | 2.75  | 2750 | 2.2296          | 0.0001 | 0.0      | 18.998  |
-| 2.7018        | 2.8   | 2800 | 2.2287          | 0.0001 | 0.0      | 18.998  |
-| 2.7285        | 2.85  | 2850 | 2.2281          | 0.0001 | 0.0      | 18.998  |
-| 2.6873        | 2.9   | 2900 | 2.2276          | 0.0001 | 0.0      | 18.997  |
-| 2.7121        | 2.95  | 2950 | 2.2273          | 0.0001 | 0.0      | 18.997  |
-| 2.7176        | 3.0   | 3000 | 2.2271          | 0.0001 | 0.0      | 18.997  |
 ### Framework versions
 - Transformers 4.37.1
-- Pytorch 2.1.0+cu121
 - Datasets 2.16.1
 - Tokenizers 0.15.1

 ---
 license: apache-2.0
+base_model: t5-base
 tags:
 - generated_from_trainer
 metrics:
 # randomization_model
+This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1397
 - Bleu: 0.0001
 - Accuracy: 0.0
+- Gen Len: 18.9987
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 8
+- eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 2
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Bleu   | Accuracy | Gen Len |
+|:-------------:|:-----:|:-----:|:---------------:|:------:|:--------:|:-------:|
+| 0.2127        | 1.0   | 6250  | 0.1517          | 0.0001 | 0.0      | 18.9987 |
+| 0.1844        | 2.0   | 12500 | 0.1397          | 0.0001 | 0.0      | 18.9987 |
 ### Framework versions
 - Transformers 4.37.1
+- Pytorch 2.3.0.dev20240122+cu121
 - Datasets 2.16.1
 - Tokenizers 0.15.1

config.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
-  "_name_or_path": "t5-small",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
-  "d_ff": 2048,
   "d_kv": 64,
-  "d_model": 512,
   "decoder_start_token_id": 0,
   "dense_act_fn": "relu",
   "dropout_rate": 0.1,
@@ -18,9 +18,9 @@
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
-  "num_decoder_layers": 6,
-  "num_heads": 8,
-  "num_layers": 6,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

 {
+  "_name_or_path": "t5-base",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
+  "d_ff": 3072,
   "d_kv": 64,
+  "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "relu",
   "dropout_rate": 0.1,
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
   "output_past": true,
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9de13f573190168dd8af8e2178fa752d35f9e9f73a2b2c2587ee0f4cafd9002
-size 242041896

 version https://git-lfs.github.com/spec/v1
+oid sha256:e60d986a1f82f878dd2be5f422d6e3156ed191fbc6a9ccf3309626fc1639e961
+size 891644712

special_tokens_map.json CHANGED Viewed

@@ -101,25 +101,7 @@
     "<extra_id_98>",
     "<extra_id_99>"
   ],
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

     "<extra_id_98>",
     "<extra_id_99>"
   ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
 }

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e51819030c3ee068bcccae76b550e921a016437505c7fae8065b0ca02f9bedea
-size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7d6b5ce9f23e446f662744be068c8fe580a40206247ec914bcd6fc838d79796
+size 4920