End of training

Browse files

Files changed (5) hide show

README.md +159 -0
config.json +62 -0
generation_config.json +7 -0
model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,159 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: google/flan-t5-base
+tags:
+- generated_from_trainer
+model-index:
+- name: flan-t5-base-squad-qag
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# flan-t5-base-squad-qag
+This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 15.5366
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 100
+### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 46.6459       | 0.5714  | 1    | 43.1485         |
+| 65.3532       | 1.5714  | 2    | 41.6135         |
+| 62.735        | 2.5714  | 3    | 40.4950         |
+| 61.766        | 3.5714  | 4    | 39.5923         |
+| 57.7849       | 4.5714  | 5    | 38.7835         |
+| 55.8212       | 5.5714  | 6    | 38.0234         |
+| 55.277        | 6.5714  | 7    | 37.3084         |
+| 53.3929       | 7.5714  | 8    | 36.6244         |
+| 53.3042       | 8.5714  | 9    | 35.9682         |
+| 51.6622       | 9.5714  | 10   | 35.3432         |
+| 50.679        | 10.5714 | 11   | 34.7521         |
+| 49.5895       | 11.5714 | 12   | 34.1904         |
+| 48.219        | 12.5714 | 13   | 33.6558         |
+| 47.7489       | 13.5714 | 14   | 33.1496         |
+| 46.8016       | 14.5714 | 15   | 32.6729         |
+| 45.2487       | 15.5714 | 16   | 32.2232         |
+| 45.3148       | 16.5714 | 17   | 31.7991         |
+| 44.3454       | 17.5714 | 18   | 31.3964         |
+| 43.6766       | 18.5714 | 19   | 31.0145         |
+| 42.8757       | 19.5714 | 20   | 30.6499         |
+| 42.9431       | 20.5714 | 21   | 30.2991         |
+| 41.7531       | 21.5714 | 22   | 29.9607         |
+| 41.2977       | 22.5714 | 23   | 29.6343         |
+| 40.7757       | 23.5714 | 24   | 29.3154         |
+| 40.0453       | 24.5714 | 25   | 29.0031         |
+| 39.602        | 25.5714 | 26   | 28.6966         |
+| 39.7306       | 26.5714 | 27   | 28.3944         |
+| 38.703        | 27.5714 | 28   | 28.0947         |
+| 38.7594       | 28.5714 | 29   | 27.7978         |
+| 38.3422       | 29.5714 | 30   | 27.5051         |
+| 38.1699       | 30.5714 | 31   | 27.2152         |
+| 37.9031       | 31.5714 | 32   | 26.9286         |
+| 36.9734       | 32.5714 | 33   | 26.6434         |
+| 37.0667       | 33.5714 | 34   | 26.3599         |
+| 36.466        | 34.5714 | 35   | 26.0803         |
+| 35.9406       | 35.5714 | 36   | 25.8051         |
+| 35.6333       | 36.5714 | 37   | 25.5321         |
+| 35.3167       | 37.5714 | 38   | 25.2619         |
+| 35.3965       | 38.5714 | 39   | 24.9950         |
+| 34.5731       | 39.5714 | 40   | 24.7318         |
+| 34.2446       | 40.5714 | 41   | 24.4734         |
+| 34.2668       | 41.5714 | 42   | 24.2188         |
+| 33.6151       | 42.5714 | 43   | 23.9683         |
+| 33.5523       | 43.5714 | 44   | 23.7208         |
+| 33.0644       | 44.5714 | 45   | 23.4754         |
+| 32.7476       | 45.5714 | 46   | 23.2334         |
+| 32.9809       | 46.5714 | 47   | 22.9938         |
+| 32.0039       | 47.5714 | 48   | 22.7560         |
+| 32.0855       | 48.5714 | 49   | 22.5215         |
+| 31.6121       | 49.5714 | 50   | 22.2902         |
+| 31.5072       | 50.5714 | 51   | 22.0626         |
+| 31.0177       | 51.5714 | 52   | 21.8383         |
+| 30.7915       | 52.5714 | 53   | 21.6181         |
+| 30.6385       | 53.5714 | 54   | 21.4022         |
+| 30.0557       | 54.5714 | 55   | 21.1892         |
+| 30.2011       | 55.5714 | 56   | 20.9796         |
+| 29.8038       | 56.5714 | 57   | 20.7722         |
+| 29.5406       | 57.5714 | 58   | 20.5675         |
+| 29.2149       | 58.5714 | 59   | 20.3655         |
+| 29.2198       | 59.5714 | 60   | 20.1660         |
+| 28.8561       | 60.5714 | 61   | 19.9673         |
+| 28.6416       | 61.5714 | 62   | 19.7716         |
+| 28.8112       | 62.5714 | 63   | 19.5786         |
+| 28.2165       | 63.5714 | 64   | 19.3885         |
+| 27.8991       | 64.5714 | 65   | 19.2027         |
+| 27.5846       | 65.5714 | 66   | 19.0196         |
+| 27.2384       | 66.5714 | 67   | 18.8399         |
+| 27.0191       | 67.5714 | 68   | 18.6630         |
+| 27.1786       | 68.5714 | 69   | 18.4899         |
+| 26.8166       | 69.5714 | 70   | 18.3193         |
+| 26.5479       | 70.5714 | 71   | 18.1547         |
+| 26.083        | 71.5714 | 72   | 17.9942         |
+| 26.2608       | 72.5714 | 73   | 17.8381         |
+| 26.4425       | 73.5714 | 74   | 17.6867         |
+| 25.715        | 74.5714 | 75   | 17.5401         |
+| 25.3579       | 75.5714 | 76   | 17.3974         |
+| 25.4759       | 76.5714 | 77   | 17.2583         |
+| 25.494        | 77.5714 | 78   | 17.1240         |
+| 25.2856       | 78.5714 | 79   | 16.9947         |
+| 24.9815       | 79.5714 | 80   | 16.8690         |
+| 25.0225       | 80.5714 | 81   | 16.7476         |
+| 24.868        | 81.5714 | 82   | 16.6309         |
+| 24.7249       | 82.5714 | 83   | 16.5196         |
+| 24.446        | 83.5714 | 84   | 16.4145         |
+| 24.7399       | 84.5714 | 85   | 16.3154         |
+| 24.1898       | 85.5714 | 86   | 16.2216         |
+| 24.451        | 86.5714 | 87   | 16.1322         |
+| 24.1094       | 87.5714 | 88   | 16.0485         |
+| 23.9994       | 88.5714 | 89   | 15.9708         |
+| 23.7422       | 89.5714 | 90   | 15.8995         |
+| 23.967        | 90.5714 | 91   | 15.8342         |
+| 23.7289       | 91.5714 | 92   | 15.7749         |
+| 23.5241       | 92.5714 | 93   | 15.7217         |
+| 23.5077       | 93.5714 | 94   | 15.6754         |
+| 23.7215       | 94.5714 | 95   | 15.6356         |
+| 23.5504       | 95.5714 | 96   | 15.6024         |
+| 23.4054       | 96.5714 | 97   | 15.5761         |
+| 23.4654       | 97.5714 | 98   | 15.5563         |
+| 23.3449       | 98.5714 | 99   | 15.5431         |
+| 23.5244       | 99.5714 | 100  | 15.5366         |
+### Framework versions
+- Transformers 4.48.3
+- Pytorch 2.5.1+cu124
+- Datasets 3.3.0
+- Tokenizers 0.21.0

config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3",
+  "use_cache": true,
+  "vocab_size": 32128
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.48.3"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:658fe126de3aff1db7948e1191610f24e9e043bd78e658041ffd0444f5117bac
+size 990345064

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b48a7b05b84c5aaea40f37f4b8716d1ec376635b9b47010b9e5e30e46035b19
+size 5368