End of training

Browse files

Files changed (5) hide show

README.md +159 -0
config.json +62 -0
generation_config.json +7 -0
model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,159 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: google/flan-t5-base
+tags:
+- generated_from_trainer
+model-index:
+- name: flan-t5-base-squad-qag-b
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# flan-t5-base-squad-qag-b
+This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 4.3126
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 100
+### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 46.6459       | 0.5714  | 1    | 41.6529         |
+| 62.2881       | 1.5714  | 2    | 39.6635         |
+| 57.9979       | 2.5714  | 3    | 38.0827         |
+| 55.2983       | 3.5714  | 4    | 36.6795         |
+| 52.2218       | 4.5714  | 5    | 35.3840         |
+| 49.5878       | 5.5714  | 6    | 34.1714         |
+| 48.3754       | 6.5714  | 7    | 33.0814         |
+| 46.4029       | 7.5714  | 8    | 32.1042         |
+| 45.7095       | 8.5714  | 9    | 31.2362         |
+| 43.622        | 9.5714  | 10   | 30.4637         |
+| 42.5298       | 10.5714 | 11   | 29.7723         |
+| 41.374        | 11.5714 | 12   | 29.1297         |
+| 40.0091       | 12.5714 | 13   | 28.5180         |
+| 38.9493       | 13.5714 | 14   | 27.9190         |
+| 38.4019       | 14.5714 | 15   | 27.3293         |
+| 37.0155       | 15.5714 | 16   | 26.7453         |
+| 36.6909       | 16.5714 | 17   | 26.1653         |
+| 35.8302       | 17.5714 | 18   | 25.5881         |
+| 35.0254       | 18.5714 | 19   | 25.0157         |
+| 34.5181       | 19.5714 | 20   | 24.4471         |
+| 33.9099       | 20.5714 | 21   | 23.8822         |
+| 32.9181       | 21.5714 | 22   | 23.3236         |
+| 32.3386       | 22.5714 | 23   | 22.7745         |
+| 31.7004       | 23.5714 | 24   | 22.2330         |
+| 30.7786       | 24.5714 | 25   | 21.6956         |
+| 30.1637       | 25.5714 | 26   | 21.1577         |
+| 29.9204       | 26.5714 | 27   | 20.6225         |
+| 28.9905       | 27.5714 | 28   | 20.0852         |
+| 28.6735       | 28.5714 | 29   | 19.5506         |
+| 27.9501       | 29.5714 | 30   | 19.0144         |
+| 27.358        | 30.5714 | 31   | 18.4755         |
+| 26.9717       | 31.5714 | 32   | 17.9333         |
+| 25.7062       | 32.5714 | 33   | 17.3834         |
+| 25.5156       | 33.5714 | 34   | 16.8244         |
+| 24.5697       | 34.5714 | 35   | 16.2665         |
+| 23.9692       | 35.5714 | 36   | 15.6957         |
+| 23.2353       | 36.5714 | 37   | 15.1122         |
+| 22.5299       | 37.5714 | 38   | 14.5180         |
+| 22.0458       | 38.5714 | 39   | 13.9129         |
+| 20.8835       | 39.5714 | 40   | 13.2977         |
+| 20.2673       | 40.5714 | 41   | 12.6807         |
+| 19.8669       | 41.5714 | 42   | 12.0450         |
+| 18.836        | 42.5714 | 43   | 11.3926         |
+| 18.1732       | 43.5714 | 44   | 10.7243         |
+| 17.1998       | 44.5714 | 45   | 10.0452         |
+| 16.2139       | 45.5714 | 46   | 9.3454          |
+| 16.225        | 46.5714 | 47   | 8.6436          |
+| 14.549        | 47.5714 | 48   | 7.9531          |
+| 14.4398       | 48.5714 | 49   | 7.2808          |
+| 13.7322       | 49.5714 | 50   | 6.6608          |
+| 12.4941       | 50.5714 | 51   | 6.1236          |
+| 11.982        | 51.5714 | 52   | 5.7015          |
+| 11.3853       | 52.5714 | 53   | 5.3868          |
+| 10.7295       | 53.5714 | 54   | 5.1683          |
+| 10.1794       | 54.5714 | 55   | 5.0195          |
+| 9.8638        | 55.5714 | 56   | 4.9177          |
+| 9.2193        | 56.5714 | 57   | 4.8455          |
+| 9.1266        | 57.5714 | 58   | 4.7924          |
+| 8.9316        | 58.5714 | 59   | 4.7506          |
+| 8.3913        | 59.5714 | 60   | 4.7168          |
+| 8.3446        | 60.5714 | 61   | 4.6880          |
+| 8.0074        | 61.5714 | 62   | 4.6617          |
+| 7.8951        | 62.5714 | 63   | 4.6376          |
+| 7.7446        | 63.5714 | 64   | 4.6158          |
+| 7.7313        | 64.5714 | 65   | 4.5963          |
+| 7.5051        | 65.5714 | 66   | 4.5780          |
+| 7.3933        | 66.5714 | 67   | 4.5608          |
+| 7.3083        | 67.5714 | 68   | 4.5449          |
+| 7.3168        | 68.5714 | 69   | 4.5301          |
+| 7.1794        | 69.5714 | 70   | 4.5161          |
+| 7.2062        | 70.5714 | 71   | 4.5032          |
+| 7.1407        | 71.5714 | 72   | 4.4914          |
+| 7.0435        | 72.5714 | 73   | 4.4803          |
+| 7.0905        | 73.5714 | 74   | 4.4696          |
+| 7.0244        | 74.5714 | 75   | 4.4592          |
+| 6.9725        | 75.5714 | 76   | 4.4489          |
+| 6.9754        | 76.5714 | 77   | 4.4392          |
+| 6.9402        | 77.5714 | 78   | 4.4297          |
+| 6.8599        | 78.5714 | 79   | 4.4207          |
+| 6.8546        | 79.5714 | 80   | 4.4118          |
+| 6.8619        | 80.5714 | 81   | 4.4032          |
+| 6.7876        | 81.5714 | 82   | 4.3950          |
+| 6.7963        | 82.5714 | 83   | 4.3869          |
+| 6.8075        | 83.5714 | 84   | 4.3791          |
+| 6.7802        | 84.5714 | 85   | 4.3718          |
+| 6.7299        | 85.5714 | 86   | 4.3649          |
+| 6.7649        | 86.5714 | 87   | 4.3583          |
+| 6.7058        | 87.5714 | 88   | 4.3521          |
+| 6.7104        | 88.5714 | 89   | 4.3464          |
+| 6.6899        | 89.5714 | 90   | 4.3410          |
+| 6.6947        | 90.5714 | 91   | 4.3360          |
+| 6.6657        | 91.5714 | 92   | 4.3314          |
+| 6.6323        | 92.5714 | 93   | 4.3273          |
+| 6.6381        | 93.5714 | 94   | 4.3237          |
+| 6.6417        | 94.5714 | 95   | 4.3206          |
+| 6.6728        | 95.5714 | 96   | 4.3179          |
+| 6.6866        | 96.5714 | 97   | 4.3158          |
+| 6.634         | 97.5714 | 98   | 4.3142          |
+| 6.6684        | 98.5714 | 99   | 4.3132          |
+| 6.6304        | 99.5714 | 100  | 4.3126          |
+### Framework versions
+- Transformers 4.48.3
+- Pytorch 2.5.1+cu124
+- Datasets 3.3.0
+- Tokenizers 0.21.0

config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3",
+  "use_cache": true,
+  "vocab_size": 32128
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.48.3"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f437076b1581ba72e0060243d623434027d58fbc254675ba9ef79910b387b09
+size 990345064

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1e05a3aa4926bd54685cf41396dbfccc0989c0864bfd2491db2b5f81a903ef3
+size 5368