Upload model

Browse files

Files changed (4) hide show

README.md +152 -0
config.json +38 -0
generation_config.json +6 -0
tf_model.h5 +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,152 @@

+---
+license: mit
+tags:
+- generated_from_keras_callback
+model-index:
+- name: valve_model
+  results: []
+---
+<!-- This model card has been generated automatically according to the information Keras had access to. You should
+probably proofread and complete it, then remove this comment. -->
+# valve_model
+This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Train Loss: 3.1831
+- Validation Loss: 5.9072
+- Epoch: 99
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'class_name': 'WarmUp', 'config': {'initial_learning_rate': 5e-05, 'decay_schedule_fn': {'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 5e-05, 'decay_steps': -999, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, '__passive_serialization__': True}, 'warmup_steps': 1000, 'power': 1.0, 'name': None}}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.01}
+- training_precision: mixed_float16
+### Training results
+| Train Loss | Validation Loss | Epoch |
+|:----------:|:---------------:|:-----:|
+| 6.4756     | 6.5449          | 0     |
+| 6.5200     | 6.5443          | 1     |
+| 6.6058     | 6.5432          | 2     |
+| 6.5443     | 6.5416          | 3     |
+| 6.6271     | 6.5395          | 4     |
+| 6.5184     | 6.5369          | 5     |
+| 6.4858     | 6.5337          | 6     |
+| 6.4394     | 6.5301          | 7     |
+| 6.5547     | 6.5259          | 8     |
+| 6.4873     | 6.5211          | 9     |
+| 6.4768     | 6.5160          | 10    |
+| 6.5326     | 6.5102          | 11    |
+| 6.4251     | 6.5041          | 12    |
+| 6.4698     | 6.4974          | 13    |
+| 6.4125     | 6.4903          | 14    |
+| 6.3133     | 6.4827          | 15    |
+| 6.3633     | 6.4748          | 16    |
+| 6.3025     | 6.4665          | 17    |
+| 6.3038     | 6.4579          | 18    |
+| 6.2443     | 6.4490          | 19    |
+| 6.2891     | 6.4398          | 20    |
+| 6.1881     | 6.4304          | 21    |
+| 6.0868     | 6.4208          | 22    |
+| 6.1109     | 6.4110          | 23    |
+| 6.1498     | 6.4010          | 24    |
+| 5.9289     | 6.3908          | 25    |
+| 6.0533     | 6.3804          | 26    |
+| 6.0183     | 6.3699          | 27    |
+| 5.8904     | 6.3593          | 28    |
+| 5.8638     | 6.3486          | 29    |
+| 5.8532     | 6.3379          | 30    |
+| 5.8279     | 6.3273          | 31    |
+| 5.7185     | 6.3167          | 32    |
+| 5.7309     | 6.3062          | 33    |
+| 5.6457     | 6.2958          | 34    |
+| 5.5979     | 6.2855          | 35    |
+| 5.5968     | 6.2750          | 36    |
+| 5.6015     | 6.2646          | 37    |
+| 5.4416     | 6.2542          | 38    |
+| 5.5024     | 6.2441          | 39    |
+| 5.4739     | 6.2342          | 40    |
+| 5.3524     | 6.2245          | 41    |
+| 5.3214     | 6.2149          | 42    |
+| 5.2997     | 6.2052          | 43    |
+| 5.2619     | 6.1955          | 44    |
+| 5.2155     | 6.1859          | 45    |
+| 5.2030     | 6.1765          | 46    |
+| 5.1632     | 6.1672          | 47    |
+| 5.1386     | 6.1581          | 48    |
+| 5.0821     | 6.1492          | 49    |
+| 5.0143     | 6.1406          | 50    |
+| 5.0254     | 6.1318          | 51    |
+| 4.9244     | 6.1235          | 52    |
+| 4.8945     | 6.1151          | 53    |
+| 4.9138     | 6.1066          | 54    |
+| 4.8516     | 6.0985          | 55    |
+| 4.8212     | 6.0899          | 56    |
+| 4.6900     | 6.0817          | 57    |
+| 4.7051     | 6.0738          | 58    |
+| 4.6742     | 6.0657          | 59    |
+| 4.6304     | 6.0577          | 60    |
+| 4.5756     | 6.0498          | 61    |
+| 4.5728     | 6.0420          | 62    |
+| 4.5041     | 6.0341          | 63    |
+| 4.5266     | 6.0264          | 64    |
+| 4.4364     | 6.0193          | 65    |
+| 4.3653     | 6.0125          | 66    |
+| 4.3792     | 6.0054          | 67    |
+| 4.3870     | 5.9979          | 68    |
+| 4.2865     | 5.9911          | 69    |
+| 4.2443     | 5.9845          | 70    |
+| 4.2388     | 5.9783          | 71    |
+| 4.2063     | 5.9723          | 72    |
+| 4.1534     | 5.9667          | 73    |
+| 4.0811     | 5.9616          | 74    |
+| 4.1064     | 5.9565          | 75    |
+| 4.1281     | 5.9512          | 76    |
+| 4.0124     | 5.9467          | 77    |
+| 3.9703     | 5.9430          | 78    |
+| 3.8858     | 5.9389          | 79    |
+| 3.9194     | 5.9351          | 80    |
+| 3.8257     | 5.9309          | 81    |
+| 3.8251     | 5.9270          | 82    |
+| 3.8499     | 5.9234          | 83    |
+| 3.7903     | 5.9206          | 84    |
+| 3.7851     | 5.9190          | 85    |
+| 3.7319     | 5.9174          | 86    |
+| 3.6612     | 5.9169          | 87    |
+| 3.6404     | 5.9162          | 88    |
+| 3.5339     | 5.9162          | 89    |
+| 3.5073     | 5.9158          | 90    |
+| 3.4569     | 5.9157          | 91    |
+| 3.5231     | 5.9153          | 92    |
+| 3.3952     | 5.9152          | 93    |
+| 3.3774     | 5.9144          | 94    |
+| 3.3776     | 5.9126          | 95    |
+| 3.2881     | 5.9112          | 96    |
+| 3.2130     | 5.9099          | 97    |
+| 3.2514     | 5.9088          | 98    |
+| 3.1831     | 5.9072          | 99    |
+### Framework versions
+- Transformers 4.29.0.dev0
+- TensorFlow 2.9.1
+- Datasets 2.5.1
+- Tokenizers 0.13.3

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 0,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 0,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 40,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.29.0.dev0",
+  "use_cache": true,
+  "vocab_size": 665
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "eos_token_id": 0,
+  "transformers_version": "4.29.0.dev0"
+}

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6311b4e3a92717f413ce150c3791712ecebed73ff280364d1b10eaf8e2f7226a
+size 345588840