Upload TFBertForPreTraining

Browse files

Files changed (3) hide show

README.md +151 -0
config.json +29 -0
tf_model.h5 +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,151 @@

+---
+tags:
+- generated_from_keras_callback
+model-index:
+- name: pretrained-m-bert-100
+  results: []
+---
+<!-- This model card has been generated automatically according to the information Keras had access to. You should
+probably proofread and complete it, then remove this comment. -->
+# pretrained-m-bert-100
+This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Train Loss: 5.7643
+- Validation Loss: 15.3282
+- Epoch: 99
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- optimizer: {'name': 'Adam', 'learning_rate': 1e-04, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
+- training_precision: float32
+### Training results
+| Train Loss | Validation Loss | Epoch |
+|:----------:|:---------------:|:-----:|
+| 10.2677    | 10.9468         | 0     |
+| 7.8146     | 10.9178         | 1     |
+| 6.8830     | 11.4644         | 2     |
+| 6.4854     | 11.6027         | 3     |
+| 6.3696     | 11.5359         | 4     |
+| 6.3373     | 12.1261         | 5     |
+| 6.4347     | 11.7670         | 6     |
+| 6.0652     | 12.2223         | 7     |
+| 5.9448     | 12.1202         | 8     |
+| 6.0746     | 12.0816         | 9     |
+| 6.0138     | 12.4949         | 10    |
+| 5.9344     | 12.8130         | 11    |
+| 5.9458     | 12.4795         | 12    |
+| 5.9723     | 12.8273         | 13    |
+| 6.0556     | 12.3681         | 14    |
+| 5.8662     | 12.5367         | 15    |
+| 5.8969     | 12.8070         | 16    |
+| 5.9584     | 13.0502         | 17    |
+| 5.8317     | 12.9219         | 18    |
+| 5.8259     | 13.0385         | 19    |
+| 5.8747     | 13.0952         | 20    |
+| 5.7600     | 13.2153         | 21    |
+| 5.8675     | 13.2446         | 22    |
+| 5.8878     | 13.1709         | 23    |
+| 5.7433     | 13.0553         | 24    |
+| 5.6823     | 13.2854         | 25    |
+| 5.8674     | 13.5718         | 26    |
+| 5.7787     | 14.0820         | 27    |
+| 5.8037     | 13.5664         | 28    |
+| 5.9530     | 13.0143         | 29    |
+| 5.8236     | 13.0637         | 30    |
+| 5.7696     | 13.5515         | 31    |
+| 5.9817     | 13.4774         | 32    |
+| 5.6877     | 13.6842         | 33    |
+| 5.7816     | 13.5667         | 34    |
+| 5.7775     | 13.3846         | 35    |
+| 5.7104     | 13.6230         | 36    |
+| 5.8429     | 13.5487         | 37    |
+| 5.8082     | 13.6563         | 38    |
+| 5.8588     | 13.6359         | 39    |
+| 5.6482     | 13.8751         | 40    |
+| 5.7874     | 13.6936         | 41    |
+| 5.7451     | 14.1454         | 42    |
+| 5.7165     | 13.8532         | 43    |
+| 5.7180     | 14.0519         | 44    |
+| 5.6640     | 14.0859         | 45    |
+| 5.6735     | 14.0086         | 46    |
+| 5.6666     | 14.1733         | 47    |
+| 5.6681     | 13.9786         | 48    |
+| 5.8221     | 14.0396         | 49    |
+| 5.8544     | 14.0354         | 50    |
+| 5.6817     | 14.4682         | 51    |
+| 5.7215     | 14.2324         | 52    |
+| 5.7315     | 13.9238         | 53    |
+| 5.9291     | 14.2091         | 54    |
+| 5.6790     | 13.6927         | 55    |
+| 5.8746     | 14.1590         | 56    |
+| 5.7267     | 14.4351         | 57    |
+| 5.7268     | 14.0592         | 58    |
+| 5.7535     | 14.2763         | 59    |
+| 5.7884     | 13.8493         | 60    |
+| 5.6596     | 14.0534         | 61    |
+| 5.7041     | 14.4408         | 62    |
+| 5.6752     | 14.4218         | 63    |
+| 5.7102     | 14.3895         | 64    |
+| 5.7761     | 14.3942         | 65    |
+| 5.7248     | 14.5926         | 66    |
+| 5.7945     | 14.2754         | 67    |
+| 5.9298     | 14.3393         | 68    |
+| 5.8765     | 14.5247         | 69    |
+| 5.7173     | 14.3060         | 70    |
+| 5.6568     | 14.1837         | 71    |
+| 5.8706     | 14.0935         | 72    |
+| 5.6913     | 14.0180         | 73    |
+| 5.7403     | 14.9313         | 74    |
+| 5.8633     | 14.1447         | 75    |
+| 5.8216     | 14.5450         | 76    |
+| 5.7655     | 14.4690         | 77    |
+| 5.7860     | 14.4312         | 78    |
+| 5.6992     | 14.1038         | 79    |
+| 5.7390     | 14.5180         | 80    |
+| 5.7588     | 14.2374         | 81    |
+| 5.7709     | 14.4895         | 82    |
+| 5.6294     | 14.9558         | 83    |
+| 5.8151     | 14.5835         | 84    |
+| 5.7965     | 14.8980         | 85    |
+| 5.8296     | 14.5919         | 86    |
+| 5.6494     | 15.2158         | 87    |
+| 5.8014     | 14.9455         | 88    |
+| 5.7313     | 14.4270         | 89    |
+| 5.7492     | 14.2205         | 90    |
+| 5.7618     | 15.2789         | 91    |
+| 5.7249     | 15.0650         | 92    |
+| 5.6761     | 14.8731         | 93    |
+| 5.8601     | 14.6370         | 94    |
+| 5.6296     | 14.8570         | 95    |
+| 5.7572     | 14.7718         | 96    |
+| 5.6341     | 14.9328         | 97    |
+| 5.6881     | 14.9298         | 98    |
+| 5.7643     | 15.3282         | 99    |
+### Framework versions
+- Transformers 4.27.0.dev0
+- TensorFlow 2.9.2
+- Datasets 2.9.0
+- Tokenizers 0.13.2

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "architectures": [
+    "BertForPreTraining"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.27.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
+}

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bccdcf48ba94c2c6a66431e991f8468ce18cc17b3190d894f24b2c229c8d462
+size 1083389236