Upload TFBertForPreTraining

Browse files

Files changed (3) hide show

README.md +141 -0
config.json +29 -0
tf_model.h5 +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,141 @@

+---
+tags:
+- generated_from_keras_callback
+model-index:
+- name: pretrained-m-bert-90
+  results: []
+---
+<!-- This model card has been generated automatically according to the information Keras had access to. You should
+probably proofread and complete it, then remove this comment. -->
+# pretrained-m-bert-90
+This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Train Loss: 5.7094
+- Validation Loss: 14.5332
+- Epoch: 89
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': 1e-04, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
+- training_precision: float32
+### Training results
+| Train Loss | Validation Loss | Epoch |
+|:----------:|:---------------:|:-----:|
+| 10.2413    | 10.9668         | 0     |
+| 7.5814     | 10.9638         | 1     |
+| 7.0095     | 11.3733         | 2     |
+| 6.4352     | 11.5989         | 3     |
+| 6.7137     | 11.4072         | 4     |
+| 6.4383     | 11.8287         | 5     |
+| 6.2223     | 12.0344         | 6     |
+| 6.1759     | 11.6900         | 7     |
+| 6.0764     | 11.7144         | 8     |
+| 5.8802     | 12.1089         | 9     |
+| 6.0159     | 12.3456         | 10    |
+| 5.9254     | 12.7065         | 11    |
+| 5.6652     | nan             | 12    |
+| 5.8185     | 12.8155         | 13    |
+| 5.9185     | 12.7047         | 14    |
+| 5.8418     | 12.7175         | 15    |
+| 5.9122     | 12.5688         | 16    |
+| 5.9698     | 12.5251         | 17    |
+| 5.8286     | 12.7015         | 18    |
+| 5.8807     | 13.2514         | 19    |
+| 5.8330     | 12.8541         | 20    |
+| 5.6456     | 13.4088         | 21    |
+| 5.7257     | 13.5517         | 22    |
+| 5.8854     | 12.8775         | 23    |
+| 5.6770     | 13.6499         | 24    |
+| 5.6026     | 13.9732         | 25    |
+| 5.6651     | 13.0827         | 26    |
+| 5.8888     | 13.1292         | 27    |
+| 5.8123     | 12.8970         | 28    |
+| 5.7525     | 13.3724         | 29    |
+| 5.9020     | 13.5507         | 30    |
+| 5.8642     | 13.3284         | 31    |
+| 5.9329     | 13.7350         | 32    |
+| 5.7728     | 13.3011         | 33    |
+| 5.8297     | 13.6108         | 34    |
+| 5.8118     | 13.3331         | 35    |
+| 5.7382     | 13.7047         | 36    |
+| 5.8061     | 13.8107         | 37    |
+| 5.8423     | 13.4207         | 38    |
+| 5.8442     | 13.6832         | 39    |
+| 5.7680     | 14.1248         | 40    |
+| 5.7668     | 13.6626         | 41    |
+| 5.7826     | 13.6470         | 42    |
+| 5.7692     | 13.9430         | 43    |
+| 5.5109     | 14.0924         | 44    |
+| 5.7394     | 14.0253         | 45    |
+| 5.8013     | 13.5926         | 46    |
+| 5.7222     | 13.9732         | 47    |
+| 5.7023     | 14.0204         | 48    |
+| 5.8250     | 13.9655         | 49    |
+| 5.6064     | 14.0406         | 50    |
+| 5.7319     | 14.1826         | 51    |
+| 5.6849     | 13.9114         | 52    |
+| 5.8167     | 13.9917         | 53    |
+| 5.7573     | 14.1509         | 54    |
+| 5.6921     | 14.3722         | 55    |
+| 5.7190     | 14.4919         | 56    |
+| 5.8501     | 13.6970         | 57    |
+| 5.7627     | 14.1393         | 58    |
+| 5.8031     | 14.1246         | 59    |
+| 5.7207     | 14.3084         | 60    |
+| 5.7979     | 13.9398         | 61    |
+| 5.7068     | 14.2865         | 62    |
+| 5.7547     | 14.2590         | 63    |
+| 5.8349     | 14.1481         | 64    |
+| 5.7924     | 14.0461         | 65    |
+| 5.8127     | 14.1274         | 66    |
+| 5.7590     | 14.3578         | 67    |
+| 5.8297     | 14.2429         | 68    |
+| 5.7822     | 14.2742         | 69    |
+| 5.7708     | 14.3720         | 70    |
+| 5.6521     | 14.8640         | 71    |
+| 5.7253     | 14.4404         | 72    |
+| 5.8076     | 14.1843         | 73    |
+| 5.7746     | 14.4657         | 74    |
+| 5.8592     | 14.2965         | 75    |
+| 5.6643     | 14.0996         | 76    |
+| 5.7849     | 14.3531         | 77    |
+| 5.7418     | 14.4266         | 78    |
+| 5.7030     | 14.5584         | 79    |
+| 5.8298     | 14.1390         | 80    |
+| 5.9061     | 13.9172         | 81    |
+| 5.6570     | 14.6991         | 82    |
+| 5.7040     | 14.7839         | 83    |
+| 5.8064     | 14.2581         | 84    |
+| 5.6855     | 14.4449         | 85    |
+| 5.7803     | 14.7469         | 86    |
+| 5.7495     | 14.4704         | 87    |
+| 5.7539     | 14.5520         | 88    |
+| 5.7094     | 14.5332         | 89    |
+### Framework versions
+- Transformers 4.27.0.dev0
+- TensorFlow 2.11.0
+- Datasets 2.9.0
+- Tokenizers 0.13.2

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "architectures": [
+    "BertForPreTraining"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.27.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
+}

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79399a00574c19cba1b872f606b096b0f2c7116f25c9439877edfe36d8edee1e
+size 1083389236