amrisaurus commited on
Commit
5b15b32
·
1 Parent(s): bac8105

Upload TFBertForPreTraining

Browse files
Files changed (3) hide show
  1. README.md +17 -3
  2. config.json +8 -4
  3. tf_model.h5 +2 -2
README.md CHANGED
@@ -11,9 +11,11 @@ probably proofread and complete it, then remove this comment. -->
11
 
12
  # pretrained-m-bert
13
 
14
- This model is a fine-tuned version of [amrisaurus/pretrained-bert](https://huggingface.co/amrisaurus/pretrained-bert) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
-
 
 
17
 
18
  ## Model description
19
 
@@ -32,11 +34,23 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - optimizer: None
36
  - training_precision: float32
37
 
38
  ### Training results
39
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
 
42
  ### Framework versions
 
11
 
12
  # pretrained-m-bert
13
 
14
+ This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Train Loss: 6.0589
17
+ - Validation Loss: 12.2793
18
+ - Epoch: 9
19
 
20
  ## Model description
21
 
 
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
+ - optimizer: {'name': 'Adam', 'learning_rate': 1e-04, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
38
  - training_precision: float32
39
 
40
  ### Training results
41
 
42
+ | Train Loss | Validation Loss | Epoch |
43
+ |:----------:|:---------------:|:-----:|
44
+ | 10.2772 | 11.0091 | 0 |
45
+ | 7.9077 | 11.0096 | 1 |
46
+ | 6.8422 | 11.0426 | 2 |
47
+ | 6.6196 | 11.1006 | 3 |
48
+ | 6.4596 | 11.5412 | 4 |
49
+ | 6.9657 | 11.7570 | 5 |
50
+ | 6.3738 | 11.7909 | 6 |
51
+ | 6.1480 | 12.0058 | 7 |
52
+ | 6.2503 | 11.9410 | 8 |
53
+ | 6.0589 | 12.2793 | 9 |
54
 
55
 
56
  ### Framework versions
config.json CHANGED
@@ -1,11 +1,10 @@
1
  {
2
- "_name_or_path": "amrisaurus/pretrained-bert",
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
@@ -17,9 +16,14 @@
17
  "num_attention_heads": 12,
18
  "num_hidden_layers": 12,
19
  "pad_token_id": 0,
 
 
 
 
 
20
  "position_embedding_type": "absolute",
21
  "transformers_version": "4.27.0.dev0",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
- "vocab_size": 28996
25
  }
 
1
  {
 
2
  "architectures": [
3
+ "BertForPreTraining"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
+ "directionality": "bidi",
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
 
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
+ "pooler_fc_size": 768,
20
+ "pooler_num_attention_heads": 12,
21
+ "pooler_num_fc_layers": 3,
22
+ "pooler_size_per_head": 128,
23
+ "pooler_type": "first_token_transform",
24
  "position_embedding_type": "absolute",
25
  "transformers_version": "4.27.0.dev0",
26
  "type_vocab_size": 2,
27
  "use_cache": true,
28
+ "vocab_size": 119547
29
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e90af7ce7c9c09806d77bfb6d0ce6f7d12b7f4993cb3e7f8f460d9b5b90e06af
3
- size 433535320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee16c899da3b4047597557aae1b300660d935ffb668141fa31c010622e24a158
3
+ size 1083389236