amrisaurus commited on
Commit
c9d81c6
·
1 Parent(s): f33f8a9

Upload TFBertForSequenceClassification

Browse files
Files changed (3) hide show
  1. README.md +3 -8
  2. config.json +4 -8
  3. tf_model.h5 +2 -2
README.md CHANGED
@@ -11,11 +11,9 @@ probably proofread and complete it, then remove this comment. -->
11
 
12
  # pretrained-m-bert
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Train Loss: 10.1160
17
- - Validation Loss: 10.8236
18
- - Epoch: 0
19
 
20
  ## Model description
21
 
@@ -34,14 +32,11 @@ More information needed
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
- - optimizer: {'name': 'Adam', 'learning_rate': 1e-04, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
38
  - training_precision: float32
39
 
40
  ### Training results
41
 
42
- | Train Loss | Validation Loss | Epoch |
43
- |:----------:|:---------------:|:-----:|
44
- | 10.1160 | 10.8236 | 0 |
45
 
46
 
47
  ### Framework versions
 
11
 
12
  # pretrained-m-bert
13
 
14
+ This model is a fine-tuned version of [amrisaurus/pretrained-bert](https://huggingface.co/amrisaurus/pretrained-bert) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+
 
 
17
 
18
  ## Model description
19
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - optimizer: None
36
  - training_precision: float32
37
 
38
  ### Training results
39
 
 
 
 
40
 
41
 
42
  ### Framework versions
config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
 
2
  "architectures": [
3
- "BertForPreTraining"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
- "directionality": "bidi",
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
@@ -16,14 +17,9 @@
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
- "pooler_fc_size": 768,
20
- "pooler_num_attention_heads": 12,
21
- "pooler_num_fc_layers": 3,
22
- "pooler_size_per_head": 128,
23
- "pooler_type": "first_token_transform",
24
  "position_embedding_type": "absolute",
25
  "transformers_version": "4.27.0.dev0",
26
  "type_vocab_size": 2,
27
  "use_cache": true,
28
- "vocab_size": 119547
29
  }
 
1
  {
2
+ "_name_or_path": "amrisaurus/pretrained-bert",
3
  "architectures": [
4
+ "BertForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
 
17
  "num_attention_heads": 12,
18
  "num_hidden_layers": 12,
19
  "pad_token_id": 0,
 
 
 
 
 
20
  "position_embedding_type": "absolute",
21
  "transformers_version": "4.27.0.dev0",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
+ "vocab_size": 28996
25
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b573265165f9ebd90738aafd5c372b5135530ef6fdab1b028b02c31c04df0d08
3
- size 1083389236
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e90af7ce7c9c09806d77bfb6d0ce6f7d12b7f4993cb3e7f8f460d9b5b90e06af
3
+ size 433535320