Cratee commited on
Commit
4f1c3b0
·
1 Parent(s): 70b17f7

Upload TFBartForConditionalGeneration

Browse files
Files changed (4) hide show
  1. README.md +11 -14
  2. config.json +13 -36
  3. generation_config.json +1 -5
  4. tf_model.h5 +2 -2
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- license: apache-2.0
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
@@ -12,11 +12,11 @@ probably proofread and complete it, then remove this comment. -->
12
 
13
  # bart-example
14
 
15
- This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Train Loss: 2.7877
18
- - Validation Loss: 2.4972
19
- - Epoch: 4
20
 
21
  ## Model description
22
 
@@ -42,16 +42,13 @@ The following hyperparameters were used during training:
42
 
43
  | Train Loss | Validation Loss | Epoch |
44
  |:----------:|:---------------:|:-----:|
45
- | 6.3670 | 3.2462 | 0 |
46
- | 3.5143 | 2.7551 | 1 |
47
- | 3.0299 | 2.5620 | 2 |
48
- | 2.9364 | 2.7830 | 3 |
49
- | 2.7877 | 2.4972 | 4 |
50
 
51
 
52
  ### Framework versions
53
 
54
- - Transformers 4.26.0
55
- - TensorFlow 2.9.2
56
- - Datasets 2.9.0
57
- - Tokenizers 0.13.2
 
1
  ---
2
+ license: mit
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
 
12
 
13
  # bart-example
14
 
15
+ This model is a fine-tuned version of [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Train Loss: 2.2557
18
+ - Validation Loss: 2.4455
19
+ - Epoch: 1
20
 
21
  ## Model description
22
 
 
42
 
43
  | Train Loss | Validation Loss | Epoch |
44
  |:----------:|:---------------:|:-----:|
45
+ | 3.4871 | 2.5716 | 0 |
46
+ | 2.2557 | 2.4455 | 1 |
 
 
 
47
 
48
 
49
  ### Framework versions
50
 
51
+ - Transformers 4.29.2
52
+ - TensorFlow 2.8.0
53
+ - Datasets 2.12.0
54
+ - Tokenizers 0.13.3
config.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "_name_or_path": "facebook/bart-large",
3
- "activation_dropout": 0.1,
 
4
  "activation_function": "gelu",
5
- "add_bias_logits": false,
6
  "add_final_layer_norm": false,
7
  "architectures": [
8
  "BartForConditionalGeneration"
9
  ],
10
- "attention_dropout": 0.1,
11
  "bos_token_id": 0,
12
- "classif_dropout": 0.1,
13
  "classifier_dropout": 0.0,
14
  "d_model": 1024,
15
  "decoder_attention_heads": 16,
@@ -18,56 +18,33 @@
18
  "decoder_layers": 12,
19
  "decoder_start_token_id": 2,
20
  "dropout": 0.1,
21
- "early_stopping": true,
22
  "encoder_attention_heads": 16,
23
  "encoder_ffn_dim": 4096,
24
  "encoder_layerdrop": 0.0,
25
  "encoder_layers": 12,
26
  "eos_token_id": 2,
27
- "forced_bos_token_id": 0,
28
  "forced_eos_token_id": 2,
29
  "gradient_checkpointing": false,
30
  "id2label": {
31
- "0": "LABEL_0",
32
- "1": "LABEL_1",
33
- "2": "LABEL_2"
34
  },
35
  "init_std": 0.02,
36
  "is_encoder_decoder": true,
37
  "label2id": {
38
- "LABEL_0": 0,
39
- "LABEL_1": 1,
40
- "LABEL_2": 2
41
  },
42
  "max_position_embeddings": 1024,
43
  "model_type": "bart",
44
- "no_repeat_ngram_size": 3,
45
  "normalize_before": false,
46
- "num_beams": 4,
47
  "num_hidden_layers": 12,
 
48
  "pad_token_id": 1,
49
  "scale_embedding": false,
50
- "task_specific_params": {
51
- "summarization": {
52
- "length_penalty": 1.0,
53
- "max_length": 128,
54
- "min_length": 12,
55
- "num_beams": 4
56
- },
57
- "summarization_cnn": {
58
- "length_penalty": 2.0,
59
- "max_length": 142,
60
- "min_length": 56,
61
- "num_beams": 4
62
- },
63
- "summarization_xsum": {
64
- "length_penalty": 1.0,
65
- "max_length": 62,
66
- "min_length": 11,
67
- "num_beams": 6
68
- }
69
- },
70
- "transformers_version": "4.26.0",
71
  "use_cache": true,
72
  "vocab_size": 50265
73
  }
 
1
  {
2
+ "_name_or_path": "facebook/bart-large-mnli",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
  "activation_function": "gelu",
 
6
  "add_final_layer_norm": false,
7
  "architectures": [
8
  "BartForConditionalGeneration"
9
  ],
10
+ "attention_dropout": 0.0,
11
  "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
  "classifier_dropout": 0.0,
14
  "d_model": 1024,
15
  "decoder_attention_heads": 16,
 
18
  "decoder_layers": 12,
19
  "decoder_start_token_id": 2,
20
  "dropout": 0.1,
 
21
  "encoder_attention_heads": 16,
22
  "encoder_ffn_dim": 4096,
23
  "encoder_layerdrop": 0.0,
24
  "encoder_layers": 12,
25
  "eos_token_id": 2,
 
26
  "forced_eos_token_id": 2,
27
  "gradient_checkpointing": false,
28
  "id2label": {
29
+ "0": "contradiction",
30
+ "1": "neutral",
31
+ "2": "entailment"
32
  },
33
  "init_std": 0.02,
34
  "is_encoder_decoder": true,
35
  "label2id": {
36
+ "contradiction": 0,
37
+ "entailment": 2,
38
+ "neutral": 1
39
  },
40
  "max_position_embeddings": 1024,
41
  "model_type": "bart",
 
42
  "normalize_before": false,
 
43
  "num_hidden_layers": 12,
44
+ "output_past": false,
45
  "pad_token_id": 1,
46
  "scale_embedding": false,
47
+ "transformers_version": "4.29.2",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  "use_cache": true,
49
  "vocab_size": 50265
50
  }
generation_config.json CHANGED
@@ -2,12 +2,8 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "decoder_start_token_id": 2,
5
- "early_stopping": true,
6
  "eos_token_id": 2,
7
- "forced_bos_token_id": 0,
8
  "forced_eos_token_id": 2,
9
- "no_repeat_ngram_size": 3,
10
- "num_beams": 4,
11
  "pad_token_id": 1,
12
- "transformers_version": "4.26.0"
13
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "decoder_start_token_id": 2,
 
5
  "eos_token_id": 2,
 
6
  "forced_eos_token_id": 2,
 
 
7
  "pad_token_id": 1,
8
+ "transformers_version": "4.29.2"
9
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca865a715fda8a4fb8e5a82eafa49e5eb7274e13f20ca479763ca969d62ae828
3
- size 1625925476
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48296e9c9faed0c24eca96884c089f555f8efc1b4d19a25be8e44ccdd1deecc8
3
+ size 1625925412