yazansh commited on
Commit
cc3ed62
·
verified ·
1 Parent(s): 6fd1794

binary-25

Browse files
Files changed (8) hide show
  1. README.md +39 -24
  2. config.json +6 -12
  3. config.toml +12 -20
  4. pytorch_model.bin +2 -2
  5. tokenizer.json +0 -0
  6. tokenizer_config.json +17 -4
  7. training_args.bin +1 -1
  8. vocab.txt +0 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: UBC-NLP/MARBERTv2
3
  tags:
4
  - generated_from_trainer
5
  datasets:
@@ -23,13 +23,13 @@ model-index:
23
  metrics:
24
  - name: F1
25
  type: f1
26
- value: 0.6696076155096354
27
  - name: Precision
28
  type: precision
29
- value: 0.5963606286186931
30
  - name: Recall
31
  type: recall
32
- value: 0.7633668607728957
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,12 +37,12 @@ should probably proofread and complete it, then remove this comment. -->
37
 
38
  # nuha-binary
39
 
40
- This model is a fine-tuned version of [UBC-NLP/MARBERTv2](https://huggingface.co/UBC-NLP/MARBERTv2) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
- - Loss: 0.6353
43
- - F1: 0.6696
44
- - Precision: 0.5964
45
- - Recall: 0.7634
46
  - Support: None
47
 
48
  ## Model description
@@ -63,30 +63,45 @@ More information needed
63
 
64
  The following hyperparameters were used during training:
65
  - learning_rate: 1e-05
66
- - train_batch_size: 16
67
  - eval_batch_size: 32
68
  - seed: 42
69
- - gradient_accumulation_steps: 4
70
  - total_train_batch_size: 64
71
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
- - lr_scheduler_type: constant_with_warmup
73
  - lr_scheduler_warmup_steps: 1000.0
74
- - num_epochs: 10
75
  - label_smoothing_factor: 0.1
76
 
77
  ### Training results
78
 
79
- | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
80
- |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
81
- | 2.4063 | 1.06 | 500 | 0.9684 | 0.5373 | 0.4479 | 0.6713 | None |
82
- | 1.7797 | 2.12 | 1000 | 1.1863 | 0.5891 | 0.4416 | 0.8846 | None |
83
- | 1.3347 | 3.18 | 1500 | 0.7357 | 0.6431 | 0.5565 | 0.7618 | None |
84
- | 1.0719 | 4.24 | 2000 | 0.6695 | 0.6514 | 0.5701 | 0.7597 | None |
85
- | 0.9321 | 5.29 | 2500 | 0.7326 | 0.6487 | 0.5499 | 0.7909 | None |
86
- | 0.824 | 6.35 | 3000 | 0.6184 | 0.6665 | 0.6162 | 0.7258 | None |
87
- | 0.7959 | 7.41 | 3500 | 0.6409 | 0.6627 | 0.5842 | 0.7655 | None |
88
- | 0.707 | 8.47 | 4000 | 0.7284 | 0.6610 | 0.5529 | 0.8216 | None |
89
- | 0.662 | 9.53 | 4500 | 0.6353 | 0.6696 | 0.5964 | 0.7634 | None |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
 
92
  ### Framework versions
 
1
  ---
2
+ base_model: thejosango/nuha-mlm
3
  tags:
4
  - generated_from_trainer
5
  datasets:
 
23
  metrics:
24
  - name: F1
25
  type: f1
26
+ value: 0.6711851987543506
27
  - name: Precision
28
  type: precision
29
+ value: 0.6655767484105358
30
  - name: Recall
31
  type: recall
32
+ value: 0.676888970995751
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
37
 
38
  # nuha-binary
39
 
40
+ This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
+ - Loss: 0.5283
43
+ - F1: 0.6712
44
+ - Precision: 0.6656
45
+ - Recall: 0.6769
46
  - Support: None
47
 
48
  ## Model description
 
63
 
64
  The following hyperparameters were used during training:
65
  - learning_rate: 1e-05
66
+ - train_batch_size: 32
67
  - eval_batch_size: 32
68
  - seed: 42
69
+ - gradient_accumulation_steps: 2
70
  - total_train_batch_size: 64
71
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
+ - lr_scheduler_type: linear
73
  - lr_scheduler_warmup_steps: 1000.0
74
+ - num_epochs: 20
75
  - label_smoothing_factor: 0.1
76
 
77
  ### Training results
78
 
79
+ | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
80
+ |:-------------:|:-----:|:-----:|:---------------:|:------:|:---------:|:------:|:-------:|
81
+ | 1.2633 | 0.3 | 500 | 0.6344 | 0.5719 | 0.5563 | 0.5884 | None |
82
+ | 0.9118 | 0.6 | 1000 | 0.5886 | 0.5940 | 0.5973 | 0.5908 | None |
83
+ | 0.7827 | 0.9 | 1500 | 0.5890 | 0.6367 | 0.5640 | 0.7308 | None |
84
+ | 0.6804 | 1.2 | 2000 | 0.5905 | 0.6458 | 0.5495 | 0.7829 | None |
85
+ | 0.6492 | 1.49 | 2500 | 0.5619 | 0.6558 | 0.5993 | 0.7242 | None |
86
+ | 0.6268 | 1.79 | 3000 | 0.5676 | 0.6642 | 0.5844 | 0.7691 | None |
87
+ | 0.6148 | 2.09 | 3500 | 0.5476 | 0.6510 | 0.6160 | 0.6902 | None |
88
+ | 0.5816 | 2.39 | 4000 | 0.5492 | 0.6666 | 0.6142 | 0.7286 | None |
89
+ | 0.5855 | 2.69 | 4500 | 0.5549 | 0.6716 | 0.6043 | 0.7560 | None |
90
+ | 0.5712 | 2.99 | 5000 | 0.5285 | 0.6665 | 0.6445 | 0.6900 | None |
91
+ | 0.5507 | 3.29 | 5500 | 0.5435 | 0.6705 | 0.6187 | 0.7318 | None |
92
+ | 0.5543 | 3.59 | 6000 | 0.5324 | 0.6772 | 0.6387 | 0.7207 | None |
93
+ | 0.5448 | 3.89 | 6500 | 0.5254 | 0.6711 | 0.6555 | 0.6874 | None |
94
+ | 0.5313 | 4.18 | 7000 | 0.5428 | 0.6823 | 0.6219 | 0.7558 | None |
95
+ | 0.5268 | 4.48 | 7500 | 0.5192 | 0.6667 | 0.6758 | 0.6579 | None |
96
+ | 0.5242 | 4.78 | 8000 | 0.5330 | 0.6844 | 0.6360 | 0.7406 | None |
97
+ | 0.519 | 5.08 | 8500 | 0.5203 | 0.6650 | 0.6837 | 0.6473 | None |
98
+ | 0.5056 | 5.38 | 9000 | 0.5607 | 0.6865 | 0.6080 | 0.7881 | None |
99
+ | 0.5025 | 5.68 | 9500 | 0.5238 | 0.6429 | 0.7203 | 0.5805 | None |
100
+ | 0.5086 | 5.98 | 10000 | 0.6008 | 0.6791 | 0.5661 | 0.8485 | None |
101
+ | 0.4832 | 6.28 | 10500 | 0.5555 | 0.6892 | 0.6189 | 0.7776 | None |
102
+ | 0.494 | 6.58 | 11000 | 0.5286 | 0.6773 | 0.6538 | 0.7026 | None |
103
+ | 0.4849 | 6.87 | 11500 | 0.5570 | 0.6867 | 0.6154 | 0.7766 | None |
104
+ | 0.4743 | 7.17 | 12000 | 0.5283 | 0.6712 | 0.6656 | 0.6769 | None |
105
 
106
 
107
  ### Framework versions
config.json CHANGED
@@ -1,14 +1,13 @@
1
  {
2
- "_name_or_path": "UBC-NLP/MARBERTv2",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.3,
7
- "classifier_dropout": 0.3,
8
- "directionality": "bidi",
9
  "gradient_checkpointing": false,
10
  "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.3,
12
  "hidden_size": 768,
13
  "id2label": {
14
  "0": "non-hate-speech",
@@ -24,18 +23,13 @@
24
  "max_position_embeddings": 512,
25
  "model_type": "bert",
26
  "num_attention_heads": 12,
27
- "num_hidden_layers": 12,
28
  "pad_token_id": 0,
29
- "pooler_fc_size": 768,
30
- "pooler_num_attention_heads": 12,
31
- "pooler_num_fc_layers": 3,
32
- "pooler_size_per_head": 128,
33
- "pooler_type": "first_token_transform",
34
  "position_embedding_type": "absolute",
35
  "problem_type": "single_label_classification",
36
  "torch_dtype": "float32",
37
  "transformers_version": "4.32.1",
38
  "type_vocab_size": 2,
39
  "use_cache": true,
40
- "vocab_size": 100000
41
  }
 
1
  {
2
+ "_name_or_path": "thejosango/nuha-mlm",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
 
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
  "0": "non-hate-speech",
 
23
  "max_position_embeddings": 512,
24
  "model_type": "bert",
25
  "num_attention_heads": 12,
26
+ "num_hidden_layers": 4,
27
  "pad_token_id": 0,
 
 
 
 
 
28
  "position_embedding_type": "absolute",
29
  "problem_type": "single_label_classification",
30
  "torch_dtype": "float32",
31
  "transformers_version": "4.32.1",
32
  "type_vocab_size": 2,
33
  "use_cache": true,
34
+ "vocab_size": 64000
35
  }
config.toml CHANGED
@@ -1,38 +1,30 @@
1
  [experiment]
2
- name = "binary-24"
3
  type = "binary"
4
 
5
 
6
  [dataset]
7
  path = "thejosango/nuha-dataset"
8
  dataset_revision = "main"
9
- with_post_text = false
10
- augment_ratio = 0.0
11
 
12
 
13
  [model]
14
- pretrained_model_name_or_path = "UBC-NLP/MARBERTv2"
15
- revision = "main"
16
- hidden_dropout_prob = 0.3
17
- attention_probs_dropout_prob = 0.3
18
- classifier_dropout = 0.3
19
- #num_hidden_layers = 2
20
- #num_attention_heads = 12
21
- #hidden_size = 768
22
- #intermediate_size= null
23
 
24
 
25
  [training]
26
- num_train_epochs = 10
27
  warmup_steps = 1e3
28
- lr_scheduler_type = "constant_with_warmup"
29
  learning_rate = 1e-5
30
- per_device_train_batch_size = 16
31
  per_device_eval_batch_size = 32
32
- gradient_accumulation_steps = 4
33
- weight_decay = 0.1
34
  label_smoothing_factor = 0.1
35
  weighted_loss = false
36
- resample_data = true
37
- early_stopping_patience = 0
38
- early_stopping_threshold = 0
 
1
  [experiment]
2
+ name = "binary-25"
3
  type = "binary"
4
 
5
 
6
  [dataset]
7
  path = "thejosango/nuha-dataset"
8
  dataset_revision = "main"
9
+ augment_ratio = 0.25
10
+ undersampling_strategy = "majority"
11
 
12
 
13
  [model]
14
+ pretrained_model_name_or_path = "thejosango/nuha-mlm"
15
+ revision = "2caf9ebc5b275737c95f8bb16953288107a7131c"
 
 
 
 
 
 
 
16
 
17
 
18
  [training]
19
+ num_train_epochs = 20
20
  warmup_steps = 1e3
21
+ lr_scheduler_type = "linear"
22
  learning_rate = 1e-5
23
+ per_device_train_batch_size = 32
24
  per_device_eval_batch_size = 32
25
+ gradient_accumulation_steps = 2
26
+ weight_decay = 0.01
27
  label_smoothing_factor = 0.1
28
  weighted_loss = false
29
+ early_stopping_patience = 10
30
+ early_stopping_threshold = 0.005
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f610909431fd5da709a5454dba0a5c5b48b17955ef88e0ce97a75e3616a789ba
3
- size 651439921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a98781949be70d56a87a882c2f59124d9d2df230e6f29e72b575c7082c3f0a
3
+ size 313992076
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -2,14 +2,27 @@
2
  "clean_up_tokenization_spaces": true,
3
  "cls_token": "[CLS]",
4
  "do_basic_tokenize": true,
5
- "do_lower_case": true,
6
  "mask_token": "[MASK]",
7
- "model_max_length": 1000000000000000019884624838656,
8
- "never_split": null,
 
 
 
 
 
 
 
9
  "pad_token": "[PAD]",
 
 
10
  "sep_token": "[SEP]",
 
11
  "strip_accents": null,
12
  "tokenize_chinese_chars": true,
13
  "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
 
 
 
15
  }
 
2
  "clean_up_tokenization_spaces": true,
3
  "cls_token": "[CLS]",
4
  "do_basic_tokenize": true,
5
+ "do_lower_case": false,
6
  "mask_token": "[MASK]",
7
+ "max_len": 512,
8
+ "max_length": 512,
9
+ "model_max_length": 512,
10
+ "never_split": [
11
+ "[بريد]",
12
+ "[مستخدم]",
13
+ "[رابط]"
14
+ ],
15
+ "pad_to_multiple_of": null,
16
  "pad_token": "[PAD]",
17
+ "pad_token_type_id": 0,
18
+ "padding_side": "right",
19
  "sep_token": "[SEP]",
20
+ "stride": 0,
21
  "strip_accents": null,
22
  "tokenize_chinese_chars": true,
23
  "tokenizer_class": "BertTokenizer",
24
+ "truncation_side": "right",
25
+ "truncation_strategy": "longest_first",
26
+ "unk_token": "[UNK]",
27
+ "use_fast": true
28
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e392f2392ba67a92949ae2fec0c77c26881b325c6745442c7a5320501c12ef
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5000e8929849a0d1fc06cf7a92f266db77a62a36dee5f486357cad977bf553ac
3
  size 4091
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff