binary-25

Browse files

Files changed (8) hide show

README.md +39 -24
config.json +6 -12
config.toml +12 -20
pytorch_model.bin +2 -2
tokenizer.json +0 -0
tokenizer_config.json +17 -4
training_args.bin +1 -1
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: UBC-NLP/MARBERTv2
 tags:
 - generated_from_trainer
 datasets:
@@ -23,13 +23,13 @@ model-index:
     metrics:
     - name: F1
       type: f1
-      value: 0.6696076155096354
     - name: Precision
       type: precision
-      value: 0.5963606286186931
     - name: Recall
       type: recall
-      value: 0.7633668607728957
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,12 +37,12 @@ should probably proofread and complete it, then remove this comment. -->
 # nuha-binary
-This model is a fine-tuned version of [UBC-NLP/MARBERTv2](https://huggingface.co/UBC-NLP/MARBERTv2) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6353
-- F1: 0.6696
-- Precision: 0.5964
-- Recall: 0.7634
 - Support: None
 ## Model description
@@ -63,30 +63,45 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
-- train_batch_size: 16
 - eval_batch_size: 32
 - seed: 42
-- gradient_accumulation_steps: 4
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: constant_with_warmup
 - lr_scheduler_warmup_steps: 1000.0
-- num_epochs: 10
 - label_smoothing_factor: 0.1
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
-|:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
-| 2.4063        | 1.06  | 500  | 0.9684          | 0.5373 | 0.4479    | 0.6713 | None    |
-| 1.7797        | 2.12  | 1000 | 1.1863          | 0.5891 | 0.4416    | 0.8846 | None    |
-| 1.3347        | 3.18  | 1500 | 0.7357          | 0.6431 | 0.5565    | 0.7618 | None    |
-| 1.0719        | 4.24  | 2000 | 0.6695          | 0.6514 | 0.5701    | 0.7597 | None    |
-| 0.9321        | 5.29  | 2500 | 0.7326          | 0.6487 | 0.5499    | 0.7909 | None    |
-| 0.824         | 6.35  | 3000 | 0.6184          | 0.6665 | 0.6162    | 0.7258 | None    |
-| 0.7959        | 7.41  | 3500 | 0.6409          | 0.6627 | 0.5842    | 0.7655 | None    |
-| 0.707         | 8.47  | 4000 | 0.7284          | 0.6610 | 0.5529    | 0.8216 | None    |
-| 0.662         | 9.53  | 4500 | 0.6353          | 0.6696 | 0.5964    | 0.7634 | None    |
 ### Framework versions

 ---
+base_model: thejosango/nuha-mlm
 tags:
 - generated_from_trainer
 datasets:
     metrics:
     - name: F1
       type: f1
+      value: 0.6711851987543506
     - name: Precision
       type: precision
+      value: 0.6655767484105358
     - name: Recall
       type: recall
+      value: 0.676888970995751
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # nuha-binary
+This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5283
+- F1: 0.6712
+- Precision: 0.6656
+- Recall: 0.6769
 - Support: None
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
+- train_batch_size: 32
 - eval_batch_size: 32
 - seed: 42
+- gradient_accumulation_steps: 2
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 1000.0
+- num_epochs: 20
 - label_smoothing_factor: 0.1
 ### Training results
+| Training Loss | Epoch | Step  | Validation Loss | F1     | Precision | Recall | Support |
+|:-------------:|:-----:|:-----:|:---------------:|:------:|:---------:|:------:|:-------:|
+| 1.2633        | 0.3   | 500   | 0.6344          | 0.5719 | 0.5563    | 0.5884 | None    |
+| 0.9118        | 0.6   | 1000  | 0.5886          | 0.5940 | 0.5973    | 0.5908 | None    |
+| 0.7827        | 0.9   | 1500  | 0.5890          | 0.6367 | 0.5640    | 0.7308 | None    |
+| 0.6804        | 1.2   | 2000  | 0.5905          | 0.6458 | 0.5495    | 0.7829 | None    |
+| 0.6492        | 1.49  | 2500  | 0.5619          | 0.6558 | 0.5993    | 0.7242 | None    |
+| 0.6268        | 1.79  | 3000  | 0.5676          | 0.6642 | 0.5844    | 0.7691 | None    |
+| 0.6148        | 2.09  | 3500  | 0.5476          | 0.6510 | 0.6160    | 0.6902 | None    |
+| 0.5816        | 2.39  | 4000  | 0.5492          | 0.6666 | 0.6142    | 0.7286 | None    |
+| 0.5855        | 2.69  | 4500  | 0.5549          | 0.6716 | 0.6043    | 0.7560 | None    |
+| 0.5712        | 2.99  | 5000  | 0.5285          | 0.6665 | 0.6445    | 0.6900 | None    |
+| 0.5507        | 3.29  | 5500  | 0.5435          | 0.6705 | 0.6187    | 0.7318 | None    |
+| 0.5543        | 3.59  | 6000  | 0.5324          | 0.6772 | 0.6387    | 0.7207 | None    |
+| 0.5448        | 3.89  | 6500  | 0.5254          | 0.6711 | 0.6555    | 0.6874 | None    |
+| 0.5313        | 4.18  | 7000  | 0.5428          | 0.6823 | 0.6219    | 0.7558 | None    |
+| 0.5268        | 4.48  | 7500  | 0.5192          | 0.6667 | 0.6758    | 0.6579 | None    |
+| 0.5242        | 4.78  | 8000  | 0.5330          | 0.6844 | 0.6360    | 0.7406 | None    |
+| 0.519         | 5.08  | 8500  | 0.5203          | 0.6650 | 0.6837    | 0.6473 | None    |
+| 0.5056        | 5.38  | 9000  | 0.5607          | 0.6865 | 0.6080    | 0.7881 | None    |
+| 0.5025        | 5.68  | 9500  | 0.5238          | 0.6429 | 0.7203    | 0.5805 | None    |
+| 0.5086        | 5.98  | 10000 | 0.6008          | 0.6791 | 0.5661    | 0.8485 | None    |
+| 0.4832        | 6.28  | 10500 | 0.5555          | 0.6892 | 0.6189    | 0.7776 | None    |
+| 0.494         | 6.58  | 11000 | 0.5286          | 0.6773 | 0.6538    | 0.7026 | None    |
+| 0.4849        | 6.87  | 11500 | 0.5570          | 0.6867 | 0.6154    | 0.7766 | None    |
+| 0.4743        | 7.17  | 12000 | 0.5283          | 0.6712 | 0.6656    | 0.6769 | None    |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,14 +1,13 @@
 {
-  "_name_or_path": "UBC-NLP/MARBERTv2",
   "architectures": [
     "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.3,
-  "classifier_dropout": 0.3,
-  "directionality": "bidi",
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.3,
   "hidden_size": 768,
   "id2label": {
     "0": "non-hate-speech",
@@ -24,18 +23,13 @@
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "pad_token_id": 0,
-  "pooler_fc_size": 768,
-  "pooler_num_attention_heads": 12,
-  "pooler_num_fc_layers": 3,
-  "pooler_size_per_head": 128,
-  "pooler_type": "first_token_transform",
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.32.1",
   "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 100000
 }

 {
+  "_name_or_path": "thejosango/nuha-mlm",
   "architectures": [
     "BertForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
     "0": "non-hate-speech",
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
+  "num_hidden_layers": 4,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.32.1",
   "type_vocab_size": 2,
   "use_cache": true,
+  "vocab_size": 64000
 }

config.toml CHANGED Viewed

@@ -1,38 +1,30 @@
 [experiment]
-name = "binary-24"
 type = "binary"
 [dataset]
 path = "thejosango/nuha-dataset"
 dataset_revision = "main"
-with_post_text = false
-augment_ratio = 0.0
 [model]
-pretrained_model_name_or_path = "UBC-NLP/MARBERTv2"
-revision = "main"
-hidden_dropout_prob = 0.3
-attention_probs_dropout_prob = 0.3
-classifier_dropout = 0.3
-#num_hidden_layers = 2
-#num_attention_heads = 12
-#hidden_size = 768
-#intermediate_size= null
 [training]
-num_train_epochs = 10
 warmup_steps = 1e3
-lr_scheduler_type = "constant_with_warmup"
 learning_rate = 1e-5
-per_device_train_batch_size = 16
 per_device_eval_batch_size = 32
-gradient_accumulation_steps = 4
-weight_decay = 0.1
 label_smoothing_factor = 0.1
 weighted_loss = false
-resample_data = true
-early_stopping_patience = 0
-early_stopping_threshold = 0

 [experiment]
+name = "binary-25"
 type = "binary"
 [dataset]
 path = "thejosango/nuha-dataset"
 dataset_revision = "main"
+augment_ratio = 0.25
+undersampling_strategy = "majority"
 [model]
+pretrained_model_name_or_path = "thejosango/nuha-mlm"
+revision = "2caf9ebc5b275737c95f8bb16953288107a7131c"
 [training]
+num_train_epochs = 20
 warmup_steps = 1e3
+lr_scheduler_type = "linear"
 learning_rate = 1e-5
+per_device_train_batch_size = 32
 per_device_eval_batch_size = 32
+gradient_accumulation_steps = 2
+weight_decay = 0.01
 label_smoothing_factor = 0.1
 weighted_loss = false
+early_stopping_patience = 10
+early_stopping_threshold = 0.005

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f610909431fd5da709a5454dba0a5c5b48b17955ef88e0ce97a75e3616a789ba
-size 651439921

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7a98781949be70d56a87a882c2f59124d9d2df230e6f29e72b575c7082c3f0a
+size 313992076

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -2,14 +2,27 @@
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
-  "do_lower_case": true,
   "mask_token": "[MASK]",
-  "model_max_length": 1000000000000000019884624838656,
-  "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
 }

   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_basic_tokenize": true,
+  "do_lower_case": false,
   "mask_token": "[MASK]",
+  "max_len": 512,
+  "max_length": 512,
+  "model_max_length": 512,
+  "never_split": [
+    "[بريد]",
+    "[مستخدم]",
+    "[رابط]"
+  ],
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]",
+  "use_fast": true
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11e392f2392ba67a92949ae2fec0c77c26881b325c6745442c7a5320501c12ef
 size 4091

 version https://git-lfs.github.com/spec/v1
+oid sha256:5000e8929849a0d1fc06cf7a92f266db77a62a36dee5f486357cad977bf553ac
 size 4091

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff