binary-4

Browse files

Files changed (7) hide show

README.md +27 -26
config.json +5 -5
config.toml +15 -15
pytorch_model.bin +2 -2
tokenizer.json +6 -1
tokenizer_config.json +7 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: aubmindlab/bert-base-arabertv02-twitter
 tags:
 - generated_from_trainer
 datasets:
@@ -23,13 +23,13 @@ model-index:
     metrics:
     - name: F1
       type: f1
-      value: 0.6448919449901768
     - name: Precision
       type: precision
-      value: 0.6464795667159035
     - name: Recall
       type: recall
-      value: 0.643312101910828
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,12 +37,12 @@ should probably proofread and complete it, then remove this comment. -->
 # nuha-binary
-This model is a fine-tuned version of [aubmindlab/bert-base-arabertv02-twitter](https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.7060
-- F1: 0.6449
-- Precision: 0.6465
-- Recall: 0.6433
 - Support: None
 ## Model description
@@ -64,33 +64,34 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
 - train_batch_size: 32
-- eval_batch_size: 128
 - seed: 42
 - gradient_accumulation_steps: 2
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
-- num_epochs: 50
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
-| 0.5406        | 0.64  | 500  | 0.4952          | 0.5584 | 0.6769    | 0.4753 | None    |
-| 0.4445        | 1.28  | 1000 | 0.4863          | 0.5321 | 0.7509    | 0.4121 | None    |
-| 0.4449        | 1.91  | 1500 | 0.4629          | 0.6368 | 0.6794    | 0.5992 | None    |
-| 0.3638        | 2.55  | 2000 | 0.4948          | 0.6369 | 0.6777    | 0.6007 | None    |
-| 0.3536        | 3.19  | 2500 | 0.5794          | 0.6604 | 0.6468    | 0.6747 | None    |
-| 0.2881        | 3.83  | 3000 | 0.5343          | 0.6320 | 0.6858    | 0.5860 | None    |
-| 0.1775        | 4.46  | 3500 | 0.7580          | 0.6439 | 0.6450    | 0.6428 | None    |
-| 0.1554        | 5.1   | 4000 | 1.1326          | 0.6278 | 0.6593    | 0.5992 | None    |
-| 0.124         | 5.74  | 4500 | 0.9173          | 0.6389 | 0.6516    | 0.6267 | None    |
-| 0.0865        | 6.38  | 5000 | 1.2594          | 0.6342 | 0.6610    | 0.6095 | None    |
-| 0.0962        | 7.02  | 5500 | 1.4553          | 0.6477 | 0.6263    | 0.6707 | None    |
-| 0.0752        | 7.65  | 6000 | 1.3959          | 0.6391 | 0.6580    | 0.6213 | None    |
-| 0.0621        | 8.29  | 6500 | 1.6376          | 0.6439 | 0.6359    | 0.6521 | None    |
-| 0.0664        | 8.93  | 7000 | 1.3241          | 0.6284 | 0.6613    | 0.5987 | None    |
-| 0.0562        | 9.57  | 7500 | 1.7060          | 0.6449 | 0.6465    | 0.6433 | None    |
 ### Framework versions

 ---
+base_model: thejosango/nuha-mlm
 tags:
 - generated_from_trainer
 datasets:
     metrics:
     - name: F1
       type: f1
+      value: 0.5652559928973069
     - name: Precision
       type: precision
+      value: 0.7137518684603886
     - name: Recall
       type: recall
+      value: 0.4679078882900539
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # nuha-binary
+This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5595
+- F1: 0.5653
+- Precision: 0.7138
+- Recall: 0.4679
 - Support: None
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
 - train_batch_size: 32
+- eval_batch_size: 32
 - seed: 42
 - gradient_accumulation_steps: 2
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
+- lr_scheduler_warmup_steps: 1000.0
+- num_epochs: 30
+- label_smoothing_factor: 0.1
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
+| 0.8838        | 0.64  | 500  | 0.6201          | 0.4182 | 0.6907    | 0.2999 | None    |
+| 0.6748        | 1.28  | 1000 | 0.5750          | 0.4756 | 0.7174    | 0.3557 | None    |
+| 0.6404        | 1.91  | 1500 | 0.5329          | 0.5705 | 0.6788    | 0.4919 | None    |
+| 0.5836        | 2.55  | 2000 | 0.5316          | 0.5649 | 0.7069    | 0.4704 | None    |
+| 0.5793        | 3.19  | 2500 | 0.5267          | 0.6255 | 0.6614    | 0.5933 | None    |
+| 0.557         | 3.83  | 3000 | 0.5211          | 0.6145 | 0.6669    | 0.5698 | None    |
+| 0.5279        | 4.46  | 3500 | 0.5301          | 0.6516 | 0.6481    | 0.6551 | None    |
+| 0.5121        | 5.1   | 4000 | 0.5220          | 0.6356 | 0.6818    | 0.5953 | None    |
+| 0.5067        | 5.74  | 4500 | 0.5270          | 0.6609 | 0.6481    | 0.6742 | None    |
+| 0.4806        | 6.38  | 5000 | 0.5259          | 0.6309 | 0.6899    | 0.5811 | None    |
+| 0.4858        | 7.02  | 5500 | 0.5303          | 0.6145 | 0.6890    | 0.5546 | None    |
+| 0.4608        | 7.65  | 6000 | 0.5429          | 0.6558 | 0.6402    | 0.6722 | None    |
+| 0.441         | 8.29  | 6500 | 0.5575          | 0.6279 | 0.6776    | 0.5850 | None    |
+| 0.4367        | 8.93  | 7000 | 0.5595          | 0.5653 | 0.7138    | 0.4679 | None    |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
-  "_name_or_path": "aubmindlab/bert-base-arabertv02-twitter",
   "architectures": [
     "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.0,
-  "classifier_dropout": 0.0,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.0,
   "hidden_size": 768,
   "id2label": {
     "0": "non-hate-speech",
@@ -23,7 +23,7 @@
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
-  "num_hidden_layers": 6,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",

 {
+  "_name_or_path": "thejosango/nuha-mlm",
   "architectures": [
     "BertForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
     "0": "non-hate-speech",
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
+  "num_hidden_layers": 4,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",

config.toml CHANGED Viewed

@@ -1,5 +1,5 @@
 [experiment]
-name = "binary-3"
 type = "binary"
@@ -11,27 +11,27 @@ augment_ratio = 0.0
 [model]
-pretrained_model_name_or_path = "aubmindlab/bert-base-arabertv02-twitter"
-revision = "main"
-hidden_dropout_prob = 0.0
-attention_probs_dropout_prob = 0.0
-classifier_dropout = 0.0
-num_hidden_layers = 6
 #num_attention_heads = 12
 #hidden_size = 768
-#intermediate_size= 1024
 [training]
-num_train_epochs = 50
-warmup_steps = 0
 lr_scheduler_type = "constant"
 learning_rate = 1e-5
 per_device_train_batch_size = 32
-per_device_eval_batch_size = 128
 gradient_accumulation_steps = 2
-weight_decay = 0.00
-label_smoothing_factor = 0.0
-weighted_loss = false
-early_stopping_patience = 10
 early_stopping_threshold = 0.005

 [experiment]
+name = "binary-4"
 type = "binary"
 [model]
+pretrained_model_name_or_path = "thejosango/nuha-mlm"
+revision = "2caf9ebc5b275737c95f8bb16953288107a7131c"
+#hidden_dropout_prob = 0
+#attention_probs_dropout_prob = 0
+#classifier_dropout = 0
+#num_hidden_layers = 4
 #num_attention_heads = 12
 #hidden_size = 768
+#intermediate_size= null
 [training]
+num_train_epochs = 30
+warmup_steps = 1e3
 lr_scheduler_type = "constant"
 learning_rate = 1e-5
 per_device_train_batch_size = 32
+per_device_eval_batch_size = 32
 gradient_accumulation_steps = 2
+weight_decay = 0.01
+label_smoothing_factor = 0.1
+weighted_loss = false
+early_stopping_patience = 5
 early_stopping_threshold = 0.005

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b83c900e0ea49bbcefb28e9618972b484e78980b78de29bca1931b8e084204d3
-size 370706033

 version https://git-lfs.github.com/spec/v1
+oid sha256:40de39f946a025883e44c47ae5e11ea1f08dc35b0befe9f2443dbc785a4045bd
+size 313992076

tokenizer.json CHANGED Viewed

@@ -1,6 +1,11 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 512,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": null,
   "added_tokens": [
     {

tokenizer_config.json CHANGED Viewed

@@ -5,17 +5,24 @@
   "do_lower_case": false,
   "mask_token": "[MASK]",
   "max_len": 512,
   "model_max_length": 512,
   "never_split": [
     "[بريد]",
     "[مستخدم]",
     "[رابط]"
   ],
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]",
   "use_fast": true
 }

   "do_lower_case": false,
   "mask_token": "[MASK]",
   "max_len": 512,
+  "max_length": 512,
   "model_max_length": 512,
   "never_split": [
     "[بريد]",
     "[مستخدم]",
     "[رابط]"
   ],
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]",
   "use_fast": true
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:581939067a961bed8370054fe7cd7f1030c3c6f0eeb1c7407bea8c1b1647597b
-size 4027

 version https://git-lfs.github.com/spec/v1
+oid sha256:37aa39e079233d13bb57c12ffb2e9ddc52bb1ab690045ff67bde840cebbe2c7b
+size 4091