thejosango
/

nuha-binary

@@ -23,13 +23,13 @@ model-index:
     metrics:
     - name: F1
       type: f1
-      value: 0.6642664266426642
     - name: Precision
       type: precision
-      value: 0.6142322097378277
     - name: Recall
       type: recall
-      value: 0.7231749142577168
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -39,10 +39,10 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [aubmindlab/bert-base-arabertv02-twitter](https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5473
-- F1: 0.6643
-- Precision: 0.6142
-- Recall: 0.7232
 - Support: None
 ## Model description
@@ -64,8 +64,10 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
 - train_batch_size: 32
-- eval_batch_size: 64
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
 - num_epochs: 50
@@ -74,18 +76,21 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
-| 0.8335        | 0.32  | 500  | 0.7434          | 0.2637 | 0.5264    | 0.1759 | None    |
-| 0.6566        | 0.64  | 1000 | 0.5636          | 0.3675 | 0.7393    | 0.2445 | None    |
-| 0.6277        | 0.96  | 1500 | 0.7360          | 0.5957 | 0.4424    | 0.9113 | None    |
-| 0.7174        | 1.28  | 2000 | 1.0336          | 0.1403 | 0.9226    | 0.0759 | None    |
-| 0.6156        | 1.59  | 2500 | 0.9586          | 0.0870 | 0.9490    | 0.0456 | None    |
-| 0.6196        | 1.91  | 3000 | 0.4772          | 0.5771 | 0.6969    | 0.4924 | None    |
-| 0.5038        | 2.23  | 3500 | 0.5039          | 0.6608 | 0.6119    | 0.7183 | None    |
-| 0.5072        | 2.55  | 4000 | 0.4758          | 0.6230 | 0.6794    | 0.5752 | None    |
-| 0.4988        | 2.87  | 4500 | 0.4592          | 0.6270 | 0.6999    | 0.5679 | None    |
-| 0.4687        | 3.19  | 5000 | 1.0071          | 0.1982 | 0.908     | 0.1112 | None    |
-| 0.4582        | 3.51  | 5500 | 0.4858          | 0.5598 | 0.7451    | 0.4483 | None    |
-| 0.4321        | 3.83  | 6000 | 0.5473          | 0.6643 | 0.6142    | 0.7232 | None    |
 ### Framework versions

     metrics:
     - name: F1
       type: f1
+      value: 0.6448919449901768
     - name: Precision
       type: precision
+      value: 0.6464795667159035
     - name: Recall
       type: recall
+      value: 0.643312101910828
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [aubmindlab/bert-base-arabertv02-twitter](https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.7060
+- F1: 0.6449
+- Precision: 0.6465
+- Recall: 0.6433
 - Support: None
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
 - train_batch_size: 32
+- eval_batch_size: 128
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
 - num_epochs: 50
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
+| 0.5406        | 0.64  | 500  | 0.4952          | 0.5584 | 0.6769    | 0.4753 | None    |
+| 0.4445        | 1.28  | 1000 | 0.4863          | 0.5321 | 0.7509    | 0.4121 | None    |
+| 0.4449        | 1.91  | 1500 | 0.4629          | 0.6368 | 0.6794    | 0.5992 | None    |
+| 0.3638        | 2.55  | 2000 | 0.4948          | 0.6369 | 0.6777    | 0.6007 | None    |
+| 0.3536        | 3.19  | 2500 | 0.5794          | 0.6604 | 0.6468    | 0.6747 | None    |
+| 0.2881        | 3.83  | 3000 | 0.5343          | 0.6320 | 0.6858    | 0.5860 | None    |
+| 0.1775        | 4.46  | 3500 | 0.7580          | 0.6439 | 0.6450    | 0.6428 | None    |
+| 0.1554        | 5.1   | 4000 | 1.1326          | 0.6278 | 0.6593    | 0.5992 | None    |
+| 0.124         | 5.74  | 4500 | 0.9173          | 0.6389 | 0.6516    | 0.6267 | None    |
+| 0.0865        | 6.38  | 5000 | 1.2594          | 0.6342 | 0.6610    | 0.6095 | None    |
+| 0.0962        | 7.02  | 5500 | 1.4553          | 0.6477 | 0.6263    | 0.6707 | None    |
+| 0.0752        | 7.65  | 6000 | 1.3959          | 0.6391 | 0.6580    | 0.6213 | None    |
+| 0.0621        | 8.29  | 6500 | 1.6376          | 0.6439 | 0.6359    | 0.6521 | None    |
+| 0.0664        | 8.93  | 7000 | 1.3241          | 0.6284 | 0.6613    | 0.5987 | None    |
+| 0.0562        | 9.57  | 7500 | 1.7060          | 0.6449 | 0.6465    | 0.6433 | None    |
 ### Framework versions

config.json CHANGED Viewed

@@ -23,7 +23,7 @@
   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",

   "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
+  "num_hidden_layers": 6,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",

config.toml CHANGED Viewed

@@ -1,5 +1,5 @@
 [experiment]
-name = "bianry-1"
 type = "binary"
@@ -16,7 +16,7 @@ revision = "main"
 hidden_dropout_prob = 0.0
 attention_probs_dropout_prob = 0.0
 classifier_dropout = 0.0
-#num_hidden_layers = 6
 #num_attention_heads = 12
 #hidden_size = 768
 #intermediate_size= 1024
@@ -28,10 +28,10 @@ warmup_steps = 0
 lr_scheduler_type = "constant"
 learning_rate = 1e-5
 per_device_train_batch_size = 32
-per_device_eval_batch_size = 64
-gradient_accumulation_steps = 1
 weight_decay = 0.00
 label_smoothing_factor = 0.0
-weighted_loss = false
-early_stopping_patience = 5
 early_stopping_threshold = 0.005

 [experiment]
+name = "binary-3"
 type = "binary"
 hidden_dropout_prob = 0.0
 attention_probs_dropout_prob = 0.0
 classifier_dropout = 0.0
+num_hidden_layers = 6
 #num_attention_heads = 12
 #hidden_size = 768
 #intermediate_size= 1024
 lr_scheduler_type = "constant"
 learning_rate = 1e-5
 per_device_train_batch_size = 32
+per_device_eval_batch_size = 128
+gradient_accumulation_steps = 2
 weight_decay = 0.00
 label_smoothing_factor = 0.0
+weighted_loss = false
+early_stopping_patience = 10
 early_stopping_threshold = 0.005

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b25e0860b4dc529f84126a508b1d0f5b3ce6f8e06f8cb41191d12a604e7ab900
-size 540847921

 version https://git-lfs.github.com/spec/v1
+oid sha256:b83c900e0ea49bbcefb28e9618972b484e78980b78de29bca1931b8e084204d3
+size 370706033

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71323c508e837425b2b66a5f0be035927105efc9797869282f035fea59f175df
 size 4027

 version https://git-lfs.github.com/spec/v1
+oid sha256:581939067a961bed8370054fe7cd7f1030c3c6f0eeb1c7407bea8c1b1647597b
 size 4027