thejosango
/

nuha-ajp-binary

@@ -23,13 +23,13 @@ model-index:
     metrics:
     - name: F1
       type: f1
-      value: 0.6302113631956563
     - name: Precision
       type: precision
-      value: 0.4972460220318237
     - name: Recall
       type: recall
-      value: 0.8602435150873478
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -39,10 +39,10 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.0884
-- F1: 0.6302
-- Precision: 0.4972
-- Recall: 0.8602
 - Support: None
 ## Model description
@@ -62,7 +62,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 1e-05
 - train_batch_size: 16
 - eval_batch_size: 32
 - seed: 42
@@ -70,7 +70,7 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant_with_warmup
-- lr_scheduler_warmup_steps: 1000.0
 - num_epochs: 5
 - label_smoothing_factor: 0.1
@@ -78,10 +78,10 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
-| 4.7382        | 1.06  | 500  | 1.6112          | 0.5093 | 0.5664    | 0.4627 | None    |
-| 2.8127        | 2.12  | 1000 | 1.4358          | 0.6255 | 0.4994    | 0.8370 | None    |
-| 2.0837        | 3.18  | 1500 | 1.0886          | 0.6362 | 0.5187    | 0.8227 | None    |
-| 1.6086        | 4.24  | 2000 | 1.0884          | 0.6302 | 0.4972    | 0.8602 | None    |
 ### Framework versions

     metrics:
     - name: F1
       type: f1
+      value: 0.5637359294189231
     - name: Precision
       type: precision
+      value: 0.3955176093916756
     - name: Recall
       type: recall
+      value: 0.9809422975119111
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.7381
+- F1: 0.5637
+- Precision: 0.3955
+- Recall: 0.9809
 - Support: None
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 3e-05
 - train_batch_size: 16
 - eval_batch_size: 32
 - seed: 42
 - total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant_with_warmup
+- lr_scheduler_warmup_steps: 500.0
 - num_epochs: 5
 - label_smoothing_factor: 0.1
 | Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
 |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
+| 2.5304        | 1.06  | 500  | 1.2026          | 0.6045 | 0.4616    | 0.8756 | None    |
+| 1.7019        | 2.12  | 1000 | 1.5175          | 0.5891 | 0.4260    | 0.9545 | None    |
+| 1.2725        | 3.18  | 1500 | 0.6375          | 0.6583 | 0.6373    | 0.6808 | None    |
+| 1.1038        | 4.24  | 2000 | 1.7381          | 0.5637 | 0.3955    | 0.9809 | None    |
 ### Framework versions

config.json CHANGED Viewed

@@ -3,11 +3,11 @@
   "architectures": [
     "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.2,
-  "classifier_dropout": 0.2,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.2,
   "hidden_size": 768,
   "id2label": {
     "0": "non-hate-speech",

   "architectures": [
     "BertForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": 0.1,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
     "0": "non-hate-speech",

config.toml CHANGED Viewed

@@ -1,5 +1,5 @@
 [experiment]
-name = "binary-20"
 type = "binary"
@@ -13,9 +13,9 @@ augment_ratio = 0.0
 [model]
 pretrained_model_name_or_path = "thejosango/nuha-mlm"
 revision = "984ac09880b24959f6767fdbea8757d2c312df46"
-hidden_dropout_prob = 0.2
-attention_probs_dropout_prob = 0.2
-classifier_dropout = 0.2
 #num_hidden_layers = 6
 #num_attention_heads = 12
 #hidden_size = 768
@@ -24,13 +24,13 @@ classifier_dropout = 0.2
 [training]
 num_train_epochs = 5
-warmup_steps = 1e3
 lr_scheduler_type = "constant_with_warmup"
-learning_rate = 1e-5
 per_device_train_batch_size = 16
 per_device_eval_batch_size = 32
 gradient_accumulation_steps = 4
-weight_decay = 0.05
 label_smoothing_factor = 0.1
 weighted_loss = false
 resample_data = true

 [experiment]
+name = "binary-21"
 type = "binary"
 [model]
 pretrained_model_name_or_path = "thejosango/nuha-mlm"
 revision = "984ac09880b24959f6767fdbea8757d2c312df46"
+hidden_dropout_prob = 0.1
+attention_probs_dropout_prob = 0.1
+classifier_dropout = 0.1
 #num_hidden_layers = 6
 #num_attention_heads = 12
 #hidden_size = 768
 [training]
 num_train_epochs = 5
+warmup_steps = 5e2
 lr_scheduler_type = "constant_with_warmup"
+learning_rate = 3e-5
 per_device_train_batch_size = 16
 per_device_eval_batch_size = 32
 gradient_accumulation_steps = 4
+weight_decay = 0.001
 label_smoothing_factor = 0.1
 weighted_loss = false
 resample_data = true

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59b0e5bbf20a6b167f71da35e9552c419eaea22968c91db1542e7179ca0ce3a0
 size 540847921

 version https://git-lfs.github.com/spec/v1
+oid sha256:2432d4365874f5c0c4f0db7a29c62c98cd861472b8a6c5e899edbc8e7f1d9594
 size 540847921

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a36be302c6e8f2422e4c79fa435fc84ec8b153a6ccfe8f5bb5f0297e17d46015
 size 4091

 version https://git-lfs.github.com/spec/v1
+oid sha256:54d7a70a5e033a47a0efb21cf37879c3a9e52e296da70985eddcb4eaaba41d94
 size 4091