thejosango
/

nuha-binary

@@ -23,13 +23,13 @@ model-index:
     metrics:
     - name: F1
       type: f1
-      value: 0.6709883502442691
     - name: Precision
       type: precision
-      value: 0.6826610590709233
     - name: Recall
       type: recall
-      value: 0.6597081101053021
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -39,10 +39,10 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5203
-- F1: 0.6710
-- Precision: 0.6827
-- Recall: 0.6597
 - Support: None
 ## Model description
@@ -63,47 +63,31 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
-- train_batch_size: 32
-- eval_batch_size: 32
 - seed: 42
 - gradient_accumulation_steps: 2
-- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 1000.0
-- num_epochs: 20
 - label_smoothing_factor: 0.1
 ### Training results
-| Training Loss | Epoch | Step  | Validation Loss | F1     | Precision | Recall | Support |
-|:-------------:|:-----:|:-----:|:---------------:|:------:|:---------:|:------:|:-------:|
-| 1.2252        | 0.23  | 500   | 0.6446          | 0.5213 | 0.6006    | 0.4606 | None    |
-| 0.8851        | 0.46  | 1000  | 0.5788          | 0.6011 | 0.6123    | 0.5904 | None    |
-| 0.7632        | 0.7   | 1500  | 0.5560          | 0.5957 | 0.6485    | 0.5509 | None    |
-| 0.684         | 0.93  | 2000  | 0.5481          | 0.6076 | 0.6506    | 0.5699 | None    |
-| 0.6334        | 1.16  | 2500  | 0.6265          | 0.6462 | 0.5199    | 0.8535 | None    |
-| 0.6137        | 1.39  | 3000  | 0.5378          | 0.5593 | 0.7224    | 0.4563 | None    |
-| 0.609         | 1.63  | 3500  | 0.5348          | 0.6486 | 0.6499    | 0.6473 | None    |
-| 0.5756        | 1.86  | 4000  | 0.5272          | 0.6182 | 0.6966    | 0.5557 | None    |
-| 0.5667        | 2.09  | 4500  | 0.5257          | 0.6035 | 0.7194    | 0.5197 | None    |
-| 0.5551        | 2.32  | 5000  | 0.5193          | 0.6456 | 0.6837    | 0.6115 | None    |
-| 0.5549        | 2.56  | 5500  | 0.5173          | 0.6236 | 0.7191    | 0.5505 | None    |
-| 0.5507        | 2.79  | 6000  | 0.5279          | 0.6675 | 0.6542    | 0.6813 | None    |
-| 0.5505        | 3.02  | 6500  | 0.5164          | 0.6534 | 0.6916    | 0.6192 | None    |
-| 0.5298        | 3.25  | 7000  | 0.5232          | 0.6687 | 0.6628    | 0.6747 | None    |
-| 0.5313        | 3.49  | 7500  | 0.5128          | 0.6603 | 0.6934    | 0.6303 | None    |
-| 0.5209        | 3.72  | 8000  | 0.5285          | 0.6800 | 0.6513    | 0.7113 | None    |
-| 0.5214        | 3.95  | 8500  | 0.5127          | 0.6443 | 0.7128    | 0.5878 | None    |
-| 0.5033        | 4.18  | 9000  | 0.5179          | 0.6341 | 0.7268    | 0.5623 | None    |
-| 0.5055        | 4.41  | 9500  | 0.5214          | 0.6239 | 0.7347    | 0.5422 | None    |
-| 0.5013        | 4.65  | 10000 | 0.5230          | 0.6794 | 0.6626    | 0.6972 | None    |
-| 0.5107        | 4.88  | 10500 | 0.5127          | 0.6656 | 0.7012    | 0.6335 | None    |
-| 0.4862        | 5.11  | 11000 | 0.5447          | 0.5848 | 0.7670    | 0.4726 | None    |
-| 0.4814        | 5.34  | 11500 | 0.5216          | 0.6217 | 0.7386    | 0.5367 | None    |
-| 0.4877        | 5.58  | 12000 | 0.5176          | 0.6375 | 0.7255    | 0.5684 | None    |
-| 0.4893        | 5.81  | 12500 | 0.5180          | 0.6463 | 0.7129    | 0.5912 | None    |
-| 0.4779        | 6.04  | 13000 | 0.5203          | 0.6710 | 0.6827    | 0.6597 | None    |
 ### Framework versions

     metrics:
     - name: F1
       type: f1
+      value: 0.36679201619901647
     - name: Precision
       type: precision
+      value: 0.8447701532311792
     - name: Recall
       type: recall
+      value: 0.23425087751708848
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4531
+- F1: 0.3668
+- Precision: 0.8448
+- Recall: 0.2343
 - Support: None
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
+- train_batch_size: 8
+- eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 2
+- total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: constant
 - lr_scheduler_warmup_steps: 1000.0
+- num_epochs: 30
 - label_smoothing_factor: 0.1
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | F1     | Precision | Recall | Support |
+|:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
+| 0.8428        | 0.06  | 500  | 0.5318          | 0.4246 | 0.6560    | 0.3139 | None    |
+| 0.6948        | 0.13  | 1000 | 0.4850          | 0.4097 | 0.7445    | 0.2827 | None    |
+| 0.684         | 0.19  | 1500 | 0.4674          | 0.3861 | 0.7563    | 0.2592 | None    |
+| 0.5896        | 0.25  | 2000 | 0.4870          | 0.2807 | 0.8451    | 0.1683 | None    |
+| 0.5585        | 0.31  | 2500 | 0.4591          | 0.5076 | 0.7354    | 0.3876 | None    |
+| 0.5371        | 0.38  | 3000 | 0.4484          | 0.4326 | 0.7993    | 0.2965 | None    |
+| 0.5286        | 0.44  | 3500 | 0.4479          | 0.4129 | 0.8212    | 0.2758 | None    |
+| 0.5071        | 0.5   | 4000 | 0.4433          | 0.4647 | 0.7822    | 0.3305 | None    |
+| 0.5043        | 0.56  | 4500 | 0.4799          | 0.2584 | 0.8539    | 0.1522 | None    |
+| 0.5149        | 0.63  | 5000 | 0.4531          | 0.3668 | 0.8448    | 0.2343 | None    |
 ### Framework versions

config.toml CHANGED Viewed

@@ -1,13 +1,13 @@
 [experiment]
-name = "binary-26"
 type = "binary"
 [dataset]
 path = "thejosango/nuha-dataset"
 dataset_revision = "main"
-augment_ratio = 0.25
-undersampling_strategy = 0
 [model]
@@ -16,15 +16,15 @@ revision = "2caf9ebc5b275737c95f8bb16953288107a7131c"
 [training]
-num_train_epochs = 20
 warmup_steps = 1e3
-lr_scheduler_type = "linear"
 learning_rate = 1e-5
-per_device_train_batch_size = 32
-per_device_eval_batch_size = 32
 gradient_accumulation_steps = 2
 weight_decay = 0.01
 label_smoothing_factor = 0.1
-weighted_loss = false
-early_stopping_patience = 10
 early_stopping_threshold = 0.005

 [experiment]
+name = "binary-27"
 type = "binary"
 [dataset]
 path = "thejosango/nuha-dataset"
 dataset_revision = "main"
+augment_ratio = 0.0
+undersampling_strategy = false
 [model]
 [training]
+num_train_epochs = 30
 warmup_steps = 1e3
+lr_scheduler_type = "constant"
 learning_rate = 1e-5
+per_device_train_batch_size = 8
+per_device_eval_batch_size = 8
 gradient_accumulation_steps = 2
 weight_decay = 0.01
 label_smoothing_factor = 0.1
+weighted_loss = true
+early_stopping_patience = 5
 early_stopping_threshold = 0.005

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6274d0bde584699734a00b2cd752b54dcaf69e5a4753cab991977f54ca510145
 size 313992076

 version https://git-lfs.github.com/spec/v1
+oid sha256:587e8391854237b42317246fd0b723aee9b71b6d155d6fa2f5f78109242cf9f9
 size 313992076

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cc61a0a4012cb94dd109030c3f0bc5d60b87d98e45eac45d39704943b4e479e
 size 4091

 version https://git-lfs.github.com/spec/v1
+oid sha256:12ec62d67b80d950a507d49d7653460381b8a047dc923e1e97dc8a60d3a0d858
 size 4091