yazansh commited on
Commit
d7f1c72
·
1 Parent(s): 99b45f1

binary-21

Browse files
Files changed (5) hide show
  1. README.md +13 -13
  2. config.json +3 -3
  3. config.toml +7 -7
  4. pytorch_model.bin +1 -1
  5. training_args.bin +1 -1
README.md CHANGED
@@ -23,13 +23,13 @@ model-index:
23
  metrics:
24
  - name: F1
25
  type: f1
26
- value: 0.6302113631956563
27
  - name: Precision
28
  type: precision
29
- value: 0.4972460220318237
30
  - name: Recall
31
  type: recall
32
- value: 0.8602435150873478
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -39,10 +39,10 @@ should probably proofread and complete it, then remove this comment. -->
39
 
40
  This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
- - Loss: 1.0884
43
- - F1: 0.6302
44
- - Precision: 0.4972
45
- - Recall: 0.8602
46
  - Support: None
47
 
48
  ## Model description
@@ -62,7 +62,7 @@ More information needed
62
  ### Training hyperparameters
63
 
64
  The following hyperparameters were used during training:
65
- - learning_rate: 1e-05
66
  - train_batch_size: 16
67
  - eval_batch_size: 32
68
  - seed: 42
@@ -70,7 +70,7 @@ The following hyperparameters were used during training:
70
  - total_train_batch_size: 64
71
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
  - lr_scheduler_type: constant_with_warmup
73
- - lr_scheduler_warmup_steps: 1000.0
74
  - num_epochs: 5
75
  - label_smoothing_factor: 0.1
76
 
@@ -78,10 +78,10 @@ The following hyperparameters were used during training:
78
 
79
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
80
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
81
- | 4.7382 | 1.06 | 500 | 1.6112 | 0.5093 | 0.5664 | 0.4627 | None |
82
- | 2.8127 | 2.12 | 1000 | 1.4358 | 0.6255 | 0.4994 | 0.8370 | None |
83
- | 2.0837 | 3.18 | 1500 | 1.0886 | 0.6362 | 0.5187 | 0.8227 | None |
84
- | 1.6086 | 4.24 | 2000 | 1.0884 | 0.6302 | 0.4972 | 0.8602 | None |
85
 
86
 
87
  ### Framework versions
 
23
  metrics:
24
  - name: F1
25
  type: f1
26
+ value: 0.5637359294189231
27
  - name: Precision
28
  type: precision
29
+ value: 0.3955176093916756
30
  - name: Recall
31
  type: recall
32
+ value: 0.9809422975119111
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
39
 
40
  This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
+ - Loss: 1.7381
43
+ - F1: 0.5637
44
+ - Precision: 0.3955
45
+ - Recall: 0.9809
46
  - Support: None
47
 
48
  ## Model description
 
62
  ### Training hyperparameters
63
 
64
  The following hyperparameters were used during training:
65
+ - learning_rate: 3e-05
66
  - train_batch_size: 16
67
  - eval_batch_size: 32
68
  - seed: 42
 
70
  - total_train_batch_size: 64
71
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
  - lr_scheduler_type: constant_with_warmup
73
+ - lr_scheduler_warmup_steps: 500.0
74
  - num_epochs: 5
75
  - label_smoothing_factor: 0.1
76
 
 
78
 
79
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
80
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
81
+ | 2.5304 | 1.06 | 500 | 1.2026 | 0.6045 | 0.4616 | 0.8756 | None |
82
+ | 1.7019 | 2.12 | 1000 | 1.5175 | 0.5891 | 0.4260 | 0.9545 | None |
83
+ | 1.2725 | 3.18 | 1500 | 0.6375 | 0.6583 | 0.6373 | 0.6808 | None |
84
+ | 1.1038 | 4.24 | 2000 | 1.7381 | 0.5637 | 0.3955 | 0.9809 | None |
85
 
86
 
87
  ### Framework versions
config.json CHANGED
@@ -3,11 +3,11 @@
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.2,
7
- "classifier_dropout": 0.2,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.2,
11
  "hidden_size": 768,
12
  "id2label": {
13
  "0": "non-hate-speech",
 
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": 0.1,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
  "0": "non-hate-speech",
config.toml CHANGED
@@ -1,5 +1,5 @@
1
  [experiment]
2
- name = "binary-20"
3
  type = "binary"
4
 
5
 
@@ -13,9 +13,9 @@ augment_ratio = 0.0
13
  [model]
14
  pretrained_model_name_or_path = "thejosango/nuha-mlm"
15
  revision = "984ac09880b24959f6767fdbea8757d2c312df46"
16
- hidden_dropout_prob = 0.2
17
- attention_probs_dropout_prob = 0.2
18
- classifier_dropout = 0.2
19
  #num_hidden_layers = 6
20
  #num_attention_heads = 12
21
  #hidden_size = 768
@@ -24,13 +24,13 @@ classifier_dropout = 0.2
24
 
25
  [training]
26
  num_train_epochs = 5
27
- warmup_steps = 1e3
28
  lr_scheduler_type = "constant_with_warmup"
29
- learning_rate = 1e-5
30
  per_device_train_batch_size = 16
31
  per_device_eval_batch_size = 32
32
  gradient_accumulation_steps = 4
33
- weight_decay = 0.05
34
  label_smoothing_factor = 0.1
35
  weighted_loss = false
36
  resample_data = true
 
1
  [experiment]
2
+ name = "binary-21"
3
  type = "binary"
4
 
5
 
 
13
  [model]
14
  pretrained_model_name_or_path = "thejosango/nuha-mlm"
15
  revision = "984ac09880b24959f6767fdbea8757d2c312df46"
16
+ hidden_dropout_prob = 0.1
17
+ attention_probs_dropout_prob = 0.1
18
+ classifier_dropout = 0.1
19
  #num_hidden_layers = 6
20
  #num_attention_heads = 12
21
  #hidden_size = 768
 
24
 
25
  [training]
26
  num_train_epochs = 5
27
+ warmup_steps = 5e2
28
  lr_scheduler_type = "constant_with_warmup"
29
+ learning_rate = 3e-5
30
  per_device_train_batch_size = 16
31
  per_device_eval_batch_size = 32
32
  gradient_accumulation_steps = 4
33
+ weight_decay = 0.001
34
  label_smoothing_factor = 0.1
35
  weighted_loss = false
36
  resample_data = true
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59b0e5bbf20a6b167f71da35e9552c419eaea22968c91db1542e7179ca0ce3a0
3
  size 540847921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2432d4365874f5c0c4f0db7a29c62c98cd861472b8a6c5e899edbc8e7f1d9594
3
  size 540847921
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a36be302c6e8f2422e4c79fa435fc84ec8b153a6ccfe8f5bb5f0297e17d46015
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d7a70a5e033a47a0efb21cf37879c3a9e52e296da70985eddcb4eaaba41d94
3
  size 4091