yazansh commited on
Commit
aca5e41
·
1 Parent(s): 024ce8b
Files changed (7) hide show
  1. README.md +27 -26
  2. config.json +5 -5
  3. config.toml +15 -15
  4. pytorch_model.bin +2 -2
  5. tokenizer.json +6 -1
  6. tokenizer_config.json +7 -0
  7. training_args.bin +2 -2
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: aubmindlab/bert-base-arabertv02-twitter
3
  tags:
4
  - generated_from_trainer
5
  datasets:
@@ -23,13 +23,13 @@ model-index:
23
  metrics:
24
  - name: F1
25
  type: f1
26
- value: 0.6448919449901768
27
  - name: Precision
28
  type: precision
29
- value: 0.6464795667159035
30
  - name: Recall
31
  type: recall
32
- value: 0.643312101910828
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,12 +37,12 @@ should probably proofread and complete it, then remove this comment. -->
37
 
38
  # nuha-binary
39
 
40
- This model is a fine-tuned version of [aubmindlab/bert-base-arabertv02-twitter](https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
- - Loss: 1.7060
43
- - F1: 0.6449
44
- - Precision: 0.6465
45
- - Recall: 0.6433
46
  - Support: None
47
 
48
  ## Model description
@@ -64,33 +64,34 @@ More information needed
64
  The following hyperparameters were used during training:
65
  - learning_rate: 1e-05
66
  - train_batch_size: 32
67
- - eval_batch_size: 128
68
  - seed: 42
69
  - gradient_accumulation_steps: 2
70
  - total_train_batch_size: 64
71
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
  - lr_scheduler_type: constant
73
- - num_epochs: 50
 
 
74
 
75
  ### Training results
76
 
77
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
78
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
79
- | 0.5406 | 0.64 | 500 | 0.4952 | 0.5584 | 0.6769 | 0.4753 | None |
80
- | 0.4445 | 1.28 | 1000 | 0.4863 | 0.5321 | 0.7509 | 0.4121 | None |
81
- | 0.4449 | 1.91 | 1500 | 0.4629 | 0.6368 | 0.6794 | 0.5992 | None |
82
- | 0.3638 | 2.55 | 2000 | 0.4948 | 0.6369 | 0.6777 | 0.6007 | None |
83
- | 0.3536 | 3.19 | 2500 | 0.5794 | 0.6604 | 0.6468 | 0.6747 | None |
84
- | 0.2881 | 3.83 | 3000 | 0.5343 | 0.6320 | 0.6858 | 0.5860 | None |
85
- | 0.1775 | 4.46 | 3500 | 0.7580 | 0.6439 | 0.6450 | 0.6428 | None |
86
- | 0.1554 | 5.1 | 4000 | 1.1326 | 0.6278 | 0.6593 | 0.5992 | None |
87
- | 0.124 | 5.74 | 4500 | 0.9173 | 0.6389 | 0.6516 | 0.6267 | None |
88
- | 0.0865 | 6.38 | 5000 | 1.2594 | 0.6342 | 0.6610 | 0.6095 | None |
89
- | 0.0962 | 7.02 | 5500 | 1.4553 | 0.6477 | 0.6263 | 0.6707 | None |
90
- | 0.0752 | 7.65 | 6000 | 1.3959 | 0.6391 | 0.6580 | 0.6213 | None |
91
- | 0.0621 | 8.29 | 6500 | 1.6376 | 0.6439 | 0.6359 | 0.6521 | None |
92
- | 0.0664 | 8.93 | 7000 | 1.3241 | 0.6284 | 0.6613 | 0.5987 | None |
93
- | 0.0562 | 9.57 | 7500 | 1.7060 | 0.6449 | 0.6465 | 0.6433 | None |
94
 
95
 
96
  ### Framework versions
 
1
  ---
2
+ base_model: thejosango/nuha-mlm
3
  tags:
4
  - generated_from_trainer
5
  datasets:
 
23
  metrics:
24
  - name: F1
25
  type: f1
26
+ value: 0.5652559928973069
27
  - name: Precision
28
  type: precision
29
+ value: 0.7137518684603886
30
  - name: Recall
31
  type: recall
32
+ value: 0.4679078882900539
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
37
 
38
  # nuha-binary
39
 
40
+ This model is a fine-tuned version of [thejosango/nuha-mlm](https://huggingface.co/thejosango/nuha-mlm) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
+ - Loss: 0.5595
43
+ - F1: 0.5653
44
+ - Precision: 0.7138
45
+ - Recall: 0.4679
46
  - Support: None
47
 
48
  ## Model description
 
64
  The following hyperparameters were used during training:
65
  - learning_rate: 1e-05
66
  - train_batch_size: 32
67
+ - eval_batch_size: 32
68
  - seed: 42
69
  - gradient_accumulation_steps: 2
70
  - total_train_batch_size: 64
71
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
  - lr_scheduler_type: constant
73
+ - lr_scheduler_warmup_steps: 1000.0
74
+ - num_epochs: 30
75
+ - label_smoothing_factor: 0.1
76
 
77
  ### Training results
78
 
79
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
80
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
81
+ | 0.8838 | 0.64 | 500 | 0.6201 | 0.4182 | 0.6907 | 0.2999 | None |
82
+ | 0.6748 | 1.28 | 1000 | 0.5750 | 0.4756 | 0.7174 | 0.3557 | None |
83
+ | 0.6404 | 1.91 | 1500 | 0.5329 | 0.5705 | 0.6788 | 0.4919 | None |
84
+ | 0.5836 | 2.55 | 2000 | 0.5316 | 0.5649 | 0.7069 | 0.4704 | None |
85
+ | 0.5793 | 3.19 | 2500 | 0.5267 | 0.6255 | 0.6614 | 0.5933 | None |
86
+ | 0.557 | 3.83 | 3000 | 0.5211 | 0.6145 | 0.6669 | 0.5698 | None |
87
+ | 0.5279 | 4.46 | 3500 | 0.5301 | 0.6516 | 0.6481 | 0.6551 | None |
88
+ | 0.5121 | 5.1 | 4000 | 0.5220 | 0.6356 | 0.6818 | 0.5953 | None |
89
+ | 0.5067 | 5.74 | 4500 | 0.5270 | 0.6609 | 0.6481 | 0.6742 | None |
90
+ | 0.4806 | 6.38 | 5000 | 0.5259 | 0.6309 | 0.6899 | 0.5811 | None |
91
+ | 0.4858 | 7.02 | 5500 | 0.5303 | 0.6145 | 0.6890 | 0.5546 | None |
92
+ | 0.4608 | 7.65 | 6000 | 0.5429 | 0.6558 | 0.6402 | 0.6722 | None |
93
+ | 0.441 | 8.29 | 6500 | 0.5575 | 0.6279 | 0.6776 | 0.5850 | None |
94
+ | 0.4367 | 8.93 | 7000 | 0.5595 | 0.5653 | 0.7138 | 0.4679 | None |
 
95
 
96
 
97
  ### Framework versions
config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "_name_or_path": "aubmindlab/bert-base-arabertv02-twitter",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.0,
7
- "classifier_dropout": 0.0,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.0,
11
  "hidden_size": 768,
12
  "id2label": {
13
  "0": "non-hate-speech",
@@ -23,7 +23,7 @@
23
  "max_position_embeddings": 512,
24
  "model_type": "bert",
25
  "num_attention_heads": 12,
26
- "num_hidden_layers": 6,
27
  "pad_token_id": 0,
28
  "position_embedding_type": "absolute",
29
  "problem_type": "single_label_classification",
 
1
  {
2
+ "_name_or_path": "thejosango/nuha-mlm",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
  "0": "non-hate-speech",
 
23
  "max_position_embeddings": 512,
24
  "model_type": "bert",
25
  "num_attention_heads": 12,
26
+ "num_hidden_layers": 4,
27
  "pad_token_id": 0,
28
  "position_embedding_type": "absolute",
29
  "problem_type": "single_label_classification",
config.toml CHANGED
@@ -1,5 +1,5 @@
1
  [experiment]
2
- name = "binary-3"
3
  type = "binary"
4
 
5
 
@@ -11,27 +11,27 @@ augment_ratio = 0.0
11
 
12
 
13
  [model]
14
- pretrained_model_name_or_path = "aubmindlab/bert-base-arabertv02-twitter"
15
- revision = "main"
16
- hidden_dropout_prob = 0.0
17
- attention_probs_dropout_prob = 0.0
18
- classifier_dropout = 0.0
19
- num_hidden_layers = 6
20
  #num_attention_heads = 12
21
  #hidden_size = 768
22
- #intermediate_size= 1024
23
 
24
 
25
  [training]
26
- num_train_epochs = 50
27
- warmup_steps = 0
28
  lr_scheduler_type = "constant"
29
  learning_rate = 1e-5
30
  per_device_train_batch_size = 32
31
- per_device_eval_batch_size = 128
32
  gradient_accumulation_steps = 2
33
- weight_decay = 0.00
34
- label_smoothing_factor = 0.0
35
- weighted_loss = false
36
- early_stopping_patience = 10
37
  early_stopping_threshold = 0.005
 
1
  [experiment]
2
+ name = "binary-4"
3
  type = "binary"
4
 
5
 
 
11
 
12
 
13
  [model]
14
+ pretrained_model_name_or_path = "thejosango/nuha-mlm"
15
+ revision = "2caf9ebc5b275737c95f8bb16953288107a7131c"
16
+ #hidden_dropout_prob = 0
17
+ #attention_probs_dropout_prob = 0
18
+ #classifier_dropout = 0
19
+ #num_hidden_layers = 4
20
  #num_attention_heads = 12
21
  #hidden_size = 768
22
+ #intermediate_size= null
23
 
24
 
25
  [training]
26
+ num_train_epochs = 30
27
+ warmup_steps = 1e3
28
  lr_scheduler_type = "constant"
29
  learning_rate = 1e-5
30
  per_device_train_batch_size = 32
31
+ per_device_eval_batch_size = 32
32
  gradient_accumulation_steps = 2
33
+ weight_decay = 0.01
34
+ label_smoothing_factor = 0.1
35
+ weighted_loss = false
36
+ early_stopping_patience = 5
37
  early_stopping_threshold = 0.005
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b83c900e0ea49bbcefb28e9618972b484e78980b78de29bca1931b8e084204d3
3
- size 370706033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40de39f946a025883e44c47ae5e11ea1f08dc35b0befe9f2443dbc785a4045bd
3
+ size 313992076
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
tokenizer_config.json CHANGED
@@ -5,17 +5,24 @@
5
  "do_lower_case": false,
6
  "mask_token": "[MASK]",
7
  "max_len": 512,
 
8
  "model_max_length": 512,
9
  "never_split": [
10
  "[بريد]",
11
  "[مستخدم]",
12
  "[رابط]"
13
  ],
 
14
  "pad_token": "[PAD]",
 
 
15
  "sep_token": "[SEP]",
 
16
  "strip_accents": null,
17
  "tokenize_chinese_chars": true,
18
  "tokenizer_class": "BertTokenizer",
 
 
19
  "unk_token": "[UNK]",
20
  "use_fast": true
21
  }
 
5
  "do_lower_case": false,
6
  "mask_token": "[MASK]",
7
  "max_len": 512,
8
+ "max_length": 512,
9
  "model_max_length": 512,
10
  "never_split": [
11
  "[بريد]",
12
  "[مستخدم]",
13
  "[رابط]"
14
  ],
15
+ "pad_to_multiple_of": null,
16
  "pad_token": "[PAD]",
17
+ "pad_token_type_id": 0,
18
+ "padding_side": "right",
19
  "sep_token": "[SEP]",
20
+ "stride": 0,
21
  "strip_accents": null,
22
  "tokenize_chinese_chars": true,
23
  "tokenizer_class": "BertTokenizer",
24
+ "truncation_side": "right",
25
+ "truncation_strategy": "longest_first",
26
  "unk_token": "[UNK]",
27
  "use_fast": true
28
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:581939067a961bed8370054fe7cd7f1030c3c6f0eeb1c7407bea8c1b1647597b
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37aa39e079233d13bb57c12ffb2e9ddc52bb1ab690045ff67bde840cebbe2c7b
3
+ size 4091