yazansh commited on
Commit
024ce8b
·
1 Parent(s): e0def9f
Files changed (6) hide show
  1. README.md +25 -20
  2. config.json +1 -1
  3. config.toml +6 -6
  4. pytorch_model.bin +2 -2
  5. tokenizer.json +1 -6
  6. training_args.bin +1 -1
README.md CHANGED
@@ -23,13 +23,13 @@ model-index:
23
  metrics:
24
  - name: F1
25
  type: f1
26
- value: 0.6642664266426642
27
  - name: Precision
28
  type: precision
29
- value: 0.6142322097378277
30
  - name: Recall
31
  type: recall
32
- value: 0.7231749142577168
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -39,10 +39,10 @@ should probably proofread and complete it, then remove this comment. -->
39
 
40
  This model is a fine-tuned version of [aubmindlab/bert-base-arabertv02-twitter](https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
- - Loss: 0.5473
43
- - F1: 0.6643
44
- - Precision: 0.6142
45
- - Recall: 0.7232
46
  - Support: None
47
 
48
  ## Model description
@@ -64,8 +64,10 @@ More information needed
64
  The following hyperparameters were used during training:
65
  - learning_rate: 1e-05
66
  - train_batch_size: 32
67
- - eval_batch_size: 64
68
  - seed: 42
 
 
69
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
70
  - lr_scheduler_type: constant
71
  - num_epochs: 50
@@ -74,18 +76,21 @@ The following hyperparameters were used during training:
74
 
75
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
76
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
77
- | 0.8335 | 0.32 | 500 | 0.7434 | 0.2637 | 0.5264 | 0.1759 | None |
78
- | 0.6566 | 0.64 | 1000 | 0.5636 | 0.3675 | 0.7393 | 0.2445 | None |
79
- | 0.6277 | 0.96 | 1500 | 0.7360 | 0.5957 | 0.4424 | 0.9113 | None |
80
- | 0.7174 | 1.28 | 2000 | 1.0336 | 0.1403 | 0.9226 | 0.0759 | None |
81
- | 0.6156 | 1.59 | 2500 | 0.9586 | 0.0870 | 0.9490 | 0.0456 | None |
82
- | 0.6196 | 1.91 | 3000 | 0.4772 | 0.5771 | 0.6969 | 0.4924 | None |
83
- | 0.5038 | 2.23 | 3500 | 0.5039 | 0.6608 | 0.6119 | 0.7183 | None |
84
- | 0.5072 | 2.55 | 4000 | 0.4758 | 0.6230 | 0.6794 | 0.5752 | None |
85
- | 0.4988 | 2.87 | 4500 | 0.4592 | 0.6270 | 0.6999 | 0.5679 | None |
86
- | 0.4687 | 3.19 | 5000 | 1.0071 | 0.1982 | 0.908 | 0.1112 | None |
87
- | 0.4582 | 3.51 | 5500 | 0.4858 | 0.5598 | 0.7451 | 0.4483 | None |
88
- | 0.4321 | 3.83 | 6000 | 0.5473 | 0.6643 | 0.6142 | 0.7232 | None |
 
 
 
89
 
90
 
91
  ### Framework versions
 
23
  metrics:
24
  - name: F1
25
  type: f1
26
+ value: 0.6448919449901768
27
  - name: Precision
28
  type: precision
29
+ value: 0.6464795667159035
30
  - name: Recall
31
  type: recall
32
+ value: 0.643312101910828
33
  ---
34
 
35
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
39
 
40
  This model is a fine-tuned version of [aubmindlab/bert-base-arabertv02-twitter](https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter) on the nuha-dataset dataset.
41
  It achieves the following results on the evaluation set:
42
+ - Loss: 1.7060
43
+ - F1: 0.6449
44
+ - Precision: 0.6465
45
+ - Recall: 0.6433
46
  - Support: None
47
 
48
  ## Model description
 
64
  The following hyperparameters were used during training:
65
  - learning_rate: 1e-05
66
  - train_batch_size: 32
67
+ - eval_batch_size: 128
68
  - seed: 42
69
+ - gradient_accumulation_steps: 2
70
+ - total_train_batch_size: 64
71
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
  - lr_scheduler_type: constant
73
  - num_epochs: 50
 
76
 
77
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall | Support |
78
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|:-------:|
79
+ | 0.5406 | 0.64 | 500 | 0.4952 | 0.5584 | 0.6769 | 0.4753 | None |
80
+ | 0.4445 | 1.28 | 1000 | 0.4863 | 0.5321 | 0.7509 | 0.4121 | None |
81
+ | 0.4449 | 1.91 | 1500 | 0.4629 | 0.6368 | 0.6794 | 0.5992 | None |
82
+ | 0.3638 | 2.55 | 2000 | 0.4948 | 0.6369 | 0.6777 | 0.6007 | None |
83
+ | 0.3536 | 3.19 | 2500 | 0.5794 | 0.6604 | 0.6468 | 0.6747 | None |
84
+ | 0.2881 | 3.83 | 3000 | 0.5343 | 0.6320 | 0.6858 | 0.5860 | None |
85
+ | 0.1775 | 4.46 | 3500 | 0.7580 | 0.6439 | 0.6450 | 0.6428 | None |
86
+ | 0.1554 | 5.1 | 4000 | 1.1326 | 0.6278 | 0.6593 | 0.5992 | None |
87
+ | 0.124 | 5.74 | 4500 | 0.9173 | 0.6389 | 0.6516 | 0.6267 | None |
88
+ | 0.0865 | 6.38 | 5000 | 1.2594 | 0.6342 | 0.6610 | 0.6095 | None |
89
+ | 0.0962 | 7.02 | 5500 | 1.4553 | 0.6477 | 0.6263 | 0.6707 | None |
90
+ | 0.0752 | 7.65 | 6000 | 1.3959 | 0.6391 | 0.6580 | 0.6213 | None |
91
+ | 0.0621 | 8.29 | 6500 | 1.6376 | 0.6439 | 0.6359 | 0.6521 | None |
92
+ | 0.0664 | 8.93 | 7000 | 1.3241 | 0.6284 | 0.6613 | 0.5987 | None |
93
+ | 0.0562 | 9.57 | 7500 | 1.7060 | 0.6449 | 0.6465 | 0.6433 | None |
94
 
95
 
96
  ### Framework versions
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "max_position_embeddings": 512,
24
  "model_type": "bert",
25
  "num_attention_heads": 12,
26
- "num_hidden_layers": 12,
27
  "pad_token_id": 0,
28
  "position_embedding_type": "absolute",
29
  "problem_type": "single_label_classification",
 
23
  "max_position_embeddings": 512,
24
  "model_type": "bert",
25
  "num_attention_heads": 12,
26
+ "num_hidden_layers": 6,
27
  "pad_token_id": 0,
28
  "position_embedding_type": "absolute",
29
  "problem_type": "single_label_classification",
config.toml CHANGED
@@ -1,5 +1,5 @@
1
  [experiment]
2
- name = "bianry-1"
3
  type = "binary"
4
 
5
 
@@ -16,7 +16,7 @@ revision = "main"
16
  hidden_dropout_prob = 0.0
17
  attention_probs_dropout_prob = 0.0
18
  classifier_dropout = 0.0
19
- #num_hidden_layers = 6
20
  #num_attention_heads = 12
21
  #hidden_size = 768
22
  #intermediate_size= 1024
@@ -28,10 +28,10 @@ warmup_steps = 0
28
  lr_scheduler_type = "constant"
29
  learning_rate = 1e-5
30
  per_device_train_batch_size = 32
31
- per_device_eval_batch_size = 64
32
- gradient_accumulation_steps = 1
33
  weight_decay = 0.00
34
  label_smoothing_factor = 0.0
35
- weighted_loss = false
36
- early_stopping_patience = 5
37
  early_stopping_threshold = 0.005
 
1
  [experiment]
2
+ name = "binary-3"
3
  type = "binary"
4
 
5
 
 
16
  hidden_dropout_prob = 0.0
17
  attention_probs_dropout_prob = 0.0
18
  classifier_dropout = 0.0
19
+ num_hidden_layers = 6
20
  #num_attention_heads = 12
21
  #hidden_size = 768
22
  #intermediate_size= 1024
 
28
  lr_scheduler_type = "constant"
29
  learning_rate = 1e-5
30
  per_device_train_batch_size = 32
31
+ per_device_eval_batch_size = 128
32
+ gradient_accumulation_steps = 2
33
  weight_decay = 0.00
34
  label_smoothing_factor = 0.0
35
+ weighted_loss = false
36
+ early_stopping_patience = 10
37
  early_stopping_threshold = 0.005
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b25e0860b4dc529f84126a508b1d0f5b3ce6f8e06f8cb41191d12a604e7ab900
3
- size 540847921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83c900e0ea49bbcefb28e9618972b484e78980b78de29bca1931b8e084204d3
3
+ size 370706033
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71323c508e837425b2b66a5f0be035927105efc9797869282f035fea59f175df
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:581939067a961bed8370054fe7cd7f1030c3c6f0eeb1c7407bea8c1b1647597b
3
  size 4027