abdeljalilELmajjodi commited on
Commit
284169e
·
verified ·
1 Parent(s): b580382

abdeljalilELmajjodi/test-bert

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  license: mit
4
- base_model: FacebookAI/xlm-roberta-base
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,9 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # model
16
 
17
- This model is a fine-tuned version of [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base) on an unknown dataset.
18
- It achieves the following results on the evaluation set:
19
- - Loss: 3.2858
20
 
21
  ## Model description
22
 
@@ -37,43 +35,19 @@ More information needed
37
  The following hyperparameters were used during training:
38
  - learning_rate: 2e-05
39
  - train_batch_size: 4
40
- - eval_batch_size: 16
41
  - seed: 42
42
  - gradient_accumulation_steps: 4
43
  - total_train_batch_size: 16
44
- - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.03
47
  - num_epochs: 1
48
  - mixed_precision_training: Native AMP
49
 
50
- ### Training results
51
-
52
- | Training Loss | Epoch | Step | Validation Loss |
53
- |:-------------:|:------:|:-----:|:---------------:|
54
- | 3.2251 | 0.0533 | 1000 | 4.1491 |
55
- | 2.7413 | 0.1067 | 2000 | 3.7912 |
56
- | 2.5416 | 0.16 | 3000 | 3.6801 |
57
- | 2.371 | 0.2133 | 4000 | 3.6439 |
58
- | 2.2968 | 0.2667 | 5000 | 3.5301 |
59
- | 2.1989 | 0.32 | 6000 | 3.3905 |
60
- | 2.0841 | 0.3733 | 7000 | 3.5244 |
61
- | 2.0032 | 0.4267 | 8000 | 3.3268 |
62
- | 1.9618 | 0.48 | 9000 | 3.3207 |
63
- | 1.9114 | 0.5333 | 10000 | 3.4544 |
64
- | 1.8472 | 0.5867 | 11000 | 3.2520 |
65
- | 1.8068 | 0.64 | 12000 | 3.3389 |
66
- | 1.7692 | 0.6933 | 13000 | 3.2428 |
67
- | 1.7236 | 0.7467 | 14000 | 3.3926 |
68
- | 1.7219 | 0.8 | 15000 | 3.2721 |
69
- | 1.6838 | 0.8533 | 16000 | 3.2671 |
70
- | 1.6771 | 0.9067 | 17000 | 3.2732 |
71
- | 1.6531 | 0.96 | 18000 | 3.2858 |
72
-
73
-
74
  ### Framework versions
75
 
76
- - Transformers 4.48.1
77
- - Pytorch 2.1.1+cu121
78
- - Datasets 3.2.0
79
- - Tokenizers 0.21.0
 
1
  ---
2
  library_name: transformers
3
  license: mit
4
+ base_model: atlasia/XLM-RoBERTa-Morocco
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # model
16
 
17
+ This model is a fine-tuned version of [atlasia/XLM-RoBERTa-Morocco](https://huggingface.co/atlasia/XLM-RoBERTa-Morocco) on an unknown dataset.
 
 
18
 
19
  ## Model description
20
 
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 2e-05
37
  - train_batch_size: 4
38
+ - eval_batch_size: 8
39
  - seed: 42
40
  - gradient_accumulation_steps: 4
41
  - total_train_batch_size: 16
42
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_ratio: 0.03
45
  - num_epochs: 1
46
  - mixed_precision_training: Native AMP
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  ### Framework versions
49
 
50
+ - Transformers 4.51.3
51
+ - Pytorch 2.6.0+cu124
52
+ - Datasets 3.5.1
53
+ - Tokenizers 0.21.1
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "FacebookAI/xlm-roberta-base",
3
  "architectures": [
4
  "XLMRobertaForMaskedLM"
5
  ],
@@ -9,20 +8,21 @@
9
  "eos_token_id": 2,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
  "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 514,
17
  "model_type": "xlm-roberta",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
  "output_past": true,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.48.1",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
 
27
  "vocab_size": 250002
28
  }
 
1
  {
 
2
  "architectures": [
3
  "XLMRobertaForMaskedLM"
4
  ],
 
8
  "eos_token_id": 2,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
  "layer_norm_eps": 1e-05,
15
  "max_position_embeddings": 514,
16
  "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
  "output_past": true,
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.51.3",
24
  "type_vocab_size": 1,
25
  "use_cache": true,
26
+ "use_flash_attention_2": true,
27
  "vocab_size": 250002
28
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4ce63c40c9137609281d850473d880d4688929df4da249fd60ad23cbc668a50
3
- size 1113205088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c90fe26fd285a5881e806e6b4626cf982897354f8453b972e43f5ff44cc37c06
3
+ size 2240618752
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json CHANGED
@@ -1,7 +1,25 @@
1
  {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
@@ -9,7 +27,25 @@
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
 
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6094422212ef0ee74526132298f9c3679a759d99ca9ddb95fa929af9ad8589ae
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd330ca9891f488a7c79ac078633a6bea1f643f674bc03901cb4c732575d9663
3
  size 5304