Clemylia commited on
Commit
ea2f1d1
·
verified ·
1 Parent(s): 518f091

Premier modèle d'architecture gemma from scratch

Browse files
Files changed (3) hide show
  1. README.md +54 -0
  2. tokenizer.json +166 -0
  3. tokenizer_config.json +20 -0
README.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: small-ever
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # small-ever
14
+
15
+ This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
+
17
+ ## Model description
18
+
19
+ More information needed
20
+
21
+ ## Intended uses & limitations
22
+
23
+ More information needed
24
+
25
+ ## Training and evaluation data
26
+
27
+ More information needed
28
+
29
+ ## Training procedure
30
+
31
+ ### Training hyperparameters
32
+
33
+ The following hyperparameters were used during training:
34
+ - learning_rate: 0.0002
35
+ - train_batch_size: 8
36
+ - eval_batch_size: 8
37
+ - seed: 42
38
+ - gradient_accumulation_steps: 4
39
+ - total_train_batch_size: 32
40
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
41
+ - lr_scheduler_type: linear
42
+ - num_epochs: 500
43
+ - mixed_precision_training: Native AMP
44
+
45
+ ### Training results
46
+
47
+
48
+
49
+ ### Framework versions
50
+
51
+ - Transformers 5.0.0
52
+ - Pytorch 2.10.0+cpu
53
+ - Datasets 4.0.0
54
+ - Tokenizers 0.22.2
tokenizer.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 512
12
+ },
13
+ "direction": "Left",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<pad>"
18
+ },
19
+ "added_tokens": [
20
+ {
21
+ "id": 0,
22
+ "content": "<pad>",
23
+ "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
+ },
29
+ {
30
+ "id": 1,
31
+ "content": "</s>",
32
+ "single_word": false,
33
+ "lstrip": false,
34
+ "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
+ },
38
+ {
39
+ "id": 2,
40
+ "content": "<s>",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 3,
49
+ "content": "<unk>",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ },
56
+ {
57
+ "id": 4,
58
+ "content": "<mask>",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
+ },
65
+ {
66
+ "id": 5,
67
+ "content": "Question:",
68
+ "single_word": false,
69
+ "lstrip": false,
70
+ "rstrip": false,
71
+ "normalized": false,
72
+ "special": false
73
+ },
74
+ {
75
+ "id": 6,
76
+ "content": "Réponse:",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": false,
81
+ "special": false
82
+ }
83
+ ],
84
+ "normalizer": {
85
+ "type": "Replace",
86
+ "pattern": {
87
+ "String": " "
88
+ },
89
+ "content": "▁"
90
+ },
91
+ "pre_tokenizer": null,
92
+ "post_processor": {
93
+ "type": "TemplateProcessing",
94
+ "single": [
95
+ {
96
+ "Sequence": {
97
+ "id": "A",
98
+ "type_id": 0
99
+ }
100
+ }
101
+ ],
102
+ "pair": [
103
+ {
104
+ "Sequence": {
105
+ "id": "A",
106
+ "type_id": 0
107
+ }
108
+ },
109
+ {
110
+ "Sequence": {
111
+ "id": "B",
112
+ "type_id": 1
113
+ }
114
+ }
115
+ ],
116
+ "special_tokens": {}
117
+ },
118
+ "decoder": {
119
+ "type": "Sequence",
120
+ "decoders": [
121
+ {
122
+ "type": "Replace",
123
+ "pattern": {
124
+ "String": "▁"
125
+ },
126
+ "content": " "
127
+ },
128
+ {
129
+ "type": "ByteFallback"
130
+ },
131
+ {
132
+ "type": "Fuse"
133
+ }
134
+ ]
135
+ },
136
+ "model": {
137
+ "type": "BPE",
138
+ "dropout": null,
139
+ "unk_token": "<unk>",
140
+ "continuing_subword_prefix": null,
141
+ "end_of_word_suffix": null,
142
+ "fuse_unk": true,
143
+ "byte_fallback": true,
144
+ "ignore_merges": false,
145
+ "vocab": {
146
+ "<pad>": 0,
147
+ "</s>": 1,
148
+ "<s>": 2,
149
+ "<unk>": 3,
150
+ "<mask>": 4,
151
+ "Question:": 5,
152
+ "Réponse:": 6,
153
+ "e": 7,
154
+ "C": 8,
155
+ "r": 9,
156
+ "▁": 10,
157
+ "a": 11,
158
+ "c": 12,
159
+ "i": 13,
160
+ "l": 14,
161
+ "m": 15,
162
+ "t": 16
163
+ },
164
+ "merges": []
165
+ }
166
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "extra_special_tokens": [
6
+ "<pad>",
7
+ "</s>",
8
+ "<s>",
9
+ "<mask>",
10
+ "Question:",
11
+ "Réponse:"
12
+ ],
13
+ "is_local": true,
14
+ "mask_token": "<mask>",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "<pad>",
17
+ "tokenizer_class": "GemmaTokenizer",
18
+ "unk_token": "<unk>",
19
+ "vocab_file": "tokenizer-gemma/tokenizer.model"
20
+ }