Upload GptOssForCausalLM

#1
by albertvillanova HF Staff - opened
config.json CHANGED
@@ -4,7 +4,9 @@
4
  ],
5
  "attention_bias": true,
6
  "attention_dropout": 0.0,
 
7
  "dtype": "bfloat16",
 
8
  "head_dim": 64,
9
  "hidden_act": "silu",
10
  "hidden_size": 8,
@@ -17,26 +19,26 @@
17
  "max_position_embeddings": 131072,
18
  "model_type": "gpt_oss",
19
  "num_attention_heads": 4,
20
- "num_experts": 4,
21
  "num_experts_per_tok": 2,
22
  "num_hidden_layers": 2,
23
  "num_key_value_heads": 2,
24
- "num_local_experts": 128,
25
  "output_router_logits": false,
 
26
  "rms_norm_eps": 1e-05,
27
- "rope_scaling": {
28
  "beta_fast": 32.0,
29
  "beta_slow": 1.0,
30
  "factor": 32.0,
31
  "original_max_position_embeddings": 4096,
 
32
  "rope_type": "yarn",
33
  "truncate": false
34
  },
35
- "rope_theta": 150000.0,
36
  "router_aux_loss_coef": 0.9,
37
  "sliding_window": 128,
38
  "tie_word_embeddings": false,
39
- "transformers_version": "4.57.3",
40
  "use_cache": true,
41
  "vocab_size": 200019
42
  }
 
4
  ],
5
  "attention_bias": true,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
  "dtype": "bfloat16",
9
+ "eos_token_id": null,
10
  "head_dim": 64,
11
  "hidden_act": "silu",
12
  "hidden_size": 8,
 
19
  "max_position_embeddings": 131072,
20
  "model_type": "gpt_oss",
21
  "num_attention_heads": 4,
 
22
  "num_experts_per_tok": 2,
23
  "num_hidden_layers": 2,
24
  "num_key_value_heads": 2,
25
+ "num_local_experts": 4,
26
  "output_router_logits": false,
27
+ "pad_token_id": null,
28
  "rms_norm_eps": 1e-05,
29
+ "rope_parameters": {
30
  "beta_fast": 32.0,
31
  "beta_slow": 1.0,
32
  "factor": 32.0,
33
  "original_max_position_embeddings": 4096,
34
+ "rope_theta": 150000.0,
35
  "rope_type": "yarn",
36
  "truncate": false
37
  },
 
38
  "router_aux_loss_coef": 0.9,
39
  "sliding_window": 128,
40
  "tie_word_embeddings": false,
41
+ "transformers_version": "5.6.0.dev0",
42
  "use_cache": true,
43
  "vocab_size": 200019
44
  }
generation_config.json CHANGED
@@ -7,5 +7,5 @@
7
  200012
8
  ],
9
  "pad_token_id": 199999,
10
- "transformers_version": "4.57.3"
11
  }
 
7
  200012
8
  ],
9
  "pad_token_id": 199999,
10
+ "transformers_version": "5.6.0.dev0"
11
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:015ad2fae9f683669e23746fec4260638970a906453a9a299f6619393c1d9254
3
- size 6865928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3aff233b7c3cf39bdf8ede7a2e35698308b3cb86c8b0598da952f70c41f2601
3
+ size 6444800
tokenizer_config.json CHANGED
@@ -1,183 +1,15 @@
1
  {
2
- "added_tokens_decoder": {
3
- "199998": {
4
- "content": "<|startoftext|>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "199999": {
12
- "content": "<|endoftext|>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "200000": {
20
- "content": "<|reserved_200000|>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "200001": {
28
- "content": "<|reserved_200001|>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "200002": {
36
- "content": "<|return|>",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "200003": {
44
- "content": "<|constrain|>",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "200004": {
52
- "content": "<|reserved_200004|>",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "200005": {
60
- "content": "<|channel|>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "200006": {
68
- "content": "<|start|>",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "200007": {
76
- "content": "<|end|>",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": true
82
- },
83
- "200008": {
84
- "content": "<|message|>",
85
- "lstrip": false,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": true
90
- },
91
- "200009": {
92
- "content": "<|reserved_200009|>",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": true
98
- },
99
- "200010": {
100
- "content": "<|reserved_200010|>",
101
- "lstrip": false,
102
- "normalized": false,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": true
106
- },
107
- "200011": {
108
- "content": "<|reserved_200011|>",
109
- "lstrip": false,
110
- "normalized": false,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": true
114
- },
115
- "200012": {
116
- "content": "<|call|>",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "200013": {
124
- "content": "<|reserved_200013|>",
125
- "lstrip": false,
126
- "normalized": false,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": true
130
- },
131
- "200014": {
132
- "content": "<|reserved_200014|>",
133
- "lstrip": false,
134
- "normalized": false,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": true
138
- },
139
- "200015": {
140
- "content": "<|reserved_200015|>",
141
- "lstrip": false,
142
- "normalized": false,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": true
146
- },
147
- "200016": {
148
- "content": "<|reserved_200016|>",
149
- "lstrip": false,
150
- "normalized": false,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": true
154
- },
155
- "200017": {
156
- "content": "<|reserved_200017|>",
157
- "lstrip": false,
158
- "normalized": false,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": true
162
- },
163
- "200018": {
164
- "content": "<|endofprompt|>",
165
- "lstrip": false,
166
- "normalized": false,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": true
170
- }
171
- },
172
  "bos_token": "<|startoftext|>",
173
  "clean_up_tokenization_spaces": false,
174
  "eos_token": "<|return|>",
175
- "extra_special_tokens": {},
 
176
  "model_input_names": [
177
  "input_ids",
178
  "attention_mask"
179
  ],
180
  "model_max_length": 1000000000000000019884624838656,
181
  "pad_token": "<|endoftext|>",
182
- "tokenizer_class": "PreTrainedTokenizerFast"
183
  }
 
1
  {
2
+ "backend": "tokenizers",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "bos_token": "<|startoftext|>",
4
  "clean_up_tokenization_spaces": false,
5
  "eos_token": "<|return|>",
6
+ "is_local": false,
7
+ "local_files_only": false,
8
  "model_input_names": [
9
  "input_ids",
10
  "attention_mask"
11
  ],
12
  "model_max_length": 1000000000000000019884624838656,
13
  "pad_token": "<|endoftext|>",
14
+ "tokenizer_class": "TokenizersBackend"
15
  }