zhoujiaming777 commited on
Commit
9c3e889
·
verified ·
1 Parent(s): f05b0df

Upload 3 files

Browse files
Files changed (3) hide show
  1. config.json +326 -0
  2. generation_config.json +4 -0
  3. pytorch_model.bin +3 -0
config.json ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "acoustic_connector_config": {},
3
+ "adapter_inner_dim": 512,
4
+ "architectures": [
5
+ "DestaModel"
6
+ ],
7
+ "conv_kernel_sizes": "5,5,5",
8
+ "llm_config": {
9
+ "_attn_implementation_autoset": false,
10
+ "_name_or_path": "/llm/nankai/jiaming_space/models/LLaDA-8B-Instruct",
11
+ "activation_type": "silu",
12
+ "add_cross_attention": false,
13
+ "alibi": false,
14
+ "alibi_bias_max": 8.0,
15
+ "architectures": [
16
+ "LLaDAModelLM"
17
+ ],
18
+ "attention_dropout": 0.0,
19
+ "attention_layer_norm": false,
20
+ "attention_layer_norm_with_affine": true,
21
+ "auto_map": {
22
+ "AutoConfig": "configuration_llada.LLaDAConfig",
23
+ "AutoModel": "modeling_llada.LLaDAModelLM",
24
+ "AutoModelForCausalLM": "modeling_llada.LLaDAModelLM"
25
+ },
26
+ "bad_words_ids": null,
27
+ "begin_suppress_tokens": null,
28
+ "bias_for_layer_norm": false,
29
+ "block_group_size": 1,
30
+ "block_type": "llama",
31
+ "bos_token_id": null,
32
+ "chunk_size_feed_forward": 0,
33
+ "cross_attention_hidden_size": null,
34
+ "d_model": 4096,
35
+ "decoder_start_token_id": null,
36
+ "diversity_penalty": 0.0,
37
+ "do_sample": false,
38
+ "early_stopping": false,
39
+ "embedding_dropout": 0.0,
40
+ "embedding_size": 126464,
41
+ "encoder_no_repeat_ngram_size": 0,
42
+ "eos_token_id": 126081,
43
+ "exponential_decay_length_penalty": null,
44
+ "finetuning_task": null,
45
+ "flash_attention": false,
46
+ "forced_bos_token_id": null,
47
+ "forced_eos_token_id": null,
48
+ "id2label": {
49
+ "0": "LABEL_0",
50
+ "1": "LABEL_1"
51
+ },
52
+ "include_bias": false,
53
+ "include_qkv_bias": false,
54
+ "init_cutoff_factor": null,
55
+ "init_device": "meta",
56
+ "init_fn": "mitchell",
57
+ "init_std": 0.02,
58
+ "input_emb_norm": false,
59
+ "is_decoder": false,
60
+ "is_encoder_decoder": false,
61
+ "label2id": {
62
+ "LABEL_0": 0,
63
+ "LABEL_1": 1
64
+ },
65
+ "layer_norm_type": "rms",
66
+ "layer_norm_with_affine": true,
67
+ "length_penalty": 1.0,
68
+ "mask_token_id": 126336,
69
+ "max_length": 20,
70
+ "max_sequence_length": 4096,
71
+ "min_length": 0,
72
+ "mlp_hidden_size": 12288,
73
+ "mlp_ratio": 4,
74
+ "model_type": "llada",
75
+ "multi_query_attention": null,
76
+ "n_heads": 32,
77
+ "n_kv_heads": 32,
78
+ "n_layers": 32,
79
+ "no_repeat_ngram_size": 0,
80
+ "num_beam_groups": 1,
81
+ "num_beams": 1,
82
+ "num_return_sequences": 1,
83
+ "output_attentions": false,
84
+ "output_hidden_states": false,
85
+ "output_scores": false,
86
+ "pad_token_id": 126081,
87
+ "precision": "amp_bf16",
88
+ "prefix": null,
89
+ "problem_type": null,
90
+ "pruned_heads": {},
91
+ "remove_invalid_values": false,
92
+ "repetition_penalty": 1.0,
93
+ "residual_dropout": 0.0,
94
+ "return_dict": true,
95
+ "return_dict_in_generate": false,
96
+ "rms_norm_eps": 1e-05,
97
+ "rope": true,
98
+ "rope_full_precision": true,
99
+ "rope_theta": 500000.0,
100
+ "scale_logits": false,
101
+ "sep_token_id": null,
102
+ "suppress_tokens": null,
103
+ "task_specific_params": null,
104
+ "temperature": 1.0,
105
+ "tf_legacy_loss": false,
106
+ "tie_encoder_decoder": false,
107
+ "tie_word_embeddings": true,
108
+ "tokenizer_class": null,
109
+ "top_k": 50,
110
+ "top_p": 1.0,
111
+ "torch_dtype": null,
112
+ "torchscript": false,
113
+ "transformers_version": "4.49.0",
114
+ "typical_p": 1.0,
115
+ "use_bfloat16": false,
116
+ "use_cache": false,
117
+ "vocab_size": 126464,
118
+ "weight_tying": false
119
+ },
120
+ "prompt_size": 64,
121
+ "torch_dtype": "bfloat16",
122
+ "transformers_version": "4.49.0",
123
+ "whisper_config": {
124
+ "_attn_implementation_autoset": false,
125
+ "_name_or_path": "openai/whisper-small",
126
+ "activation_dropout": 0.0,
127
+ "activation_function": "gelu",
128
+ "add_cross_attention": false,
129
+ "apply_spec_augment": false,
130
+ "architectures": [
131
+ "WhisperForConditionalGeneration"
132
+ ],
133
+ "attention_dropout": 0.0,
134
+ "bad_words_ids": null,
135
+ "begin_suppress_tokens": [
136
+ 220,
137
+ 50257
138
+ ],
139
+ "bos_token_id": 50257,
140
+ "chunk_size_feed_forward": 0,
141
+ "classifier_proj_size": 256,
142
+ "cross_attention_hidden_size": null,
143
+ "d_model": 768,
144
+ "decoder_attention_heads": 12,
145
+ "decoder_ffn_dim": 3072,
146
+ "decoder_layerdrop": 0.0,
147
+ "decoder_layers": 12,
148
+ "decoder_start_token_id": 50258,
149
+ "diversity_penalty": 0.0,
150
+ "do_sample": false,
151
+ "dropout": 0.0,
152
+ "early_stopping": false,
153
+ "encoder_attention_heads": 12,
154
+ "encoder_ffn_dim": 3072,
155
+ "encoder_layerdrop": 0.0,
156
+ "encoder_layers": 12,
157
+ "encoder_no_repeat_ngram_size": 0,
158
+ "eos_token_id": 50257,
159
+ "exponential_decay_length_penalty": null,
160
+ "finetuning_task": null,
161
+ "forced_bos_token_id": null,
162
+ "forced_decoder_ids": [
163
+ [
164
+ 1,
165
+ 50259
166
+ ],
167
+ [
168
+ 2,
169
+ 50359
170
+ ],
171
+ [
172
+ 3,
173
+ 50363
174
+ ]
175
+ ],
176
+ "forced_eos_token_id": null,
177
+ "id2label": {
178
+ "0": "LABEL_0",
179
+ "1": "LABEL_1"
180
+ },
181
+ "init_std": 0.02,
182
+ "is_decoder": false,
183
+ "is_encoder_decoder": true,
184
+ "label2id": {
185
+ "LABEL_0": 0,
186
+ "LABEL_1": 1
187
+ },
188
+ "length_penalty": 1.0,
189
+ "mask_feature_length": 10,
190
+ "mask_feature_min_masks": 0,
191
+ "mask_feature_prob": 0.0,
192
+ "mask_time_length": 10,
193
+ "mask_time_min_masks": 2,
194
+ "mask_time_prob": 0.05,
195
+ "max_length": 448,
196
+ "max_source_positions": 1500,
197
+ "max_target_positions": 448,
198
+ "median_filter_width": 7,
199
+ "min_length": 0,
200
+ "model_type": "whisper",
201
+ "no_repeat_ngram_size": 0,
202
+ "num_beam_groups": 1,
203
+ "num_beams": 1,
204
+ "num_hidden_layers": 12,
205
+ "num_mel_bins": 80,
206
+ "num_return_sequences": 1,
207
+ "output_attentions": false,
208
+ "output_hidden_states": false,
209
+ "output_scores": false,
210
+ "pad_token_id": 50257,
211
+ "prefix": null,
212
+ "problem_type": null,
213
+ "pruned_heads": {},
214
+ "remove_invalid_values": false,
215
+ "repetition_penalty": 1.0,
216
+ "return_dict": true,
217
+ "return_dict_in_generate": false,
218
+ "scale_embedding": false,
219
+ "sep_token_id": null,
220
+ "suppress_tokens": [
221
+ 1,
222
+ 2,
223
+ 7,
224
+ 8,
225
+ 9,
226
+ 10,
227
+ 14,
228
+ 25,
229
+ 26,
230
+ 27,
231
+ 28,
232
+ 29,
233
+ 31,
234
+ 58,
235
+ 59,
236
+ 60,
237
+ 61,
238
+ 62,
239
+ 63,
240
+ 90,
241
+ 91,
242
+ 92,
243
+ 93,
244
+ 359,
245
+ 503,
246
+ 522,
247
+ 542,
248
+ 873,
249
+ 893,
250
+ 902,
251
+ 918,
252
+ 922,
253
+ 931,
254
+ 1350,
255
+ 1853,
256
+ 1982,
257
+ 2460,
258
+ 2627,
259
+ 3246,
260
+ 3253,
261
+ 3268,
262
+ 3536,
263
+ 3846,
264
+ 3961,
265
+ 4183,
266
+ 4667,
267
+ 6585,
268
+ 6647,
269
+ 7273,
270
+ 9061,
271
+ 9383,
272
+ 10428,
273
+ 10929,
274
+ 11938,
275
+ 12033,
276
+ 12331,
277
+ 12562,
278
+ 13793,
279
+ 14157,
280
+ 14635,
281
+ 15265,
282
+ 15618,
283
+ 16553,
284
+ 16604,
285
+ 18362,
286
+ 18956,
287
+ 20075,
288
+ 21675,
289
+ 22520,
290
+ 26130,
291
+ 26161,
292
+ 26435,
293
+ 28279,
294
+ 29464,
295
+ 31650,
296
+ 32302,
297
+ 32470,
298
+ 36865,
299
+ 42863,
300
+ 47425,
301
+ 49870,
302
+ 50254,
303
+ 50258,
304
+ 50360,
305
+ 50361,
306
+ 50362
307
+ ],
308
+ "task_specific_params": null,
309
+ "temperature": 1.0,
310
+ "tf_legacy_loss": false,
311
+ "tie_encoder_decoder": false,
312
+ "tie_word_embeddings": true,
313
+ "tokenizer_class": null,
314
+ "top_k": 50,
315
+ "top_p": 1.0,
316
+ "torch_dtype": "float32",
317
+ "torchscript": false,
318
+ "transformers_version": "4.49.0",
319
+ "typical_p": 1.0,
320
+ "use_bfloat16": false,
321
+ "use_cache": true,
322
+ "use_weighted_layer_sum": false,
323
+ "vocab_size": 51865
324
+ },
325
+ "whisper_model_id": "/llm/nankai/jiaming_space/models/whisper-small"
326
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.49.0"
4
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4aaeb3673f25d8421075fc7d79f31629049ca300662b8e5e0ab32ff154384c9
3
+ size 146822483