ziom6270 commited on
Commit
13b0752
·
verified ·
1 Parent(s): d7d3997

Upload 33 files

Browse files
BF16/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "XttsGPT"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "audio_config": {
8
+ "mel_channels": 80,
9
+ "output_sample_rate": 24000,
10
+ "sample_rate": 22050
11
+ },
12
+ "auto_map": {
13
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
14
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
15
+ "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast"
16
+ },
17
+ "decoder_input_dim": 1024,
18
+ "enable_redaction": false,
19
+ "gpt_batch_size": 1,
20
+ "gpt_max_audio_tokens": 605,
21
+ "hidden_size": 1024,
22
+ "initializer_range": 0.02,
23
+ "kv_cache": true,
24
+ "layer_norm_epsilon": 1e-05,
25
+ "max_audio_tokens": 605,
26
+ "max_prompt_tokens": 70,
27
+ "max_text_tokens": 402,
28
+ "model_type": "xtts_gpt",
29
+ "n_inner": 4096,
30
+ "num_attention_heads": 16,
31
+ "num_audio_tokens": 1026,
32
+ "num_hidden_layers": 30,
33
+ "number_text_tokens": 6681,
34
+ "reorder_and_upcast_attn": false,
35
+ "scale_attn_by_inverse_layer_idx": false,
36
+ "start_audio_token": 1024,
37
+ "start_text_token": null,
38
+ "stop_audio_token": 1025,
39
+ "stop_text_token": null,
40
+ "transformers_version": "4.46.0",
41
+ "use_masking_gt_prompt_approach": true,
42
+ "use_perceiver_resampler": true,
43
+ "vocab_size": 6681
44
+ }
BF16/gpt2_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e23ffef8c95bd7d046155ff003a6eb30f38be57ccae1197f42301520d45252d4
3
+ size 761266620
BF16/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[START]",
3
+ "eos_token": "[STOP]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
BF16/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
BF16/tokenizer_config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[STOP]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SPACE]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "259": {
28
+ "content": "[en]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "260": {
36
+ "content": "[de]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "261": {
44
+ "content": "[START]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "262": {
52
+ "content": "[fr]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "267": {
60
+ "content": "[ru]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "284": {
68
+ "content": "[es]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "285": {
76
+ "content": "[it]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "286": {
84
+ "content": "[pt]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "293": {
92
+ "content": "[cs]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "294": {
100
+ "content": "[pl]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "295": {
108
+ "content": "[tr]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "297": {
116
+ "content": "[nl]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "5022": {
124
+ "content": "[ar]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "5023": {
132
+ "content": "[zh-cn]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "5412": {
140
+ "content": "[ja]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "5753": {
148
+ "content": "[hu]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "6152": {
156
+ "content": "[ko]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "6680": {
164
+ "content": "[hi]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "6681": {
172
+ "content": "[PAD]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ }
179
+ },
180
+ "auto_map": {"AutoTokenizer": ["AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast", null]},
181
+ "bos_token": "[START]",
182
+ "clean_up_tokenization_spaces": true,
183
+ "eos_token": "[STOP]",
184
+ "max_length": null,
185
+ "model_max_length": 1000000000000000019884624838656,
186
+ "pad_to_multiple_of": null,
187
+ "pad_token": "[PAD]",
188
+ "pad_token_type_id": 0,
189
+ "padding_side": "right",
190
+ "tokenizer_class": "XTTSTokenizerFast",
191
+ "unk_token": "[UNK]"
192
+ }
FP16/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "XttsGPT"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "audio_config": {
8
+ "mel_channels": 80,
9
+ "output_sample_rate": 24000,
10
+ "sample_rate": 22050
11
+ },
12
+ "auto_map": {
13
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
14
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
15
+ "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast"
16
+ },
17
+ "decoder_input_dim": 1024,
18
+ "enable_redaction": false,
19
+ "gpt_batch_size": 1,
20
+ "gpt_max_audio_tokens": 605,
21
+ "hidden_size": 1024,
22
+ "initializer_range": 0.02,
23
+ "kv_cache": true,
24
+ "layer_norm_epsilon": 1e-05,
25
+ "max_audio_tokens": 605,
26
+ "max_prompt_tokens": 70,
27
+ "max_text_tokens": 402,
28
+ "model_type": "xtts_gpt",
29
+ "n_inner": 4096,
30
+ "num_attention_heads": 16,
31
+ "num_audio_tokens": 1026,
32
+ "num_hidden_layers": 30,
33
+ "number_text_tokens": 6681,
34
+ "reorder_and_upcast_attn": false,
35
+ "scale_attn_by_inverse_layer_idx": false,
36
+ "start_audio_token": 1024,
37
+ "start_text_token": null,
38
+ "stop_audio_token": 1025,
39
+ "stop_text_token": null,
40
+ "transformers_version": "4.46.0",
41
+ "use_masking_gt_prompt_approach": true,
42
+ "use_perceiver_resampler": true,
43
+ "vocab_size": 6681
44
+ }
FP16/gpt2_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f43220971207d4527f390d2d7a4d702099e3df4525c1a3b2b4aaf378736cb1
3
+ size 761266252
FP16/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[START]",
3
+ "eos_token": "[STOP]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
FP16/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
FP16/tokenizer_config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[STOP]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SPACE]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "259": {
28
+ "content": "[en]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "260": {
36
+ "content": "[de]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "261": {
44
+ "content": "[START]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "262": {
52
+ "content": "[fr]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "267": {
60
+ "content": "[ru]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "284": {
68
+ "content": "[es]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "285": {
76
+ "content": "[it]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "286": {
84
+ "content": "[pt]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "293": {
92
+ "content": "[cs]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "294": {
100
+ "content": "[pl]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "295": {
108
+ "content": "[tr]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "297": {
116
+ "content": "[nl]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "5022": {
124
+ "content": "[ar]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "5023": {
132
+ "content": "[zh-cn]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "5412": {
140
+ "content": "[ja]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "5753": {
148
+ "content": "[hu]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "6152": {
156
+ "content": "[ko]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "6680": {
164
+ "content": "[hi]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "6681": {
172
+ "content": "[PAD]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ }
179
+ },
180
+ "auto_map": {"AutoTokenizer": ["AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast", null]},
181
+ "bos_token": "[START]",
182
+ "clean_up_tokenization_spaces": true,
183
+ "eos_token": "[STOP]",
184
+ "max_length": null,
185
+ "model_max_length": 1000000000000000019884624838656,
186
+ "pad_to_multiple_of": null,
187
+ "pad_token": "[PAD]",
188
+ "pad_token_type_id": 0,
189
+ "padding_side": "right",
190
+ "tokenizer_class": "XTTSTokenizerFast",
191
+ "unk_token": "[UNK]"
192
+ }
FP32/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "XttsGPT"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "audio_config": {
8
+ "mel_channels": 80,
9
+ "output_sample_rate": 24000,
10
+ "sample_rate": 22050
11
+ },
12
+ "auto_map": {
13
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
14
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
15
+ "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast"
16
+ },
17
+ "decoder_input_dim": 1024,
18
+ "enable_redaction": false,
19
+ "gpt_batch_size": 1,
20
+ "gpt_max_audio_tokens": 605,
21
+ "hidden_size": 1024,
22
+ "initializer_range": 0.02,
23
+ "kv_cache": true,
24
+ "layer_norm_epsilon": 1e-05,
25
+ "max_audio_tokens": 605,
26
+ "max_prompt_tokens": 70,
27
+ "max_text_tokens": 402,
28
+ "model_type": "xtts_gpt",
29
+ "n_inner": 4096,
30
+ "num_attention_heads": 16,
31
+ "num_audio_tokens": 1026,
32
+ "num_hidden_layers": 30,
33
+ "number_text_tokens": 6681,
34
+ "reorder_and_upcast_attn": false,
35
+ "scale_attn_by_inverse_layer_idx": false,
36
+ "start_audio_token": 1024,
37
+ "start_text_token": null,
38
+ "stop_audio_token": 1025,
39
+ "stop_text_token": null,
40
+ "transformers_version": "4.46.0",
41
+ "use_masking_gt_prompt_approach": true,
42
+ "use_perceiver_resampler": true,
43
+ "vocab_size": 6681
44
+ }
FP32/gpt2_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104d92b2297c243b64d1417bd5cfda015faca0a670e9bc90088eed0e844f8e35
3
+ size 1522497936
FP32/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[START]",
3
+ "eos_token": "[STOP]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
FP32/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
FP32/tokenizer_config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[STOP]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SPACE]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "259": {
28
+ "content": "[en]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "260": {
36
+ "content": "[de]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "261": {
44
+ "content": "[START]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "262": {
52
+ "content": "[fr]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "267": {
60
+ "content": "[ru]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "284": {
68
+ "content": "[es]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "285": {
76
+ "content": "[it]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "286": {
84
+ "content": "[pt]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "293": {
92
+ "content": "[cs]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "294": {
100
+ "content": "[pl]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "295": {
108
+ "content": "[tr]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "297": {
116
+ "content": "[nl]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "5022": {
124
+ "content": "[ar]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "5023": {
132
+ "content": "[zh-cn]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "5412": {
140
+ "content": "[ja]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "5753": {
148
+ "content": "[hu]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "6152": {
156
+ "content": "[ko]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "6680": {
164
+ "content": "[hi]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "6681": {
172
+ "content": "[PAD]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ }
179
+ },
180
+ "auto_map": {"AutoTokenizer": ["AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast", null]},
181
+ "bos_token": "[START]",
182
+ "clean_up_tokenization_spaces": true,
183
+ "eos_token": "[STOP]",
184
+ "max_length": null,
185
+ "model_max_length": 1000000000000000019884624838656,
186
+ "pad_to_multiple_of": null,
187
+ "pad_token": "[PAD]",
188
+ "pad_token_type_id": 0,
189
+ "padding_side": "right",
190
+ "tokenizer_class": "XTTSTokenizerFast",
191
+ "unk_token": "[UNK]"
192
+ }
INT2/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "XttsGPT"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "audio_config": {
8
+ "mel_channels": 80,
9
+ "output_sample_rate": 24000,
10
+ "sample_rate": 22050
11
+ },
12
+ "auto_map": {
13
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
14
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
15
+ "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast"
16
+ },
17
+ "decoder_input_dim": 1024,
18
+ "enable_redaction": false,
19
+ "gpt_batch_size": 1,
20
+ "gpt_max_audio_tokens": 605,
21
+ "hidden_size": 1024,
22
+ "initializer_range": 0.02,
23
+ "kv_cache": true,
24
+ "layer_norm_epsilon": 1e-05,
25
+ "max_audio_tokens": 605,
26
+ "max_prompt_tokens": 70,
27
+ "max_text_tokens": 402,
28
+ "model_type": "xtts_gpt",
29
+ "n_inner": 4096,
30
+ "num_attention_heads": 16,
31
+ "num_audio_tokens": 1026,
32
+ "num_hidden_layers": 30,
33
+ "number_text_tokens": 6681,
34
+ "reorder_and_upcast_attn": false,
35
+ "scale_attn_by_inverse_layer_idx": false,
36
+ "start_audio_token": 1024,
37
+ "start_text_token": null,
38
+ "stop_audio_token": 1025,
39
+ "stop_text_token": null,
40
+ "transformers_version": "4.46.0",
41
+ "use_masking_gt_prompt_approach": true,
42
+ "use_perceiver_resampler": true,
43
+ "vocab_size": 6681
44
+ }
INT2/gpt2_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dabde08132c71284df3d672aaeac4d57e898d8766f041369641fadfd8db6dea3
3
+ size 95187217
INT2/int2_metadata.json ADDED
@@ -0,0 +1,2333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_norm.bias": {
3
+ "scale": 0.6184078454971313,
4
+ "shape": [
5
+ 1024
6
+ ]
7
+ },
8
+ "final_norm.weight": {
9
+ "scale": 5.523232936859131,
10
+ "shape": [
11
+ 1024
12
+ ]
13
+ },
14
+ "gpt.h.0.attn.c_attn.bias": {
15
+ "scale": 0.8616526126861572,
16
+ "shape": [
17
+ 3072
18
+ ]
19
+ },
20
+ "gpt.h.0.attn.c_attn.weight": {
21
+ "scale": 0.7289496064186096,
22
+ "shape": [
23
+ 1024,
24
+ 3072
25
+ ]
26
+ },
27
+ "gpt.h.0.attn.c_proj.bias": {
28
+ "scale": 0.6265745759010315,
29
+ "shape": [
30
+ 1024
31
+ ]
32
+ },
33
+ "gpt.h.0.attn.c_proj.weight": {
34
+ "scale": 0.8160552382469177,
35
+ "shape": [
36
+ 1024,
37
+ 1024
38
+ ]
39
+ },
40
+ "gpt.h.0.ln_1.bias": {
41
+ "scale": 0.18356449902057648,
42
+ "shape": [
43
+ 1024
44
+ ]
45
+ },
46
+ "gpt.h.0.ln_1.weight": {
47
+ "scale": 0.24023672938346863,
48
+ "shape": [
49
+ 1024
50
+ ]
51
+ },
52
+ "gpt.h.0.ln_2.bias": {
53
+ "scale": 1.4397389888763428,
54
+ "shape": [
55
+ 1024
56
+ ]
57
+ },
58
+ "gpt.h.0.ln_2.weight": {
59
+ "scale": 0.8021197319030762,
60
+ "shape": [
61
+ 1024
62
+ ]
63
+ },
64
+ "gpt.h.0.mlp.c_fc.bias": {
65
+ "scale": 0.3322474956512451,
66
+ "shape": [
67
+ 4096
68
+ ]
69
+ },
70
+ "gpt.h.0.mlp.c_fc.weight": {
71
+ "scale": 1.1929363012313843,
72
+ "shape": [
73
+ 1024,
74
+ 4096
75
+ ]
76
+ },
77
+ "gpt.h.0.mlp.c_proj.bias": {
78
+ "scale": 2.0688436031341553,
79
+ "shape": [
80
+ 1024
81
+ ]
82
+ },
83
+ "gpt.h.0.mlp.c_proj.weight": {
84
+ "scale": 4.6349406242370605,
85
+ "shape": [
86
+ 4096,
87
+ 1024
88
+ ]
89
+ },
90
+ "gpt.h.1.attn.c_attn.bias": {
91
+ "scale": 0.6426463723182678,
92
+ "shape": [
93
+ 3072
94
+ ]
95
+ },
96
+ "gpt.h.1.attn.c_attn.weight": {
97
+ "scale": 0.43571868538856506,
98
+ "shape": [
99
+ 1024,
100
+ 3072
101
+ ]
102
+ },
103
+ "gpt.h.1.attn.c_proj.bias": {
104
+ "scale": 2.0807175636291504,
105
+ "shape": [
106
+ 1024
107
+ ]
108
+ },
109
+ "gpt.h.1.attn.c_proj.weight": {
110
+ "scale": 3.195742607116699,
111
+ "shape": [
112
+ 1024,
113
+ 1024
114
+ ]
115
+ },
116
+ "gpt.h.1.ln_1.bias": {
117
+ "scale": 1.403233289718628,
118
+ "shape": [
119
+ 1024
120
+ ]
121
+ },
122
+ "gpt.h.1.ln_1.weight": {
123
+ "scale": 0.5358895659446716,
124
+ "shape": [
125
+ 1024
126
+ ]
127
+ },
128
+ "gpt.h.1.ln_2.bias": {
129
+ "scale": 1.513456106185913,
130
+ "shape": [
131
+ 1024
132
+ ]
133
+ },
134
+ "gpt.h.1.ln_2.weight": {
135
+ "scale": 1.418876051902771,
136
+ "shape": [
137
+ 1024
138
+ ]
139
+ },
140
+ "gpt.h.1.mlp.c_fc.bias": {
141
+ "scale": 0.185274139046669,
142
+ "shape": [
143
+ 4096
144
+ ]
145
+ },
146
+ "gpt.h.1.mlp.c_fc.weight": {
147
+ "scale": 0.8499930500984192,
148
+ "shape": [
149
+ 1024,
150
+ 4096
151
+ ]
152
+ },
153
+ "gpt.h.1.mlp.c_proj.bias": {
154
+ "scale": 2.412322998046875,
155
+ "shape": [
156
+ 1024
157
+ ]
158
+ },
159
+ "gpt.h.1.mlp.c_proj.weight": {
160
+ "scale": 5.190143585205078,
161
+ "shape": [
162
+ 4096,
163
+ 1024
164
+ ]
165
+ },
166
+ "gpt.h.10.attn.c_attn.bias": {
167
+ "scale": 0.30900076031684875,
168
+ "shape": [
169
+ 3072
170
+ ]
171
+ },
172
+ "gpt.h.10.attn.c_attn.weight": {
173
+ "scale": 0.8287355303764343,
174
+ "shape": [
175
+ 1024,
176
+ 3072
177
+ ]
178
+ },
179
+ "gpt.h.10.attn.c_proj.bias": {
180
+ "scale": 0.3192511796951294,
181
+ "shape": [
182
+ 1024
183
+ ]
184
+ },
185
+ "gpt.h.10.attn.c_proj.weight": {
186
+ "scale": 0.6080719232559204,
187
+ "shape": [
188
+ 1024,
189
+ 1024
190
+ ]
191
+ },
192
+ "gpt.h.10.ln_1.bias": {
193
+ "scale": 1.0192170143127441,
194
+ "shape": [
195
+ 1024
196
+ ]
197
+ },
198
+ "gpt.h.10.ln_1.weight": {
199
+ "scale": 1.0667375326156616,
200
+ "shape": [
201
+ 1024
202
+ ]
203
+ },
204
+ "gpt.h.10.ln_2.bias": {
205
+ "scale": 0.9129897356033325,
206
+ "shape": [
207
+ 1024
208
+ ]
209
+ },
210
+ "gpt.h.10.ln_2.weight": {
211
+ "scale": 1.7987780570983887,
212
+ "shape": [
213
+ 1024
214
+ ]
215
+ },
216
+ "gpt.h.10.mlp.c_fc.bias": {
217
+ "scale": 0.20667202770709991,
218
+ "shape": [
219
+ 4096
220
+ ]
221
+ },
222
+ "gpt.h.10.mlp.c_fc.weight": {
223
+ "scale": 0.6766823530197144,
224
+ "shape": [
225
+ 1024,
226
+ 4096
227
+ ]
228
+ },
229
+ "gpt.h.10.mlp.c_proj.bias": {
230
+ "scale": 0.7202504873275757,
231
+ "shape": [
232
+ 1024
233
+ ]
234
+ },
235
+ "gpt.h.10.mlp.c_proj.weight": {
236
+ "scale": 2.558100461959839,
237
+ "shape": [
238
+ 4096,
239
+ 1024
240
+ ]
241
+ },
242
+ "gpt.h.11.attn.c_attn.bias": {
243
+ "scale": 0.3394349217414856,
244
+ "shape": [
245
+ 3072
246
+ ]
247
+ },
248
+ "gpt.h.11.attn.c_attn.weight": {
249
+ "scale": 0.7863844037055969,
250
+ "shape": [
251
+ 1024,
252
+ 3072
253
+ ]
254
+ },
255
+ "gpt.h.11.attn.c_proj.bias": {
256
+ "scale": 0.3575643002986908,
257
+ "shape": [
258
+ 1024
259
+ ]
260
+ },
261
+ "gpt.h.11.attn.c_proj.weight": {
262
+ "scale": 0.615801990032196,
263
+ "shape": [
264
+ 1024,
265
+ 1024
266
+ ]
267
+ },
268
+ "gpt.h.11.ln_1.bias": {
269
+ "scale": 1.1525065898895264,
270
+ "shape": [
271
+ 1024
272
+ ]
273
+ },
274
+ "gpt.h.11.ln_1.weight": {
275
+ "scale": 1.129403829574585,
276
+ "shape": [
277
+ 1024
278
+ ]
279
+ },
280
+ "gpt.h.11.ln_2.bias": {
281
+ "scale": 0.6890860199928284,
282
+ "shape": [
283
+ 1024
284
+ ]
285
+ },
286
+ "gpt.h.11.ln_2.weight": {
287
+ "scale": 1.56197988986969,
288
+ "shape": [
289
+ 1024
290
+ ]
291
+ },
292
+ "gpt.h.11.mlp.c_fc.bias": {
293
+ "scale": 0.21424338221549988,
294
+ "shape": [
295
+ 4096
296
+ ]
297
+ },
298
+ "gpt.h.11.mlp.c_fc.weight": {
299
+ "scale": 0.4732816219329834,
300
+ "shape": [
301
+ 1024,
302
+ 4096
303
+ ]
304
+ },
305
+ "gpt.h.11.mlp.c_proj.bias": {
306
+ "scale": 0.7450451850891113,
307
+ "shape": [
308
+ 1024
309
+ ]
310
+ },
311
+ "gpt.h.11.mlp.c_proj.weight": {
312
+ "scale": 3.203523635864258,
313
+ "shape": [
314
+ 4096,
315
+ 1024
316
+ ]
317
+ },
318
+ "gpt.h.12.attn.c_attn.bias": {
319
+ "scale": 0.3494969308376312,
320
+ "shape": [
321
+ 3072
322
+ ]
323
+ },
324
+ "gpt.h.12.attn.c_attn.weight": {
325
+ "scale": 1.0152732133865356,
326
+ "shape": [
327
+ 1024,
328
+ 3072
329
+ ]
330
+ },
331
+ "gpt.h.12.attn.c_proj.bias": {
332
+ "scale": 0.23432914912700653,
333
+ "shape": [
334
+ 1024
335
+ ]
336
+ },
337
+ "gpt.h.12.attn.c_proj.weight": {
338
+ "scale": 0.7732473611831665,
339
+ "shape": [
340
+ 1024,
341
+ 1024
342
+ ]
343
+ },
344
+ "gpt.h.12.ln_1.bias": {
345
+ "scale": 1.3029590845108032,
346
+ "shape": [
347
+ 1024
348
+ ]
349
+ },
350
+ "gpt.h.12.ln_1.weight": {
351
+ "scale": 1.1806756258010864,
352
+ "shape": [
353
+ 1024
354
+ ]
355
+ },
356
+ "gpt.h.12.ln_2.bias": {
357
+ "scale": 0.8905816078186035,
358
+ "shape": [
359
+ 1024
360
+ ]
361
+ },
362
+ "gpt.h.12.ln_2.weight": {
363
+ "scale": 1.6275115013122559,
364
+ "shape": [
365
+ 1024
366
+ ]
367
+ },
368
+ "gpt.h.12.mlp.c_fc.bias": {
369
+ "scale": 0.10368738323450089,
370
+ "shape": [
371
+ 4096
372
+ ]
373
+ },
374
+ "gpt.h.12.mlp.c_fc.weight": {
375
+ "scale": 0.48194029927253723,
376
+ "shape": [
377
+ 1024,
378
+ 4096
379
+ ]
380
+ },
381
+ "gpt.h.12.mlp.c_proj.bias": {
382
+ "scale": 0.8424944281578064,
383
+ "shape": [
384
+ 1024
385
+ ]
386
+ },
387
+ "gpt.h.12.mlp.c_proj.weight": {
388
+ "scale": 1.9203577041625977,
389
+ "shape": [
390
+ 4096,
391
+ 1024
392
+ ]
393
+ },
394
+ "gpt.h.13.attn.c_attn.bias": {
395
+ "scale": 0.33320215344429016,
396
+ "shape": [
397
+ 3072
398
+ ]
399
+ },
400
+ "gpt.h.13.attn.c_attn.weight": {
401
+ "scale": 0.6099980473518372,
402
+ "shape": [
403
+ 1024,
404
+ 3072
405
+ ]
406
+ },
407
+ "gpt.h.13.attn.c_proj.bias": {
408
+ "scale": 0.47228217124938965,
409
+ "shape": [
410
+ 1024
411
+ ]
412
+ },
413
+ "gpt.h.13.attn.c_proj.weight": {
414
+ "scale": 0.8291131258010864,
415
+ "shape": [
416
+ 1024,
417
+ 1024
418
+ ]
419
+ },
420
+ "gpt.h.13.ln_1.bias": {
421
+ "scale": 1.0610405206680298,
422
+ "shape": [
423
+ 1024
424
+ ]
425
+ },
426
+ "gpt.h.13.ln_1.weight": {
427
+ "scale": 1.1475324630737305,
428
+ "shape": [
429
+ 1024
430
+ ]
431
+ },
432
+ "gpt.h.13.ln_2.bias": {
433
+ "scale": 0.9145744442939758,
434
+ "shape": [
435
+ 1024
436
+ ]
437
+ },
438
+ "gpt.h.13.ln_2.weight": {
439
+ "scale": 1.5574055910110474,
440
+ "shape": [
441
+ 1024
442
+ ]
443
+ },
444
+ "gpt.h.13.mlp.c_fc.bias": {
445
+ "scale": 0.13628917932510376,
446
+ "shape": [
447
+ 4096
448
+ ]
449
+ },
450
+ "gpt.h.13.mlp.c_fc.weight": {
451
+ "scale": 0.4501611590385437,
452
+ "shape": [
453
+ 1024,
454
+ 4096
455
+ ]
456
+ },
457
+ "gpt.h.13.mlp.c_proj.bias": {
458
+ "scale": 0.7718632817268372,
459
+ "shape": [
460
+ 1024
461
+ ]
462
+ },
463
+ "gpt.h.13.mlp.c_proj.weight": {
464
+ "scale": 1.7728174924850464,
465
+ "shape": [
466
+ 4096,
467
+ 1024
468
+ ]
469
+ },
470
+ "gpt.h.14.attn.c_attn.bias": {
471
+ "scale": 0.28137704730033875,
472
+ "shape": [
473
+ 3072
474
+ ]
475
+ },
476
+ "gpt.h.14.attn.c_attn.weight": {
477
+ "scale": 0.9850640892982483,
478
+ "shape": [
479
+ 1024,
480
+ 3072
481
+ ]
482
+ },
483
+ "gpt.h.14.attn.c_proj.bias": {
484
+ "scale": 0.1655244529247284,
485
+ "shape": [
486
+ 1024
487
+ ]
488
+ },
489
+ "gpt.h.14.attn.c_proj.weight": {
490
+ "scale": 0.7112484574317932,
491
+ "shape": [
492
+ 1024,
493
+ 1024
494
+ ]
495
+ },
496
+ "gpt.h.14.ln_1.bias": {
497
+ "scale": 1.1183710098266602,
498
+ "shape": [
499
+ 1024
500
+ ]
501
+ },
502
+ "gpt.h.14.ln_1.weight": {
503
+ "scale": 1.135362148284912,
504
+ "shape": [
505
+ 1024
506
+ ]
507
+ },
508
+ "gpt.h.14.ln_2.bias": {
509
+ "scale": 0.9109926819801331,
510
+ "shape": [
511
+ 1024
512
+ ]
513
+ },
514
+ "gpt.h.14.ln_2.weight": {
515
+ "scale": 1.4943935871124268,
516
+ "shape": [
517
+ 1024
518
+ ]
519
+ },
520
+ "gpt.h.14.mlp.c_fc.bias": {
521
+ "scale": 0.1072084978222847,
522
+ "shape": [
523
+ 4096
524
+ ]
525
+ },
526
+ "gpt.h.14.mlp.c_fc.weight": {
527
+ "scale": 0.5771117806434631,
528
+ "shape": [
529
+ 1024,
530
+ 4096
531
+ ]
532
+ },
533
+ "gpt.h.14.mlp.c_proj.bias": {
534
+ "scale": 0.734090268611908,
535
+ "shape": [
536
+ 1024
537
+ ]
538
+ },
539
+ "gpt.h.14.mlp.c_proj.weight": {
540
+ "scale": 2.0087053775787354,
541
+ "shape": [
542
+ 4096,
543
+ 1024
544
+ ]
545
+ },
546
+ "gpt.h.15.attn.c_attn.bias": {
547
+ "scale": 0.28571373224258423,
548
+ "shape": [
549
+ 3072
550
+ ]
551
+ },
552
+ "gpt.h.15.attn.c_attn.weight": {
553
+ "scale": 0.5473542213439941,
554
+ "shape": [
555
+ 1024,
556
+ 3072
557
+ ]
558
+ },
559
+ "gpt.h.15.attn.c_proj.bias": {
560
+ "scale": 0.5944628119468689,
561
+ "shape": [
562
+ 1024
563
+ ]
564
+ },
565
+ "gpt.h.15.attn.c_proj.weight": {
566
+ "scale": 0.8187956213951111,
567
+ "shape": [
568
+ 1024,
569
+ 1024
570
+ ]
571
+ },
572
+ "gpt.h.15.ln_1.bias": {
573
+ "scale": 1.0116039514541626,
574
+ "shape": [
575
+ 1024
576
+ ]
577
+ },
578
+ "gpt.h.15.ln_1.weight": {
579
+ "scale": 1.1605725288391113,
580
+ "shape": [
581
+ 1024
582
+ ]
583
+ },
584
+ "gpt.h.15.ln_2.bias": {
585
+ "scale": 0.7876421809196472,
586
+ "shape": [
587
+ 1024
588
+ ]
589
+ },
590
+ "gpt.h.15.ln_2.weight": {
591
+ "scale": 1.402604579925537,
592
+ "shape": [
593
+ 1024
594
+ ]
595
+ },
596
+ "gpt.h.15.mlp.c_fc.bias": {
597
+ "scale": 0.14053300023078918,
598
+ "shape": [
599
+ 4096
600
+ ]
601
+ },
602
+ "gpt.h.15.mlp.c_fc.weight": {
603
+ "scale": 0.5913069844245911,
604
+ "shape": [
605
+ 1024,
606
+ 4096
607
+ ]
608
+ },
609
+ "gpt.h.15.mlp.c_proj.bias": {
610
+ "scale": 0.68301922082901,
611
+ "shape": [
612
+ 1024
613
+ ]
614
+ },
615
+ "gpt.h.15.mlp.c_proj.weight": {
616
+ "scale": 2.1755576133728027,
617
+ "shape": [
618
+ 4096,
619
+ 1024
620
+ ]
621
+ },
622
+ "gpt.h.16.attn.c_attn.bias": {
623
+ "scale": 0.332042932510376,
624
+ "shape": [
625
+ 3072
626
+ ]
627
+ },
628
+ "gpt.h.16.attn.c_attn.weight": {
629
+ "scale": 0.8905439972877502,
630
+ "shape": [
631
+ 1024,
632
+ 3072
633
+ ]
634
+ },
635
+ "gpt.h.16.attn.c_proj.bias": {
636
+ "scale": 0.6724244952201843,
637
+ "shape": [
638
+ 1024
639
+ ]
640
+ },
641
+ "gpt.h.16.attn.c_proj.weight": {
642
+ "scale": 0.5313841104507446,
643
+ "shape": [
644
+ 1024,
645
+ 1024
646
+ ]
647
+ },
648
+ "gpt.h.16.ln_1.bias": {
649
+ "scale": 1.0996408462524414,
650
+ "shape": [
651
+ 1024
652
+ ]
653
+ },
654
+ "gpt.h.16.ln_1.weight": {
655
+ "scale": 1.1137791872024536,
656
+ "shape": [
657
+ 1024
658
+ ]
659
+ },
660
+ "gpt.h.16.ln_2.bias": {
661
+ "scale": 0.8403527736663818,
662
+ "shape": [
663
+ 1024
664
+ ]
665
+ },
666
+ "gpt.h.16.ln_2.weight": {
667
+ "scale": 1.2900326251983643,
668
+ "shape": [
669
+ 1024
670
+ ]
671
+ },
672
+ "gpt.h.16.mlp.c_fc.bias": {
673
+ "scale": 0.15540914237499237,
674
+ "shape": [
675
+ 4096
676
+ ]
677
+ },
678
+ "gpt.h.16.mlp.c_fc.weight": {
679
+ "scale": 0.5606194734573364,
680
+ "shape": [
681
+ 1024,
682
+ 4096
683
+ ]
684
+ },
685
+ "gpt.h.16.mlp.c_proj.bias": {
686
+ "scale": 0.6842435598373413,
687
+ "shape": [
688
+ 1024
689
+ ]
690
+ },
691
+ "gpt.h.16.mlp.c_proj.weight": {
692
+ "scale": 1.1348406076431274,
693
+ "shape": [
694
+ 4096,
695
+ 1024
696
+ ]
697
+ },
698
+ "gpt.h.17.attn.c_attn.bias": {
699
+ "scale": 0.3092385530471802,
700
+ "shape": [
701
+ 3072
702
+ ]
703
+ },
704
+ "gpt.h.17.attn.c_attn.weight": {
705
+ "scale": 0.9243433475494385,
706
+ "shape": [
707
+ 1024,
708
+ 3072
709
+ ]
710
+ },
711
+ "gpt.h.17.attn.c_proj.bias": {
712
+ "scale": 0.4436689019203186,
713
+ "shape": [
714
+ 1024
715
+ ]
716
+ },
717
+ "gpt.h.17.attn.c_proj.weight": {
718
+ "scale": 0.9725503325462341,
719
+ "shape": [
720
+ 1024,
721
+ 1024
722
+ ]
723
+ },
724
+ "gpt.h.17.ln_1.bias": {
725
+ "scale": 1.1076136827468872,
726
+ "shape": [
727
+ 1024
728
+ ]
729
+ },
730
+ "gpt.h.17.ln_1.weight": {
731
+ "scale": 1.1201558113098145,
732
+ "shape": [
733
+ 1024
734
+ ]
735
+ },
736
+ "gpt.h.17.ln_2.bias": {
737
+ "scale": 0.9228811860084534,
738
+ "shape": [
739
+ 1024
740
+ ]
741
+ },
742
+ "gpt.h.17.ln_2.weight": {
743
+ "scale": 1.3084325790405273,
744
+ "shape": [
745
+ 1024
746
+ ]
747
+ },
748
+ "gpt.h.17.mlp.c_fc.bias": {
749
+ "scale": 0.1736840307712555,
750
+ "shape": [
751
+ 4096
752
+ ]
753
+ },
754
+ "gpt.h.17.mlp.c_fc.weight": {
755
+ "scale": 0.47977033257484436,
756
+ "shape": [
757
+ 1024,
758
+ 4096
759
+ ]
760
+ },
761
+ "gpt.h.17.mlp.c_proj.bias": {
762
+ "scale": 0.6184632182121277,
763
+ "shape": [
764
+ 1024
765
+ ]
766
+ },
767
+ "gpt.h.17.mlp.c_proj.weight": {
768
+ "scale": 1.289231538772583,
769
+ "shape": [
770
+ 4096,
771
+ 1024
772
+ ]
773
+ },
774
+ "gpt.h.18.attn.c_attn.bias": {
775
+ "scale": 0.2630675435066223,
776
+ "shape": [
777
+ 3072
778
+ ]
779
+ },
780
+ "gpt.h.18.attn.c_attn.weight": {
781
+ "scale": 0.8577865958213806,
782
+ "shape": [
783
+ 1024,
784
+ 3072
785
+ ]
786
+ },
787
+ "gpt.h.18.attn.c_proj.bias": {
788
+ "scale": 0.7874951958656311,
789
+ "shape": [
790
+ 1024
791
+ ]
792
+ },
793
+ "gpt.h.18.attn.c_proj.weight": {
794
+ "scale": 0.6830109357833862,
795
+ "shape": [
796
+ 1024,
797
+ 1024
798
+ ]
799
+ },
800
+ "gpt.h.18.ln_1.bias": {
801
+ "scale": 1.0202974081039429,
802
+ "shape": [
803
+ 1024
804
+ ]
805
+ },
806
+ "gpt.h.18.ln_1.weight": {
807
+ "scale": 1.112379550933838,
808
+ "shape": [
809
+ 1024
810
+ ]
811
+ },
812
+ "gpt.h.18.ln_2.bias": {
813
+ "scale": 0.8227484226226807,
814
+ "shape": [
815
+ 1024
816
+ ]
817
+ },
818
+ "gpt.h.18.ln_2.weight": {
819
+ "scale": 1.2532376050949097,
820
+ "shape": [
821
+ 1024
822
+ ]
823
+ },
824
+ "gpt.h.18.mlp.c_fc.bias": {
825
+ "scale": 0.1509416699409485,
826
+ "shape": [
827
+ 4096
828
+ ]
829
+ },
830
+ "gpt.h.18.mlp.c_fc.weight": {
831
+ "scale": 0.47805407643318176,
832
+ "shape": [
833
+ 1024,
834
+ 4096
835
+ ]
836
+ },
837
+ "gpt.h.18.mlp.c_proj.bias": {
838
+ "scale": 0.4773893356323242,
839
+ "shape": [
840
+ 1024
841
+ ]
842
+ },
843
+ "gpt.h.18.mlp.c_proj.weight": {
844
+ "scale": 0.8970383405685425,
845
+ "shape": [
846
+ 4096,
847
+ 1024
848
+ ]
849
+ },
850
+ "gpt.h.19.attn.c_attn.bias": {
851
+ "scale": 0.32839706540107727,
852
+ "shape": [
853
+ 3072
854
+ ]
855
+ },
856
+ "gpt.h.19.attn.c_attn.weight": {
857
+ "scale": 0.8178861737251282,
858
+ "shape": [
859
+ 1024,
860
+ 3072
861
+ ]
862
+ },
863
+ "gpt.h.19.attn.c_proj.bias": {
864
+ "scale": 0.41575977206230164,
865
+ "shape": [
866
+ 1024
867
+ ]
868
+ },
869
+ "gpt.h.19.attn.c_proj.weight": {
870
+ "scale": 0.8766708970069885,
871
+ "shape": [
872
+ 1024,
873
+ 1024
874
+ ]
875
+ },
876
+ "gpt.h.19.ln_1.bias": {
877
+ "scale": 1.034698724746704,
878
+ "shape": [
879
+ 1024
880
+ ]
881
+ },
882
+ "gpt.h.19.ln_1.weight": {
883
+ "scale": 1.092841625213623,
884
+ "shape": [
885
+ 1024
886
+ ]
887
+ },
888
+ "gpt.h.19.ln_2.bias": {
889
+ "scale": 0.7605751156806946,
890
+ "shape": [
891
+ 1024
892
+ ]
893
+ },
894
+ "gpt.h.19.ln_2.weight": {
895
+ "scale": 1.2519071102142334,
896
+ "shape": [
897
+ 1024
898
+ ]
899
+ },
900
+ "gpt.h.19.mlp.c_fc.bias": {
901
+ "scale": 0.16412009298801422,
902
+ "shape": [
903
+ 4096
904
+ ]
905
+ },
906
+ "gpt.h.19.mlp.c_fc.weight": {
907
+ "scale": 0.5440301895141602,
908
+ "shape": [
909
+ 1024,
910
+ 4096
911
+ ]
912
+ },
913
+ "gpt.h.19.mlp.c_proj.bias": {
914
+ "scale": 0.41071999073028564,
915
+ "shape": [
916
+ 1024
917
+ ]
918
+ },
919
+ "gpt.h.19.mlp.c_proj.weight": {
920
+ "scale": 1.0663466453552246,
921
+ "shape": [
922
+ 4096,
923
+ 1024
924
+ ]
925
+ },
926
+ "gpt.h.2.attn.c_attn.bias": {
927
+ "scale": 0.31375154852867126,
928
+ "shape": [
929
+ 3072
930
+ ]
931
+ },
932
+ "gpt.h.2.attn.c_attn.weight": {
933
+ "scale": 0.43119457364082336,
934
+ "shape": [
935
+ 1024,
936
+ 3072
937
+ ]
938
+ },
939
+ "gpt.h.2.attn.c_proj.bias": {
940
+ "scale": 2.6263744831085205,
941
+ "shape": [
942
+ 1024
943
+ ]
944
+ },
945
+ "gpt.h.2.attn.c_proj.weight": {
946
+ "scale": 3.013683319091797,
947
+ "shape": [
948
+ 1024,
949
+ 1024
950
+ ]
951
+ },
952
+ "gpt.h.2.ln_1.bias": {
953
+ "scale": 1.1809124946594238,
954
+ "shape": [
955
+ 1024
956
+ ]
957
+ },
958
+ "gpt.h.2.ln_1.weight": {
959
+ "scale": 0.6806290745735168,
960
+ "shape": [
961
+ 1024
962
+ ]
963
+ },
964
+ "gpt.h.2.ln_2.bias": {
965
+ "scale": 1.0246816873550415,
966
+ "shape": [
967
+ 1024
968
+ ]
969
+ },
970
+ "gpt.h.2.ln_2.weight": {
971
+ "scale": 1.704537034034729,
972
+ "shape": [
973
+ 1024
974
+ ]
975
+ },
976
+ "gpt.h.2.mlp.c_fc.bias": {
977
+ "scale": 0.3829772174358368,
978
+ "shape": [
979
+ 4096
980
+ ]
981
+ },
982
+ "gpt.h.2.mlp.c_fc.weight": {
983
+ "scale": 1.365037441253662,
984
+ "shape": [
985
+ 1024,
986
+ 4096
987
+ ]
988
+ },
989
+ "gpt.h.2.mlp.c_proj.bias": {
990
+ "scale": 1.9403208494186401,
991
+ "shape": [
992
+ 1024
993
+ ]
994
+ },
995
+ "gpt.h.2.mlp.c_proj.weight": {
996
+ "scale": 5.108565807342529,
997
+ "shape": [
998
+ 4096,
999
+ 1024
1000
+ ]
1001
+ },
1002
+ "gpt.h.20.attn.c_attn.bias": {
1003
+ "scale": 0.2516125738620758,
1004
+ "shape": [
1005
+ 3072
1006
+ ]
1007
+ },
1008
+ "gpt.h.20.attn.c_attn.weight": {
1009
+ "scale": 0.870423436164856,
1010
+ "shape": [
1011
+ 1024,
1012
+ 3072
1013
+ ]
1014
+ },
1015
+ "gpt.h.20.attn.c_proj.bias": {
1016
+ "scale": 0.6339138150215149,
1017
+ "shape": [
1018
+ 1024
1019
+ ]
1020
+ },
1021
+ "gpt.h.20.attn.c_proj.weight": {
1022
+ "scale": 0.7554279565811157,
1023
+ "shape": [
1024
+ 1024,
1025
+ 1024
1026
+ ]
1027
+ },
1028
+ "gpt.h.20.ln_1.bias": {
1029
+ "scale": 0.9357683062553406,
1030
+ "shape": [
1031
+ 1024
1032
+ ]
1033
+ },
1034
+ "gpt.h.20.ln_1.weight": {
1035
+ "scale": 1.0969212055206299,
1036
+ "shape": [
1037
+ 1024
1038
+ ]
1039
+ },
1040
+ "gpt.h.20.ln_2.bias": {
1041
+ "scale": 0.7808690071105957,
1042
+ "shape": [
1043
+ 1024
1044
+ ]
1045
+ },
1046
+ "gpt.h.20.ln_2.weight": {
1047
+ "scale": 1.1708141565322876,
1048
+ "shape": [
1049
+ 1024
1050
+ ]
1051
+ },
1052
+ "gpt.h.20.mlp.c_fc.bias": {
1053
+ "scale": 0.10677170008420944,
1054
+ "shape": [
1055
+ 4096
1056
+ ]
1057
+ },
1058
+ "gpt.h.20.mlp.c_fc.weight": {
1059
+ "scale": 0.45480090379714966,
1060
+ "shape": [
1061
+ 1024,
1062
+ 4096
1063
+ ]
1064
+ },
1065
+ "gpt.h.20.mlp.c_proj.bias": {
1066
+ "scale": 0.30032098293304443,
1067
+ "shape": [
1068
+ 1024
1069
+ ]
1070
+ },
1071
+ "gpt.h.20.mlp.c_proj.weight": {
1072
+ "scale": 1.8839131593704224,
1073
+ "shape": [
1074
+ 4096,
1075
+ 1024
1076
+ ]
1077
+ },
1078
+ "gpt.h.21.attn.c_attn.bias": {
1079
+ "scale": 0.3100431561470032,
1080
+ "shape": [
1081
+ 3072
1082
+ ]
1083
+ },
1084
+ "gpt.h.21.attn.c_attn.weight": {
1085
+ "scale": 0.7054853439331055,
1086
+ "shape": [
1087
+ 1024,
1088
+ 3072
1089
+ ]
1090
+ },
1091
+ "gpt.h.21.attn.c_proj.bias": {
1092
+ "scale": 0.5535825490951538,
1093
+ "shape": [
1094
+ 1024
1095
+ ]
1096
+ },
1097
+ "gpt.h.21.attn.c_proj.weight": {
1098
+ "scale": 1.1769016981124878,
1099
+ "shape": [
1100
+ 1024,
1101
+ 1024
1102
+ ]
1103
+ },
1104
+ "gpt.h.21.ln_1.bias": {
1105
+ "scale": 0.8685834407806396,
1106
+ "shape": [
1107
+ 1024
1108
+ ]
1109
+ },
1110
+ "gpt.h.21.ln_1.weight": {
1111
+ "scale": 1.1164261102676392,
1112
+ "shape": [
1113
+ 1024
1114
+ ]
1115
+ },
1116
+ "gpt.h.21.ln_2.bias": {
1117
+ "scale": 0.7616092562675476,
1118
+ "shape": [
1119
+ 1024
1120
+ ]
1121
+ },
1122
+ "gpt.h.21.ln_2.weight": {
1123
+ "scale": 1.1056314706802368,
1124
+ "shape": [
1125
+ 1024
1126
+ ]
1127
+ },
1128
+ "gpt.h.21.mlp.c_fc.bias": {
1129
+ "scale": 0.15077142417430878,
1130
+ "shape": [
1131
+ 4096
1132
+ ]
1133
+ },
1134
+ "gpt.h.21.mlp.c_fc.weight": {
1135
+ "scale": 0.4911968410015106,
1136
+ "shape": [
1137
+ 1024,
1138
+ 4096
1139
+ ]
1140
+ },
1141
+ "gpt.h.21.mlp.c_proj.bias": {
1142
+ "scale": 0.1756763458251953,
1143
+ "shape": [
1144
+ 1024
1145
+ ]
1146
+ },
1147
+ "gpt.h.21.mlp.c_proj.weight": {
1148
+ "scale": 1.4334403276443481,
1149
+ "shape": [
1150
+ 4096,
1151
+ 1024
1152
+ ]
1153
+ },
1154
+ "gpt.h.22.attn.c_attn.bias": {
1155
+ "scale": 0.2804168462753296,
1156
+ "shape": [
1157
+ 3072
1158
+ ]
1159
+ },
1160
+ "gpt.h.22.attn.c_attn.weight": {
1161
+ "scale": 0.4919489324092865,
1162
+ "shape": [
1163
+ 1024,
1164
+ 3072
1165
+ ]
1166
+ },
1167
+ "gpt.h.22.attn.c_proj.bias": {
1168
+ "scale": 0.4737534523010254,
1169
+ "shape": [
1170
+ 1024
1171
+ ]
1172
+ },
1173
+ "gpt.h.22.attn.c_proj.weight": {
1174
+ "scale": 0.6300857067108154,
1175
+ "shape": [
1176
+ 1024,
1177
+ 1024
1178
+ ]
1179
+ },
1180
+ "gpt.h.22.ln_1.bias": {
1181
+ "scale": 0.8146415948867798,
1182
+ "shape": [
1183
+ 1024
1184
+ ]
1185
+ },
1186
+ "gpt.h.22.ln_1.weight": {
1187
+ "scale": 1.0955730676651,
1188
+ "shape": [
1189
+ 1024
1190
+ ]
1191
+ },
1192
+ "gpt.h.22.ln_2.bias": {
1193
+ "scale": 0.8315455317497253,
1194
+ "shape": [
1195
+ 1024
1196
+ ]
1197
+ },
1198
+ "gpt.h.22.ln_2.weight": {
1199
+ "scale": 1.116787314414978,
1200
+ "shape": [
1201
+ 1024
1202
+ ]
1203
+ },
1204
+ "gpt.h.22.mlp.c_fc.bias": {
1205
+ "scale": 0.1203167513012886,
1206
+ "shape": [
1207
+ 4096
1208
+ ]
1209
+ },
1210
+ "gpt.h.22.mlp.c_fc.weight": {
1211
+ "scale": 0.5276402235031128,
1212
+ "shape": [
1213
+ 1024,
1214
+ 4096
1215
+ ]
1216
+ },
1217
+ "gpt.h.22.mlp.c_proj.bias": {
1218
+ "scale": 0.17383702099323273,
1219
+ "shape": [
1220
+ 1024
1221
+ ]
1222
+ },
1223
+ "gpt.h.22.mlp.c_proj.weight": {
1224
+ "scale": 1.2244986295700073,
1225
+ "shape": [
1226
+ 4096,
1227
+ 1024
1228
+ ]
1229
+ },
1230
+ "gpt.h.23.attn.c_attn.bias": {
1231
+ "scale": 0.2580159902572632,
1232
+ "shape": [
1233
+ 3072
1234
+ ]
1235
+ },
1236
+ "gpt.h.23.attn.c_attn.weight": {
1237
+ "scale": 0.6216667890548706,
1238
+ "shape": [
1239
+ 1024,
1240
+ 3072
1241
+ ]
1242
+ },
1243
+ "gpt.h.23.attn.c_proj.bias": {
1244
+ "scale": 0.19547155499458313,
1245
+ "shape": [
1246
+ 1024
1247
+ ]
1248
+ },
1249
+ "gpt.h.23.attn.c_proj.weight": {
1250
+ "scale": 0.7300105690956116,
1251
+ "shape": [
1252
+ 1024,
1253
+ 1024
1254
+ ]
1255
+ },
1256
+ "gpt.h.23.ln_1.bias": {
1257
+ "scale": 0.7185365557670593,
1258
+ "shape": [
1259
+ 1024
1260
+ ]
1261
+ },
1262
+ "gpt.h.23.ln_1.weight": {
1263
+ "scale": 1.0195420980453491,
1264
+ "shape": [
1265
+ 1024
1266
+ ]
1267
+ },
1268
+ "gpt.h.23.ln_2.bias": {
1269
+ "scale": 0.8253968954086304,
1270
+ "shape": [
1271
+ 1024
1272
+ ]
1273
+ },
1274
+ "gpt.h.23.ln_2.weight": {
1275
+ "scale": 1.0707511901855469,
1276
+ "shape": [
1277
+ 1024
1278
+ ]
1279
+ },
1280
+ "gpt.h.23.mlp.c_fc.bias": {
1281
+ "scale": 0.12819896638393402,
1282
+ "shape": [
1283
+ 4096
1284
+ ]
1285
+ },
1286
+ "gpt.h.23.mlp.c_fc.weight": {
1287
+ "scale": 0.4703911542892456,
1288
+ "shape": [
1289
+ 1024,
1290
+ 4096
1291
+ ]
1292
+ },
1293
+ "gpt.h.23.mlp.c_proj.bias": {
1294
+ "scale": 0.21622495353221893,
1295
+ "shape": [
1296
+ 1024
1297
+ ]
1298
+ },
1299
+ "gpt.h.23.mlp.c_proj.weight": {
1300
+ "scale": 0.9520456790924072,
1301
+ "shape": [
1302
+ 4096,
1303
+ 1024
1304
+ ]
1305
+ },
1306
+ "gpt.h.24.attn.c_attn.bias": {
1307
+ "scale": 0.29281240701675415,
1308
+ "shape": [
1309
+ 3072
1310
+ ]
1311
+ },
1312
+ "gpt.h.24.attn.c_attn.weight": {
1313
+ "scale": 0.40381887555122375,
1314
+ "shape": [
1315
+ 1024,
1316
+ 3072
1317
+ ]
1318
+ },
1319
+ "gpt.h.24.attn.c_proj.bias": {
1320
+ "scale": 0.20989258587360382,
1321
+ "shape": [
1322
+ 1024
1323
+ ]
1324
+ },
1325
+ "gpt.h.24.attn.c_proj.weight": {
1326
+ "scale": 0.6699181795120239,
1327
+ "shape": [
1328
+ 1024,
1329
+ 1024
1330
+ ]
1331
+ },
1332
+ "gpt.h.24.ln_1.bias": {
1333
+ "scale": 0.7208132743835449,
1334
+ "shape": [
1335
+ 1024
1336
+ ]
1337
+ },
1338
+ "gpt.h.24.ln_1.weight": {
1339
+ "scale": 1.151794195175171,
1340
+ "shape": [
1341
+ 1024
1342
+ ]
1343
+ },
1344
+ "gpt.h.24.ln_2.bias": {
1345
+ "scale": 0.8072383999824524,
1346
+ "shape": [
1347
+ 1024
1348
+ ]
1349
+ },
1350
+ "gpt.h.24.ln_2.weight": {
1351
+ "scale": 1.1724637746810913,
1352
+ "shape": [
1353
+ 1024
1354
+ ]
1355
+ },
1356
+ "gpt.h.24.mlp.c_fc.bias": {
1357
+ "scale": 0.1703694760799408,
1358
+ "shape": [
1359
+ 4096
1360
+ ]
1361
+ },
1362
+ "gpt.h.24.mlp.c_fc.weight": {
1363
+ "scale": 0.39706242084503174,
1364
+ "shape": [
1365
+ 1024,
1366
+ 4096
1367
+ ]
1368
+ },
1369
+ "gpt.h.24.mlp.c_proj.bias": {
1370
+ "scale": 0.21926502883434296,
1371
+ "shape": [
1372
+ 1024
1373
+ ]
1374
+ },
1375
+ "gpt.h.24.mlp.c_proj.weight": {
1376
+ "scale": 0.5858592987060547,
1377
+ "shape": [
1378
+ 4096,
1379
+ 1024
1380
+ ]
1381
+ },
1382
+ "gpt.h.25.attn.c_attn.bias": {
1383
+ "scale": 0.30726155638694763,
1384
+ "shape": [
1385
+ 3072
1386
+ ]
1387
+ },
1388
+ "gpt.h.25.attn.c_attn.weight": {
1389
+ "scale": 0.411799818277359,
1390
+ "shape": [
1391
+ 1024,
1392
+ 3072
1393
+ ]
1394
+ },
1395
+ "gpt.h.25.attn.c_proj.bias": {
1396
+ "scale": 0.24399296939373016,
1397
+ "shape": [
1398
+ 1024
1399
+ ]
1400
+ },
1401
+ "gpt.h.25.attn.c_proj.weight": {
1402
+ "scale": 0.6184800863265991,
1403
+ "shape": [
1404
+ 1024,
1405
+ 1024
1406
+ ]
1407
+ },
1408
+ "gpt.h.25.ln_1.bias": {
1409
+ "scale": 0.650389552116394,
1410
+ "shape": [
1411
+ 1024
1412
+ ]
1413
+ },
1414
+ "gpt.h.25.ln_1.weight": {
1415
+ "scale": 1.253990650177002,
1416
+ "shape": [
1417
+ 1024
1418
+ ]
1419
+ },
1420
+ "gpt.h.25.ln_2.bias": {
1421
+ "scale": 0.8235530853271484,
1422
+ "shape": [
1423
+ 1024
1424
+ ]
1425
+ },
1426
+ "gpt.h.25.ln_2.weight": {
1427
+ "scale": 1.1858876943588257,
1428
+ "shape": [
1429
+ 1024
1430
+ ]
1431
+ },
1432
+ "gpt.h.25.mlp.c_fc.bias": {
1433
+ "scale": 0.15443487465381622,
1434
+ "shape": [
1435
+ 4096
1436
+ ]
1437
+ },
1438
+ "gpt.h.25.mlp.c_fc.weight": {
1439
+ "scale": 0.4894944131374359,
1440
+ "shape": [
1441
+ 1024,
1442
+ 4096
1443
+ ]
1444
+ },
1445
+ "gpt.h.25.mlp.c_proj.bias": {
1446
+ "scale": 0.2847861051559448,
1447
+ "shape": [
1448
+ 1024
1449
+ ]
1450
+ },
1451
+ "gpt.h.25.mlp.c_proj.weight": {
1452
+ "scale": 0.6402336359024048,
1453
+ "shape": [
1454
+ 4096,
1455
+ 1024
1456
+ ]
1457
+ },
1458
+ "gpt.h.26.attn.c_attn.bias": {
1459
+ "scale": 0.5720527768135071,
1460
+ "shape": [
1461
+ 3072
1462
+ ]
1463
+ },
1464
+ "gpt.h.26.attn.c_attn.weight": {
1465
+ "scale": 0.4341934025287628,
1466
+ "shape": [
1467
+ 1024,
1468
+ 3072
1469
+ ]
1470
+ },
1471
+ "gpt.h.26.attn.c_proj.bias": {
1472
+ "scale": 0.26778459548950195,
1473
+ "shape": [
1474
+ 1024
1475
+ ]
1476
+ },
1477
+ "gpt.h.26.attn.c_proj.weight": {
1478
+ "scale": 0.7562843561172485,
1479
+ "shape": [
1480
+ 1024,
1481
+ 1024
1482
+ ]
1483
+ },
1484
+ "gpt.h.26.ln_1.bias": {
1485
+ "scale": 0.5681367516517639,
1486
+ "shape": [
1487
+ 1024
1488
+ ]
1489
+ },
1490
+ "gpt.h.26.ln_1.weight": {
1491
+ "scale": 1.196866750717163,
1492
+ "shape": [
1493
+ 1024
1494
+ ]
1495
+ },
1496
+ "gpt.h.26.ln_2.bias": {
1497
+ "scale": 0.8213993906974792,
1498
+ "shape": [
1499
+ 1024
1500
+ ]
1501
+ },
1502
+ "gpt.h.26.ln_2.weight": {
1503
+ "scale": 1.2492419481277466,
1504
+ "shape": [
1505
+ 1024
1506
+ ]
1507
+ },
1508
+ "gpt.h.26.mlp.c_fc.bias": {
1509
+ "scale": 0.22124537825584412,
1510
+ "shape": [
1511
+ 4096
1512
+ ]
1513
+ },
1514
+ "gpt.h.26.mlp.c_fc.weight": {
1515
+ "scale": 0.3691442310810089,
1516
+ "shape": [
1517
+ 1024,
1518
+ 4096
1519
+ ]
1520
+ },
1521
+ "gpt.h.26.mlp.c_proj.bias": {
1522
+ "scale": 0.35506486892700195,
1523
+ "shape": [
1524
+ 1024
1525
+ ]
1526
+ },
1527
+ "gpt.h.26.mlp.c_proj.weight": {
1528
+ "scale": 1.0029659271240234,
1529
+ "shape": [
1530
+ 4096,
1531
+ 1024
1532
+ ]
1533
+ },
1534
+ "gpt.h.27.attn.c_attn.bias": {
1535
+ "scale": 0.3983057737350464,
1536
+ "shape": [
1537
+ 3072
1538
+ ]
1539
+ },
1540
+ "gpt.h.27.attn.c_attn.weight": {
1541
+ "scale": 0.5026274919509888,
1542
+ "shape": [
1543
+ 1024,
1544
+ 3072
1545
+ ]
1546
+ },
1547
+ "gpt.h.27.attn.c_proj.bias": {
1548
+ "scale": 0.37028607726097107,
1549
+ "shape": [
1550
+ 1024
1551
+ ]
1552
+ },
1553
+ "gpt.h.27.attn.c_proj.weight": {
1554
+ "scale": 0.6866351962089539,
1555
+ "shape": [
1556
+ 1024,
1557
+ 1024
1558
+ ]
1559
+ },
1560
+ "gpt.h.27.ln_1.bias": {
1561
+ "scale": 0.5788735747337341,
1562
+ "shape": [
1563
+ 1024
1564
+ ]
1565
+ },
1566
+ "gpt.h.27.ln_1.weight": {
1567
+ "scale": 1.2025846242904663,
1568
+ "shape": [
1569
+ 1024
1570
+ ]
1571
+ },
1572
+ "gpt.h.27.ln_2.bias": {
1573
+ "scale": 0.8350062370300293,
1574
+ "shape": [
1575
+ 1024
1576
+ ]
1577
+ },
1578
+ "gpt.h.27.ln_2.weight": {
1579
+ "scale": 1.2651458978652954,
1580
+ "shape": [
1581
+ 1024
1582
+ ]
1583
+ },
1584
+ "gpt.h.27.mlp.c_fc.bias": {
1585
+ "scale": 0.2119043618440628,
1586
+ "shape": [
1587
+ 4096
1588
+ ]
1589
+ },
1590
+ "gpt.h.27.mlp.c_fc.weight": {
1591
+ "scale": 0.37953221797943115,
1592
+ "shape": [
1593
+ 1024,
1594
+ 4096
1595
+ ]
1596
+ },
1597
+ "gpt.h.27.mlp.c_proj.bias": {
1598
+ "scale": 0.3048810064792633,
1599
+ "shape": [
1600
+ 1024
1601
+ ]
1602
+ },
1603
+ "gpt.h.27.mlp.c_proj.weight": {
1604
+ "scale": 1.0571913719177246,
1605
+ "shape": [
1606
+ 4096,
1607
+ 1024
1608
+ ]
1609
+ },
1610
+ "gpt.h.28.attn.c_attn.bias": {
1611
+ "scale": 0.3161656856536865,
1612
+ "shape": [
1613
+ 3072
1614
+ ]
1615
+ },
1616
+ "gpt.h.28.attn.c_attn.weight": {
1617
+ "scale": 0.39138245582580566,
1618
+ "shape": [
1619
+ 1024,
1620
+ 3072
1621
+ ]
1622
+ },
1623
+ "gpt.h.28.attn.c_proj.bias": {
1624
+ "scale": 0.5387474894523621,
1625
+ "shape": [
1626
+ 1024
1627
+ ]
1628
+ },
1629
+ "gpt.h.28.attn.c_proj.weight": {
1630
+ "scale": 1.1945445537567139,
1631
+ "shape": [
1632
+ 1024,
1633
+ 1024
1634
+ ]
1635
+ },
1636
+ "gpt.h.28.ln_1.bias": {
1637
+ "scale": 0.6207168102264404,
1638
+ "shape": [
1639
+ 1024
1640
+ ]
1641
+ },
1642
+ "gpt.h.28.ln_1.weight": {
1643
+ "scale": 1.2662208080291748,
1644
+ "shape": [
1645
+ 1024
1646
+ ]
1647
+ },
1648
+ "gpt.h.28.ln_2.bias": {
1649
+ "scale": 0.7384198307991028,
1650
+ "shape": [
1651
+ 1024
1652
+ ]
1653
+ },
1654
+ "gpt.h.28.ln_2.weight": {
1655
+ "scale": 1.3807166814804077,
1656
+ "shape": [
1657
+ 1024
1658
+ ]
1659
+ },
1660
+ "gpt.h.28.mlp.c_fc.bias": {
1661
+ "scale": 0.25197261571884155,
1662
+ "shape": [
1663
+ 4096
1664
+ ]
1665
+ },
1666
+ "gpt.h.28.mlp.c_fc.weight": {
1667
+ "scale": 1.0825285911560059,
1668
+ "shape": [
1669
+ 1024,
1670
+ 4096
1671
+ ]
1672
+ },
1673
+ "gpt.h.28.mlp.c_proj.bias": {
1674
+ "scale": 0.7067692875862122,
1675
+ "shape": [
1676
+ 1024
1677
+ ]
1678
+ },
1679
+ "gpt.h.28.mlp.c_proj.weight": {
1680
+ "scale": 4.0657830238342285,
1681
+ "shape": [
1682
+ 4096,
1683
+ 1024
1684
+ ]
1685
+ },
1686
+ "gpt.h.29.attn.c_attn.bias": {
1687
+ "scale": 0.4292069375514984,
1688
+ "shape": [
1689
+ 3072
1690
+ ]
1691
+ },
1692
+ "gpt.h.29.attn.c_attn.weight": {
1693
+ "scale": 0.5109118223190308,
1694
+ "shape": [
1695
+ 1024,
1696
+ 3072
1697
+ ]
1698
+ },
1699
+ "gpt.h.29.attn.c_proj.bias": {
1700
+ "scale": 0.23389524221420288,
1701
+ "shape": [
1702
+ 1024
1703
+ ]
1704
+ },
1705
+ "gpt.h.29.attn.c_proj.weight": {
1706
+ "scale": 2.3270254135131836,
1707
+ "shape": [
1708
+ 1024,
1709
+ 1024
1710
+ ]
1711
+ },
1712
+ "gpt.h.29.ln_1.bias": {
1713
+ "scale": 0.5844658017158508,
1714
+ "shape": [
1715
+ 1024
1716
+ ]
1717
+ },
1718
+ "gpt.h.29.ln_1.weight": {
1719
+ "scale": 1.2286149263381958,
1720
+ "shape": [
1721
+ 1024
1722
+ ]
1723
+ },
1724
+ "gpt.h.29.ln_2.bias": {
1725
+ "scale": 0.4225330948829651,
1726
+ "shape": [
1727
+ 1024
1728
+ ]
1729
+ },
1730
+ "gpt.h.29.ln_2.weight": {
1731
+ "scale": 1.4540377855300903,
1732
+ "shape": [
1733
+ 1024
1734
+ ]
1735
+ },
1736
+ "gpt.h.29.mlp.c_fc.bias": {
1737
+ "scale": 0.28030163049697876,
1738
+ "shape": [
1739
+ 4096
1740
+ ]
1741
+ },
1742
+ "gpt.h.29.mlp.c_fc.weight": {
1743
+ "scale": 1.8019921779632568,
1744
+ "shape": [
1745
+ 1024,
1746
+ 4096
1747
+ ]
1748
+ },
1749
+ "gpt.h.29.mlp.c_proj.bias": {
1750
+ "scale": 0.5616198778152466,
1751
+ "shape": [
1752
+ 1024
1753
+ ]
1754
+ },
1755
+ "gpt.h.29.mlp.c_proj.weight": {
1756
+ "scale": 11.012739181518555,
1757
+ "shape": [
1758
+ 4096,
1759
+ 1024
1760
+ ]
1761
+ },
1762
+ "gpt.h.3.attn.c_attn.bias": {
1763
+ "scale": 0.25713393092155457,
1764
+ "shape": [
1765
+ 3072
1766
+ ]
1767
+ },
1768
+ "gpt.h.3.attn.c_attn.weight": {
1769
+ "scale": 0.6324517726898193,
1770
+ "shape": [
1771
+ 1024,
1772
+ 3072
1773
+ ]
1774
+ },
1775
+ "gpt.h.3.attn.c_proj.bias": {
1776
+ "scale": 2.667752265930176,
1777
+ "shape": [
1778
+ 1024
1779
+ ]
1780
+ },
1781
+ "gpt.h.3.attn.c_proj.weight": {
1782
+ "scale": 3.59769344329834,
1783
+ "shape": [
1784
+ 1024,
1785
+ 1024
1786
+ ]
1787
+ },
1788
+ "gpt.h.3.ln_1.bias": {
1789
+ "scale": 1.02127206325531,
1790
+ "shape": [
1791
+ 1024
1792
+ ]
1793
+ },
1794
+ "gpt.h.3.ln_1.weight": {
1795
+ "scale": 0.8167343139648438,
1796
+ "shape": [
1797
+ 1024
1798
+ ]
1799
+ },
1800
+ "gpt.h.3.ln_2.bias": {
1801
+ "scale": 0.8380739092826843,
1802
+ "shape": [
1803
+ 1024
1804
+ ]
1805
+ },
1806
+ "gpt.h.3.ln_2.weight": {
1807
+ "scale": 1.6680330038070679,
1808
+ "shape": [
1809
+ 1024
1810
+ ]
1811
+ },
1812
+ "gpt.h.3.mlp.c_fc.bias": {
1813
+ "scale": 0.3283645212650299,
1814
+ "shape": [
1815
+ 4096
1816
+ ]
1817
+ },
1818
+ "gpt.h.3.mlp.c_fc.weight": {
1819
+ "scale": 1.1588833332061768,
1820
+ "shape": [
1821
+ 1024,
1822
+ 4096
1823
+ ]
1824
+ },
1825
+ "gpt.h.3.mlp.c_proj.bias": {
1826
+ "scale": 1.6539766788482666,
1827
+ "shape": [
1828
+ 1024
1829
+ ]
1830
+ },
1831
+ "gpt.h.3.mlp.c_proj.weight": {
1832
+ "scale": 6.646170139312744,
1833
+ "shape": [
1834
+ 4096,
1835
+ 1024
1836
+ ]
1837
+ },
1838
+ "gpt.h.4.attn.c_attn.bias": {
1839
+ "scale": 0.2780280113220215,
1840
+ "shape": [
1841
+ 3072
1842
+ ]
1843
+ },
1844
+ "gpt.h.4.attn.c_attn.weight": {
1845
+ "scale": 0.4671952426433563,
1846
+ "shape": [
1847
+ 1024,
1848
+ 3072
1849
+ ]
1850
+ },
1851
+ "gpt.h.4.attn.c_proj.bias": {
1852
+ "scale": 2.2085044384002686,
1853
+ "shape": [
1854
+ 1024
1855
+ ]
1856
+ },
1857
+ "gpt.h.4.attn.c_proj.weight": {
1858
+ "scale": 2.1969192028045654,
1859
+ "shape": [
1860
+ 1024,
1861
+ 1024
1862
+ ]
1863
+ },
1864
+ "gpt.h.4.ln_1.bias": {
1865
+ "scale": 0.9655510187149048,
1866
+ "shape": [
1867
+ 1024
1868
+ ]
1869
+ },
1870
+ "gpt.h.4.ln_1.weight": {
1871
+ "scale": 0.9420844316482544,
1872
+ "shape": [
1873
+ 1024
1874
+ ]
1875
+ },
1876
+ "gpt.h.4.ln_2.bias": {
1877
+ "scale": 0.600337028503418,
1878
+ "shape": [
1879
+ 1024
1880
+ ]
1881
+ },
1882
+ "gpt.h.4.ln_2.weight": {
1883
+ "scale": 1.6738598346710205,
1884
+ "shape": [
1885
+ 1024
1886
+ ]
1887
+ },
1888
+ "gpt.h.4.mlp.c_fc.bias": {
1889
+ "scale": 0.11800195276737213,
1890
+ "shape": [
1891
+ 4096
1892
+ ]
1893
+ },
1894
+ "gpt.h.4.mlp.c_fc.weight": {
1895
+ "scale": 0.6646812558174133,
1896
+ "shape": [
1897
+ 1024,
1898
+ 4096
1899
+ ]
1900
+ },
1901
+ "gpt.h.4.mlp.c_proj.bias": {
1902
+ "scale": 0.99233478307724,
1903
+ "shape": [
1904
+ 1024
1905
+ ]
1906
+ },
1907
+ "gpt.h.4.mlp.c_proj.weight": {
1908
+ "scale": 7.154963493347168,
1909
+ "shape": [
1910
+ 4096,
1911
+ 1024
1912
+ ]
1913
+ },
1914
+ "gpt.h.5.attn.c_attn.bias": {
1915
+ "scale": 0.2130964696407318,
1916
+ "shape": [
1917
+ 3072
1918
+ ]
1919
+ },
1920
+ "gpt.h.5.attn.c_attn.weight": {
1921
+ "scale": 0.3897348940372467,
1922
+ "shape": [
1923
+ 1024,
1924
+ 3072
1925
+ ]
1926
+ },
1927
+ "gpt.h.5.attn.c_proj.bias": {
1928
+ "scale": 1.2391482591629028,
1929
+ "shape": [
1930
+ 1024
1931
+ ]
1932
+ },
1933
+ "gpt.h.5.attn.c_proj.weight": {
1934
+ "scale": 0.9562830328941345,
1935
+ "shape": [
1936
+ 1024,
1937
+ 1024
1938
+ ]
1939
+ },
1940
+ "gpt.h.5.ln_1.bias": {
1941
+ "scale": 0.7676820755004883,
1942
+ "shape": [
1943
+ 1024
1944
+ ]
1945
+ },
1946
+ "gpt.h.5.ln_1.weight": {
1947
+ "scale": 1.2011964321136475,
1948
+ "shape": [
1949
+ 1024
1950
+ ]
1951
+ },
1952
+ "gpt.h.5.ln_2.bias": {
1953
+ "scale": 0.4611365795135498,
1954
+ "shape": [
1955
+ 1024
1956
+ ]
1957
+ },
1958
+ "gpt.h.5.ln_2.weight": {
1959
+ "scale": 1.7594345808029175,
1960
+ "shape": [
1961
+ 1024
1962
+ ]
1963
+ },
1964
+ "gpt.h.5.mlp.c_fc.bias": {
1965
+ "scale": 0.1486712545156479,
1966
+ "shape": [
1967
+ 4096
1968
+ ]
1969
+ },
1970
+ "gpt.h.5.mlp.c_fc.weight": {
1971
+ "scale": 0.575945258140564,
1972
+ "shape": [
1973
+ 1024,
1974
+ 4096
1975
+ ]
1976
+ },
1977
+ "gpt.h.5.mlp.c_proj.bias": {
1978
+ "scale": 0.638988196849823,
1979
+ "shape": [
1980
+ 1024
1981
+ ]
1982
+ },
1983
+ "gpt.h.5.mlp.c_proj.weight": {
1984
+ "scale": 6.2721123695373535,
1985
+ "shape": [
1986
+ 4096,
1987
+ 1024
1988
+ ]
1989
+ },
1990
+ "gpt.h.6.attn.c_attn.bias": {
1991
+ "scale": 0.28897368907928467,
1992
+ "shape": [
1993
+ 3072
1994
+ ]
1995
+ },
1996
+ "gpt.h.6.attn.c_attn.weight": {
1997
+ "scale": 0.6516239643096924,
1998
+ "shape": [
1999
+ 1024,
2000
+ 3072
2001
+ ]
2002
+ },
2003
+ "gpt.h.6.attn.c_proj.bias": {
2004
+ "scale": 0.6968675255775452,
2005
+ "shape": [
2006
+ 1024
2007
+ ]
2008
+ },
2009
+ "gpt.h.6.attn.c_proj.weight": {
2010
+ "scale": 0.6980696320533752,
2011
+ "shape": [
2012
+ 1024,
2013
+ 1024
2014
+ ]
2015
+ },
2016
+ "gpt.h.6.ln_1.bias": {
2017
+ "scale": 0.741898238658905,
2018
+ "shape": [
2019
+ 1024
2020
+ ]
2021
+ },
2022
+ "gpt.h.6.ln_1.weight": {
2023
+ "scale": 1.4237617254257202,
2024
+ "shape": [
2025
+ 1024
2026
+ ]
2027
+ },
2028
+ "gpt.h.6.ln_2.bias": {
2029
+ "scale": 0.5870022177696228,
2030
+ "shape": [
2031
+ 1024
2032
+ ]
2033
+ },
2034
+ "gpt.h.6.ln_2.weight": {
2035
+ "scale": 1.7807389497756958,
2036
+ "shape": [
2037
+ 1024
2038
+ ]
2039
+ },
2040
+ "gpt.h.6.mlp.c_fc.bias": {
2041
+ "scale": 0.18498767912387848,
2042
+ "shape": [
2043
+ 4096
2044
+ ]
2045
+ },
2046
+ "gpt.h.6.mlp.c_fc.weight": {
2047
+ "scale": 0.8209737539291382,
2048
+ "shape": [
2049
+ 1024,
2050
+ 4096
2051
+ ]
2052
+ },
2053
+ "gpt.h.6.mlp.c_proj.bias": {
2054
+ "scale": 0.5902945399284363,
2055
+ "shape": [
2056
+ 1024
2057
+ ]
2058
+ },
2059
+ "gpt.h.6.mlp.c_proj.weight": {
2060
+ "scale": 6.16025972366333,
2061
+ "shape": [
2062
+ 4096,
2063
+ 1024
2064
+ ]
2065
+ },
2066
+ "gpt.h.7.attn.c_attn.bias": {
2067
+ "scale": 0.3166765570640564,
2068
+ "shape": [
2069
+ 3072
2070
+ ]
2071
+ },
2072
+ "gpt.h.7.attn.c_attn.weight": {
2073
+ "scale": 0.6890222430229187,
2074
+ "shape": [
2075
+ 1024,
2076
+ 3072
2077
+ ]
2078
+ },
2079
+ "gpt.h.7.attn.c_proj.bias": {
2080
+ "scale": 0.63676917552948,
2081
+ "shape": [
2082
+ 1024
2083
+ ]
2084
+ },
2085
+ "gpt.h.7.attn.c_proj.weight": {
2086
+ "scale": 0.659460186958313,
2087
+ "shape": [
2088
+ 1024,
2089
+ 1024
2090
+ ]
2091
+ },
2092
+ "gpt.h.7.ln_1.bias": {
2093
+ "scale": 1.0708993673324585,
2094
+ "shape": [
2095
+ 1024
2096
+ ]
2097
+ },
2098
+ "gpt.h.7.ln_1.weight": {
2099
+ "scale": 1.055467963218689,
2100
+ "shape": [
2101
+ 1024
2102
+ ]
2103
+ },
2104
+ "gpt.h.7.ln_2.bias": {
2105
+ "scale": 0.9893343448638916,
2106
+ "shape": [
2107
+ 1024
2108
+ ]
2109
+ },
2110
+ "gpt.h.7.ln_2.weight": {
2111
+ "scale": 1.8035027980804443,
2112
+ "shape": [
2113
+ 1024
2114
+ ]
2115
+ },
2116
+ "gpt.h.7.mlp.c_fc.bias": {
2117
+ "scale": 0.2329855114221573,
2118
+ "shape": [
2119
+ 4096
2120
+ ]
2121
+ },
2122
+ "gpt.h.7.mlp.c_fc.weight": {
2123
+ "scale": 0.4914255440235138,
2124
+ "shape": [
2125
+ 1024,
2126
+ 4096
2127
+ ]
2128
+ },
2129
+ "gpt.h.7.mlp.c_proj.bias": {
2130
+ "scale": 0.7499263882637024,
2131
+ "shape": [
2132
+ 1024
2133
+ ]
2134
+ },
2135
+ "gpt.h.7.mlp.c_proj.weight": {
2136
+ "scale": 2.9818549156188965,
2137
+ "shape": [
2138
+ 4096,
2139
+ 1024
2140
+ ]
2141
+ },
2142
+ "gpt.h.8.attn.c_attn.bias": {
2143
+ "scale": 0.2652284502983093,
2144
+ "shape": [
2145
+ 3072
2146
+ ]
2147
+ },
2148
+ "gpt.h.8.attn.c_attn.weight": {
2149
+ "scale": 0.6239627599716187,
2150
+ "shape": [
2151
+ 1024,
2152
+ 3072
2153
+ ]
2154
+ },
2155
+ "gpt.h.8.attn.c_proj.bias": {
2156
+ "scale": 0.6191520094871521,
2157
+ "shape": [
2158
+ 1024
2159
+ ]
2160
+ },
2161
+ "gpt.h.8.attn.c_proj.weight": {
2162
+ "scale": 0.6664621829986572,
2163
+ "shape": [
2164
+ 1024,
2165
+ 1024
2166
+ ]
2167
+ },
2168
+ "gpt.h.8.ln_1.bias": {
2169
+ "scale": 0.8762368559837341,
2170
+ "shape": [
2171
+ 1024
2172
+ ]
2173
+ },
2174
+ "gpt.h.8.ln_1.weight": {
2175
+ "scale": 1.153762936592102,
2176
+ "shape": [
2177
+ 1024
2178
+ ]
2179
+ },
2180
+ "gpt.h.8.ln_2.bias": {
2181
+ "scale": 0.9868759512901306,
2182
+ "shape": [
2183
+ 1024
2184
+ ]
2185
+ },
2186
+ "gpt.h.8.ln_2.weight": {
2187
+ "scale": 1.7482178211212158,
2188
+ "shape": [
2189
+ 1024
2190
+ ]
2191
+ },
2192
+ "gpt.h.8.mlp.c_fc.bias": {
2193
+ "scale": 0.18552374839782715,
2194
+ "shape": [
2195
+ 4096
2196
+ ]
2197
+ },
2198
+ "gpt.h.8.mlp.c_fc.weight": {
2199
+ "scale": 0.47477778792381287,
2200
+ "shape": [
2201
+ 1024,
2202
+ 4096
2203
+ ]
2204
+ },
2205
+ "gpt.h.8.mlp.c_proj.bias": {
2206
+ "scale": 0.6799322962760925,
2207
+ "shape": [
2208
+ 1024
2209
+ ]
2210
+ },
2211
+ "gpt.h.8.mlp.c_proj.weight": {
2212
+ "scale": 3.0612871646881104,
2213
+ "shape": [
2214
+ 4096,
2215
+ 1024
2216
+ ]
2217
+ },
2218
+ "gpt.h.9.attn.c_attn.bias": {
2219
+ "scale": 0.309771865606308,
2220
+ "shape": [
2221
+ 3072
2222
+ ]
2223
+ },
2224
+ "gpt.h.9.attn.c_attn.weight": {
2225
+ "scale": 0.8271536231040955,
2226
+ "shape": [
2227
+ 1024,
2228
+ 3072
2229
+ ]
2230
+ },
2231
+ "gpt.h.9.attn.c_proj.bias": {
2232
+ "scale": 0.49563321471214294,
2233
+ "shape": [
2234
+ 1024
2235
+ ]
2236
+ },
2237
+ "gpt.h.9.attn.c_proj.weight": {
2238
+ "scale": 0.7561616897583008,
2239
+ "shape": [
2240
+ 1024,
2241
+ 1024
2242
+ ]
2243
+ },
2244
+ "gpt.h.9.ln_1.bias": {
2245
+ "scale": 1.0751368999481201,
2246
+ "shape": [
2247
+ 1024
2248
+ ]
2249
+ },
2250
+ "gpt.h.9.ln_1.weight": {
2251
+ "scale": 1.0969300270080566,
2252
+ "shape": [
2253
+ 1024
2254
+ ]
2255
+ },
2256
+ "gpt.h.9.ln_2.bias": {
2257
+ "scale": 0.9641826152801514,
2258
+ "shape": [
2259
+ 1024
2260
+ ]
2261
+ },
2262
+ "gpt.h.9.ln_2.weight": {
2263
+ "scale": 1.803221344947815,
2264
+ "shape": [
2265
+ 1024
2266
+ ]
2267
+ },
2268
+ "gpt.h.9.mlp.c_fc.bias": {
2269
+ "scale": 0.22008062899112701,
2270
+ "shape": [
2271
+ 4096
2272
+ ]
2273
+ },
2274
+ "gpt.h.9.mlp.c_fc.weight": {
2275
+ "scale": 0.5351904630661011,
2276
+ "shape": [
2277
+ 1024,
2278
+ 4096
2279
+ ]
2280
+ },
2281
+ "gpt.h.9.mlp.c_proj.bias": {
2282
+ "scale": 0.7559229731559753,
2283
+ "shape": [
2284
+ 1024
2285
+ ]
2286
+ },
2287
+ "gpt.h.9.mlp.c_proj.weight": {
2288
+ "scale": 3.0584769248962402,
2289
+ "shape": [
2290
+ 4096,
2291
+ 1024
2292
+ ]
2293
+ },
2294
+ "gpt.ln_f.bias": {
2295
+ "scale": 1.4052708148956299,
2296
+ "shape": [
2297
+ 1024
2298
+ ]
2299
+ },
2300
+ "gpt.ln_f.weight": {
2301
+ "scale": 2.9010915756225586,
2302
+ "shape": [
2303
+ 1024
2304
+ ]
2305
+ },
2306
+ "gpt.wpe.emb.weight": {
2307
+ "scale": 0.5411291122436523,
2308
+ "shape": [
2309
+ 608,
2310
+ 1024
2311
+ ]
2312
+ },
2313
+ "gpt.wte.weight": {
2314
+ "scale": 0.5614672303199768,
2315
+ "shape": [
2316
+ 1026,
2317
+ 1024
2318
+ ]
2319
+ },
2320
+ "mel_head.bias": {
2321
+ "scale": 0.19914697110652924,
2322
+ "shape": [
2323
+ 1026
2324
+ ]
2325
+ },
2326
+ "mel_head.weight": {
2327
+ "scale": 0.5308834314346313,
2328
+ "shape": [
2329
+ 1026,
2330
+ 1024
2331
+ ]
2332
+ }
2333
+ }
INT2/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[START]",
3
+ "eos_token": "[STOP]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
INT2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
INT2/tokenizer_config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[STOP]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SPACE]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "259": {
28
+ "content": "[en]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "260": {
36
+ "content": "[de]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "261": {
44
+ "content": "[START]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "262": {
52
+ "content": "[fr]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "267": {
60
+ "content": "[ru]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "284": {
68
+ "content": "[es]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "285": {
76
+ "content": "[it]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "286": {
84
+ "content": "[pt]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "293": {
92
+ "content": "[cs]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "294": {
100
+ "content": "[pl]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "295": {
108
+ "content": "[tr]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "297": {
116
+ "content": "[nl]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "5022": {
124
+ "content": "[ar]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "5023": {
132
+ "content": "[zh-cn]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "5412": {
140
+ "content": "[ja]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "5753": {
148
+ "content": "[hu]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "6152": {
156
+ "content": "[ko]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "6680": {
164
+ "content": "[hi]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "6681": {
172
+ "content": "[PAD]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ }
179
+ },
180
+ "auto_map": {"AutoTokenizer": ["AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast", null]},
181
+ "bos_token": "[START]",
182
+ "clean_up_tokenization_spaces": true,
183
+ "eos_token": "[STOP]",
184
+ "max_length": null,
185
+ "model_max_length": 1000000000000000019884624838656,
186
+ "pad_to_multiple_of": null,
187
+ "pad_token": "[PAD]",
188
+ "pad_token_type_id": 0,
189
+ "padding_side": "right",
190
+ "tokenizer_class": "XTTSTokenizerFast",
191
+ "unk_token": "[UNK]"
192
+ }
INT4/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "XttsGPT"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "audio_config": {
8
+ "mel_channels": 80,
9
+ "output_sample_rate": 24000,
10
+ "sample_rate": 22050
11
+ },
12
+ "auto_map": {
13
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
14
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
15
+ "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast"
16
+ },
17
+ "decoder_input_dim": 1024,
18
+ "enable_redaction": false,
19
+ "gpt_batch_size": 1,
20
+ "gpt_max_audio_tokens": 605,
21
+ "hidden_size": 1024,
22
+ "initializer_range": 0.02,
23
+ "kv_cache": true,
24
+ "layer_norm_epsilon": 1e-05,
25
+ "max_audio_tokens": 605,
26
+ "max_prompt_tokens": 70,
27
+ "max_text_tokens": 402,
28
+ "model_type": "xtts_gpt",
29
+ "n_inner": 4096,
30
+ "num_attention_heads": 16,
31
+ "num_audio_tokens": 1026,
32
+ "num_hidden_layers": 30,
33
+ "number_text_tokens": 6681,
34
+ "reorder_and_upcast_attn": false,
35
+ "scale_attn_by_inverse_layer_idx": false,
36
+ "start_audio_token": 1024,
37
+ "start_text_token": null,
38
+ "stop_audio_token": 1025,
39
+ "stop_text_token": null,
40
+ "transformers_version": "4.46.0",
41
+ "use_masking_gt_prompt_approach": true,
42
+ "use_perceiver_resampler": true,
43
+ "vocab_size": 6681
44
+ }
INT4/gpt2_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79002172c227f60838f8de5b1d956856f83e595f33bb03613c3c716a5529f220
3
+ size 190341585
INT4/int4_metadata.json ADDED
@@ -0,0 +1,2333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_norm.bias": {
3
+ "scale": 0.08834397792816162,
4
+ "shape": [
5
+ 1024
6
+ ]
7
+ },
8
+ "final_norm.weight": {
9
+ "scale": 0.7890332937240601,
10
+ "shape": [
11
+ 1024
12
+ ]
13
+ },
14
+ "gpt.h.0.attn.c_attn.bias": {
15
+ "scale": 0.12309323251247406,
16
+ "shape": [
17
+ 3072
18
+ ]
19
+ },
20
+ "gpt.h.0.attn.c_attn.weight": {
21
+ "scale": 0.1041356548666954,
22
+ "shape": [
23
+ 1024,
24
+ 3072
25
+ ]
26
+ },
27
+ "gpt.h.0.attn.c_proj.bias": {
28
+ "scale": 0.08951065689325333,
29
+ "shape": [
30
+ 1024
31
+ ]
32
+ },
33
+ "gpt.h.0.attn.c_proj.weight": {
34
+ "scale": 0.1165793165564537,
35
+ "shape": [
36
+ 1024,
37
+ 1024
38
+ ]
39
+ },
40
+ "gpt.h.0.ln_1.bias": {
41
+ "scale": 0.026223499327898026,
42
+ "shape": [
43
+ 1024
44
+ ]
45
+ },
46
+ "gpt.h.0.ln_1.weight": {
47
+ "scale": 0.03431953117251396,
48
+ "shape": [
49
+ 1024
50
+ ]
51
+ },
52
+ "gpt.h.0.ln_2.bias": {
53
+ "scale": 0.20567700266838074,
54
+ "shape": [
55
+ 1024
56
+ ]
57
+ },
58
+ "gpt.h.0.ln_2.weight": {
59
+ "scale": 0.11458853632211685,
60
+ "shape": [
61
+ 1024
62
+ ]
63
+ },
64
+ "gpt.h.0.mlp.c_fc.bias": {
65
+ "scale": 0.047463927417993546,
66
+ "shape": [
67
+ 4096
68
+ ]
69
+ },
70
+ "gpt.h.0.mlp.c_fc.weight": {
71
+ "scale": 0.17041946947574615,
72
+ "shape": [
73
+ 1024,
74
+ 4096
75
+ ]
76
+ },
77
+ "gpt.h.0.mlp.c_proj.bias": {
78
+ "scale": 0.29554909467697144,
79
+ "shape": [
80
+ 1024
81
+ ]
82
+ },
83
+ "gpt.h.0.mlp.c_proj.weight": {
84
+ "scale": 0.6621343493461609,
85
+ "shape": [
86
+ 4096,
87
+ 1024
88
+ ]
89
+ },
90
+ "gpt.h.1.attn.c_attn.bias": {
91
+ "scale": 0.09180662781000137,
92
+ "shape": [
93
+ 3072
94
+ ]
95
+ },
96
+ "gpt.h.1.attn.c_attn.weight": {
97
+ "scale": 0.06224552541971207,
98
+ "shape": [
99
+ 1024,
100
+ 3072
101
+ ]
102
+ },
103
+ "gpt.h.1.attn.c_proj.bias": {
104
+ "scale": 0.2972453534603119,
105
+ "shape": [
106
+ 1024
107
+ ]
108
+ },
109
+ "gpt.h.1.attn.c_proj.weight": {
110
+ "scale": 0.45653465390205383,
111
+ "shape": [
112
+ 1024,
113
+ 1024
114
+ ]
115
+ },
116
+ "gpt.h.1.ln_1.bias": {
117
+ "scale": 0.20046189427375793,
118
+ "shape": [
119
+ 1024
120
+ ]
121
+ },
122
+ "gpt.h.1.ln_1.weight": {
123
+ "scale": 0.07655565440654755,
124
+ "shape": [
125
+ 1024
126
+ ]
127
+ },
128
+ "gpt.h.1.ln_2.bias": {
129
+ "scale": 0.21620801091194153,
130
+ "shape": [
131
+ 1024
132
+ ]
133
+ },
134
+ "gpt.h.1.ln_2.weight": {
135
+ "scale": 0.20269657671451569,
136
+ "shape": [
137
+ 1024
138
+ ]
139
+ },
140
+ "gpt.h.1.mlp.c_fc.bias": {
141
+ "scale": 0.026467734947800636,
142
+ "shape": [
143
+ 4096
144
+ ]
145
+ },
146
+ "gpt.h.1.mlp.c_fc.weight": {
147
+ "scale": 0.12142758071422577,
148
+ "shape": [
149
+ 1024,
150
+ 4096
151
+ ]
152
+ },
153
+ "gpt.h.1.mlp.c_proj.bias": {
154
+ "scale": 0.3446175754070282,
155
+ "shape": [
156
+ 1024
157
+ ]
158
+ },
159
+ "gpt.h.1.mlp.c_proj.weight": {
160
+ "scale": 0.7414490580558777,
161
+ "shape": [
162
+ 4096,
163
+ 1024
164
+ ]
165
+ },
166
+ "gpt.h.10.attn.c_attn.bias": {
167
+ "scale": 0.044142965227365494,
168
+ "shape": [
169
+ 3072
170
+ ]
171
+ },
172
+ "gpt.h.10.attn.c_attn.weight": {
173
+ "scale": 0.11839079111814499,
174
+ "shape": [
175
+ 1024,
176
+ 3072
177
+ ]
178
+ },
179
+ "gpt.h.10.attn.c_proj.bias": {
180
+ "scale": 0.0456073097884655,
181
+ "shape": [
182
+ 1024
183
+ ]
184
+ },
185
+ "gpt.h.10.attn.c_proj.weight": {
186
+ "scale": 0.08686741441488266,
187
+ "shape": [
188
+ 1024,
189
+ 1024
190
+ ]
191
+ },
192
+ "gpt.h.10.ln_1.bias": {
193
+ "scale": 0.14560243487358093,
194
+ "shape": [
195
+ 1024
196
+ ]
197
+ },
198
+ "gpt.h.10.ln_1.weight": {
199
+ "scale": 0.15239107608795166,
200
+ "shape": [
201
+ 1024
202
+ ]
203
+ },
204
+ "gpt.h.10.ln_2.bias": {
205
+ "scale": 0.13042710721492767,
206
+ "shape": [
207
+ 1024
208
+ ]
209
+ },
210
+ "gpt.h.10.ln_2.weight": {
211
+ "scale": 0.25696828961372375,
212
+ "shape": [
213
+ 1024
214
+ ]
215
+ },
216
+ "gpt.h.10.mlp.c_fc.bias": {
217
+ "scale": 0.029524575918912888,
218
+ "shape": [
219
+ 4096
220
+ ]
221
+ },
222
+ "gpt.h.10.mlp.c_fc.weight": {
223
+ "scale": 0.09666890650987625,
224
+ "shape": [
225
+ 1024,
226
+ 4096
227
+ ]
228
+ },
229
+ "gpt.h.10.mlp.c_proj.bias": {
230
+ "scale": 0.1028929278254509,
231
+ "shape": [
232
+ 1024
233
+ ]
234
+ },
235
+ "gpt.h.10.mlp.c_proj.weight": {
236
+ "scale": 0.3654429316520691,
237
+ "shape": [
238
+ 4096,
239
+ 1024
240
+ ]
241
+ },
242
+ "gpt.h.11.attn.c_attn.bias": {
243
+ "scale": 0.048490703105926514,
244
+ "shape": [
245
+ 3072
246
+ ]
247
+ },
248
+ "gpt.h.11.attn.c_attn.weight": {
249
+ "scale": 0.11234062910079956,
250
+ "shape": [
251
+ 1024,
252
+ 3072
253
+ ]
254
+ },
255
+ "gpt.h.11.attn.c_proj.bias": {
256
+ "scale": 0.0510806143283844,
257
+ "shape": [
258
+ 1024
259
+ ]
260
+ },
261
+ "gpt.h.11.attn.c_proj.weight": {
262
+ "scale": 0.08797170966863632,
263
+ "shape": [
264
+ 1024,
265
+ 1024
266
+ ]
267
+ },
268
+ "gpt.h.11.ln_1.bias": {
269
+ "scale": 0.164643794298172,
270
+ "shape": [
271
+ 1024
272
+ ]
273
+ },
274
+ "gpt.h.11.ln_1.weight": {
275
+ "scale": 0.16134341061115265,
276
+ "shape": [
277
+ 1024
278
+ ]
279
+ },
280
+ "gpt.h.11.ln_2.bias": {
281
+ "scale": 0.09844086319208145,
282
+ "shape": [
283
+ 1024
284
+ ]
285
+ },
286
+ "gpt.h.11.ln_2.weight": {
287
+ "scale": 0.22313998639583588,
288
+ "shape": [
289
+ 1024
290
+ ]
291
+ },
292
+ "gpt.h.11.mlp.c_fc.bias": {
293
+ "scale": 0.03060619719326496,
294
+ "shape": [
295
+ 4096
296
+ ]
297
+ },
298
+ "gpt.h.11.mlp.c_fc.weight": {
299
+ "scale": 0.06761165708303452,
300
+ "shape": [
301
+ 1024,
302
+ 4096
303
+ ]
304
+ },
305
+ "gpt.h.11.mlp.c_proj.bias": {
306
+ "scale": 0.10643502324819565,
307
+ "shape": [
308
+ 1024
309
+ ]
310
+ },
311
+ "gpt.h.11.mlp.c_proj.weight": {
312
+ "scale": 0.4576462209224701,
313
+ "shape": [
314
+ 4096,
315
+ 1024
316
+ ]
317
+ },
318
+ "gpt.h.12.attn.c_attn.bias": {
319
+ "scale": 0.04992813244462013,
320
+ "shape": [
321
+ 3072
322
+ ]
323
+ },
324
+ "gpt.h.12.attn.c_attn.weight": {
325
+ "scale": 0.14503903687000275,
326
+ "shape": [
327
+ 1024,
328
+ 3072
329
+ ]
330
+ },
331
+ "gpt.h.12.attn.c_proj.bias": {
332
+ "scale": 0.033475592732429504,
333
+ "shape": [
334
+ 1024
335
+ ]
336
+ },
337
+ "gpt.h.12.attn.c_proj.weight": {
338
+ "scale": 0.11046390980482101,
339
+ "shape": [
340
+ 1024,
341
+ 1024
342
+ ]
343
+ },
344
+ "gpt.h.12.ln_1.bias": {
345
+ "scale": 0.18613700568675995,
346
+ "shape": [
347
+ 1024
348
+ ]
349
+ },
350
+ "gpt.h.12.ln_1.weight": {
351
+ "scale": 0.16866794228553772,
352
+ "shape": [
353
+ 1024
354
+ ]
355
+ },
356
+ "gpt.h.12.ln_2.bias": {
357
+ "scale": 0.12722595036029816,
358
+ "shape": [
359
+ 1024
360
+ ]
361
+ },
362
+ "gpt.h.12.ln_2.weight": {
363
+ "scale": 0.23250164091587067,
364
+ "shape": [
365
+ 1024
366
+ ]
367
+ },
368
+ "gpt.h.12.mlp.c_fc.bias": {
369
+ "scale": 0.014812483452260494,
370
+ "shape": [
371
+ 4096
372
+ ]
373
+ },
374
+ "gpt.h.12.mlp.c_fc.weight": {
375
+ "scale": 0.068848617374897,
376
+ "shape": [
377
+ 1024,
378
+ 4096
379
+ ]
380
+ },
381
+ "gpt.h.12.mlp.c_proj.bias": {
382
+ "scale": 0.12035634368658066,
383
+ "shape": [
384
+ 1024
385
+ ]
386
+ },
387
+ "gpt.h.12.mlp.c_proj.weight": {
388
+ "scale": 0.2743368148803711,
389
+ "shape": [
390
+ 4096,
391
+ 1024
392
+ ]
393
+ },
394
+ "gpt.h.13.attn.c_attn.bias": {
395
+ "scale": 0.04760030657052994,
396
+ "shape": [
397
+ 3072
398
+ ]
399
+ },
400
+ "gpt.h.13.attn.c_attn.weight": {
401
+ "scale": 0.08714257925748825,
402
+ "shape": [
403
+ 1024,
404
+ 3072
405
+ ]
406
+ },
407
+ "gpt.h.13.attn.c_proj.bias": {
408
+ "scale": 0.06746888160705566,
409
+ "shape": [
410
+ 1024
411
+ ]
412
+ },
413
+ "gpt.h.13.attn.c_proj.weight": {
414
+ "scale": 0.11844473332166672,
415
+ "shape": [
416
+ 1024,
417
+ 1024
418
+ ]
419
+ },
420
+ "gpt.h.13.ln_1.bias": {
421
+ "scale": 0.15157721936702728,
422
+ "shape": [
423
+ 1024
424
+ ]
425
+ },
426
+ "gpt.h.13.ln_1.weight": {
427
+ "scale": 0.16393320262432098,
428
+ "shape": [
429
+ 1024
430
+ ]
431
+ },
432
+ "gpt.h.13.ln_2.bias": {
433
+ "scale": 0.1306534856557846,
434
+ "shape": [
435
+ 1024
436
+ ]
437
+ },
438
+ "gpt.h.13.ln_2.weight": {
439
+ "scale": 0.22248651087284088,
440
+ "shape": [
441
+ 1024
442
+ ]
443
+ },
444
+ "gpt.h.13.mlp.c_fc.bias": {
445
+ "scale": 0.019469883292913437,
446
+ "shape": [
447
+ 4096
448
+ ]
449
+ },
450
+ "gpt.h.13.mlp.c_fc.weight": {
451
+ "scale": 0.06430874019861221,
452
+ "shape": [
453
+ 1024,
454
+ 4096
455
+ ]
456
+ },
457
+ "gpt.h.13.mlp.c_proj.bias": {
458
+ "scale": 0.11026618629693985,
459
+ "shape": [
460
+ 1024
461
+ ]
462
+ },
463
+ "gpt.h.13.mlp.c_proj.weight": {
464
+ "scale": 0.2532596290111542,
465
+ "shape": [
466
+ 4096,
467
+ 1024
468
+ ]
469
+ },
470
+ "gpt.h.14.attn.c_attn.bias": {
471
+ "scale": 0.04019672051072121,
472
+ "shape": [
473
+ 3072
474
+ ]
475
+ },
476
+ "gpt.h.14.attn.c_attn.weight": {
477
+ "scale": 0.14072343707084656,
478
+ "shape": [
479
+ 1024,
480
+ 3072
481
+ ]
482
+ },
483
+ "gpt.h.14.attn.c_proj.bias": {
484
+ "scale": 0.02364635095000267,
485
+ "shape": [
486
+ 1024
487
+ ]
488
+ },
489
+ "gpt.h.14.attn.c_proj.weight": {
490
+ "scale": 0.10160692036151886,
491
+ "shape": [
492
+ 1024,
493
+ 1024
494
+ ]
495
+ },
496
+ "gpt.h.14.ln_1.bias": {
497
+ "scale": 0.159767284989357,
498
+ "shape": [
499
+ 1024
500
+ ]
501
+ },
502
+ "gpt.h.14.ln_1.weight": {
503
+ "scale": 0.16219459474086761,
504
+ "shape": [
505
+ 1024
506
+ ]
507
+ },
508
+ "gpt.h.14.ln_2.bias": {
509
+ "scale": 0.13014180958271027,
510
+ "shape": [
511
+ 1024
512
+ ]
513
+ },
514
+ "gpt.h.14.ln_2.weight": {
515
+ "scale": 0.21348479390144348,
516
+ "shape": [
517
+ 1024
518
+ ]
519
+ },
520
+ "gpt.h.14.mlp.c_fc.bias": {
521
+ "scale": 0.01531550008803606,
522
+ "shape": [
523
+ 4096
524
+ ]
525
+ },
526
+ "gpt.h.14.mlp.c_fc.weight": {
527
+ "scale": 0.08244454115629196,
528
+ "shape": [
529
+ 1024,
530
+ 4096
531
+ ]
532
+ },
533
+ "gpt.h.14.mlp.c_proj.bias": {
534
+ "scale": 0.1048700362443924,
535
+ "shape": [
536
+ 1024
537
+ ]
538
+ },
539
+ "gpt.h.14.mlp.c_proj.weight": {
540
+ "scale": 0.28695791959762573,
541
+ "shape": [
542
+ 4096,
543
+ 1024
544
+ ]
545
+ },
546
+ "gpt.h.15.attn.c_attn.bias": {
547
+ "scale": 0.04081624746322632,
548
+ "shape": [
549
+ 3072
550
+ ]
551
+ },
552
+ "gpt.h.15.attn.c_attn.weight": {
553
+ "scale": 0.07819346338510513,
554
+ "shape": [
555
+ 1024,
556
+ 3072
557
+ ]
558
+ },
559
+ "gpt.h.15.attn.c_proj.bias": {
560
+ "scale": 0.08492325991392136,
561
+ "shape": [
562
+ 1024
563
+ ]
564
+ },
565
+ "gpt.h.15.attn.c_proj.weight": {
566
+ "scale": 0.11697079986333847,
567
+ "shape": [
568
+ 1024,
569
+ 1024
570
+ ]
571
+ },
572
+ "gpt.h.15.ln_1.bias": {
573
+ "scale": 0.14451484382152557,
574
+ "shape": [
575
+ 1024
576
+ ]
577
+ },
578
+ "gpt.h.15.ln_1.weight": {
579
+ "scale": 0.16579607129096985,
580
+ "shape": [
581
+ 1024
582
+ ]
583
+ },
584
+ "gpt.h.15.ln_2.bias": {
585
+ "scale": 0.11252031475305557,
586
+ "shape": [
587
+ 1024
588
+ ]
589
+ },
590
+ "gpt.h.15.ln_2.weight": {
591
+ "scale": 0.20037208497524261,
592
+ "shape": [
593
+ 1024
594
+ ]
595
+ },
596
+ "gpt.h.15.mlp.c_fc.bias": {
597
+ "scale": 0.020076142624020576,
598
+ "shape": [
599
+ 4096
600
+ ]
601
+ },
602
+ "gpt.h.15.mlp.c_fc.weight": {
603
+ "scale": 0.0844724252820015,
604
+ "shape": [
605
+ 1024,
606
+ 4096
607
+ ]
608
+ },
609
+ "gpt.h.15.mlp.c_proj.bias": {
610
+ "scale": 0.09757417440414429,
611
+ "shape": [
612
+ 1024
613
+ ]
614
+ },
615
+ "gpt.h.15.mlp.c_proj.weight": {
616
+ "scale": 0.310793936252594,
617
+ "shape": [
618
+ 4096,
619
+ 1024
620
+ ]
621
+ },
622
+ "gpt.h.16.attn.c_attn.bias": {
623
+ "scale": 0.04743470624089241,
624
+ "shape": [
625
+ 3072
626
+ ]
627
+ },
628
+ "gpt.h.16.attn.c_attn.weight": {
629
+ "scale": 0.12722057104110718,
630
+ "shape": [
631
+ 1024,
632
+ 3072
633
+ ]
634
+ },
635
+ "gpt.h.16.attn.c_proj.bias": {
636
+ "scale": 0.09606064110994339,
637
+ "shape": [
638
+ 1024
639
+ ]
640
+ },
641
+ "gpt.h.16.attn.c_proj.weight": {
642
+ "scale": 0.07591201364994049,
643
+ "shape": [
644
+ 1024,
645
+ 1024
646
+ ]
647
+ },
648
+ "gpt.h.16.ln_1.bias": {
649
+ "scale": 0.15709154307842255,
650
+ "shape": [
651
+ 1024
652
+ ]
653
+ },
654
+ "gpt.h.16.ln_1.weight": {
655
+ "scale": 0.15911130607128143,
656
+ "shape": [
657
+ 1024
658
+ ]
659
+ },
660
+ "gpt.h.16.ln_2.bias": {
661
+ "scale": 0.12005039304494858,
662
+ "shape": [
663
+ 1024
664
+ ]
665
+ },
666
+ "gpt.h.16.ln_2.weight": {
667
+ "scale": 0.18429037928581238,
668
+ "shape": [
669
+ 1024
670
+ ]
671
+ },
672
+ "gpt.h.16.mlp.c_fc.bias": {
673
+ "scale": 0.022201305255293846,
674
+ "shape": [
675
+ 4096
676
+ ]
677
+ },
678
+ "gpt.h.16.mlp.c_fc.weight": {
679
+ "scale": 0.08008849620819092,
680
+ "shape": [
681
+ 1024,
682
+ 4096
683
+ ]
684
+ },
685
+ "gpt.h.16.mlp.c_proj.bias": {
686
+ "scale": 0.09774907678365707,
687
+ "shape": [
688
+ 1024
689
+ ]
690
+ },
691
+ "gpt.h.16.mlp.c_proj.weight": {
692
+ "scale": 0.16212008893489838,
693
+ "shape": [
694
+ 4096,
695
+ 1024
696
+ ]
697
+ },
698
+ "gpt.h.17.attn.c_attn.bias": {
699
+ "scale": 0.04417693614959717,
700
+ "shape": [
701
+ 3072
702
+ ]
703
+ },
704
+ "gpt.h.17.attn.c_attn.weight": {
705
+ "scale": 0.1320490539073944,
706
+ "shape": [
707
+ 1024,
708
+ 3072
709
+ ]
710
+ },
711
+ "gpt.h.17.attn.c_proj.bias": {
712
+ "scale": 0.06338126957416534,
713
+ "shape": [
714
+ 1024
715
+ ]
716
+ },
717
+ "gpt.h.17.attn.c_proj.weight": {
718
+ "scale": 0.13893575966358185,
719
+ "shape": [
720
+ 1024,
721
+ 1024
722
+ ]
723
+ },
724
+ "gpt.h.17.ln_1.bias": {
725
+ "scale": 0.15823052823543549,
726
+ "shape": [
727
+ 1024
728
+ ]
729
+ },
730
+ "gpt.h.17.ln_1.weight": {
731
+ "scale": 0.16002225875854492,
732
+ "shape": [
733
+ 1024
734
+ ]
735
+ },
736
+ "gpt.h.17.ln_2.bias": {
737
+ "scale": 0.13184016942977905,
738
+ "shape": [
739
+ 1024
740
+ ]
741
+ },
742
+ "gpt.h.17.ln_2.weight": {
743
+ "scale": 0.1869189441204071,
744
+ "shape": [
745
+ 1024
746
+ ]
747
+ },
748
+ "gpt.h.17.mlp.c_fc.bias": {
749
+ "scale": 0.02481200359761715,
750
+ "shape": [
751
+ 4096
752
+ ]
753
+ },
754
+ "gpt.h.17.mlp.c_fc.weight": {
755
+ "scale": 0.0685386210680008,
756
+ "shape": [
757
+ 1024,
758
+ 4096
759
+ ]
760
+ },
761
+ "gpt.h.17.mlp.c_proj.bias": {
762
+ "scale": 0.08835189044475555,
763
+ "shape": [
764
+ 1024
765
+ ]
766
+ },
767
+ "gpt.h.17.mlp.c_proj.weight": {
768
+ "scale": 0.18417593836784363,
769
+ "shape": [
770
+ 4096,
771
+ 1024
772
+ ]
773
+ },
774
+ "gpt.h.18.attn.c_attn.bias": {
775
+ "scale": 0.037581078708171844,
776
+ "shape": [
777
+ 3072
778
+ ]
779
+ },
780
+ "gpt.h.18.attn.c_attn.weight": {
781
+ "scale": 0.12254094332456589,
782
+ "shape": [
783
+ 1024,
784
+ 3072
785
+ ]
786
+ },
787
+ "gpt.h.18.attn.c_proj.bias": {
788
+ "scale": 0.11249931156635284,
789
+ "shape": [
790
+ 1024
791
+ ]
792
+ },
793
+ "gpt.h.18.attn.c_proj.weight": {
794
+ "scale": 0.09757298976182938,
795
+ "shape": [
796
+ 1024,
797
+ 1024
798
+ ]
799
+ },
800
+ "gpt.h.18.ln_1.bias": {
801
+ "scale": 0.1457567662000656,
802
+ "shape": [
803
+ 1024
804
+ ]
805
+ },
806
+ "gpt.h.18.ln_1.weight": {
807
+ "scale": 0.15891136229038239,
808
+ "shape": [
809
+ 1024
810
+ ]
811
+ },
812
+ "gpt.h.18.ln_2.bias": {
813
+ "scale": 0.11753548681735992,
814
+ "shape": [
815
+ 1024
816
+ ]
817
+ },
818
+ "gpt.h.18.ln_2.weight": {
819
+ "scale": 0.17903394997119904,
820
+ "shape": [
821
+ 1024
822
+ ]
823
+ },
824
+ "gpt.h.18.mlp.c_fc.bias": {
825
+ "scale": 0.021563095971941948,
826
+ "shape": [
827
+ 4096
828
+ ]
829
+ },
830
+ "gpt.h.18.mlp.c_fc.weight": {
831
+ "scale": 0.06829343736171722,
832
+ "shape": [
833
+ 1024,
834
+ 4096
835
+ ]
836
+ },
837
+ "gpt.h.18.mlp.c_proj.bias": {
838
+ "scale": 0.06819847971200943,
839
+ "shape": [
840
+ 1024
841
+ ]
842
+ },
843
+ "gpt.h.18.mlp.c_proj.weight": {
844
+ "scale": 0.12814833223819733,
845
+ "shape": [
846
+ 4096,
847
+ 1024
848
+ ]
849
+ },
850
+ "gpt.h.19.attn.c_attn.bias": {
851
+ "scale": 0.04691386595368385,
852
+ "shape": [
853
+ 3072
854
+ ]
855
+ },
856
+ "gpt.h.19.attn.c_attn.weight": {
857
+ "scale": 0.11684088408946991,
858
+ "shape": [
859
+ 1024,
860
+ 3072
861
+ ]
862
+ },
863
+ "gpt.h.19.attn.c_proj.bias": {
864
+ "scale": 0.05939425155520439,
865
+ "shape": [
866
+ 1024
867
+ ]
868
+ },
869
+ "gpt.h.19.attn.c_proj.weight": {
870
+ "scale": 0.12523870170116425,
871
+ "shape": [
872
+ 1024,
873
+ 1024
874
+ ]
875
+ },
876
+ "gpt.h.19.ln_1.bias": {
877
+ "scale": 0.14781410992145538,
878
+ "shape": [
879
+ 1024
880
+ ]
881
+ },
882
+ "gpt.h.19.ln_1.weight": {
883
+ "scale": 0.15612022578716278,
884
+ "shape": [
885
+ 1024
886
+ ]
887
+ },
888
+ "gpt.h.19.ln_2.bias": {
889
+ "scale": 0.10865359008312225,
890
+ "shape": [
891
+ 1024
892
+ ]
893
+ },
894
+ "gpt.h.19.ln_2.weight": {
895
+ "scale": 0.17884387075901031,
896
+ "shape": [
897
+ 1024
898
+ ]
899
+ },
900
+ "gpt.h.19.mlp.c_fc.bias": {
901
+ "scale": 0.023445727303624153,
902
+ "shape": [
903
+ 4096
904
+ ]
905
+ },
906
+ "gpt.h.19.mlp.c_fc.weight": {
907
+ "scale": 0.07771860063076019,
908
+ "shape": [
909
+ 1024,
910
+ 4096
911
+ ]
912
+ },
913
+ "gpt.h.19.mlp.c_proj.bias": {
914
+ "scale": 0.05867428332567215,
915
+ "shape": [
916
+ 1024
917
+ ]
918
+ },
919
+ "gpt.h.19.mlp.c_proj.weight": {
920
+ "scale": 0.1523352414369583,
921
+ "shape": [
922
+ 4096,
923
+ 1024
924
+ ]
925
+ },
926
+ "gpt.h.2.attn.c_attn.bias": {
927
+ "scale": 0.04482164978981018,
928
+ "shape": [
929
+ 3072
930
+ ]
931
+ },
932
+ "gpt.h.2.attn.c_attn.weight": {
933
+ "scale": 0.06159922480583191,
934
+ "shape": [
935
+ 1024,
936
+ 3072
937
+ ]
938
+ },
939
+ "gpt.h.2.attn.c_proj.bias": {
940
+ "scale": 0.3751963675022125,
941
+ "shape": [
942
+ 1024
943
+ ]
944
+ },
945
+ "gpt.h.2.attn.c_proj.weight": {
946
+ "scale": 0.4305261969566345,
947
+ "shape": [
948
+ 1024,
949
+ 1024
950
+ ]
951
+ },
952
+ "gpt.h.2.ln_1.bias": {
953
+ "scale": 0.16870178282260895,
954
+ "shape": [
955
+ 1024
956
+ ]
957
+ },
958
+ "gpt.h.2.ln_1.weight": {
959
+ "scale": 0.09723272174596786,
960
+ "shape": [
961
+ 1024
962
+ ]
963
+ },
964
+ "gpt.h.2.ln_2.bias": {
965
+ "scale": 0.14638309180736542,
966
+ "shape": [
967
+ 1024
968
+ ]
969
+ },
970
+ "gpt.h.2.ln_2.weight": {
971
+ "scale": 0.24350528419017792,
972
+ "shape": [
973
+ 1024
974
+ ]
975
+ },
976
+ "gpt.h.2.mlp.c_fc.bias": {
977
+ "scale": 0.054711032658815384,
978
+ "shape": [
979
+ 4096
980
+ ]
981
+ },
982
+ "gpt.h.2.mlp.c_fc.weight": {
983
+ "scale": 0.19500534236431122,
984
+ "shape": [
985
+ 1024,
986
+ 4096
987
+ ]
988
+ },
989
+ "gpt.h.2.mlp.c_proj.bias": {
990
+ "scale": 0.2771886885166168,
991
+ "shape": [
992
+ 1024
993
+ ]
994
+ },
995
+ "gpt.h.2.mlp.c_proj.weight": {
996
+ "scale": 0.7297950983047485,
997
+ "shape": [
998
+ 4096,
999
+ 1024
1000
+ ]
1001
+ },
1002
+ "gpt.h.20.attn.c_attn.bias": {
1003
+ "scale": 0.03594465181231499,
1004
+ "shape": [
1005
+ 3072
1006
+ ]
1007
+ },
1008
+ "gpt.h.20.attn.c_attn.weight": {
1009
+ "scale": 0.12434620410203934,
1010
+ "shape": [
1011
+ 1024,
1012
+ 3072
1013
+ ]
1014
+ },
1015
+ "gpt.h.20.attn.c_proj.bias": {
1016
+ "scale": 0.09055911749601364,
1017
+ "shape": [
1018
+ 1024
1019
+ ]
1020
+ },
1021
+ "gpt.h.20.attn.c_proj.weight": {
1022
+ "scale": 0.10791827738285065,
1023
+ "shape": [
1024
+ 1024,
1025
+ 1024
1026
+ ]
1027
+ },
1028
+ "gpt.h.20.ln_1.bias": {
1029
+ "scale": 0.13368119299411774,
1030
+ "shape": [
1031
+ 1024
1032
+ ]
1033
+ },
1034
+ "gpt.h.20.ln_1.weight": {
1035
+ "scale": 0.15670302510261536,
1036
+ "shape": [
1037
+ 1024
1038
+ ]
1039
+ },
1040
+ "gpt.h.20.ln_2.bias": {
1041
+ "scale": 0.11155271530151367,
1042
+ "shape": [
1043
+ 1024
1044
+ ]
1045
+ },
1046
+ "gpt.h.20.ln_2.weight": {
1047
+ "scale": 0.16725917160511017,
1048
+ "shape": [
1049
+ 1024
1050
+ ]
1051
+ },
1052
+ "gpt.h.20.mlp.c_fc.bias": {
1053
+ "scale": 0.015253099612891674,
1054
+ "shape": [
1055
+ 4096
1056
+ ]
1057
+ },
1058
+ "gpt.h.20.mlp.c_fc.weight": {
1059
+ "scale": 0.06497155874967575,
1060
+ "shape": [
1061
+ 1024,
1062
+ 4096
1063
+ ]
1064
+ },
1065
+ "gpt.h.20.mlp.c_proj.bias": {
1066
+ "scale": 0.04290299862623215,
1067
+ "shape": [
1068
+ 1024
1069
+ ]
1070
+ },
1071
+ "gpt.h.20.mlp.c_proj.weight": {
1072
+ "scale": 0.2691304385662079,
1073
+ "shape": [
1074
+ 4096,
1075
+ 1024
1076
+ ]
1077
+ },
1078
+ "gpt.h.21.attn.c_attn.bias": {
1079
+ "scale": 0.04429187998175621,
1080
+ "shape": [
1081
+ 3072
1082
+ ]
1083
+ },
1084
+ "gpt.h.21.attn.c_attn.weight": {
1085
+ "scale": 0.10078362375497818,
1086
+ "shape": [
1087
+ 1024,
1088
+ 3072
1089
+ ]
1090
+ },
1091
+ "gpt.h.21.attn.c_proj.bias": {
1092
+ "scale": 0.07908321917057037,
1093
+ "shape": [
1094
+ 1024
1095
+ ]
1096
+ },
1097
+ "gpt.h.21.attn.c_proj.weight": {
1098
+ "scale": 0.1681288182735443,
1099
+ "shape": [
1100
+ 1024,
1101
+ 1024
1102
+ ]
1103
+ },
1104
+ "gpt.h.21.ln_1.bias": {
1105
+ "scale": 0.12408334761857986,
1106
+ "shape": [
1107
+ 1024
1108
+ ]
1109
+ },
1110
+ "gpt.h.21.ln_1.weight": {
1111
+ "scale": 0.1594894379377365,
1112
+ "shape": [
1113
+ 1024
1114
+ ]
1115
+ },
1116
+ "gpt.h.21.ln_2.bias": {
1117
+ "scale": 0.10880132019519806,
1118
+ "shape": [
1119
+ 1024
1120
+ ]
1121
+ },
1122
+ "gpt.h.21.ln_2.weight": {
1123
+ "scale": 0.1579473465681076,
1124
+ "shape": [
1125
+ 1024
1126
+ ]
1127
+ },
1128
+ "gpt.h.21.mlp.c_fc.bias": {
1129
+ "scale": 0.02153877541422844,
1130
+ "shape": [
1131
+ 4096
1132
+ ]
1133
+ },
1134
+ "gpt.h.21.mlp.c_fc.weight": {
1135
+ "scale": 0.07017097622156143,
1136
+ "shape": [
1137
+ 1024,
1138
+ 4096
1139
+ ]
1140
+ },
1141
+ "gpt.h.21.mlp.c_proj.bias": {
1142
+ "scale": 0.025096621364355087,
1143
+ "shape": [
1144
+ 1024
1145
+ ]
1146
+ },
1147
+ "gpt.h.21.mlp.c_proj.weight": {
1148
+ "scale": 0.20477719604969025,
1149
+ "shape": [
1150
+ 4096,
1151
+ 1024
1152
+ ]
1153
+ },
1154
+ "gpt.h.22.attn.c_attn.bias": {
1155
+ "scale": 0.04005954787135124,
1156
+ "shape": [
1157
+ 3072
1158
+ ]
1159
+ },
1160
+ "gpt.h.22.attn.c_attn.weight": {
1161
+ "scale": 0.07027842104434967,
1162
+ "shape": [
1163
+ 1024,
1164
+ 3072
1165
+ ]
1166
+ },
1167
+ "gpt.h.22.attn.c_proj.bias": {
1168
+ "scale": 0.06767906248569489,
1169
+ "shape": [
1170
+ 1024
1171
+ ]
1172
+ },
1173
+ "gpt.h.22.attn.c_proj.weight": {
1174
+ "scale": 0.09001224488019943,
1175
+ "shape": [
1176
+ 1024,
1177
+ 1024
1178
+ ]
1179
+ },
1180
+ "gpt.h.22.ln_1.bias": {
1181
+ "scale": 0.11637736856937408,
1182
+ "shape": [
1183
+ 1024
1184
+ ]
1185
+ },
1186
+ "gpt.h.22.ln_1.weight": {
1187
+ "scale": 0.15651044249534607,
1188
+ "shape": [
1189
+ 1024
1190
+ ]
1191
+ },
1192
+ "gpt.h.22.ln_2.bias": {
1193
+ "scale": 0.11879222095012665,
1194
+ "shape": [
1195
+ 1024
1196
+ ]
1197
+ },
1198
+ "gpt.h.22.ln_2.weight": {
1199
+ "scale": 0.1595410406589508,
1200
+ "shape": [
1201
+ 1024
1202
+ ]
1203
+ },
1204
+ "gpt.h.22.mlp.c_fc.bias": {
1205
+ "scale": 0.01718810759484768,
1206
+ "shape": [
1207
+ 4096
1208
+ ]
1209
+ },
1210
+ "gpt.h.22.mlp.c_fc.weight": {
1211
+ "scale": 0.07537717372179031,
1212
+ "shape": [
1213
+ 1024,
1214
+ 4096
1215
+ ]
1216
+ },
1217
+ "gpt.h.22.mlp.c_proj.bias": {
1218
+ "scale": 0.024833859875798225,
1219
+ "shape": [
1220
+ 1024
1221
+ ]
1222
+ },
1223
+ "gpt.h.22.mlp.c_proj.weight": {
1224
+ "scale": 0.17492838203907013,
1225
+ "shape": [
1226
+ 4096,
1227
+ 1024
1228
+ ]
1229
+ },
1230
+ "gpt.h.23.attn.c_attn.bias": {
1231
+ "scale": 0.0368594266474247,
1232
+ "shape": [
1233
+ 3072
1234
+ ]
1235
+ },
1236
+ "gpt.h.23.attn.c_attn.weight": {
1237
+ "scale": 0.0888095423579216,
1238
+ "shape": [
1239
+ 1024,
1240
+ 3072
1241
+ ]
1242
+ },
1243
+ "gpt.h.23.attn.c_proj.bias": {
1244
+ "scale": 0.02792450785636902,
1245
+ "shape": [
1246
+ 1024
1247
+ ]
1248
+ },
1249
+ "gpt.h.23.attn.c_proj.weight": {
1250
+ "scale": 0.10428722202777863,
1251
+ "shape": [
1252
+ 1024,
1253
+ 1024
1254
+ ]
1255
+ },
1256
+ "gpt.h.23.ln_1.bias": {
1257
+ "scale": 0.10264807939529419,
1258
+ "shape": [
1259
+ 1024
1260
+ ]
1261
+ },
1262
+ "gpt.h.23.ln_1.weight": {
1263
+ "scale": 0.14564886689186096,
1264
+ "shape": [
1265
+ 1024
1266
+ ]
1267
+ },
1268
+ "gpt.h.23.ln_2.bias": {
1269
+ "scale": 0.11791384220123291,
1270
+ "shape": [
1271
+ 1024
1272
+ ]
1273
+ },
1274
+ "gpt.h.23.ln_2.weight": {
1275
+ "scale": 0.15296445786952972,
1276
+ "shape": [
1277
+ 1024
1278
+ ]
1279
+ },
1280
+ "gpt.h.23.mlp.c_fc.bias": {
1281
+ "scale": 0.018314138054847717,
1282
+ "shape": [
1283
+ 4096
1284
+ ]
1285
+ },
1286
+ "gpt.h.23.mlp.c_fc.weight": {
1287
+ "scale": 0.0671987384557724,
1288
+ "shape": [
1289
+ 1024,
1290
+ 4096
1291
+ ]
1292
+ },
1293
+ "gpt.h.23.mlp.c_proj.bias": {
1294
+ "scale": 0.030889278277754784,
1295
+ "shape": [
1296
+ 1024
1297
+ ]
1298
+ },
1299
+ "gpt.h.23.mlp.c_proj.weight": {
1300
+ "scale": 0.13600651919841766,
1301
+ "shape": [
1302
+ 4096,
1303
+ 1024
1304
+ ]
1305
+ },
1306
+ "gpt.h.24.attn.c_attn.bias": {
1307
+ "scale": 0.04183034226298332,
1308
+ "shape": [
1309
+ 3072
1310
+ ]
1311
+ },
1312
+ "gpt.h.24.attn.c_attn.weight": {
1313
+ "scale": 0.05768841132521629,
1314
+ "shape": [
1315
+ 1024,
1316
+ 3072
1317
+ ]
1318
+ },
1319
+ "gpt.h.24.attn.c_proj.bias": {
1320
+ "scale": 0.02998465485870838,
1321
+ "shape": [
1322
+ 1024
1323
+ ]
1324
+ },
1325
+ "gpt.h.24.attn.c_proj.weight": {
1326
+ "scale": 0.09570259600877762,
1327
+ "shape": [
1328
+ 1024,
1329
+ 1024
1330
+ ]
1331
+ },
1332
+ "gpt.h.24.ln_1.bias": {
1333
+ "scale": 0.1029733270406723,
1334
+ "shape": [
1335
+ 1024
1336
+ ]
1337
+ },
1338
+ "gpt.h.24.ln_1.weight": {
1339
+ "scale": 0.1645420342683792,
1340
+ "shape": [
1341
+ 1024
1342
+ ]
1343
+ },
1344
+ "gpt.h.24.ln_2.bias": {
1345
+ "scale": 0.11531977355480194,
1346
+ "shape": [
1347
+ 1024
1348
+ ]
1349
+ },
1350
+ "gpt.h.24.ln_2.weight": {
1351
+ "scale": 0.16749481856822968,
1352
+ "shape": [
1353
+ 1024
1354
+ ]
1355
+ },
1356
+ "gpt.h.24.mlp.c_fc.bias": {
1357
+ "scale": 0.02433849684894085,
1358
+ "shape": [
1359
+ 4096
1360
+ ]
1361
+ },
1362
+ "gpt.h.24.mlp.c_fc.weight": {
1363
+ "scale": 0.056723203510046005,
1364
+ "shape": [
1365
+ 1024,
1366
+ 4096
1367
+ ]
1368
+ },
1369
+ "gpt.h.24.mlp.c_proj.bias": {
1370
+ "scale": 0.03132357448339462,
1371
+ "shape": [
1372
+ 1024
1373
+ ]
1374
+ },
1375
+ "gpt.h.24.mlp.c_proj.weight": {
1376
+ "scale": 0.08369418233633041,
1377
+ "shape": [
1378
+ 4096,
1379
+ 1024
1380
+ ]
1381
+ },
1382
+ "gpt.h.25.attn.c_attn.bias": {
1383
+ "scale": 0.043894506990909576,
1384
+ "shape": [
1385
+ 3072
1386
+ ]
1387
+ },
1388
+ "gpt.h.25.attn.c_attn.weight": {
1389
+ "scale": 0.05882854387164116,
1390
+ "shape": [
1391
+ 1024,
1392
+ 3072
1393
+ ]
1394
+ },
1395
+ "gpt.h.25.attn.c_proj.bias": {
1396
+ "scale": 0.03485613688826561,
1397
+ "shape": [
1398
+ 1024
1399
+ ]
1400
+ },
1401
+ "gpt.h.25.attn.c_proj.weight": {
1402
+ "scale": 0.08835429698228836,
1403
+ "shape": [
1404
+ 1024,
1405
+ 1024
1406
+ ]
1407
+ },
1408
+ "gpt.h.25.ln_1.bias": {
1409
+ "scale": 0.09291279315948486,
1410
+ "shape": [
1411
+ 1024
1412
+ ]
1413
+ },
1414
+ "gpt.h.25.ln_1.weight": {
1415
+ "scale": 0.17914152145385742,
1416
+ "shape": [
1417
+ 1024
1418
+ ]
1419
+ },
1420
+ "gpt.h.25.ln_2.bias": {
1421
+ "scale": 0.11765044182538986,
1422
+ "shape": [
1423
+ 1024
1424
+ ]
1425
+ },
1426
+ "gpt.h.25.ln_2.weight": {
1427
+ "scale": 0.1694125235080719,
1428
+ "shape": [
1429
+ 1024
1430
+ ]
1431
+ },
1432
+ "gpt.h.25.mlp.c_fc.bias": {
1433
+ "scale": 0.02206212468445301,
1434
+ "shape": [
1435
+ 4096
1436
+ ]
1437
+ },
1438
+ "gpt.h.25.mlp.c_fc.weight": {
1439
+ "scale": 0.06992777436971664,
1440
+ "shape": [
1441
+ 1024,
1442
+ 4096
1443
+ ]
1444
+ },
1445
+ "gpt.h.25.mlp.c_proj.bias": {
1446
+ "scale": 0.04068372771143913,
1447
+ "shape": [
1448
+ 1024
1449
+ ]
1450
+ },
1451
+ "gpt.h.25.mlp.c_proj.weight": {
1452
+ "scale": 0.09146194905042648,
1453
+ "shape": [
1454
+ 4096,
1455
+ 1024
1456
+ ]
1457
+ },
1458
+ "gpt.h.26.attn.c_attn.bias": {
1459
+ "scale": 0.08172182738780975,
1460
+ "shape": [
1461
+ 3072
1462
+ ]
1463
+ },
1464
+ "gpt.h.26.attn.c_attn.weight": {
1465
+ "scale": 0.06202762946486473,
1466
+ "shape": [
1467
+ 1024,
1468
+ 3072
1469
+ ]
1470
+ },
1471
+ "gpt.h.26.attn.c_proj.bias": {
1472
+ "scale": 0.03825494274497032,
1473
+ "shape": [
1474
+ 1024
1475
+ ]
1476
+ },
1477
+ "gpt.h.26.attn.c_proj.weight": {
1478
+ "scale": 0.10804062336683273,
1479
+ "shape": [
1480
+ 1024,
1481
+ 1024
1482
+ ]
1483
+ },
1484
+ "gpt.h.26.ln_1.bias": {
1485
+ "scale": 0.08116239309310913,
1486
+ "shape": [
1487
+ 1024
1488
+ ]
1489
+ },
1490
+ "gpt.h.26.ln_1.weight": {
1491
+ "scale": 0.17098096013069153,
1492
+ "shape": [
1493
+ 1024
1494
+ ]
1495
+ },
1496
+ "gpt.h.26.ln_2.bias": {
1497
+ "scale": 0.11734277009963989,
1498
+ "shape": [
1499
+ 1024
1500
+ ]
1501
+ },
1502
+ "gpt.h.26.ln_2.weight": {
1503
+ "scale": 0.1784631311893463,
1504
+ "shape": [
1505
+ 1024
1506
+ ]
1507
+ },
1508
+ "gpt.h.26.mlp.c_fc.bias": {
1509
+ "scale": 0.031606484204530716,
1510
+ "shape": [
1511
+ 4096
1512
+ ]
1513
+ },
1514
+ "gpt.h.26.mlp.c_fc.weight": {
1515
+ "scale": 0.05273488909006119,
1516
+ "shape": [
1517
+ 1024,
1518
+ 4096
1519
+ ]
1520
+ },
1521
+ "gpt.h.26.mlp.c_proj.bias": {
1522
+ "scale": 0.05072355270385742,
1523
+ "shape": [
1524
+ 1024
1525
+ ]
1526
+ },
1527
+ "gpt.h.26.mlp.c_proj.weight": {
1528
+ "scale": 0.14328084886074066,
1529
+ "shape": [
1530
+ 4096,
1531
+ 1024
1532
+ ]
1533
+ },
1534
+ "gpt.h.27.attn.c_attn.bias": {
1535
+ "scale": 0.05690082535147667,
1536
+ "shape": [
1537
+ 3072
1538
+ ]
1539
+ },
1540
+ "gpt.h.27.attn.c_attn.weight": {
1541
+ "scale": 0.07180392742156982,
1542
+ "shape": [
1543
+ 1024,
1544
+ 3072
1545
+ ]
1546
+ },
1547
+ "gpt.h.27.attn.c_proj.bias": {
1548
+ "scale": 0.05289801210165024,
1549
+ "shape": [
1550
+ 1024
1551
+ ]
1552
+ },
1553
+ "gpt.h.27.attn.c_proj.weight": {
1554
+ "scale": 0.0980907455086708,
1555
+ "shape": [
1556
+ 1024,
1557
+ 1024
1558
+ ]
1559
+ },
1560
+ "gpt.h.27.ln_1.bias": {
1561
+ "scale": 0.08269622176885605,
1562
+ "shape": [
1563
+ 1024
1564
+ ]
1565
+ },
1566
+ "gpt.h.27.ln_1.weight": {
1567
+ "scale": 0.17179779708385468,
1568
+ "shape": [
1569
+ 1024
1570
+ ]
1571
+ },
1572
+ "gpt.h.27.ln_2.bias": {
1573
+ "scale": 0.11928660422563553,
1574
+ "shape": [
1575
+ 1024
1576
+ ]
1577
+ },
1578
+ "gpt.h.27.ln_2.weight": {
1579
+ "scale": 0.18073512613773346,
1580
+ "shape": [
1581
+ 1024
1582
+ ]
1583
+ },
1584
+ "gpt.h.27.mlp.c_fc.bias": {
1585
+ "scale": 0.030272051692008972,
1586
+ "shape": [
1587
+ 4096
1588
+ ]
1589
+ },
1590
+ "gpt.h.27.mlp.c_fc.weight": {
1591
+ "scale": 0.05421888828277588,
1592
+ "shape": [
1593
+ 1024,
1594
+ 4096
1595
+ ]
1596
+ },
1597
+ "gpt.h.27.mlp.c_proj.bias": {
1598
+ "scale": 0.04355442896485329,
1599
+ "shape": [
1600
+ 1024
1601
+ ]
1602
+ },
1603
+ "gpt.h.27.mlp.c_proj.weight": {
1604
+ "scale": 0.15102733671665192,
1605
+ "shape": [
1606
+ 4096,
1607
+ 1024
1608
+ ]
1609
+ },
1610
+ "gpt.h.28.attn.c_attn.bias": {
1611
+ "scale": 0.04516652598977089,
1612
+ "shape": [
1613
+ 3072
1614
+ ]
1615
+ },
1616
+ "gpt.h.28.attn.c_attn.weight": {
1617
+ "scale": 0.05591177940368652,
1618
+ "shape": [
1619
+ 1024,
1620
+ 3072
1621
+ ]
1622
+ },
1623
+ "gpt.h.28.attn.c_proj.bias": {
1624
+ "scale": 0.07696392387151718,
1625
+ "shape": [
1626
+ 1024
1627
+ ]
1628
+ },
1629
+ "gpt.h.28.attn.c_proj.weight": {
1630
+ "scale": 0.1706492155790329,
1631
+ "shape": [
1632
+ 1024,
1633
+ 1024
1634
+ ]
1635
+ },
1636
+ "gpt.h.28.ln_1.bias": {
1637
+ "scale": 0.08867383003234863,
1638
+ "shape": [
1639
+ 1024
1640
+ ]
1641
+ },
1642
+ "gpt.h.28.ln_1.weight": {
1643
+ "scale": 0.18088868260383606,
1644
+ "shape": [
1645
+ 1024
1646
+ ]
1647
+ },
1648
+ "gpt.h.28.ln_2.bias": {
1649
+ "scale": 0.10548854619264603,
1650
+ "shape": [
1651
+ 1024
1652
+ ]
1653
+ },
1654
+ "gpt.h.28.ln_2.weight": {
1655
+ "scale": 0.19724524021148682,
1656
+ "shape": [
1657
+ 1024
1658
+ ]
1659
+ },
1660
+ "gpt.h.28.mlp.c_fc.bias": {
1661
+ "scale": 0.03599608689546585,
1662
+ "shape": [
1663
+ 4096
1664
+ ]
1665
+ },
1666
+ "gpt.h.28.mlp.c_fc.weight": {
1667
+ "scale": 0.15464694797992706,
1668
+ "shape": [
1669
+ 1024,
1670
+ 4096
1671
+ ]
1672
+ },
1673
+ "gpt.h.28.mlp.c_proj.bias": {
1674
+ "scale": 0.10096704214811325,
1675
+ "shape": [
1676
+ 1024
1677
+ ]
1678
+ },
1679
+ "gpt.h.28.mlp.c_proj.weight": {
1680
+ "scale": 0.5808261632919312,
1681
+ "shape": [
1682
+ 4096,
1683
+ 1024
1684
+ ]
1685
+ },
1686
+ "gpt.h.29.attn.c_attn.bias": {
1687
+ "scale": 0.061315275728702545,
1688
+ "shape": [
1689
+ 3072
1690
+ ]
1691
+ },
1692
+ "gpt.h.29.attn.c_attn.weight": {
1693
+ "scale": 0.072987399995327,
1694
+ "shape": [
1695
+ 1024,
1696
+ 3072
1697
+ ]
1698
+ },
1699
+ "gpt.h.29.attn.c_proj.bias": {
1700
+ "scale": 0.0334136076271534,
1701
+ "shape": [
1702
+ 1024
1703
+ ]
1704
+ },
1705
+ "gpt.h.29.attn.c_proj.weight": {
1706
+ "scale": 0.33243221044540405,
1707
+ "shape": [
1708
+ 1024,
1709
+ 1024
1710
+ ]
1711
+ },
1712
+ "gpt.h.29.ln_1.bias": {
1713
+ "scale": 0.0834951177239418,
1714
+ "shape": [
1715
+ 1024
1716
+ ]
1717
+ },
1718
+ "gpt.h.29.ln_1.weight": {
1719
+ "scale": 0.17551641166210175,
1720
+ "shape": [
1721
+ 1024
1722
+ ]
1723
+ },
1724
+ "gpt.h.29.ln_2.bias": {
1725
+ "scale": 0.060361869633197784,
1726
+ "shape": [
1727
+ 1024
1728
+ ]
1729
+ },
1730
+ "gpt.h.29.ln_2.weight": {
1731
+ "scale": 0.20771968364715576,
1732
+ "shape": [
1733
+ 1024
1734
+ ]
1735
+ },
1736
+ "gpt.h.29.mlp.c_fc.bias": {
1737
+ "scale": 0.04004308953881264,
1738
+ "shape": [
1739
+ 4096
1740
+ ]
1741
+ },
1742
+ "gpt.h.29.mlp.c_fc.weight": {
1743
+ "scale": 0.257427453994751,
1744
+ "shape": [
1745
+ 1024,
1746
+ 4096
1747
+ ]
1748
+ },
1749
+ "gpt.h.29.mlp.c_proj.bias": {
1750
+ "scale": 0.08023141324520111,
1751
+ "shape": [
1752
+ 1024
1753
+ ]
1754
+ },
1755
+ "gpt.h.29.mlp.c_proj.weight": {
1756
+ "scale": 1.5732485055923462,
1757
+ "shape": [
1758
+ 4096,
1759
+ 1024
1760
+ ]
1761
+ },
1762
+ "gpt.h.3.attn.c_attn.bias": {
1763
+ "scale": 0.036733418703079224,
1764
+ "shape": [
1765
+ 3072
1766
+ ]
1767
+ },
1768
+ "gpt.h.3.attn.c_attn.weight": {
1769
+ "scale": 0.09035025537014008,
1770
+ "shape": [
1771
+ 1024,
1772
+ 3072
1773
+ ]
1774
+ },
1775
+ "gpt.h.3.attn.c_proj.bias": {
1776
+ "scale": 0.38110747933387756,
1777
+ "shape": [
1778
+ 1024
1779
+ ]
1780
+ },
1781
+ "gpt.h.3.attn.c_proj.weight": {
1782
+ "scale": 0.5139561891555786,
1783
+ "shape": [
1784
+ 1024,
1785
+ 1024
1786
+ ]
1787
+ },
1788
+ "gpt.h.3.ln_1.bias": {
1789
+ "scale": 0.14589600265026093,
1790
+ "shape": [
1791
+ 1024
1792
+ ]
1793
+ },
1794
+ "gpt.h.3.ln_1.weight": {
1795
+ "scale": 0.11667633056640625,
1796
+ "shape": [
1797
+ 1024
1798
+ ]
1799
+ },
1800
+ "gpt.h.3.ln_2.bias": {
1801
+ "scale": 0.11972484737634659,
1802
+ "shape": [
1803
+ 1024
1804
+ ]
1805
+ },
1806
+ "gpt.h.3.ln_2.weight": {
1807
+ "scale": 0.2382904291152954,
1808
+ "shape": [
1809
+ 1024
1810
+ ]
1811
+ },
1812
+ "gpt.h.3.mlp.c_fc.bias": {
1813
+ "scale": 0.04690921679139137,
1814
+ "shape": [
1815
+ 4096
1816
+ ]
1817
+ },
1818
+ "gpt.h.3.mlp.c_fc.weight": {
1819
+ "scale": 0.16555476188659668,
1820
+ "shape": [
1821
+ 1024,
1822
+ 4096
1823
+ ]
1824
+ },
1825
+ "gpt.h.3.mlp.c_proj.bias": {
1826
+ "scale": 0.2362823784351349,
1827
+ "shape": [
1828
+ 1024
1829
+ ]
1830
+ },
1831
+ "gpt.h.3.mlp.c_proj.weight": {
1832
+ "scale": 0.9494528770446777,
1833
+ "shape": [
1834
+ 4096,
1835
+ 1024
1836
+ ]
1837
+ },
1838
+ "gpt.h.4.attn.c_attn.bias": {
1839
+ "scale": 0.03971828892827034,
1840
+ "shape": [
1841
+ 3072
1842
+ ]
1843
+ },
1844
+ "gpt.h.4.attn.c_attn.weight": {
1845
+ "scale": 0.0667421743273735,
1846
+ "shape": [
1847
+ 1024,
1848
+ 3072
1849
+ ]
1850
+ },
1851
+ "gpt.h.4.attn.c_proj.bias": {
1852
+ "scale": 0.3155006468296051,
1853
+ "shape": [
1854
+ 1024
1855
+ ]
1856
+ },
1857
+ "gpt.h.4.attn.c_proj.weight": {
1858
+ "scale": 0.31384560465812683,
1859
+ "shape": [
1860
+ 1024,
1861
+ 1024
1862
+ ]
1863
+ },
1864
+ "gpt.h.4.ln_1.bias": {
1865
+ "scale": 0.13793586194515228,
1866
+ "shape": [
1867
+ 1024
1868
+ ]
1869
+ },
1870
+ "gpt.h.4.ln_1.weight": {
1871
+ "scale": 0.1345834881067276,
1872
+ "shape": [
1873
+ 1024
1874
+ ]
1875
+ },
1876
+ "gpt.h.4.ln_2.bias": {
1877
+ "scale": 0.08576243370771408,
1878
+ "shape": [
1879
+ 1024
1880
+ ]
1881
+ },
1882
+ "gpt.h.4.ln_2.weight": {
1883
+ "scale": 0.23912283778190613,
1884
+ "shape": [
1885
+ 1024
1886
+ ]
1887
+ },
1888
+ "gpt.h.4.mlp.c_fc.bias": {
1889
+ "scale": 0.016857421025633812,
1890
+ "shape": [
1891
+ 4096
1892
+ ]
1893
+ },
1894
+ "gpt.h.4.mlp.c_fc.weight": {
1895
+ "scale": 0.0949544683098793,
1896
+ "shape": [
1897
+ 1024,
1898
+ 4096
1899
+ ]
1900
+ },
1901
+ "gpt.h.4.mlp.c_proj.bias": {
1902
+ "scale": 0.14176210761070251,
1903
+ "shape": [
1904
+ 1024
1905
+ ]
1906
+ },
1907
+ "gpt.h.4.mlp.c_proj.weight": {
1908
+ "scale": 1.0221376419067383,
1909
+ "shape": [
1910
+ 4096,
1911
+ 1024
1912
+ ]
1913
+ },
1914
+ "gpt.h.5.attn.c_attn.bias": {
1915
+ "scale": 0.03044235333800316,
1916
+ "shape": [
1917
+ 3072
1918
+ ]
1919
+ },
1920
+ "gpt.h.5.attn.c_attn.weight": {
1921
+ "scale": 0.0556764118373394,
1922
+ "shape": [
1923
+ 1024,
1924
+ 3072
1925
+ ]
1926
+ },
1927
+ "gpt.h.5.attn.c_proj.bias": {
1928
+ "scale": 0.17702117562294006,
1929
+ "shape": [
1930
+ 1024
1931
+ ]
1932
+ },
1933
+ "gpt.h.5.attn.c_proj.weight": {
1934
+ "scale": 0.13661186397075653,
1935
+ "shape": [
1936
+ 1024,
1937
+ 1024
1938
+ ]
1939
+ },
1940
+ "gpt.h.5.ln_1.bias": {
1941
+ "scale": 0.10966886579990387,
1942
+ "shape": [
1943
+ 1024
1944
+ ]
1945
+ },
1946
+ "gpt.h.5.ln_1.weight": {
1947
+ "scale": 0.17159949243068695,
1948
+ "shape": [
1949
+ 1024
1950
+ ]
1951
+ },
1952
+ "gpt.h.5.ln_2.bias": {
1953
+ "scale": 0.06587665528059006,
1954
+ "shape": [
1955
+ 1024
1956
+ ]
1957
+ },
1958
+ "gpt.h.5.ln_2.weight": {
1959
+ "scale": 0.2513478100299835,
1960
+ "shape": [
1961
+ 1024
1962
+ ]
1963
+ },
1964
+ "gpt.h.5.mlp.c_fc.bias": {
1965
+ "scale": 0.021238749846816063,
1966
+ "shape": [
1967
+ 4096
1968
+ ]
1969
+ },
1970
+ "gpt.h.5.mlp.c_fc.weight": {
1971
+ "scale": 0.08227789402008057,
1972
+ "shape": [
1973
+ 1024,
1974
+ 4096
1975
+ ]
1976
+ },
1977
+ "gpt.h.5.mlp.c_proj.bias": {
1978
+ "scale": 0.09128402918577194,
1979
+ "shape": [
1980
+ 1024
1981
+ ]
1982
+ },
1983
+ "gpt.h.5.mlp.c_proj.weight": {
1984
+ "scale": 0.8960160613059998,
1985
+ "shape": [
1986
+ 4096,
1987
+ 1024
1988
+ ]
1989
+ },
1990
+ "gpt.h.6.attn.c_attn.bias": {
1991
+ "scale": 0.04128195717930794,
1992
+ "shape": [
1993
+ 3072
1994
+ ]
1995
+ },
1996
+ "gpt.h.6.attn.c_attn.weight": {
1997
+ "scale": 0.09308914095163345,
1998
+ "shape": [
1999
+ 1024,
2000
+ 3072
2001
+ ]
2002
+ },
2003
+ "gpt.h.6.attn.c_proj.bias": {
2004
+ "scale": 0.09955250471830368,
2005
+ "shape": [
2006
+ 1024
2007
+ ]
2008
+ },
2009
+ "gpt.h.6.attn.c_proj.weight": {
2010
+ "scale": 0.09972423315048218,
2011
+ "shape": [
2012
+ 1024,
2013
+ 1024
2014
+ ]
2015
+ },
2016
+ "gpt.h.6.ln_1.bias": {
2017
+ "scale": 0.10598546266555786,
2018
+ "shape": [
2019
+ 1024
2020
+ ]
2021
+ },
2022
+ "gpt.h.6.ln_1.weight": {
2023
+ "scale": 0.20339453220367432,
2024
+ "shape": [
2025
+ 1024
2026
+ ]
2027
+ },
2028
+ "gpt.h.6.ln_2.bias": {
2029
+ "scale": 0.083857461810112,
2030
+ "shape": [
2031
+ 1024
2032
+ ]
2033
+ },
2034
+ "gpt.h.6.ln_2.weight": {
2035
+ "scale": 0.25439128279685974,
2036
+ "shape": [
2037
+ 1024
2038
+ ]
2039
+ },
2040
+ "gpt.h.6.mlp.c_fc.bias": {
2041
+ "scale": 0.026426810771226883,
2042
+ "shape": [
2043
+ 4096
2044
+ ]
2045
+ },
2046
+ "gpt.h.6.mlp.c_fc.weight": {
2047
+ "scale": 0.1172819659113884,
2048
+ "shape": [
2049
+ 1024,
2050
+ 4096
2051
+ ]
2052
+ },
2053
+ "gpt.h.6.mlp.c_proj.bias": {
2054
+ "scale": 0.08432779461145401,
2055
+ "shape": [
2056
+ 1024
2057
+ ]
2058
+ },
2059
+ "gpt.h.6.mlp.c_proj.weight": {
2060
+ "scale": 0.8800371289253235,
2061
+ "shape": [
2062
+ 4096,
2063
+ 1024
2064
+ ]
2065
+ },
2066
+ "gpt.h.7.attn.c_attn.bias": {
2067
+ "scale": 0.04523950815200806,
2068
+ "shape": [
2069
+ 3072
2070
+ ]
2071
+ },
2072
+ "gpt.h.7.attn.c_attn.weight": {
2073
+ "scale": 0.09843175113201141,
2074
+ "shape": [
2075
+ 1024,
2076
+ 3072
2077
+ ]
2078
+ },
2079
+ "gpt.h.7.attn.c_proj.bias": {
2080
+ "scale": 0.09096702188253403,
2081
+ "shape": [
2082
+ 1024
2083
+ ]
2084
+ },
2085
+ "gpt.h.7.attn.c_proj.weight": {
2086
+ "scale": 0.09420859813690186,
2087
+ "shape": [
2088
+ 1024,
2089
+ 1024
2090
+ ]
2091
+ },
2092
+ "gpt.h.7.ln_1.bias": {
2093
+ "scale": 0.152985617518425,
2094
+ "shape": [
2095
+ 1024
2096
+ ]
2097
+ },
2098
+ "gpt.h.7.ln_1.weight": {
2099
+ "scale": 0.15078113973140717,
2100
+ "shape": [
2101
+ 1024
2102
+ ]
2103
+ },
2104
+ "gpt.h.7.ln_2.bias": {
2105
+ "scale": 0.1413334757089615,
2106
+ "shape": [
2107
+ 1024
2108
+ ]
2109
+ },
2110
+ "gpt.h.7.ln_2.weight": {
2111
+ "scale": 0.2576432526111603,
2112
+ "shape": [
2113
+ 1024
2114
+ ]
2115
+ },
2116
+ "gpt.h.7.mlp.c_fc.bias": {
2117
+ "scale": 0.033283643424510956,
2118
+ "shape": [
2119
+ 4096
2120
+ ]
2121
+ },
2122
+ "gpt.h.7.mlp.c_fc.weight": {
2123
+ "scale": 0.07020364701747894,
2124
+ "shape": [
2125
+ 1024,
2126
+ 4096
2127
+ ]
2128
+ },
2129
+ "gpt.h.7.mlp.c_proj.bias": {
2130
+ "scale": 0.10713233798742294,
2131
+ "shape": [
2132
+ 1024
2133
+ ]
2134
+ },
2135
+ "gpt.h.7.mlp.c_proj.weight": {
2136
+ "scale": 0.42597928643226624,
2137
+ "shape": [
2138
+ 4096,
2139
+ 1024
2140
+ ]
2141
+ },
2142
+ "gpt.h.8.attn.c_attn.bias": {
2143
+ "scale": 0.03788977861404419,
2144
+ "shape": [
2145
+ 3072
2146
+ ]
2147
+ },
2148
+ "gpt.h.8.attn.c_attn.weight": {
2149
+ "scale": 0.08913753926753998,
2150
+ "shape": [
2151
+ 1024,
2152
+ 3072
2153
+ ]
2154
+ },
2155
+ "gpt.h.8.attn.c_proj.bias": {
2156
+ "scale": 0.08845029026269913,
2157
+ "shape": [
2158
+ 1024
2159
+ ]
2160
+ },
2161
+ "gpt.h.8.attn.c_proj.weight": {
2162
+ "scale": 0.09520888328552246,
2163
+ "shape": [
2164
+ 1024,
2165
+ 1024
2166
+ ]
2167
+ },
2168
+ "gpt.h.8.ln_1.bias": {
2169
+ "scale": 0.12517669796943665,
2170
+ "shape": [
2171
+ 1024
2172
+ ]
2173
+ },
2174
+ "gpt.h.8.ln_1.weight": {
2175
+ "scale": 0.1648232787847519,
2176
+ "shape": [
2177
+ 1024
2178
+ ]
2179
+ },
2180
+ "gpt.h.8.ln_2.bias": {
2181
+ "scale": 0.14098228514194489,
2182
+ "shape": [
2183
+ 1024
2184
+ ]
2185
+ },
2186
+ "gpt.h.8.ln_2.weight": {
2187
+ "scale": 0.24974539875984192,
2188
+ "shape": [
2189
+ 1024
2190
+ ]
2191
+ },
2192
+ "gpt.h.8.mlp.c_fc.bias": {
2193
+ "scale": 0.026503393426537514,
2194
+ "shape": [
2195
+ 4096
2196
+ ]
2197
+ },
2198
+ "gpt.h.8.mlp.c_fc.weight": {
2199
+ "scale": 0.06782539933919907,
2200
+ "shape": [
2201
+ 1024,
2202
+ 4096
2203
+ ]
2204
+ },
2205
+ "gpt.h.8.mlp.c_proj.bias": {
2206
+ "scale": 0.09713318198919296,
2207
+ "shape": [
2208
+ 1024
2209
+ ]
2210
+ },
2211
+ "gpt.h.8.mlp.c_proj.weight": {
2212
+ "scale": 0.43732672929763794,
2213
+ "shape": [
2214
+ 4096,
2215
+ 1024
2216
+ ]
2217
+ },
2218
+ "gpt.h.9.attn.c_attn.bias": {
2219
+ "scale": 0.04425312206149101,
2220
+ "shape": [
2221
+ 3072
2222
+ ]
2223
+ },
2224
+ "gpt.h.9.attn.c_attn.weight": {
2225
+ "scale": 0.1181648001074791,
2226
+ "shape": [
2227
+ 1024,
2228
+ 3072
2229
+ ]
2230
+ },
2231
+ "gpt.h.9.attn.c_proj.bias": {
2232
+ "scale": 0.07080474495887756,
2233
+ "shape": [
2234
+ 1024
2235
+ ]
2236
+ },
2237
+ "gpt.h.9.attn.c_proj.weight": {
2238
+ "scale": 0.10802309960126877,
2239
+ "shape": [
2240
+ 1024,
2241
+ 1024
2242
+ ]
2243
+ },
2244
+ "gpt.h.9.ln_1.bias": {
2245
+ "scale": 0.15359099209308624,
2246
+ "shape": [
2247
+ 1024
2248
+ ]
2249
+ },
2250
+ "gpt.h.9.ln_1.weight": {
2251
+ "scale": 0.15670429170131683,
2252
+ "shape": [
2253
+ 1024
2254
+ ]
2255
+ },
2256
+ "gpt.h.9.ln_2.bias": {
2257
+ "scale": 0.1377403736114502,
2258
+ "shape": [
2259
+ 1024
2260
+ ]
2261
+ },
2262
+ "gpt.h.9.ln_2.weight": {
2263
+ "scale": 0.2576030492782593,
2264
+ "shape": [
2265
+ 1024
2266
+ ]
2267
+ },
2268
+ "gpt.h.9.mlp.c_fc.bias": {
2269
+ "scale": 0.031440090388059616,
2270
+ "shape": [
2271
+ 4096
2272
+ ]
2273
+ },
2274
+ "gpt.h.9.mlp.c_fc.weight": {
2275
+ "scale": 0.07645577937364578,
2276
+ "shape": [
2277
+ 1024,
2278
+ 4096
2279
+ ]
2280
+ },
2281
+ "gpt.h.9.mlp.c_proj.bias": {
2282
+ "scale": 0.10798899829387665,
2283
+ "shape": [
2284
+ 1024
2285
+ ]
2286
+ },
2287
+ "gpt.h.9.mlp.c_proj.weight": {
2288
+ "scale": 0.4369252622127533,
2289
+ "shape": [
2290
+ 4096,
2291
+ 1024
2292
+ ]
2293
+ },
2294
+ "gpt.ln_f.bias": {
2295
+ "scale": 0.20075297355651855,
2296
+ "shape": [
2297
+ 1024
2298
+ ]
2299
+ },
2300
+ "gpt.ln_f.weight": {
2301
+ "scale": 0.41444164514541626,
2302
+ "shape": [
2303
+ 1024
2304
+ ]
2305
+ },
2306
+ "gpt.wpe.emb.weight": {
2307
+ "scale": 0.0773041620850563,
2308
+ "shape": [
2309
+ 608,
2310
+ 1024
2311
+ ]
2312
+ },
2313
+ "gpt.wte.weight": {
2314
+ "scale": 0.08020960539579391,
2315
+ "shape": [
2316
+ 1026,
2317
+ 1024
2318
+ ]
2319
+ },
2320
+ "mel_head.bias": {
2321
+ "scale": 0.028449567034840584,
2322
+ "shape": [
2323
+ 1026
2324
+ ]
2325
+ },
2326
+ "mel_head.weight": {
2327
+ "scale": 0.07584048807621002,
2328
+ "shape": [
2329
+ 1026,
2330
+ 1024
2331
+ ]
2332
+ }
2333
+ }
INT4/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[START]",
3
+ "eos_token": "[STOP]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
INT4/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
INT4/tokenizer_config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[STOP]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SPACE]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "259": {
28
+ "content": "[en]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "260": {
36
+ "content": "[de]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "261": {
44
+ "content": "[START]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "262": {
52
+ "content": "[fr]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "267": {
60
+ "content": "[ru]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "284": {
68
+ "content": "[es]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "285": {
76
+ "content": "[it]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "286": {
84
+ "content": "[pt]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "293": {
92
+ "content": "[cs]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "294": {
100
+ "content": "[pl]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "295": {
108
+ "content": "[tr]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "297": {
116
+ "content": "[nl]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "5022": {
124
+ "content": "[ar]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "5023": {
132
+ "content": "[zh-cn]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "5412": {
140
+ "content": "[ja]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "5753": {
148
+ "content": "[hu]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "6152": {
156
+ "content": "[ko]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "6680": {
164
+ "content": "[hi]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "6681": {
172
+ "content": "[PAD]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ }
179
+ },
180
+ "auto_map": {"AutoTokenizer": ["AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast", null]},
181
+ "bos_token": "[START]",
182
+ "clean_up_tokenization_spaces": true,
183
+ "eos_token": "[STOP]",
184
+ "max_length": null,
185
+ "model_max_length": 1000000000000000019884624838656,
186
+ "pad_to_multiple_of": null,
187
+ "pad_token": "[PAD]",
188
+ "pad_token_type_id": 0,
189
+ "padding_side": "right",
190
+ "tokenizer_class": "XTTSTokenizerFast",
191
+ "unk_token": "[UNK]"
192
+ }
INT8/config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "XttsGPT"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "audio_config": {
8
+ "mel_channels": 80,
9
+ "output_sample_rate": 24000,
10
+ "sample_rate": 22050
11
+ },
12
+ "auto_map": {
13
+ "AutoConfig": "AstraMindAI/xtts2-gpt--gpt_config.XTTSGPTConfig",
14
+ "AutoModelForCausalLM": "AstraMindAI/xtts2-gpt--xtts2_gpt_modeling.XttsGPT",
15
+ "AutoTokenizer": "AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast"
16
+ },
17
+ "decoder_input_dim": 1024,
18
+ "enable_redaction": false,
19
+ "gpt_batch_size": 1,
20
+ "gpt_max_audio_tokens": 605,
21
+ "hidden_size": 1024,
22
+ "initializer_range": 0.02,
23
+ "kv_cache": true,
24
+ "layer_norm_epsilon": 1e-05,
25
+ "max_audio_tokens": 605,
26
+ "max_prompt_tokens": 70,
27
+ "max_text_tokens": 402,
28
+ "model_type": "xtts_gpt",
29
+ "n_inner": 4096,
30
+ "num_attention_heads": 16,
31
+ "num_audio_tokens": 1026,
32
+ "num_hidden_layers": 30,
33
+ "number_text_tokens": 6681,
34
+ "reorder_and_upcast_attn": false,
35
+ "scale_attn_by_inverse_layer_idx": false,
36
+ "start_audio_token": 1024,
37
+ "start_text_token": null,
38
+ "stop_audio_token": 1025,
39
+ "stop_text_token": null,
40
+ "transformers_version": "4.46.0",
41
+ "use_masking_gt_prompt_approach": true,
42
+ "use_perceiver_resampler": true,
43
+ "vocab_size": 6681
44
+ }
INT8/gpt2_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a53c20858ef96c20b980b195de7f3611dda7003921b22845d89be2562dffeea2
3
+ size 380650106
INT8/int8_metadata.json ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_norm.bias": 0.004869353026151657,
3
+ "final_norm.weight": 0.04349002242088318,
4
+ "gpt.h.0.attn.c_attn.bias": 0.006784666329622269,
5
+ "gpt.h.0.attn.c_attn.weight": 0.005739760585129261,
6
+ "gpt.h.0.attn.c_proj.bias": 0.004933658055961132,
7
+ "gpt.h.0.attn.c_proj.weight": 0.006425631698220968,
8
+ "gpt.h.0.ln_1.bias": 0.0014453897019848228,
9
+ "gpt.h.0.ln_1.weight": 0.001891627791337669,
10
+ "gpt.h.0.ln_2.bias": 0.01133652776479721,
11
+ "gpt.h.0.ln_2.weight": 0.006315903272479773,
12
+ "gpt.h.0.mlp.c_fc.bias": 0.0026161218993365765,
13
+ "gpt.h.0.mlp.c_fc.weight": 0.009393199346959591,
14
+ "gpt.h.0.mlp.c_proj.bias": 0.016290107741951942,
15
+ "gpt.h.0.mlp.c_proj.weight": 0.036495596170425415,
16
+ "gpt.h.1.attn.c_attn.bias": 0.005060207564383745,
17
+ "gpt.h.1.attn.c_attn.weight": 0.003430855693295598,
18
+ "gpt.h.1.attn.c_proj.bias": 0.01638360321521759,
19
+ "gpt.h.1.attn.c_proj.weight": 0.025163328275084496,
20
+ "gpt.h.1.ln_1.bias": 0.011049080640077591,
21
+ "gpt.h.1.ln_1.weight": 0.004219602793455124,
22
+ "gpt.h.1.ln_2.bias": 0.011916977353394032,
23
+ "gpt.h.1.ln_2.weight": 0.011172252707183361,
24
+ "gpt.h.1.mlp.c_fc.bias": 0.0014588515041396022,
25
+ "gpt.h.1.mlp.c_fc.weight": 0.006692858878523111,
26
+ "gpt.h.1.mlp.c_proj.bias": 0.018994668498635292,
27
+ "gpt.h.1.mlp.c_proj.weight": 0.04086727276444435,
28
+ "gpt.h.10.attn.c_attn.bias": 0.0024330769665539265,
29
+ "gpt.h.10.attn.c_attn.weight": 0.006525476463139057,
30
+ "gpt.h.10.attn.c_proj.bias": 0.0025137888733297586,
31
+ "gpt.h.10.attn.c_proj.weight": 0.004787968005985022,
32
+ "gpt.h.10.ln_1.bias": 0.008025330491364002,
33
+ "gpt.h.10.ln_1.weight": 0.008399507962167263,
34
+ "gpt.h.10.ln_2.bias": 0.007188895717263222,
35
+ "gpt.h.10.ln_2.weight": 0.01416360680013895,
36
+ "gpt.h.10.mlp.c_fc.bias": 0.0016273388173431158,
37
+ "gpt.h.10.mlp.c_fc.weight": 0.005328207276761532,
38
+ "gpt.h.10.mlp.c_proj.bias": 0.005671263672411442,
39
+ "gpt.h.10.mlp.c_proj.weight": 0.02014252357184887,
40
+ "gpt.h.11.attn.c_attn.bias": 0.002672715811058879,
41
+ "gpt.h.11.attn.c_attn.weight": 0.0061920033767819405,
42
+ "gpt.h.11.attn.c_proj.bias": 0.0028154668398201466,
43
+ "gpt.h.11.attn.c_proj.weight": 0.004848834592849016,
44
+ "gpt.h.11.ln_1.bias": 0.009074854664504528,
45
+ "gpt.h.11.ln_1.weight": 0.008892943151295185,
46
+ "gpt.h.11.ln_2.bias": 0.0054258741438388824,
47
+ "gpt.h.11.ln_2.weight": 0.01229905430227518,
48
+ "gpt.h.11.mlp.c_fc.bias": 0.00168695580214262,
49
+ "gpt.h.11.mlp.c_fc.weight": 0.003726626979187131,
50
+ "gpt.h.11.mlp.c_proj.bias": 0.005866497289389372,
51
+ "gpt.h.11.mlp.c_proj.weight": 0.02522459626197815,
52
+ "gpt.h.12.attn.c_attn.bias": 0.0027519443538039923,
53
+ "gpt.h.12.attn.c_attn.weight": 0.007994277402758598,
54
+ "gpt.h.12.attn.c_proj.bias": 0.001845111371949315,
55
+ "gpt.h.12.attn.c_proj.weight": 0.006088561844080687,
56
+ "gpt.h.12.ln_1.bias": 0.010259520262479782,
57
+ "gpt.h.12.ln_1.weight": 0.00929665844887495,
58
+ "gpt.h.12.ln_2.bias": 0.007012453395873308,
59
+ "gpt.h.12.ln_2.weight": 0.012815050780773163,
60
+ "gpt.h.12.mlp.c_fc.bias": 0.0008164360770024359,
61
+ "gpt.h.12.mlp.c_fc.weight": 0.0037948056124150753,
62
+ "gpt.h.12.mlp.c_proj.bias": 0.006633814424276352,
63
+ "gpt.h.12.mlp.c_proj.weight": 0.01512092724442482,
64
+ "gpt.h.13.attn.c_attn.bias": 0.0026236390694975853,
65
+ "gpt.h.13.attn.c_attn.weight": 0.004803134128451347,
66
+ "gpt.h.13.attn.c_proj.bias": 0.00371875730343163,
67
+ "gpt.h.13.attn.c_proj.weight": 0.006528449710458517,
68
+ "gpt.h.13.ln_1.bias": 0.008354649879038334,
69
+ "gpt.h.13.ln_1.weight": 0.009035688824951649,
70
+ "gpt.h.13.ln_2.bias": 0.00720137357711792,
71
+ "gpt.h.13.ln_2.weight": 0.012263036333024502,
72
+ "gpt.h.13.mlp.c_fc.bias": 0.0010731430957093835,
73
+ "gpt.h.13.mlp.c_fc.weight": 0.0035445760004222393,
74
+ "gpt.h.13.mlp.c_proj.bias": 0.006077663507312536,
75
+ "gpt.h.13.mlp.c_proj.weight": 0.013959192670881748,
76
+ "gpt.h.14.attn.c_attn.bias": 0.0022155672777444124,
77
+ "gpt.h.14.attn.c_attn.weight": 0.007756410166621208,
78
+ "gpt.h.14.attn.c_proj.bias": 0.0013033421710133553,
79
+ "gpt.h.14.attn.c_proj.weight": 0.005600381642580032,
80
+ "gpt.h.14.ln_1.bias": 0.008806071244180202,
81
+ "gpt.h.14.ln_1.weight": 0.008939859457314014,
82
+ "gpt.h.14.ln_2.bias": 0.007173170801252127,
83
+ "gpt.h.14.ln_2.weight": 0.011766878888010979,
84
+ "gpt.h.14.mlp.c_fc.bias": 0.000844161375425756,
85
+ "gpt.h.14.mlp.c_fc.weight": 0.0045441873371601105,
86
+ "gpt.h.14.mlp.c_proj.bias": 0.005780238192528486,
87
+ "gpt.h.14.mlp.c_proj.weight": 0.01581657864153385,
88
+ "gpt.h.15.attn.c_attn.bias": 0.0022497144527733326,
89
+ "gpt.h.15.attn.c_attn.weight": 0.004309875890612602,
90
+ "gpt.h.15.attn.c_proj.bias": 0.004680809564888477,
91
+ "gpt.h.15.attn.c_proj.weight": 0.006447209510952234,
92
+ "gpt.h.15.ln_1.bias": 0.007965384982526302,
93
+ "gpt.h.15.ln_1.weight": 0.00913836620748043,
94
+ "gpt.h.15.ln_2.bias": 0.0062019070610404015,
95
+ "gpt.h.15.ln_2.weight": 0.01104413066059351,
96
+ "gpt.h.15.mlp.c_fc.bias": 0.0011065590661019087,
97
+ "gpt.h.15.mlp.c_fc.weight": 0.0046559604816138744,
98
+ "gpt.h.15.mlp.c_proj.bias": 0.005378104280680418,
99
+ "gpt.h.15.mlp.c_proj.weight": 0.017130374908447266,
100
+ "gpt.h.16.attn.c_attn.bias": 0.002614511176943779,
101
+ "gpt.h.16.attn.c_attn.weight": 0.0070121572352945805,
102
+ "gpt.h.16.attn.c_proj.bias": 0.0052946810610592365,
103
+ "gpt.h.16.attn.c_proj.weight": 0.004184126853942871,
104
+ "gpt.h.16.ln_1.bias": 0.00865858979523182,
105
+ "gpt.h.16.ln_1.weight": 0.008769914507865906,
106
+ "gpt.h.16.ln_2.bias": 0.0066169509664177895,
107
+ "gpt.h.16.ln_2.weight": 0.010157736949622631,
108
+ "gpt.h.16.mlp.c_fc.bias": 0.0012236940674483776,
109
+ "gpt.h.16.mlp.c_fc.weight": 0.004414326511323452,
110
+ "gpt.h.16.mlp.c_proj.bias": 0.005387744400650263,
111
+ "gpt.h.16.mlp.c_proj.weight": 0.008935753256082535,
112
+ "gpt.h.17.attn.c_attn.bias": 0.0024349491577595472,
113
+ "gpt.h.17.attn.c_attn.weight": 0.007278294302523136,
114
+ "gpt.h.17.attn.c_proj.bias": 0.0034934559371322393,
115
+ "gpt.h.17.attn.c_proj.weight": 0.007657876703888178,
116
+ "gpt.h.17.ln_1.bias": 0.008721367456018925,
117
+ "gpt.h.17.ln_1.weight": 0.00882012490183115,
118
+ "gpt.h.17.ln_2.bias": 0.007266780827194452,
119
+ "gpt.h.17.ln_2.weight": 0.010302619077265263,
120
+ "gpt.h.17.mlp.c_fc.bias": 0.0013675907393917441,
121
+ "gpt.h.17.mlp.c_fc.weight": 0.0037777191027998924,
122
+ "gpt.h.17.mlp.c_proj.bias": 0.0048697893507778645,
123
+ "gpt.h.17.mlp.c_proj.weight": 0.01015142910182476,
124
+ "gpt.h.18.attn.c_attn.bias": 0.002071398077532649,
125
+ "gpt.h.18.attn.c_attn.weight": 0.006754225119948387,
126
+ "gpt.h.18.attn.c_proj.bias": 0.006200749427080154,
127
+ "gpt.h.18.attn.c_proj.weight": 0.005378039088100195,
128
+ "gpt.h.18.ln_1.bias": 0.008033838123083115,
129
+ "gpt.h.18.ln_1.weight": 0.00875889416784048,
130
+ "gpt.h.18.ln_2.bias": 0.006478333845734596,
131
+ "gpt.h.18.ln_2.weight": 0.009868012741208076,
132
+ "gpt.h.18.mlp.c_fc.bias": 0.0011885170824825764,
133
+ "gpt.h.18.mlp.c_fc.weight": 0.003764205379411578,
134
+ "gpt.h.18.mlp.c_proj.bias": 0.0037589711137115955,
135
+ "gpt.h.18.mlp.c_proj.weight": 0.00706329382956028,
136
+ "gpt.h.19.attn.c_attn.bias": 0.002585803624242544,
137
+ "gpt.h.19.attn.c_attn.weight": 0.006440048571676016,
138
+ "gpt.h.19.attn.c_proj.bias": 0.003273698966950178,
139
+ "gpt.h.19.attn.c_proj.weight": 0.006902920547872782,
140
+ "gpt.h.19.ln_1.bias": 0.008147234097123146,
141
+ "gpt.h.19.ln_1.weight": 0.008605051785707474,
142
+ "gpt.h.19.ln_2.bias": 0.0059887804090976715,
143
+ "gpt.h.19.ln_2.weight": 0.009857536293566227,
144
+ "gpt.h.19.mlp.c_fc.bias": 0.0012922842288389802,
145
+ "gpt.h.19.mlp.c_fc.weight": 0.004283702466636896,
146
+ "gpt.h.19.mlp.c_proj.bias": 0.0032340157777071,
147
+ "gpt.h.19.mlp.c_proj.weight": 0.008396429941058159,
148
+ "gpt.h.2.attn.c_attn.bias": 0.0024704847019165754,
149
+ "gpt.h.2.attn.c_attn.weight": 0.0033952328376471996,
150
+ "gpt.h.2.attn.c_proj.bias": 0.02068011462688446,
151
+ "gpt.h.2.attn.c_proj.weight": 0.02372979000210762,
152
+ "gpt.h.2.ln_1.bias": 0.009298523887991905,
153
+ "gpt.h.2.ln_1.weight": 0.005359284114092588,
154
+ "gpt.h.2.ln_2.bias": 0.008068359456956387,
155
+ "gpt.h.2.ln_2.weight": 0.013421551324427128,
156
+ "gpt.h.2.mlp.c_fc.bias": 0.0030155687127262354,
157
+ "gpt.h.2.mlp.c_fc.weight": 0.01074832584708929,
158
+ "gpt.h.2.mlp.c_proj.bias": 0.015278116799890995,
159
+ "gpt.h.2.mlp.c_proj.weight": 0.04022492840886116,
160
+ "gpt.h.20.attn.c_attn.bias": 0.001981201348826289,
161
+ "gpt.h.20.attn.c_attn.weight": 0.006853727623820305,
162
+ "gpt.h.20.attn.c_proj.bias": 0.004991447553038597,
163
+ "gpt.h.20.attn.c_proj.weight": 0.005948251578956842,
164
+ "gpt.h.20.ln_1.bias": 0.007368254475295544,
165
+ "gpt.h.20.ln_1.weight": 0.00863717496395111,
166
+ "gpt.h.20.ln_2.bias": 0.006148574873805046,
167
+ "gpt.h.20.ln_2.weight": 0.009219009429216385,
168
+ "gpt.h.20.mlp.c_fc.bias": 0.0008407220593653619,
169
+ "gpt.h.20.mlp.c_fc.weight": 0.003581109456717968,
170
+ "gpt.h.20.mlp.c_proj.bias": 0.0023647320922464132,
171
+ "gpt.h.20.mlp.c_proj.weight": 0.014833961613476276,
172
+ "gpt.h.21.attn.c_attn.bias": 0.0024412847124040127,
173
+ "gpt.h.21.attn.c_attn.weight": 0.005555002484470606,
174
+ "gpt.h.21.attn.c_proj.bias": 0.004358917940407991,
175
+ "gpt.h.21.attn.c_proj.weight": 0.009266942739486694,
176
+ "gpt.h.21.ln_1.bias": 0.006839239504188299,
177
+ "gpt.h.21.ln_1.weight": 0.008790756575763226,
178
+ "gpt.h.21.ln_2.bias": 0.005996923428028822,
179
+ "gpt.h.21.ln_2.weight": 0.008705759420990944,
180
+ "gpt.h.21.mlp.c_fc.bias": 0.0011871765600517392,
181
+ "gpt.h.21.mlp.c_fc.weight": 0.0038676916155964136,
182
+ "gpt.h.21.mlp.c_proj.bias": 0.0013832782860845327,
183
+ "gpt.h.21.mlp.c_proj.weight": 0.011286932043731213,
184
+ "gpt.h.22.attn.c_attn.bias": 0.0022080065682530403,
185
+ "gpt.h.22.attn.c_attn.weight": 0.00387361366301775,
186
+ "gpt.h.22.attn.c_proj.bias": 0.0037303422577679157,
187
+ "gpt.h.22.attn.c_proj.weight": 0.00496130483224988,
188
+ "gpt.h.22.ln_1.bias": 0.006414500530809164,
189
+ "gpt.h.22.ln_1.weight": 0.008626559749245644,
190
+ "gpt.h.22.ln_2.bias": 0.006547602824866772,
191
+ "gpt.h.22.ln_2.weight": 0.008793600834906101,
192
+ "gpt.h.22.mlp.c_fc.bias": 0.0009473760146647692,
193
+ "gpt.h.22.mlp.c_fc.weight": 0.00415464723482728,
194
+ "gpt.h.22.mlp.c_proj.bias": 0.0013687954051420093,
195
+ "gpt.h.22.mlp.c_proj.weight": 0.009641721844673157,
196
+ "gpt.h.23.attn.c_attn.bias": 0.002031621988862753,
197
+ "gpt.h.23.attn.c_attn.weight": 0.004895014222711325,
198
+ "gpt.h.23.attn.c_proj.bias": 0.0015391460619866848,
199
+ "gpt.h.23.attn.c_proj.weight": 0.005748114548623562,
200
+ "gpt.h.23.ln_1.bias": 0.005657768342643976,
201
+ "gpt.h.23.ln_1.weight": 0.00802789069712162,
202
+ "gpt.h.23.ln_2.bias": 0.006499188020825386,
203
+ "gpt.h.23.ln_2.weight": 0.008431111462414265,
204
+ "gpt.h.23.mlp.c_fc.bias": 0.0010094406316056848,
205
+ "gpt.h.23.mlp.c_fc.weight": 0.003703867318108678,
206
+ "gpt.h.23.mlp.c_proj.bias": 0.0017025587148964405,
207
+ "gpt.h.23.mlp.c_proj.weight": 0.0074964226223528385,
208
+ "gpt.h.24.attn.c_attn.bias": 0.002305609406903386,
209
+ "gpt.h.24.attn.c_attn.weight": 0.0031796761322766542,
210
+ "gpt.h.24.attn.c_proj.bias": 0.001652697566896677,
211
+ "gpt.h.24.attn.c_proj.weight": 0.0052749463357031345,
212
+ "gpt.h.24.ln_1.bias": 0.005675694905221462,
213
+ "gpt.h.24.ln_1.weight": 0.009069245308637619,
214
+ "gpt.h.24.ln_2.bias": 0.006356207653880119,
215
+ "gpt.h.24.ln_2.weight": 0.009231998585164547,
216
+ "gpt.h.24.mlp.c_fc.bias": 0.001341491937637329,
217
+ "gpt.h.24.mlp.c_fc.weight": 0.003126475727185607,
218
+ "gpt.h.24.mlp.c_proj.bias": 0.0017264962662011385,
219
+ "gpt.h.24.mlp.c_proj.weight": 0.004613065160810947,
220
+ "gpt.h.25.attn.c_attn.bias": 0.002419382333755493,
221
+ "gpt.h.25.attn.c_attn.weight": 0.003242518287152052,
222
+ "gpt.h.25.attn.c_proj.bias": 0.0019212045008316636,
223
+ "gpt.h.25.attn.c_proj.weight": 0.004869922064244747,
224
+ "gpt.h.25.ln_1.bias": 0.005121177528053522,
225
+ "gpt.h.25.ln_1.weight": 0.009873942472040653,
226
+ "gpt.h.25.ln_2.bias": 0.006484670098870993,
227
+ "gpt.h.25.ln_2.weight": 0.009337698109447956,
228
+ "gpt.h.25.mlp.c_fc.bias": 0.001216022646985948,
229
+ "gpt.h.25.mlp.c_fc.weight": 0.0038542866241186857,
230
+ "gpt.h.25.mlp.c_proj.bias": 0.002242410322651267,
231
+ "gpt.h.25.mlp.c_proj.weight": 0.005041209515184164,
232
+ "gpt.h.26.attn.c_attn.bias": 0.00450435234233737,
233
+ "gpt.h.26.attn.c_attn.weight": 0.0034188455902040005,
234
+ "gpt.h.26.attn.c_proj.bias": 0.002108540153130889,
235
+ "gpt.h.26.attn.c_proj.weight": 0.005954994820058346,
236
+ "gpt.h.26.ln_1.bias": 0.004473517648875713,
237
+ "gpt.h.26.ln_1.weight": 0.009424147196114063,
238
+ "gpt.h.26.ln_2.bias": 0.006467711646109819,
239
+ "gpt.h.26.ln_2.weight": 0.009836550801992416,
240
+ "gpt.h.26.mlp.c_fc.bias": 0.001742089632898569,
241
+ "gpt.h.26.mlp.c_fc.weight": 0.0029066475108265877,
242
+ "gpt.h.26.mlp.c_proj.bias": 0.002795786364004016,
243
+ "gpt.h.26.mlp.c_proj.weight": 0.00789736956357956,
244
+ "gpt.h.27.attn.c_attn.bias": 0.0031362660229206085,
245
+ "gpt.h.27.attn.c_attn.weight": 0.003957696724683046,
246
+ "gpt.h.27.attn.c_proj.bias": 0.0029156384989619255,
247
+ "gpt.h.27.attn.c_proj.weight": 0.005406576208770275,
248
+ "gpt.h.27.ln_1.bias": 0.004558059852570295,
249
+ "gpt.h.27.ln_1.weight": 0.0094691701233387,
250
+ "gpt.h.27.ln_2.bias": 0.006574852392077446,
251
+ "gpt.h.27.ln_2.weight": 0.009961778298020363,
252
+ "gpt.h.27.mlp.c_fc.bias": 0.0016685383161529899,
253
+ "gpt.h.27.mlp.c_fc.weight": 0.0029884425457566977,
254
+ "gpt.h.27.mlp.c_proj.bias": 0.002400637837126851,
255
+ "gpt.h.27.mlp.c_proj.weight": 0.008324341848492622,
256
+ "gpt.h.28.attn.c_attn.bias": 0.0024894936941564083,
257
+ "gpt.h.28.attn.c_attn.weight": 0.003081751521676779,
258
+ "gpt.h.28.attn.c_proj.bias": 0.004242106340825558,
259
+ "gpt.h.28.attn.c_proj.weight": 0.009405862540006638,
260
+ "gpt.h.28.ln_1.bias": 0.004887533839792013,
261
+ "gpt.h.28.ln_1.weight": 0.009970242157578468,
262
+ "gpt.h.28.ln_2.bias": 0.005814329255372286,
263
+ "gpt.h.28.ln_2.weight": 0.010871784761548042,
264
+ "gpt.h.28.mlp.c_fc.bias": 0.0019840362947434187,
265
+ "gpt.h.28.mlp.c_fc.weight": 0.008523846976459026,
266
+ "gpt.h.28.mlp.c_proj.bias": 0.005565112456679344,
267
+ "gpt.h.28.mlp.c_proj.weight": 0.032014038413763046,
268
+ "gpt.h.29.attn.c_attn.bias": 0.00337958219461143,
269
+ "gpt.h.29.attn.c_attn.weight": 0.004022927954792976,
270
+ "gpt.h.29.attn.c_proj.bias": 0.0018416948150843382,
271
+ "gpt.h.29.attn.c_proj.weight": 0.018323034048080444,
272
+ "gpt.h.29.ln_1.bias": 0.004602092783898115,
273
+ "gpt.h.29.ln_1.weight": 0.009674133732914925,
274
+ "gpt.h.29.ln_2.bias": 0.003327032318338752,
275
+ "gpt.h.29.ln_2.weight": 0.01144911628216505,
276
+ "gpt.h.29.mlp.c_fc.bias": 0.002207099460065365,
277
+ "gpt.h.29.mlp.c_fc.weight": 0.014188914559781551,
278
+ "gpt.h.29.mlp.c_proj.bias": 0.00442220363765955,
279
+ "gpt.h.29.mlp.c_proj.weight": 0.0867144837975502,
280
+ "gpt.h.3.attn.c_attn.bias": 0.002024676650762558,
281
+ "gpt.h.3.attn.c_attn.weight": 0.004979935009032488,
282
+ "gpt.h.3.attn.c_proj.bias": 0.021005922928452492,
283
+ "gpt.h.3.attn.c_proj.weight": 0.028328293934464455,
284
+ "gpt.h.3.ln_1.bias": 0.008041512221097946,
285
+ "gpt.h.3.ln_1.weight": 0.006430978886783123,
286
+ "gpt.h.3.ln_2.bias": 0.006599007174372673,
287
+ "gpt.h.3.ln_2.weight": 0.013134118169546127,
288
+ "gpt.h.3.mlp.c_fc.bias": 0.002585547510534525,
289
+ "gpt.h.3.mlp.c_fc.weight": 0.009125065989792347,
290
+ "gpt.h.3.mlp.c_proj.bias": 0.01302343886345625,
291
+ "gpt.h.3.mlp.c_proj.weight": 0.05233204737305641,
292
+ "gpt.h.4.attn.c_attn.bias": 0.002189196879044175,
293
+ "gpt.h.4.attn.c_attn.weight": 0.003678702749311924,
294
+ "gpt.h.4.attn.c_proj.bias": 0.017389798536896706,
295
+ "gpt.h.4.attn.c_proj.weight": 0.01729857549071312,
296
+ "gpt.h.4.ln_1.bias": 0.007602763827890158,
297
+ "gpt.h.4.ln_1.weight": 0.0074179875664412975,
298
+ "gpt.h.4.ln_2.bias": 0.004727063234895468,
299
+ "gpt.h.4.ln_2.weight": 0.013179998844861984,
300
+ "gpt.h.4.mlp.c_fc.bias": 0.0009291492169722915,
301
+ "gpt.h.4.mlp.c_fc.weight": 0.005233710631728172,
302
+ "gpt.h.4.mlp.c_proj.bias": 0.007813659496605396,
303
+ "gpt.h.4.mlp.c_proj.weight": 0.056338295340538025,
304
+ "gpt.h.5.attn.c_attn.bias": 0.0016779249999672174,
305
+ "gpt.h.5.attn.c_attn.weight": 0.0030687786638736725,
306
+ "gpt.h.5.attn.c_proj.bias": 0.009757072664797306,
307
+ "gpt.h.5.attn.c_proj.weight": 0.007529787719249725,
308
+ "gpt.h.5.ln_1.bias": 0.006044740788638592,
309
+ "gpt.h.5.ln_1.weight": 0.009458239190280437,
310
+ "gpt.h.5.ln_2.bias": 0.0036309966817498207,
311
+ "gpt.h.5.ln_2.weight": 0.013853815384209156,
312
+ "gpt.h.5.mlp.c_fc.bias": 0.0011706397635862231,
313
+ "gpt.h.5.mlp.c_fc.weight": 0.004535002168267965,
314
+ "gpt.h.5.mlp.c_proj.bias": 0.00503140315413475,
315
+ "gpt.h.5.mlp.c_proj.weight": 0.04938670992851257,
316
+ "gpt.h.6.attn.c_attn.bias": 0.0022753833327442408,
317
+ "gpt.h.6.attn.c_attn.weight": 0.0051308972761034966,
318
+ "gpt.h.6.attn.c_proj.bias": 0.005487145856022835,
319
+ "gpt.h.6.attn.c_proj.weight": 0.005496611353009939,
320
+ "gpt.h.6.ln_1.bias": 0.0058417185209691525,
321
+ "gpt.h.6.ln_1.weight": 0.011210721917450428,
322
+ "gpt.h.6.ln_2.bias": 0.004622064530849457,
323
+ "gpt.h.6.ln_2.weight": 0.01402156613767147,
324
+ "gpt.h.6.mlp.c_fc.bias": 0.0014565958408638835,
325
+ "gpt.h.6.mlp.c_fc.weight": 0.006464360281825066,
326
+ "gpt.h.6.mlp.c_proj.bias": 0.004647988360375166,
327
+ "gpt.h.6.mlp.c_proj.weight": 0.048505980521440506,
328
+ "gpt.h.7.attn.c_attn.bias": 0.002493516309186816,
329
+ "gpt.h.7.attn.c_attn.weight": 0.005425372160971165,
330
+ "gpt.h.7.attn.c_proj.bias": 0.005013930611312389,
331
+ "gpt.h.7.attn.c_proj.weight": 0.0051925997249782085,
332
+ "gpt.h.7.ln_1.bias": 0.008432278409600258,
333
+ "gpt.h.7.ln_1.weight": 0.0083107715472579,
334
+ "gpt.h.7.ln_2.bias": 0.007790034171193838,
335
+ "gpt.h.7.ln_2.weight": 0.01420080941170454,
336
+ "gpt.h.7.mlp.c_fc.bias": 0.0018345315475016832,
337
+ "gpt.h.7.mlp.c_fc.weight": 0.0038694925606250763,
338
+ "gpt.h.7.mlp.c_proj.bias": 0.005904932040721178,
339
+ "gpt.h.7.mlp.c_proj.weight": 0.023479172959923744,
340
+ "gpt.h.8.attn.c_attn.bias": 0.00208841310814023,
341
+ "gpt.h.8.attn.c_attn.weight": 0.004913092590868473,
342
+ "gpt.h.8.attn.c_proj.bias": 0.00487521244212985,
343
+ "gpt.h.8.attn.c_proj.weight": 0.0052477335557341576,
344
+ "gpt.h.8.ln_1.bias": 0.006899502594023943,
345
+ "gpt.h.8.ln_1.weight": 0.009084747172892094,
346
+ "gpt.h.8.ln_2.bias": 0.007770676631480455,
347
+ "gpt.h.8.ln_2.weight": 0.013765494339168072,
348
+ "gpt.h.8.mlp.c_fc.bias": 0.0014608169440180063,
349
+ "gpt.h.8.mlp.c_fc.weight": 0.0037384077440947294,
350
+ "gpt.h.8.mlp.c_proj.bias": 0.005353797692805529,
351
+ "gpt.h.8.mlp.c_proj.weight": 0.02410462312400341,
352
+ "gpt.h.9.attn.c_attn.bias": 0.0024391484912484884,
353
+ "gpt.h.9.attn.c_attn.weight": 0.0065130204893648624,
354
+ "gpt.h.9.attn.c_proj.bias": 0.003902623662725091,
355
+ "gpt.h.9.attn.c_proj.weight": 0.0059540290385484695,
356
+ "gpt.h.9.ln_1.bias": 0.008465644903481007,
357
+ "gpt.h.9.ln_1.weight": 0.008637243881821632,
358
+ "gpt.h.9.ln_2.bias": 0.007591988891363144,
359
+ "gpt.h.9.ln_2.weight": 0.014198592863976955,
360
+ "gpt.h.9.mlp.c_fc.bias": 0.0017329183174297214,
361
+ "gpt.h.9.mlp.c_fc.weight": 0.004214098211377859,
362
+ "gpt.h.9.mlp.c_proj.bias": 0.005952149163931608,
363
+ "gpt.h.9.mlp.c_proj.weight": 0.024082494899630547,
364
+ "gpt.ln_f.bias": 0.011065124534070492,
365
+ "gpt.ln_f.weight": 0.022843239828944206,
366
+ "gpt.wpe.emb.weight": 0.004260858986526728,
367
+ "gpt.wte.weight": 0.0044210017658770084,
368
+ "mel_head.bias": 0.0015680863289162517,
369
+ "mel_head.weight": 0.004180184565484524
370
+ }
INT8/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[START]",
3
+ "eos_token": "[STOP]",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
INT8/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
INT8/tokenizer_config.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[STOP]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SPACE]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "259": {
28
+ "content": "[en]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "260": {
36
+ "content": "[de]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "261": {
44
+ "content": "[START]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "262": {
52
+ "content": "[fr]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "267": {
60
+ "content": "[ru]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "284": {
68
+ "content": "[es]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "285": {
76
+ "content": "[it]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "286": {
84
+ "content": "[pt]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "293": {
92
+ "content": "[cs]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "294": {
100
+ "content": "[pl]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "295": {
108
+ "content": "[tr]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "297": {
116
+ "content": "[nl]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "5022": {
124
+ "content": "[ar]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "5023": {
132
+ "content": "[zh-cn]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "5412": {
140
+ "content": "[ja]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "5753": {
148
+ "content": "[hu]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "6152": {
156
+ "content": "[ko]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "6680": {
164
+ "content": "[hi]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "6681": {
172
+ "content": "[PAD]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ }
179
+ },
180
+ "auto_map": {"AutoTokenizer": ["AstraMindAI/xtts2-gpt--tokenizer.XTTSTokenizerFast", null]},
181
+ "bos_token": "[START]",
182
+ "clean_up_tokenization_spaces": true,
183
+ "eos_token": "[STOP]",
184
+ "max_length": null,
185
+ "model_max_length": 1000000000000000019884624838656,
186
+ "pad_to_multiple_of": null,
187
+ "pad_token": "[PAD]",
188
+ "pad_token_type_id": 0,
189
+ "padding_side": "right",
190
+ "tokenizer_class": "XTTSTokenizerFast",
191
+ "unk_token": "[UNK]"
192
+ }