Sr-Carlos commited on
Commit
2c11923
·
verified ·
1 Parent(s): a9fef16

Trained with Unsloth - config

Browse files
Files changed (1) hide show
  1. config.json +209 -0
config.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3nForConditionalGeneration"
4
+ ],
5
+ "audio_config": {
6
+ "conf_attention_chunk_size": 12,
7
+ "conf_attention_context_left": 13,
8
+ "conf_attention_context_right": 0,
9
+ "conf_attention_logit_cap": 50.0,
10
+ "conf_conv_kernel_size": 5,
11
+ "conf_num_attention_heads": 8,
12
+ "conf_num_hidden_layers": 12,
13
+ "conf_reduction_factor": 4,
14
+ "conf_residual_weight": 0.5,
15
+ "torch_dtype": "bfloat16",
16
+ "gradient_clipping": 10000000000.0,
17
+ "hidden_size": 1536,
18
+ "input_feat_size": 128,
19
+ "model_type": "gemma3n_audio",
20
+ "rms_norm_eps": 1e-06,
21
+ "sscp_conv_channel_size": [
22
+ 128,
23
+ 32
24
+ ],
25
+ "sscp_conv_group_norm_eps": 0.001,
26
+ "sscp_conv_kernel_size": [
27
+ [
28
+ 3,
29
+ 3
30
+ ],
31
+ [
32
+ 3,
33
+ 3
34
+ ]
35
+ ],
36
+ "sscp_conv_stride_size": [
37
+ [
38
+ 2,
39
+ 2
40
+ ],
41
+ [
42
+ 2,
43
+ 2
44
+ ]
45
+ ],
46
+ "vocab_offset": 262272,
47
+ "vocab_size": 128
48
+ },
49
+ "audio_soft_tokens_per_image": 188,
50
+ "audio_token_id": 262273,
51
+ "boa_token_id": 256000,
52
+ "boi_token_id": 255999,
53
+ "bos_token_id": 2,
54
+ "torch_dtype": "bfloat16",
55
+ "eoa_token_id": 262272,
56
+ "eoi_token_id": 262144,
57
+ "eos_token_id": 106,
58
+ "image_token_id": 262145,
59
+ "initializer_range": 0.02,
60
+ "model_type": "gemma3n",
61
+ "pad_token_id": 0,
62
+ "text_config": {
63
+ "activation_sparsity_pattern": [
64
+ 0.95,
65
+ 0.95,
66
+ 0.95,
67
+ 0.95,
68
+ 0.95,
69
+ 0.95,
70
+ 0.95,
71
+ 0.95,
72
+ 0.95,
73
+ 0.95,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0,
77
+ 0.0,
78
+ 0.0,
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0,
86
+ 0.0,
87
+ 0.0,
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0
94
+ ],
95
+ "altup_active_idx": 0,
96
+ "altup_coef_clip": 120.0,
97
+ "altup_correct_scale": true,
98
+ "altup_num_inputs": 4,
99
+ "attention_bias": false,
100
+ "attention_dropout": 0.0,
101
+ "torch_dtype": "bfloat16",
102
+ "final_logit_softcapping": 30.0,
103
+ "head_dim": 256,
104
+ "hidden_activation": "gelu_pytorch_tanh",
105
+ "hidden_size": 2048,
106
+ "hidden_size_per_layer_input": 256,
107
+ "initializer_range": 0.02,
108
+ "intermediate_size": [
109
+ 8192,
110
+ 8192,
111
+ 8192,
112
+ 8192,
113
+ 8192,
114
+ 8192,
115
+ 8192,
116
+ 8192,
117
+ 8192,
118
+ 8192,
119
+ 8192,
120
+ 8192,
121
+ 8192,
122
+ 8192,
123
+ 8192,
124
+ 8192,
125
+ 8192,
126
+ 8192,
127
+ 8192,
128
+ 8192,
129
+ 8192,
130
+ 8192,
131
+ 8192,
132
+ 8192,
133
+ 8192,
134
+ 8192,
135
+ 8192,
136
+ 8192,
137
+ 8192,
138
+ 8192
139
+ ],
140
+ "laurel_rank": 64,
141
+ "layer_types": [
142
+ "sliding_attention",
143
+ "sliding_attention",
144
+ "sliding_attention",
145
+ "sliding_attention",
146
+ "full_attention",
147
+ "sliding_attention",
148
+ "sliding_attention",
149
+ "sliding_attention",
150
+ "sliding_attention",
151
+ "full_attention",
152
+ "sliding_attention",
153
+ "sliding_attention",
154
+ "sliding_attention",
155
+ "sliding_attention",
156
+ "full_attention",
157
+ "sliding_attention",
158
+ "sliding_attention",
159
+ "sliding_attention",
160
+ "sliding_attention",
161
+ "full_attention",
162
+ "sliding_attention",
163
+ "sliding_attention",
164
+ "sliding_attention",
165
+ "sliding_attention",
166
+ "full_attention",
167
+ "sliding_attention",
168
+ "sliding_attention",
169
+ "sliding_attention",
170
+ "sliding_attention",
171
+ "full_attention"
172
+ ],
173
+ "max_position_embeddings": 32768,
174
+ "model_type": "gemma3n_text",
175
+ "num_attention_heads": 8,
176
+ "num_hidden_layers": 30,
177
+ "num_key_value_heads": 2,
178
+ "num_kv_shared_layers": 10,
179
+ "rms_norm_eps": 1e-06,
180
+ "rope_local_base_freq": 10000.0,
181
+ "rope_scaling": null,
182
+ "rope_theta": 1000000.0,
183
+ "sliding_window": 512,
184
+ "use_cache": true,
185
+ "vocab_size": 262400,
186
+ "vocab_size_per_layer_input": 262144
187
+ },
188
+ "transformers_version": "4.56.2",
189
+ "unsloth_fixed": true,
190
+ "unsloth_version": "2025.12.1",
191
+ "vision_config": {
192
+ "architecture": "mobilenetv5_300m_enc",
193
+ "do_pooling": false,
194
+ "torch_dtype": "bfloat16",
195
+ "hidden_size": 2048,
196
+ "initializer_range": 0.02,
197
+ "label_names": [
198
+ "LABEL_0",
199
+ "LABEL_1"
200
+ ],
201
+ "model_args": null,
202
+ "model_type": "gemma3n_vision",
203
+ "num_classes": 2,
204
+ "rms_norm_eps": 1e-06,
205
+ "vocab_offset": 262144,
206
+ "vocab_size": 128
207
+ },
208
+ "vision_soft_tokens_per_image": 256
209
+ }