Real Null commited on
Commit
8bf78e1
·
verified ·
1 Parent(s): 634218f

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +5 -229
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "architectures": [
3
- "AurenForConditionalGeneration"
4
  ],
5
  "audio_config": {
6
  "conf_attention_chunk_size": 12,
@@ -16,7 +16,7 @@
16
  "gradient_clipping": 10000000000.0,
17
  "hidden_size": 1536,
18
  "input_feat_size": 128,
19
- "model_type": "auren_audio",
20
  "rms_norm_eps": 1e-06,
21
  "sscp_conv_channel_size": [
22
  128,
@@ -57,7 +57,7 @@
57
  "eos_token_id": 106,
58
  "image_token_id": 262145,
59
  "initializer_range": 0.02,
60
- "model_type": "auren",
61
  "pad_token_id": 0,
62
  "text_config": {
63
  "activation_sparsity_pattern": [
@@ -186,7 +186,7 @@
186
  "full_attention"
187
  ],
188
  "max_position_embeddings": 32768,
189
- "model_type": "auren_text",
190
  "num_attention_heads": 8,
191
  "num_hidden_layers": 35,
192
  "num_key_value_heads": 2,
@@ -214,231 +214,7 @@
214
  "LABEL_1"
215
  ],
216
  "model_args": null,
217
- "model_type": "auren_vision",
218
- "num_classes": 2,
219
- "rms_norm_eps": 1e-06,
220
- "vocab_offset": 262144,
221
- "vocab_size": 128
222
- },
223
- "vision_soft_tokens_per_image": 256
224
- }
225
- {
226
- "architectures": [
227
- "AurenForConditionalGeneration"
228
- ],
229
- "audio_config": {
230
- "conf_attention_chunk_size": 12,
231
- "conf_attention_context_left": 13,
232
- "conf_attention_context_right": 0,
233
- "conf_attention_logit_cap": 50.0,
234
- "conf_conv_kernel_size": 5,
235
- "conf_num_attention_heads": 8,
236
- "conf_num_hidden_layers": 12,
237
- "conf_reduction_factor": 4,
238
- "conf_residual_weight": 0.5,
239
- "dtype": "float16",
240
- "gradient_clipping": 10000000000.0,
241
- "hidden_size": 1536,
242
- "input_feat_size": 128,
243
- "model_type": "auren_audio",
244
- "rms_norm_eps": 1e-06,
245
- "sscp_conv_channel_size": [
246
- 128,
247
- 32
248
- ],
249
- "sscp_conv_group_norm_eps": 0.001,
250
- "sscp_conv_kernel_size": [
251
- [
252
- 3,
253
- 3
254
- ],
255
- [
256
- 3,
257
- 3
258
- ]
259
- ],
260
- "sscp_conv_stride_size": [
261
- [
262
- 2,
263
- 2
264
- ],
265
- [
266
- 2,
267
- 2
268
- ]
269
- ],
270
- "vocab_offset": 262272,
271
- "vocab_size": 128
272
- },
273
- "audio_soft_tokens_per_image": 188,
274
- "audio_token_id": 262273,
275
- "boa_token_id": 256000,
276
- "boi_token_id": 255999,
277
- "bos_token_id": 2,
278
- "dtype": "float16",
279
- "eoa_token_id": 262272,
280
- "eoi_token_id": 262144,
281
- "eos_token_id": 106,
282
- "image_token_id": 262145,
283
- "initializer_range": 0.02,
284
- "model_type": "auren",
285
- "pad_token_id": 0,
286
- "text_config": {
287
- "activation_sparsity_pattern": [
288
- 0.95,
289
- 0.95,
290
- 0.95,
291
- 0.95,
292
- 0.95,
293
- 0.95,
294
- 0.95,
295
- 0.95,
296
- 0.95,
297
- 0.95,
298
- 0.0,
299
- 0.0,
300
- 0.0,
301
- 0.0,
302
- 0.0,
303
- 0.0,
304
- 0.0,
305
- 0.0,
306
- 0.0,
307
- 0.0,
308
- 0.0,
309
- 0.0,
310
- 0.0,
311
- 0.0,
312
- 0.0,
313
- 0.0,
314
- 0.0,
315
- 0.0,
316
- 0.0,
317
- 0.0,
318
- 0.0,
319
- 0.0,
320
- 0.0,
321
- 0.0,
322
- 0.0
323
- ],
324
- "altup_active_idx": 0,
325
- "altup_coef_clip": 120.0,
326
- "altup_correct_scale": true,
327
- "altup_num_inputs": 4,
328
- "attention_bias": false,
329
- "attention_dropout": 0.0,
330
- "dtype": "float16",
331
- "final_logit_softcapping": 30.0,
332
- "head_dim": 256,
333
- "hidden_activation": "gelu_pytorch_tanh",
334
- "hidden_size": 2048,
335
- "hidden_size_per_layer_input": 256,
336
- "initializer_range": 0.02,
337
- "intermediate_size": [
338
- 16384,
339
- 16384,
340
- 16384,
341
- 16384,
342
- 16384,
343
- 16384,
344
- 16384,
345
- 16384,
346
- 16384,
347
- 16384,
348
- 16384,
349
- 16384,
350
- 16384,
351
- 16384,
352
- 16384,
353
- 16384,
354
- 16384,
355
- 16384,
356
- 16384,
357
- 16384,
358
- 16384,
359
- 16384,
360
- 16384,
361
- 16384,
362
- 16384,
363
- 16384,
364
- 16384,
365
- 16384,
366
- 16384,
367
- 16384,
368
- 16384,
369
- 16384,
370
- 16384,
371
- 16384,
372
- 16384
373
- ],
374
- "laurel_rank": 64,
375
- "layer_types": [
376
- "sliding_attention",
377
- "sliding_attention",
378
- "sliding_attention",
379
- "sliding_attention",
380
- "full_attention",
381
- "sliding_attention",
382
- "sliding_attention",
383
- "sliding_attention",
384
- "sliding_attention",
385
- "full_attention",
386
- "sliding_attention",
387
- "sliding_attention",
388
- "sliding_attention",
389
- "sliding_attention",
390
- "full_attention",
391
- "sliding_attention",
392
- "sliding_attention",
393
- "sliding_attention",
394
- "sliding_attention",
395
- "full_attention",
396
- "sliding_attention",
397
- "sliding_attention",
398
- "sliding_attention",
399
- "sliding_attention",
400
- "full_attention",
401
- "sliding_attention",
402
- "sliding_attention",
403
- "sliding_attention",
404
- "sliding_attention",
405
- "full_attention",
406
- "sliding_attention",
407
- "sliding_attention",
408
- "sliding_attention",
409
- "sliding_attention",
410
- "full_attention"
411
- ],
412
- "max_position_embeddings": 32768,
413
- "model_type": "auren_text",
414
- "num_attention_heads": 8,
415
- "num_hidden_layers": 35,
416
- "num_key_value_heads": 2,
417
- "num_kv_shared_layers": 15,
418
- "rms_norm_eps": 1e-06,
419
- "rope_local_base_freq": 10000.0,
420
- "rope_scaling": null,
421
- "rope_theta": 1000000.0,
422
- "sliding_window": 512,
423
- "use_cache": true,
424
- "vocab_size": 262400,
425
- "vocab_size_per_layer_input": 262144
426
- },
427
- "transformers_version": "4.56.2",
428
- "unsloth_fixed": true,
429
- "unsloth_version": "2025.9.10",
430
- "vision_config": {
431
- "architecture": "mobilenetv5_300m_enc",
432
- "do_pooling": false,
433
- "dtype": "float16",
434
- "hidden_size": 2048,
435
- "initializer_range": 0.02,
436
- "label_names": [
437
- "LABEL_0",
438
- "LABEL_1"
439
- ],
440
- "model_args": null,
441
- "model_type": "auren_vision",
442
  "num_classes": 2,
443
  "rms_norm_eps": 1e-06,
444
  "vocab_offset": 262144,
 
1
  {
2
  "architectures": [
3
+ "Gemma3nForConditionalGeneration"
4
  ],
5
  "audio_config": {
6
  "conf_attention_chunk_size": 12,
 
16
  "gradient_clipping": 10000000000.0,
17
  "hidden_size": 1536,
18
  "input_feat_size": 128,
19
+ "model_type": "gemma3n_audio",
20
  "rms_norm_eps": 1e-06,
21
  "sscp_conv_channel_size": [
22
  128,
 
57
  "eos_token_id": 106,
58
  "image_token_id": 262145,
59
  "initializer_range": 0.02,
60
+ "model_type": "gemma3n",
61
  "pad_token_id": 0,
62
  "text_config": {
63
  "activation_sparsity_pattern": [
 
186
  "full_attention"
187
  ],
188
  "max_position_embeddings": 32768,
189
+ "model_type": "gemma3n_text",
190
  "num_attention_heads": 8,
191
  "num_hidden_layers": 35,
192
  "num_key_value_heads": 2,
 
214
  "LABEL_1"
215
  ],
216
  "model_args": null,
217
+ "model_type": "gemma3n_vision",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  "num_classes": 2,
219
  "rms_norm_eps": 1e-06,
220
  "vocab_offset": 262144,