AlexTrinityBlock commited on
Commit
f78a629
·
verified ·
1 Parent(s): e658899

(Trained with Unsloth)

Browse files
Files changed (1) hide show
  1. config.json +20 -6
config.json CHANGED
@@ -22,9 +22,9 @@
22
  "final_logit_softcapping": null,
23
  "head_dim": 256,
24
  "hidden_activation": "gelu_pytorch_tanh",
25
- "hidden_size": 2560,
26
  "initializer_range": 0.02,
27
- "intermediate_size": 10240,
28
  "layer_types": [
29
  "sliding_attention",
30
  "sliding_attention",
@@ -59,13 +59,27 @@
59
  "sliding_attention",
60
  "sliding_attention",
61
  "sliding_attention",
62
- "sliding_attention"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ],
64
  "max_position_embeddings": 131072,
65
  "model_type": "gemma3_text",
66
- "num_attention_heads": 8,
67
- "num_hidden_layers": 34,
68
- "num_key_value_heads": 4,
69
  "query_pre_attn_scalar": 256,
70
  "rms_norm_eps": 1e-06,
71
  "rope_local_base_freq": 10000.0,
 
22
  "final_logit_softcapping": null,
23
  "head_dim": 256,
24
  "hidden_activation": "gelu_pytorch_tanh",
25
+ "hidden_size": 3840,
26
  "initializer_range": 0.02,
27
+ "intermediate_size": 15360,
28
  "layer_types": [
29
  "sliding_attention",
30
  "sliding_attention",
 
59
  "sliding_attention",
60
  "sliding_attention",
61
  "sliding_attention",
62
+ "sliding_attention",
63
+ "sliding_attention",
64
+ "full_attention",
65
+ "sliding_attention",
66
+ "sliding_attention",
67
+ "sliding_attention",
68
+ "sliding_attention",
69
+ "sliding_attention",
70
+ "full_attention",
71
+ "sliding_attention",
72
+ "sliding_attention",
73
+ "sliding_attention",
74
+ "sliding_attention",
75
+ "sliding_attention",
76
+ "full_attention"
77
  ],
78
  "max_position_embeddings": 131072,
79
  "model_type": "gemma3_text",
80
+ "num_attention_heads": 16,
81
+ "num_hidden_layers": 48,
82
+ "num_key_value_heads": 8,
83
  "query_pre_attn_scalar": 256,
84
  "rms_norm_eps": 1e-06,
85
  "rope_local_base_freq": 10000.0,