DeepBeepMeep commited on
Commit
64daf83
·
verified ·
1 Parent(s): 854779b

Upload config_light.json

Browse files
gemma-3-12b-it-qat-q4_0-unquantized/config_light.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForCausalLM"
4
+ ],
5
+ "eos_token_id": [
6
+ 1,
7
+ 106
8
+ ],
9
+ "attention_bias": false,
10
+ "attention_dropout": 0.0,
11
+ "attn_logit_softcapping": null,
12
+ "cache_implementation": "hybrid",
13
+ "final_logit_softcapping": null,
14
+ "head_dim": 256,
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "hidden_size": 3840,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 15360,
19
+ "max_position_embeddings": 131072,
20
+ "model_type": "gemma3_text",
21
+ "num_attention_heads": 16,
22
+ "num_hidden_layers": 48,
23
+ "num_key_value_heads": 8,
24
+ "query_pre_attn_scalar": 256,
25
+ "rms_norm_eps": 1e-06,
26
+ "rope_local_base_freq": 10000,
27
+ "rope_scaling": {
28
+ "factor": 8.0,
29
+ "rope_type": "linear"
30
+ },
31
+ "rope_theta": 1000000,
32
+ "sliding_window": 1024,
33
+ "sliding_window_pattern": 6,
34
+ "torch_dtype": "bfloat16",
35
+ "transformers_version": "4.52.0.dev0",
36
+ "use_cache": true,
37
+ "vocab_size": 262208
38
+ }