reaperdoesntknow commited on
Commit
7139555
·
verified ·
1 Parent(s): 07d71b9

Upload Gemma3ForCausalLM

Browse files
Files changed (2) hide show
  1. config.json +14 -6
  2. model.safetensors +1 -1
config.json CHANGED
@@ -7,7 +7,7 @@
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
- "cache_implementation": "hybrid",
11
  "dtype": "float32",
12
  "eos_token_id": [
13
  1,
@@ -47,21 +47,29 @@
47
  "sliding_attention",
48
  "sliding_attention"
49
  ],
50
- "max_position_embeddings": 32768,
51
  "model_type": "gemma3_text",
52
  "num_attention_heads": 4,
53
  "num_hidden_layers": 26,
54
  "num_key_value_heads": 1,
55
  "pad_token_id": 0,
56
  "query_pre_attn_scalar": 256,
57
- "rms_norm_eps": 1e-06,
58
  "rope_local_base_freq": 10000,
59
- "rope_scaling": null,
60
- "rope_theta": 1000000,
 
 
 
 
 
 
 
 
61
  "sliding_window": 512,
62
  "sliding_window_pattern": 6,
63
  "transformers_version": "4.57.1",
64
  "use_bidirectional_attention": false,
65
- "use_cache": true,
66
  "vocab_size": 262149
67
  }
 
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
+ "cache_implementation": "sliding_window",
11
  "dtype": "float32",
12
  "eos_token_id": [
13
  1,
 
47
  "sliding_attention",
48
  "sliding_attention"
49
  ],
50
+ "max_position_embeddings": 262144,
51
  "model_type": "gemma3_text",
52
  "num_attention_heads": 4,
53
  "num_hidden_layers": 26,
54
  "num_key_value_heads": 1,
55
  "pad_token_id": 0,
56
  "query_pre_attn_scalar": 256,
57
+ "rms_norm_eps": 1e-05,
58
  "rope_local_base_freq": 10000,
59
+ "rope_scaling": {
60
+ "beta_fast": 1.0,
61
+ "beta_slow": 1.0,
62
+ "factor": 8.0,
63
+ "mscale": 1.0,
64
+ "mscale_all_dim": 1.0,
65
+ "original_max_position_embeddings": 32768,
66
+ "rope_type": "yarn"
67
+ },
68
+ "rope_theta": 50000.0,
69
  "sliding_window": 512,
70
  "sliding_window_pattern": 6,
71
  "transformers_version": "4.57.1",
72
  "use_bidirectional_attention": false,
73
+ "use_cache": false,
74
  "vocab_size": 262149
75
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac75794a439d8d467c9c698046279ac8fcdf05affe544f7e98a3a802d9b140b
3
  size 3999606000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54520fb0553b6c841d9cabe4859be156b7d9dbbadde4fc0baaa2fbffb7d17f53
3
  size 3999606000