yunjae-won commited on
Commit
e978eac
·
verified ·
1 Parent(s): 59bdd45

Upload Qwen3ForCausalLM

Browse files
config.json CHANGED
@@ -9,9 +9,9 @@
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
- "hidden_size": 2048,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 6144,
15
  "layer_types": [
16
  "full_attention",
17
  "full_attention",
@@ -40,13 +40,21 @@
40
  "full_attention",
41
  "full_attention",
42
  "full_attention",
 
 
 
 
 
 
 
 
43
  "full_attention"
44
  ],
45
  "max_position_embeddings": 40960,
46
- "max_window_layers": 28,
47
  "model_type": "qwen3",
48
- "num_attention_heads": 16,
49
- "num_hidden_layers": 28,
50
  "num_key_value_heads": 8,
51
  "rms_norm_eps": 1e-06,
52
  "rope_scaling": null,
 
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
  "layer_types": [
16
  "full_attention",
17
  "full_attention",
 
40
  "full_attention",
41
  "full_attention",
42
  "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
  "full_attention"
52
  ],
53
  "max_position_embeddings": 40960,
54
+ "max_window_layers": 36,
55
  "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
  "num_key_value_heads": 8,
59
  "rms_norm_eps": 1e-06,
60
  "rope_scaling": null,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b216cf6d7bdf0812f08d33d05695144f576515bbd0c5898b0a8602d137c2c8
3
  size 4967215360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aae6c11ec85a8a15d96471cea7eecdcd0cc97d47f18d97f570eb401380d9657
3
  size 4967215360
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afedda663d8fac0aa4aad096259b9ce2b06543eedca525fdaee0213148313a13
3
  size 3077766632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b11ba55ddd6af9a647bf89e9dbf27c81a421548f21ebc5e9f599c7a1e0207f8
3
  size 3077766632