waddie commited on
Commit
799d827
·
verified ·
1 Parent(s): b2c30f2

Upload Qwen2ForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +34 -56
  2. generation_config.json +9 -6
  3. model.safetensors +2 -2
config.json CHANGED
@@ -1,83 +1,61 @@
1
  {
2
  "architectures": [
3
- "Qwen3_5ForCausalLM"
4
  ],
5
- "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "attn_output_gate": true,
8
  "bos_token_id": null,
9
  "dtype": "bfloat16",
10
- "eos_token_id": 248046,
11
- "full_attention_interval": 4,
12
- "head_dim": 256,
13
  "hidden_act": "silu",
14
- "hidden_size": 4096,
15
  "initializer_range": 0.02,
16
- "intermediate_size": 12288,
17
  "layer_types": [
18
- "linear_attention",
19
- "linear_attention",
20
- "linear_attention",
21
  "full_attention",
22
- "linear_attention",
23
- "linear_attention",
24
- "linear_attention",
25
  "full_attention",
26
- "linear_attention",
27
- "linear_attention",
28
- "linear_attention",
29
  "full_attention",
30
- "linear_attention",
31
- "linear_attention",
32
- "linear_attention",
33
  "full_attention",
34
- "linear_attention",
35
- "linear_attention",
36
- "linear_attention",
37
  "full_attention",
38
- "linear_attention",
39
- "linear_attention",
40
- "linear_attention",
41
  "full_attention",
42
- "linear_attention",
43
- "linear_attention",
44
- "linear_attention",
45
  "full_attention",
46
- "linear_attention",
47
- "linear_attention",
48
- "linear_attention",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  "full_attention"
50
  ],
51
- "linear_conv_kernel_dim": 4,
52
- "linear_key_head_dim": 128,
53
- "linear_num_key_heads": 16,
54
- "linear_num_value_heads": 32,
55
- "linear_value_head_dim": 128,
56
- "mamba_ssm_dtype": "float32",
57
- "max_position_embeddings": 262144,
58
- "mlp_only_layers": [],
59
- "model_type": "qwen3_5_text",
60
- "mtp_num_hidden_layers": 1,
61
- "mtp_use_dedicated_embeddings": false,
62
- "num_attention_heads": 16,
63
- "num_hidden_layers": 32,
64
  "num_key_value_heads": 4,
65
- "pad_token_id": 248055,
66
- "partial_rotary_factor": 0.25,
67
  "rms_norm_eps": 1e-06,
68
  "rope_parameters": {
69
- "mrope_interleaved": true,
70
- "mrope_section": [
71
- 11,
72
- 11,
73
- 10
74
- ],
75
- "partial_rotary_factor": 0.25,
76
- "rope_theta": 10000000,
77
  "rope_type": "default"
78
  },
 
79
  "tie_word_embeddings": false,
80
  "transformers_version": "5.6.2",
81
  "use_cache": false,
82
- "vocab_size": 248320
 
83
  }
 
1
  {
2
  "architectures": [
3
+ "Qwen2ForCausalLM"
4
  ],
 
5
  "attention_dropout": 0.0,
 
6
  "bos_token_id": null,
7
  "dtype": "bfloat16",
8
+ "eos_token_id": 151645,
 
 
9
  "hidden_act": "silu",
10
+ "hidden_size": 3584,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 18944,
13
  "layer_types": [
 
 
 
14
  "full_attention",
 
 
 
15
  "full_attention",
 
 
 
16
  "full_attention",
 
 
 
17
  "full_attention",
 
 
 
18
  "full_attention",
 
 
 
19
  "full_attention",
 
 
 
20
  "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
  "full_attention"
42
  ],
43
+ "max_position_embeddings": 32768,
44
+ "max_window_layers": 28,
45
+ "model_type": "qwen2",
46
+ "num_attention_heads": 28,
47
+ "num_hidden_layers": 28,
 
 
 
 
 
 
 
 
48
  "num_key_value_heads": 4,
49
+ "pad_token_id": 151654,
 
50
  "rms_norm_eps": 1e-06,
51
  "rope_parameters": {
52
+ "rope_theta": 1000000.0,
 
 
 
 
 
 
 
53
  "rope_type": "default"
54
  },
55
+ "sliding_window": null,
56
  "tie_word_embeddings": false,
57
  "transformers_version": "5.6.2",
58
  "use_cache": false,
59
+ "use_sliding_window": false,
60
+ "vocab_size": 152064
61
  }
generation_config.json CHANGED
@@ -1,10 +1,13 @@
1
  {
2
- "_from_model_config": true,
3
  "eos_token_id": [
4
- 248046,
5
- 248044
6
  ],
7
- "pad_token_id": 248055,
8
- "transformers_version": "5.6.2",
9
- "use_cache": true
 
 
 
10
  }
 
1
  {
2
+ "do_sample": true,
3
  "eos_token_id": [
4
+ 151645,
5
+ 151643
6
  ],
7
+ "pad_token_id": 151654,
8
+ "repetition_penalty": 1.05,
9
+ "temperature": 0.7,
10
+ "top_k": 20,
11
+ "top_p": 0.8,
12
+ "transformers_version": "5.6.2"
13
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c53a0ec77d6e0508dabf06b05837a5afb1b936f270755917185a278028ea26d9
3
- size 17907663008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27d6e4ffb664d838263ee1593a9b4d7786ab22fa10a8759a28659f12cc1ed2ba
3
+ size 15231272152