File size: 1,745 Bytes
c62cc12
bf92dae
c62cc12
bf92dae
 
 
 
 
 
c62cc12
bf92dae
c62cc12
bf92dae
c62cc12
bf92dae
 
 
c62cc12
bf92dae
 
 
c62cc12
bf92dae
 
 
c62cc12
bf92dae
 
 
c62cc12
bf92dae
 
 
c62cc12
bf92dae
 
 
c62cc12
 
bf92dae
 
 
 
 
 
 
 
 
 
 
 
 
c62cc12
bf92dae
 
c62cc12
 
bf92dae
 
 
 
 
 
 
 
c62cc12
 
bf92dae
c62cc12
 
bf92dae
c62cc12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
{
  "attention_bias": false,
  "attention_dropout": 0.0,
  "attn_output_gate": true,
  "bos_token_id": null,
  "dtype": "bfloat16",
  "eos_token_id": 248046,
  "full_attention_interval": 4,
  "head_dim": 256,
  "hidden_act": "silu",
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 3584,
  "layer_types": [
    "linear_attention",
    "linear_attention",
    "linear_attention",
    "full_attention",
    "linear_attention",
    "linear_attention",
    "linear_attention",
    "full_attention",
    "linear_attention",
    "linear_attention",
    "linear_attention",
    "full_attention",
    "linear_attention",
    "linear_attention",
    "linear_attention",
    "full_attention",
    "linear_attention",
    "linear_attention",
    "linear_attention",
    "full_attention",
    "linear_attention",
    "linear_attention",
    "linear_attention",
    "full_attention"
  ],
  "linear_conv_kernel_dim": 4,
  "linear_key_head_dim": 128,
  "linear_num_key_heads": 16,
  "linear_num_value_heads": 16,
  "linear_value_head_dim": 128,
  "mamba_ssm_dtype": "float32",
  "max_position_embeddings": 262144,
  "mlp_only_layers": [],
  "model_type": "qwen3_5_text",
  "mtp_num_hidden_layers": 1,
  "mtp_use_dedicated_embeddings": false,
  "num_attention_heads": 8,
  "num_hidden_layers": 24,
  "num_key_value_heads": 2,
  "pad_token_id": 248044,
  "partial_rotary_factor": 0.25,
  "rms_norm_eps": 1e-06,
  "rope_parameters": {
    "mrope_interleaved": true,
    "mrope_section": [
      11,
      11,
      10
    ],
    "partial_rotary_factor": 0.25,
    "rope_theta": 10000000,
    "rope_type": "default"
  },
  "tie_word_embeddings": true,
  "transformers_version": "5.8.0.dev0",
  "use_cache": true,
  "vocab_size": 248320
}