File size: 2,014 Bytes
b5cacc7
 
 
 
 
 
 
 
 
 
 
 
0ee5d75
b5cacc7
334dfeb
b5cacc7
0ee5d75
b5cacc7
 
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
0ee5d75
b5cacc7
 
0ee5d75
334dfeb
466c5fb
b5cacc7
0ee5d75
c31ae10
0ee5d75
b5cacc7
 
 
 
 
 
 
0ee5d75
334dfeb
0ee5d75
 
 
 
b5cacc7
0ee5d75
 
b5cacc7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "architectures": [
    "Qwen3CanonForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoModelForCausalLM": "patch.Qwen3CanonForCausalLM"
  },
  "bos_token_id": 50030,
  "dtype": "bfloat16",
  "eos_token_id": 50031,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 1152,
  "initializer_range": 0.02,
  "intermediate_size": 3168,
  "layer_types": [
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention",
    "full_attention",
    "sliding_attention"
  ],
  "max_position_embeddings": 8192,
  "max_window_layers": 40,
  "mlp_type": "squared_relu",
  "model_name": "checkpoint-7560",
  "model_type": "qwen3",
  "n_layer": 40,
  "num_attention_heads": 16,
  "num_hidden_layers": 40,
  "num_key_value_heads": 4,
  "pad_token_id": 50034,
  "rms_norm_eps": 1e-06,
  "rope_parameters": {
    "rope_theta": 50000,
    "rope_type": "default"
  },
  "sliding_window": 4096,
  "squared_relu_activation": "relu2",
  "squared_relu_intermediate_size": 3168,
  "tie_word_embeddings": true,
  "transformers_version": "5.5.0",
  "unsloth_version": "2026.5.2",
  "use_cache": false,
  "use_sliding_window": true,
  "vocab_size": 50304
}