Tongjilibo commited on
Commit
8620d5a
·
1 Parent(s): 25ff35e

增加qwen3vl

Browse files
OpenGVLab/InternVL2_5-1B/bert4torch_config.json CHANGED
@@ -43,7 +43,6 @@
43
  "vocab_size": 151674,
44
  "segment_vocab_size": 0,
45
  "rope_rank": "updown",
46
- "max_position": 32768,
47
  "generation_config": {
48
  "tokenizer_config": {
49
  "allowed_special": [
 
43
  "vocab_size": 151674,
44
  "segment_vocab_size": 0,
45
  "rope_rank": "updown",
 
46
  "generation_config": {
47
  "tokenizer_config": {
48
  "allowed_special": [
Qwen/Qwen-1_8B-Chat/bert4torch_config.json CHANGED
@@ -28,7 +28,7 @@
28
  "segment_vocab_size": 0,
29
  "skip_init": true,
30
  "rope_rank": "updown",
31
- "max_position": 8192,
32
  "generation_config": {
33
  "tokenizer_config": {
34
  "allowed_special": [
 
28
  "segment_vocab_size": 0,
29
  "skip_init": true,
30
  "rope_rank": "updown",
31
+ "max_position_embeddings": 8192,
32
  "generation_config": {
33
  "tokenizer_config": {
34
  "allowed_special": [
Qwen/Qwen-1_8B/bert4torch_config.json CHANGED
@@ -29,7 +29,7 @@
29
  "segment_vocab_size": 0,
30
  "skip_init": true,
31
  "rope_rank": "updown",
32
- "max_position": 8192,
33
  "generation_config": {
34
  "tokenizer_config": {
35
  "allowed_special": [
 
29
  "segment_vocab_size": 0,
30
  "skip_init": true,
31
  "rope_rank": "updown",
32
+ "max_position_embeddings": 8192,
33
  "generation_config": {
34
  "tokenizer_config": {
35
  "allowed_special": [
Qwen/Qwen-7B-Chat/bert4torch_config.json CHANGED
@@ -28,7 +28,7 @@
28
  "segment_vocab_size": 0,
29
  "skip_init": true,
30
  "rope_rank": "updown",
31
- "max_position": 8192,
32
  "generation_config": {
33
  "tokenizer_config": {
34
  "allowed_special": [
 
28
  "segment_vocab_size": 0,
29
  "skip_init": true,
30
  "rope_rank": "updown",
31
+ "max_position_embeddings": 8192,
32
  "generation_config": {
33
  "tokenizer_config": {
34
  "allowed_special": [
Qwen/Qwen-7B/bert4torch_config.json CHANGED
@@ -29,7 +29,7 @@
29
  "segment_vocab_size": 0,
30
  "skip_init": true,
31
  "rope_rank": "updown",
32
- "max_position": 8192,
33
  "generation_config": {
34
  "tokenizer_config": {
35
  "allowed_special": [
 
29
  "segment_vocab_size": 0,
30
  "skip_init": true,
31
  "rope_rank": "updown",
32
+ "max_position_embeddings": 8192,
33
  "generation_config": {
34
  "tokenizer_config": {
35
  "allowed_special": [
Qwen/Qwen2.5-VL-32B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen2_5_vl",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
+ "template": "qwen2_vl",
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 151643,
13
+ "eos_token_id": 151645,
14
+ "pad_token_id": 151643,
15
+ "image_token_id": 151655,
16
+ "video_token_id": 151656,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 5120,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 27648,
21
+ "max_position_embeddings": 128000,
22
+ "max_window_layers": 64,
23
+ "num_attention_heads": 40,
24
+ "num_hidden_layers": 64,
25
+ "num_key_value_heads": 8,
26
+ "layer_norm_eps": 1e-06,
27
+ "rope_theta": 1000000.0,
28
+ "sliding_window": 32768,
29
+ "tie_word_embeddings": false,
30
+ "torch_dtype": "bfloat16",
31
+ "_attn_implementation": "sdpa",
32
+ "use_sliding_window": false,
33
+ "skip_init": true,
34
+ "segment_vocab_size": 0,
35
+ "rope_rank": "updown",
36
+ "convert_logits_dtype": "float32",
37
+ "generation_config": {
38
+ "tokenizer_config": {
39
+ "skip_special_tokens": true
40
+ },
41
+ "eos_token_id": [
42
+ 151643,
43
+ 151645
44
+ ],
45
+ "max_length": 32768,
46
+ "repetition_penalty": 1.05,
47
+ "temperature": 0.000001
48
+ },
49
+ "vision_start_token_id": 151652,
50
+ "vision_end_token_id": 151653,
51
+ "vision_token_id": 151654,
52
+ "vision_config": {
53
+ "hidden_size": 1280,
54
+ "in_chans": 3,
55
+ "intermediate_size": 3456,
56
+ "model_type": "qwen2_5_vl",
57
+ "out_hidden_size": 5120,
58
+ "spatial_patch_size": 14,
59
+ "tokens_per_second": 2,
60
+ "torch_dtype": "bfloat16",
61
+ "_attn_implementation_internal": null
62
+ },
63
+ "rope_scaling": {
64
+ "mrope_section": [
65
+ 16,
66
+ 24,
67
+ 24
68
+ ],
69
+ "rope_type": "default",
70
+ "type": "default"
71
+ },
72
+ "vocab_size": 152064
73
+ }
Qwen/Qwen2.5-VL-3B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen2_5_vl",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
+ "template": "qwen2_vl",
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 151643,
13
+ "eos_token_id": 151645,
14
+ "image_token_id": 151655,
15
+ "video_token_id": 151656,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 2048,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 11008,
20
+ "max_position_embeddings": 128000,
21
+ "max_window_layers": 70,
22
+ "num_attention_heads": 16,
23
+ "num_hidden_layers": 36,
24
+ "num_key_value_heads": 2,
25
+ "layer_norm_eps": 1e-06,
26
+ "rope_theta": 1000000.0,
27
+ "sliding_window": 32768,
28
+ "tie_word_embeddings": true,
29
+ "torch_dtype": "bfloat16",
30
+ "_attn_implementation": "sdpa",
31
+ "use_sliding_window": false,
32
+ "skip_init": true,
33
+ "segment_vocab_size": 0,
34
+ "rope_rank": "updown",
35
+ "convert_logits_dtype": "float32",
36
+ "generation_config": {
37
+ "tokenizer_config": {
38
+ "skip_special_tokens": true
39
+ },
40
+ "eos_token_id": [
41
+ 151643,
42
+ 151645
43
+ ],
44
+ "max_length": 32768,
45
+ "repetition_penalty": 1.05,
46
+ "temperature": 0.000001
47
+ },
48
+ "vision_start_token_id": 151652,
49
+ "vision_end_token_id": 151653,
50
+ "vision_token_id": 151654,
51
+ "vision_config": {
52
+ "depth": 32,
53
+ "hidden_act": "silu",
54
+ "hidden_size": 1280,
55
+ "intermediate_size": 3420,
56
+ "num_heads": 16,
57
+ "in_chans": 3,
58
+ "out_hidden_size": 2048,
59
+ "patch_size": 14,
60
+ "spatial_merge_size": 2,
61
+ "spatial_patch_size": 14,
62
+ "window_size": 112,
63
+ "fullatt_block_indexes": [
64
+ 7,
65
+ 15,
66
+ 23,
67
+ 31
68
+ ],
69
+ "tokens_per_second": 2,
70
+ "_attn_implementation_internal": null
71
+ },
72
+ "rope_scaling": {
73
+ "type": "mrope",
74
+ "mrope_section": [
75
+ 16,
76
+ 24,
77
+ 24
78
+ ]
79
+ },
80
+ "vocab_size": 151936
81
+ }
Qwen/Qwen2.5-VL-7B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen2_5_vl",
3
+ "pos_emb_type": "rotary",
4
+ "use_bias": false,
5
+ "attention_bias": true,
6
+ "layer_norm_mode": "rmsnorm",
7
+ "final_layernorm": true,
8
+ "pre_layernorm": true,
9
+ "mlp_type": "LlamaFeedForward",
10
+ "template": "qwen2_vl",
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 151643,
13
+ "eos_token_id": 151645,
14
+ "image_token_id": 151655,
15
+ "video_token_id": 151656,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 3584,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 18944,
20
+ "max_position_embeddings": 128000,
21
+ "max_window_layers": 28,
22
+ "num_attention_heads": 28,
23
+ "num_hidden_layers": 28,
24
+ "num_key_value_heads": 4,
25
+ "layer_norm_eps": 1e-06,
26
+ "rope_theta": 1000000.0,
27
+ "sliding_window": 32768,
28
+ "tie_word_embeddings": false,
29
+ "torch_dtype": "bfloat16",
30
+ "_attn_implementation": "sdpa",
31
+ "use_sliding_window": false,
32
+ "skip_init": true,
33
+ "segment_vocab_size": 0,
34
+ "rope_rank": "updown",
35
+ "convert_logits_dtype": "float32",
36
+ "generation_config": {
37
+ "tokenizer_config": {
38
+ "skip_special_tokens": true
39
+ },
40
+ "eos_token_id": [
41
+ 151643,
42
+ 151645
43
+ ],
44
+ "max_length": 32768,
45
+ "repetition_penalty": 1.05,
46
+ "temperature": 0.000001
47
+ },
48
+ "vision_start_token_id": 151652,
49
+ "vision_end_token_id": 151653,
50
+ "vision_token_id": 151654,
51
+ "vision_config": {
52
+ "depth": 32,
53
+ "hidden_act": "silu",
54
+ "hidden_size": 1280,
55
+ "intermediate_size": 3420,
56
+ "num_heads": 16,
57
+ "in_chans": 3,
58
+ "out_hidden_size": 3584,
59
+ "patch_size": 14,
60
+ "spatial_merge_size": 2,
61
+ "spatial_patch_size": 14,
62
+ "window_size": 112,
63
+ "fullatt_block_indexes": [
64
+ 7,
65
+ 15,
66
+ 23,
67
+ 31
68
+ ],
69
+ "tokens_per_second": 2,
70
+ "_attn_implementation_internal": null
71
+ },
72
+ "rope_scaling": {
73
+ "type": "mrope",
74
+ "mrope_section": [
75
+ 16,
76
+ 24,
77
+ 24
78
+ ]
79
+ },
80
+ "vocab_size": 152064
81
+ }
Qwen/Qwen3-VL-2B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 2048,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 6144,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 16,
27
+ "num_hidden_layers": 28,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 5,
56
+ 11,
57
+ 17
58
+ ],
59
+ "depth": 24,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1024,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4096,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 2048,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2,
72
+ "_attn_implementation_internal": null
73
+ },
74
+ "generation_config": {
75
+ "tokenizer_config": {
76
+ "skip_special_tokens": true
77
+ },
78
+ "eos_token_id": [
79
+ 151643,
80
+ 151645
81
+ ],
82
+ "max_length": 32768,
83
+ "top_p": 0.8,
84
+ "top_k": 20,
85
+ "temperature": 0.7,
86
+ "repetition_penalty": 1.0
87
+ }
88
+ }
Qwen/Qwen3-VL-2B-Thinking/bert4torch_config.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 2048,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 6144,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 16,
27
+ "num_hidden_layers": 28,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 5,
56
+ 11,
57
+ 17
58
+ ],
59
+ "depth": 24,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1024,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4096,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 2048,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2,
72
+ "_attn_implementation_internal": null
73
+ },
74
+ "generation_config": {
75
+ "tokenizer_config": {
76
+ "skip_special_tokens": true
77
+ },
78
+ "eos_token_id": [
79
+ 151643,
80
+ 151645
81
+ ],
82
+ "max_length": 32768,
83
+ "top_p": 0.8,
84
+ "top_k": 20,
85
+ "temperature": 0.7,
86
+ "repetition_penalty": 1.0
87
+ }
88
+ }
Qwen/Qwen3-VL-32B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 5120,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 25600,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 64,
27
+ "num_hidden_layers": 64,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 8,
56
+ 16,
57
+ 24
58
+ ],
59
+ "depth": 27,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1152,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4304,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 5120,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2
72
+ },
73
+ "generation_config": {
74
+ "tokenizer_config": {
75
+ "skip_special_tokens": true
76
+ },
77
+ "eos_token_id": [
78
+ 151643,
79
+ 151645
80
+ ],
81
+ "max_length": 32768,
82
+ "top_p": 0.8,
83
+ "top_k": 20,
84
+ "temperature": 0.7,
85
+ "repetition_penalty": 1.0
86
+ }
87
+ }
Qwen/Qwen3-VL-32B-Thinking/bert4torch_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 5120,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 25600,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 64,
27
+ "num_hidden_layers": 64,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 8,
56
+ 16,
57
+ 24
58
+ ],
59
+ "depth": 27,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1152,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4304,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 5120,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2
72
+ },
73
+ "generation_config": {
74
+ "tokenizer_config": {
75
+ "skip_special_tokens": true
76
+ },
77
+ "eos_token_id": [
78
+ 151643,
79
+ 151645
80
+ ],
81
+ "max_length": 32768,
82
+ "top_p": 0.8,
83
+ "top_k": 20,
84
+ "temperature": 0.7,
85
+ "repetition_penalty": 1.0
86
+ }
87
+ }
Qwen/Qwen3-VL-4B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 2560,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 9728,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 36,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 5,
56
+ 11,
57
+ 17
58
+ ],
59
+ "depth": 24,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1024,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4096,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 2560,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2,
72
+ "_attn_implementation_internal": null
73
+ },
74
+ "generation_config": {
75
+ "tokenizer_config": {
76
+ "skip_special_tokens": true
77
+ },
78
+ "eos_token_id": [
79
+ 151643,
80
+ 151645
81
+ ],
82
+ "max_length": 32768,
83
+ "top_p": 0.8,
84
+ "top_k": 20,
85
+ "temperature": 0.7,
86
+ "repetition_penalty": 1.0
87
+ }
88
+ }
Qwen/Qwen3-VL-4B-Thinking/bert4torch_config.json ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 2560,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 9728,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 36,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": true,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 5,
56
+ 11,
57
+ 17
58
+ ],
59
+ "depth": 24,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1024,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4096,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 2560,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2,
72
+ "_attn_implementation_internal": null
73
+ },
74
+ "generation_config": {
75
+ "tokenizer_config": {
76
+ "skip_special_tokens": true
77
+ },
78
+ "eos_token_id": [
79
+ 151643,
80
+ 151645
81
+ ],
82
+ "max_length": 32768,
83
+ "top_p": 0.8,
84
+ "top_k": 20,
85
+ "temperature": 0.7,
86
+ "repetition_penalty": 1.0
87
+ }
88
+ }
Qwen/Qwen3-VL-8B-Instruct/bert4torch_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 4096,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 12288,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 36,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 8,
56
+ 16,
57
+ 24
58
+ ],
59
+ "depth": 27,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1152,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4304,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 4096,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2
72
+ },
73
+ "generation_config": {
74
+ "tokenizer_config": {
75
+ "skip_special_tokens": true
76
+ },
77
+ "eos_token_id": [
78
+ 151643,
79
+ 151645
80
+ ],
81
+ "max_length": 32768,
82
+ "top_p": 0.8,
83
+ "top_k": 20,
84
+ "temperature": 0.7,
85
+ "repetition_penalty": 1.0
86
+ }
87
+ }
Qwen/Qwen3-VL-8B-Thinking/bert4torch_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_vl",
3
+ "template": "qwen3_vl",
4
+ "torch_dtype": "bfloat16",
5
+ "skip_init": true,
6
+ "image_token_id": 151655,
7
+ "video_token_id": 151656,
8
+ "text_config": {
9
+ "pos_emb_type": "rotary",
10
+ "use_bias": false,
11
+ "layer_norm_mode": "rmsnorm",
12
+ "final_layernorm": true,
13
+ "pre_layernorm": true,
14
+ "mlp_type": "LlamaFeedForward",
15
+ "attn_type": "Qwen3Attention",
16
+ "attention_dropout": 0.0,
17
+ "bos_token_id": 151643,
18
+ "eos_token_id": 151645,
19
+ "image_token_id": 151655,
20
+ "video_token_id": 151656,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 4096,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 12288,
25
+ "max_position_embeddings": 262144,
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 36,
28
+ "num_key_value_heads": 8,
29
+ "layer_norm_eps": 1e-06,
30
+ "rope_theta": 5000000,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "_attn_implementation": "sdpa",
34
+ "use_sliding_window": false,
35
+ "skip_init": true,
36
+ "segment_vocab_size": 0,
37
+ "rope_rank": "updown",
38
+ "convert_logits_dtype": "float32",
39
+ "rope_scaling": {
40
+ "mrope_interleaved": true,
41
+ "mrope_section": [
42
+ 24,
43
+ 20,
44
+ 20
45
+ ],
46
+ "rope_type": "mrope_interleaved"
47
+ },
48
+ "vocab_size": 151936
49
+ },
50
+ "vision_start_token_id": 151652,
51
+ "vision_end_token_id": 151653,
52
+ "vision_token_id": 151654,
53
+ "vision_config": {
54
+ "deepstack_visual_indexes": [
55
+ 8,
56
+ 16,
57
+ 24
58
+ ],
59
+ "depth": 27,
60
+ "hidden_act": "gelu_pytorch_tanh",
61
+ "hidden_size": 1152,
62
+ "in_channels": 3,
63
+ "initializer_range": 0.02,
64
+ "intermediate_size": 4304,
65
+ "model_type": "qwen3_vl",
66
+ "num_heads": 16,
67
+ "num_position_embeddings": 2304,
68
+ "out_hidden_size": 4096,
69
+ "patch_size": 16,
70
+ "spatial_merge_size": 2,
71
+ "temporal_patch_size": 2
72
+ },
73
+ "generation_config": {
74
+ "tokenizer_config": {
75
+ "skip_special_tokens": true
76
+ },
77
+ "eos_token_id": [
78
+ 151643,
79
+ 151645
80
+ ],
81
+ "max_length": 32768,
82
+ "top_p": 0.8,
83
+ "top_k": 20,
84
+ "temperature": 0.7,
85
+ "repetition_penalty": 1.0
86
+ }
87
+ }
meta-llama/Llama-3.2-11B-Vision-Instruct/bert4torch_config.json CHANGED
@@ -35,7 +35,6 @@
35
  "initializer_range": 0.02,
36
  "intermediate_size": 14336,
37
  "max_position_embeddings": 131072,
38
- "max_position": 131072,
39
  "num_attention_heads": 32,
40
  "num_hidden_layers": 40,
41
  "num_key_value_heads": 8,
 
35
  "initializer_range": 0.02,
36
  "intermediate_size": 14336,
37
  "max_position_embeddings": 131072,
 
38
  "num_attention_heads": 32,
39
  "num_hidden_layers": 40,
40
  "num_key_value_heads": 8,
meta-llama/Llama-3.2-11B-Vision/bert4torch_config.json CHANGED
@@ -35,7 +35,6 @@
35
  "initializer_range": 0.02,
36
  "intermediate_size": 14336,
37
  "max_position_embeddings": 131072,
38
- "max_position": 131072,
39
  "num_attention_heads": 32,
40
  "num_hidden_layers": 40,
41
  "num_key_value_heads": 8,
 
35
  "initializer_range": 0.02,
36
  "intermediate_size": 14336,
37
  "max_position_embeddings": 131072,
 
38
  "num_attention_heads": 32,
39
  "num_hidden_layers": 40,
40
  "num_key_value_heads": 8,