File size: 2,385 Bytes
dcdc7fd
 
 
 
ca78849
 
561373c
ca78849
 
 
dcdc7fd
ca78849
 
 
 
dcdc7fd
ca78849
 
 
 
 
 
09a1c78
 
 
ca78849
 
 
 
 
 
dcdc7fd
09a1c78
ca78849
 
 
dcdc7fd
ca78849
561373c
ca78849
dcdc7fd
c08aea4
dcdc7fd
ca78849
dcdc7fd
 
 
 
 
ca78849
dcdc7fd
 
 
 
ca78849
dcdc7fd
 
09a1c78
 
dcdc7fd
 
 
 
 
 
ca78849
dcdc7fd
 
ca78849
 
dcdc7fd
09a1c78
ca78849
 
dcdc7fd
 
159314f
753be73
dcdc7fd
c08aea4
dcdc7fd
ca78849
dcdc7fd
 
 
 
 
 
ca78849
dcdc7fd
 
ca78849
dcdc7fd
 
50fc335
 
2b82795
ca78849
dcdc7fd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "architectures": [
    "Qwen2VLForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 151643,
  "dtype": "bfloat16",
  "eos_token_id": 151645,
  "hidden_act": "silu",
  "hidden_size": 1536,
  "image_token_id": 151655,
  "initializer_range": 0.02,
  "intermediate_size": 8960,
  "max_position_embeddings": 32768,
  "max_window_layers": 28,
  "model_type": "qwen2_vl",
  "num_attention_heads": 12,
  "num_hidden_layers": 28,
  "num_key_value_heads": 2,
  "rms_norm_eps": 1e-06,
  "rope_scaling": {
    "mrope_section": [
      16,
      24,
      24
    ],
    "rope_type": "default",
    "type": "default"
  },
  "rope_theta": 1000000.0,
  "sliding_window": 32768,
  "text_config": {
    "_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
    "architectures": [
      "Qwen2VLForConditionalGeneration"
    ],
    "attention_dropout": 0.0,
    "bos_token_id": 151643,
    "dtype": "bfloat16",
    "eos_token_id": 151645,
    "hidden_act": "silu",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 8960,
    "layer_types": [
      "full_attention",
      "full_attention"
    ],
    "max_position_embeddings": 32768,
    "max_window_layers": 28,
    "model_type": "qwen2_vl_text",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "rms_norm_eps": 1e-06,
    "rope_scaling": {
      "mrope_section": [
        1,
        1
      ],
      "rope_type": "default",
      "type": "default"
    },
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "tie_word_embeddings": true,
    "use_cache": true,
    "use_sliding_window": false,
    "vision_token_id": 151654,
    "vocab_size": 151936
  },
  "transformers_version": "4.57.3",
  "use_cache": true,
  "use_sliding_window": false,
  "video_token_id": 151656,
  "vision_config": {
    "depth": 2,
    "embed_dim": 64,
    "hidden_act": "quick_gelu",
    "hidden_size": 16,
    "in_channels": 3,
    "in_chans": 3,
    "initializer_range": 0.02,
    "mlp_ratio": 4,
    "model_type": "qwen2_vl",
    "num_attention_heads": 4,
    "num_heads": 16,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "spatial_merge_size": 2,
    "spatial_patch_size": 14,
    "temporal_patch_size": 2
  },
  "vision_end_token_id": 151653,
  "vision_start_token_id": 151652,
  "vision_token_id": 151654,
  "vocab_size": 151936
}