array commited on
Commit
dccafa1
·
verified ·
1 Parent(s): b01fbda

Upload folder using huggingface_hub

Browse files
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
 
2
  "architectures": [
3
  "Qwen2_5_VLForConditionalGeneration"
4
  ],
5
  "attention_dropout": 0.0,
6
  "auto_map": {
7
- "AutoModelForVision2Seq": "mmlatentdiscrete_qwen_vl.Qwen2_5_VLForConditionalGeneration"
8
  },
9
  "bos_token_id": 151643,
10
  "compress_strategy": "average",
@@ -39,108 +40,22 @@
39
  "rope_theta": 1000000.0,
40
  "sliding_window": 32768,
41
  "stage": "stage2",
42
- "text_config": {
43
- "_name_or_path": "/usr/local/google/home/arijitray/Documents/final_models/qwen_vl_mulldiscrete_24000",
44
- "architectures": [
45
- "Qwen2_5_VLForConditionalGeneration"
46
- ],
47
- "attention_dropout": 0.0,
48
- "bos_token_id": 151643,
49
- "compress_strategy": "average",
50
- "dtype": "bfloat16",
51
- "eos_token_id": 151645,
52
- "hidden_act": "silu",
53
- "hidden_size": 3584,
54
- "imagelatent_token_id": 151668,
55
- "initializer_range": 0.02,
56
- "intermediate_size": 18944,
57
- "latent_end_id": 151667,
58
- "latent_size": 20,
59
- "latent_start_id": 151666,
60
- "latent_token_id": 151665,
61
- "layer_types": [
62
- "full_attention",
63
- "full_attention",
64
- "full_attention",
65
- "full_attention",
66
- "full_attention",
67
- "full_attention",
68
- "full_attention",
69
- "full_attention",
70
- "full_attention",
71
- "full_attention",
72
- "full_attention",
73
- "full_attention",
74
- "full_attention",
75
- "full_attention",
76
- "full_attention",
77
- "full_attention",
78
- "full_attention",
79
- "full_attention",
80
- "full_attention",
81
- "full_attention",
82
- "full_attention",
83
- "full_attention",
84
- "full_attention",
85
- "full_attention",
86
- "full_attention",
87
- "full_attention",
88
- "full_attention",
89
- "full_attention"
90
- ],
91
- "max_position_embeddings": 128000,
92
- "max_window_layers": 28,
93
- "model_type": "qwen2_5_vl_text",
94
- "num_attention_heads": 28,
95
- "num_hidden_layers": 28,
96
- "num_key_value_heads": 4,
97
- "rms_norm_eps": 1e-06,
98
- "rope_scaling": {
99
- "mrope_section": [
100
- 16,
101
- 24,
102
- 24
103
- ],
104
- "rope_type": "default",
105
- "type": "default"
106
- },
107
- "rope_theta": 1000000.0,
108
- "sliding_window": null,
109
- "stage": "stage2",
110
- "use_cache": true,
111
- "use_sliding_window": false,
112
- "vision_token_id": 151654,
113
- "vocab_size": 151669
114
- },
115
  "tie_word_embeddings": false,
116
- "transformers_version": "4.57.3",
 
117
  "use_cache": true,
118
  "use_sliding_window": false,
119
  "video_token_id": 151656,
120
  "vision_config": {
121
- "depth": 32,
122
  "dtype": "bfloat16",
123
- "fullatt_block_indexes": [
124
- 7,
125
- 15,
126
- 23,
127
- 31
128
- ],
129
- "hidden_act": "silu",
130
  "hidden_size": 1280,
131
- "in_channels": 3,
132
  "in_chans": 3,
133
  "initializer_range": 0.02,
134
- "intermediate_size": 3420,
135
  "model_type": "qwen2_5_vl",
136
- "num_heads": 16,
137
- "out_hidden_size": 3584,
138
- "patch_size": 14,
139
- "spatial_merge_size": 2,
140
  "spatial_patch_size": 14,
141
- "temporal_patch_size": 2,
142
  "tokens_per_second": 2,
143
- "window_size": 112
144
  },
145
  "vision_end_token_id": 151653,
146
  "vision_start_token_id": 151652,
 
1
  {
2
+ "_name_or_path": "array/Qwen2.5-VL-Mull",
3
  "architectures": [
4
  "Qwen2_5_VLForConditionalGeneration"
5
  ],
6
  "attention_dropout": 0.0,
7
  "auto_map": {
8
+ "AutoModelForVision2Seq": "array/Qwen2.5-VL-Mull--mmlatentdiscrete_qwen_vl.Qwen2_5_VLForConditionalGeneration"
9
  },
10
  "bos_token_id": 151643,
11
  "compress_strategy": "average",
 
40
  "rope_theta": 1000000.0,
41
  "sliding_window": 32768,
42
  "stage": "stage2",
43
+ "text_config": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "tie_word_embeddings": false,
45
+ "torch_dtype": "bfloat16",
46
+ "transformers_version": "4.49.0.dev0",
47
  "use_cache": true,
48
  "use_sliding_window": false,
49
  "video_token_id": 151656,
50
  "vision_config": {
 
51
  "dtype": "bfloat16",
 
 
 
 
 
 
 
52
  "hidden_size": 1280,
 
53
  "in_chans": 3,
54
  "initializer_range": 0.02,
 
55
  "model_type": "qwen2_5_vl",
 
 
 
 
56
  "spatial_patch_size": 14,
 
57
  "tokens_per_second": 2,
58
+ "torch_dtype": "bfloat16"
59
  },
60
  "vision_end_token_id": 151653,
61
  "vision_start_token_id": 151652,
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.05,
10
  "temperature": 1e-06,
11
- "transformers_version": "4.57.3"
12
  }
 
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.05,
10
  "temperature": 1e-06,
11
+ "transformers_version": "4.49.0.dev0"
12
  }
model.safetensors.index.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 8289335296,
4
  "total_size": 16578670592
5
  },
6
  "weight_map": {
 
1
  {
2
  "metadata": {
 
3
  "total_size": 16578670592
4
  },
5
  "weight_map": {
preprocessor_config.json CHANGED
@@ -8,7 +8,7 @@
8
  0.4578275,
9
  0.40821073
10
  ],
11
- "image_processor_type": "Qwen2VLImageProcessor",
12
  "image_std": [
13
  0.26862954,
14
  0.26130258,
 
8
  0.4578275,
9
  0.40821073
10
  ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
  "image_std": [
13
  0.26862954,
14
  0.26130258,
tokenizer_config.json CHANGED
@@ -227,6 +227,7 @@
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
 
230
  "clean_up_tokenization_spaces": false,
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
 
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
231
  "clean_up_tokenization_spaces": false,
232
  "eos_token": "<|im_end|>",
233
  "errors": "replace",