Image-Text-to-Text
Safetensors
qwen2_5_vl
historical
conversational
s-jse commited on
Commit
c78e49f
·
verified ·
1 Parent(s): ada4172

Add model artifacts

Browse files
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json CHANGED
@@ -3,6 +3,7 @@
3
  "data_format": "channels_first",
4
  "default_to_square": true,
5
  "device": null,
 
6
  "do_center_crop": null,
7
  "do_convert_rgb": true,
8
  "do_normalize": true,
@@ -29,8 +30,8 @@
29
  "rescale_factor": 0.00392156862745098,
30
  "return_tensors": null,
31
  "size": {
32
- "longest_edge": 12845056,
33
- "shortest_edge": 3136
34
  },
35
  "temporal_patch_size": 2
36
  }
 
3
  "data_format": "channels_first",
4
  "default_to_square": true,
5
  "device": null,
6
+ "disable_grouping": null,
7
  "do_center_crop": null,
8
  "do_convert_rgb": true,
9
  "do_normalize": true,
 
30
  "rescale_factor": 0.00392156862745098,
31
  "return_tensors": null,
32
  "size": {
33
+ "longest_edge": 4014080,
34
+ "shortest_edge": 401408
35
  },
36
  "temporal_patch_size": 2
37
  }
tokenizer_config.json CHANGED
@@ -195,15 +195,18 @@
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
198
- "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
  "extra_special_tokens": {},
 
203
  "max_pixels": 4014080,
204
  "min_pixels": 401408,
205
  "model_max_length": 131072,
 
206
  "pad_token": "<|endoftext|>",
 
 
207
  "processor_class": "Qwen2_5_VLProcessor",
208
  "split_special_tokens": false,
209
  "tokenizer_class": "Qwen2Tokenizer",
 
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
 
198
  "clean_up_tokenization_spaces": false,
199
  "eos_token": "<|im_end|>",
200
  "errors": "replace",
201
  "extra_special_tokens": {},
202
+ "max_length": null,
203
  "max_pixels": 4014080,
204
  "min_pixels": 401408,
205
  "model_max_length": 131072,
206
+ "pad_to_multiple_of": null,
207
  "pad_token": "<|endoftext|>",
208
+ "pad_token_type_id": 0,
209
+ "padding_side": "right",
210
  "processor_class": "Qwen2_5_VLProcessor",
211
  "split_special_tokens": false,
212
  "tokenizer_class": "Qwen2Tokenizer",
video_preprocessor_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "do_center_crop": null,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_pad": null,
10
+ "do_rescale": true,
11
+ "do_resize": true,
12
+ "do_sample_frames": false,
13
+ "fps": null,
14
+ "image_mean": [
15
+ 0.48145466,
16
+ 0.4578275,
17
+ 0.40821073
18
+ ],
19
+ "image_processor_type": "Qwen2VLImageProcessorFast",
20
+ "image_std": [
21
+ 0.26862954,
22
+ 0.26130258,
23
+ 0.27577711
24
+ ],
25
+ "input_data_format": null,
26
+ "max_frames": 768,
27
+ "max_pixels": 4014080,
28
+ "merge_size": 2,
29
+ "min_frames": 4,
30
+ "min_pixels": 401408,
31
+ "num_frames": null,
32
+ "patch_size": 14,
33
+ "processor_class": "Qwen2_5_VLProcessor",
34
+ "resample": 3,
35
+ "rescale_factor": 0.00392156862745098,
36
+ "return_tensors": null,
37
+ "size": {
38
+ "longest_edge": 4014080,
39
+ "shortest_edge": 401408
40
+ },
41
+ "size_divisor": null,
42
+ "temporal_patch_size": 2,
43
+ "video_metadata": null,
44
+ "video_processor_type": "Qwen2VLVideoProcessor"
45
+ }