Upload processor
Browse files- modeling_yangjian.py +1 -3
- preprocessor_config.json +9 -1
- tokenizer.json +2 -2
- tokenizer_config.json +4 -0
modeling_yangjian.py
CHANGED
|
@@ -146,7 +146,7 @@ class OptimizedCrossAttention(nn.Module):
|
|
| 146 |
"""
|
| 147 |
仿照 Qwen2_5_VLVisionAttention 结构的优化 Cross Attention
|
| 148 |
"""
|
| 149 |
-
def __init__(self, config, is_cross_attention=
|
| 150 |
super().__init__()
|
| 151 |
self.config = config
|
| 152 |
self.dim = config.hidden_size
|
|
@@ -558,12 +558,10 @@ class YangJianVisionTransformerPretrainedModel(Qwen2_5_VisionTransformerPretrain
|
|
| 558 |
else:
|
| 559 |
cu_seqlens_now = cu_window_seqlens
|
| 560 |
|
| 561 |
-
attention_mask = self._prepare_attention_mask(hidden_states, cu_seqlens_now)
|
| 562 |
hidden_states = blk(
|
| 563 |
hidden_states,
|
| 564 |
cu_seqlens=cu_seqlens_now,
|
| 565 |
position_embeddings=position_embeddings,
|
| 566 |
-
attention_mask=attention_mask,
|
| 567 |
**kwargs,
|
| 568 |
)
|
| 569 |
|
|
|
|
| 146 |
"""
|
| 147 |
仿照 Qwen2_5_VLVisionAttention 结构的优化 Cross Attention
|
| 148 |
"""
|
| 149 |
+
def __init__(self, config, is_cross_attention=True):
|
| 150 |
super().__init__()
|
| 151 |
self.config = config
|
| 152 |
self.dim = config.hidden_size
|
|
|
|
| 558 |
else:
|
| 559 |
cu_seqlens_now = cu_window_seqlens
|
| 560 |
|
|
|
|
| 561 |
hidden_states = blk(
|
| 562 |
hidden_states,
|
| 563 |
cu_seqlens=cu_seqlens_now,
|
| 564 |
position_embeddings=position_embeddings,
|
|
|
|
| 565 |
**kwargs,
|
| 566 |
)
|
| 567 |
|
preprocessor_config.json
CHANGED
|
@@ -2,6 +2,12 @@
|
|
| 2 |
"auto_map": {
|
| 3 |
"AutoProcessor": "modeling_yangjian.YangJianProcessor"
|
| 4 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"do_convert_rgb": true,
|
| 6 |
"do_normalize": true,
|
| 7 |
"do_rescale": true,
|
|
@@ -11,12 +17,13 @@
|
|
| 11 |
0.4578275,
|
| 12 |
0.40821073
|
| 13 |
],
|
| 14 |
-
"image_processor_type": "
|
| 15 |
"image_std": [
|
| 16 |
0.26862954,
|
| 17 |
0.26130258,
|
| 18 |
0.27577711
|
| 19 |
],
|
|
|
|
| 20 |
"max_pixels": 12845056,
|
| 21 |
"merge_size": 2,
|
| 22 |
"min_pixels": 3136,
|
|
@@ -24,6 +31,7 @@
|
|
| 24 |
"processor_class": "YangJianProcessor",
|
| 25 |
"resample": 3,
|
| 26 |
"rescale_factor": 0.00392156862745098,
|
|
|
|
| 27 |
"size": {
|
| 28 |
"longest_edge": 12845056,
|
| 29 |
"shortest_edge": 3136
|
|
|
|
| 2 |
"auto_map": {
|
| 3 |
"AutoProcessor": "modeling_yangjian.YangJianProcessor"
|
| 4 |
},
|
| 5 |
+
"crop_size": null,
|
| 6 |
+
"data_format": "channels_first",
|
| 7 |
+
"default_to_square": true,
|
| 8 |
+
"device": null,
|
| 9 |
+
"disable_grouping": null,
|
| 10 |
+
"do_center_crop": null,
|
| 11 |
"do_convert_rgb": true,
|
| 12 |
"do_normalize": true,
|
| 13 |
"do_rescale": true,
|
|
|
|
| 17 |
0.4578275,
|
| 18 |
0.40821073
|
| 19 |
],
|
| 20 |
+
"image_processor_type": "Qwen2VLImageProcessorFast",
|
| 21 |
"image_std": [
|
| 22 |
0.26862954,
|
| 23 |
0.26130258,
|
| 24 |
0.27577711
|
| 25 |
],
|
| 26 |
+
"input_data_format": null,
|
| 27 |
"max_pixels": 12845056,
|
| 28 |
"merge_size": 2,
|
| 29 |
"min_pixels": 3136,
|
|
|
|
| 31 |
"processor_class": "YangJianProcessor",
|
| 32 |
"resample": 3,
|
| 33 |
"rescale_factor": 0.00392156862745098,
|
| 34 |
+
"return_tensors": null,
|
| 35 |
"size": {
|
| 36 |
"longest_edge": 12845056,
|
| 37 |
"shortest_edge": 3136
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
|
| 3 |
+
size 11422063
|
tokenizer_config.json
CHANGED
|
@@ -202,8 +202,12 @@
|
|
| 202 |
"eos_token": "<|im_end|>",
|
| 203 |
"errors": "replace",
|
| 204 |
"extra_special_tokens": {},
|
|
|
|
| 205 |
"model_max_length": 131072,
|
|
|
|
| 206 |
"pad_token": "<|endoftext|>",
|
|
|
|
|
|
|
| 207 |
"processor_class": "YangJianProcessor",
|
| 208 |
"split_special_tokens": false,
|
| 209 |
"tokenizer_class": "Qwen2Tokenizer",
|
|
|
|
| 202 |
"eos_token": "<|im_end|>",
|
| 203 |
"errors": "replace",
|
| 204 |
"extra_special_tokens": {},
|
| 205 |
+
"max_length": null,
|
| 206 |
"model_max_length": 131072,
|
| 207 |
+
"pad_to_multiple_of": null,
|
| 208 |
"pad_token": "<|endoftext|>",
|
| 209 |
+
"pad_token_type_id": 0,
|
| 210 |
+
"padding_side": "left",
|
| 211 |
"processor_class": "YangJianProcessor",
|
| 212 |
"split_special_tokens": false,
|
| 213 |
"tokenizer_class": "Qwen2Tokenizer",
|