Upload processor

Files changed (4) hide show

modeling_yangjian.py CHANGED Viewed

@@ -146,7 +146,7 @@ class OptimizedCrossAttention(nn.Module):
     """
     仿照 Qwen2_5_VLVisionAttention 结构的优化 Cross Attention
     """
-    def __init__(self, config, is_cross_attention=False):
         super().__init__()
         self.config = config
         self.dim = config.hidden_size
@@ -558,12 +558,10 @@ class YangJianVisionTransformerPretrainedModel(Qwen2_5_VisionTransformerPretrain
             else:
                 cu_seqlens_now = cu_window_seqlens
-            attention_mask = self._prepare_attention_mask(hidden_states, cu_seqlens_now)
             hidden_states = blk(
                 hidden_states,
                 cu_seqlens=cu_seqlens_now,
                 position_embeddings=position_embeddings,
-                attention_mask=attention_mask,
                 **kwargs,
             )

     """
     仿照 Qwen2_5_VLVisionAttention 结构的优化 Cross Attention
     """
+    def __init__(self, config, is_cross_attention=True):
         super().__init__()
         self.config = config
         self.dim = config.hidden_size
             else:
                 cu_seqlens_now = cu_window_seqlens
             hidden_states = blk(
                 hidden_states,
                 cu_seqlens=cu_seqlens_now,
                 position_embeddings=position_embeddings,
                 **kwargs,
             )

preprocessor_config.json CHANGED Viewed

@@ -2,6 +2,12 @@
   "auto_map": {
     "AutoProcessor": "modeling_yangjian.YangJianProcessor"
   },
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_rescale": true,
@@ -11,12 +17,13 @@
     0.4578275,
     0.40821073
   ],
-  "image_processor_type": "Qwen2VLImageProcessor",
   "image_std": [
     0.26862954,
     0.26130258,
     0.27577711
   ],
   "max_pixels": 12845056,
   "merge_size": 2,
   "min_pixels": 3136,
@@ -24,6 +31,7 @@
   "processor_class": "YangJianProcessor",
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
     "longest_edge": 12845056,
     "shortest_edge": 3136

   "auto_map": {
     "AutoProcessor": "modeling_yangjian.YangJianProcessor"
   },
+  "crop_size": null,
+  "data_format": "channels_first",
+  "default_to_square": true,
+  "device": null,
+  "disable_grouping": null,
+  "do_center_crop": null,
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_rescale": true,
     0.4578275,
     0.40821073
   ],
+  "image_processor_type": "Qwen2VLImageProcessorFast",
   "image_std": [
     0.26862954,
     0.26130258,
     0.27577711
   ],
+  "input_data_format": null,
   "max_pixels": 12845056,
   "merge_size": 2,
   "min_pixels": 3136,
   "processor_class": "YangJianProcessor",
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
+  "return_tensors": null,
   "size": {
     "longest_edge": 12845056,
     "shortest_edge": 3136

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba0c439f7be467bf47d12a7e6f9adc6116201056fc60c67f431c679b7c16afc8
-size 11422064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
+size 11422063

tokenizer_config.json CHANGED Viewed

@@ -202,8 +202,12 @@
   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
   "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
   "processor_class": "YangJianProcessor",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",

   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
+  "max_length": null,
   "model_max_length": 131072,
+  "pad_to_multiple_of": null,
   "pad_token": "<|endoftext|>",
+  "pad_token_type_id": 0,
+  "padding_side": "left",
   "processor_class": "YangJianProcessor",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",