Upload processor
Browse files- modeling_yangjian.py +1 -46
modeling_yangjian.py
CHANGED
|
@@ -759,49 +759,4 @@ class YangJianVLForConditionalGeneration(Qwen2_5_VLForConditionalGeneration):
|
|
| 759 |
|
| 760 |
def __init__(self, config):
|
| 761 |
super().__init__(config)
|
| 762 |
-
self.model = YangJianVLModel(config)
|
| 763 |
-
|
| 764 |
-
# def _prepare_generation_config(self, generation_config, use_model_defaults, **kwargs: dict):
|
| 765 |
-
# model_kwargs = super()._prepare_generation_config(generation_config, use_model_defaults, **kwargs)
|
| 766 |
-
# compare_token_size = self.config.vision_config.compare_token_size
|
| 767 |
-
# input_dict = model_kwargs[1]
|
| 768 |
-
# input_ids = model_kwargs[1]["input_ids"]
|
| 769 |
-
# attention_mask = model_kwargs[1]["attention_mask"]
|
| 770 |
-
# if "pixel_values" in input_dict and input_dict["pixel_values"] is not None:
|
| 771 |
-
# image_grid_thw = input_dict["image_grid_thw"]
|
| 772 |
-
|
| 773 |
-
# # 计算每张图片的token数量
|
| 774 |
-
# image_token_counts = (image_grid_thw.prod(-1) // self.config.vision_config.spatial_merge_size**2).tolist()
|
| 775 |
-
|
| 776 |
-
# image_token_positions = (input_ids == self.config.image_token_id).nonzero(as_tuple=True)[1]
|
| 777 |
-
# # 倒序遍历图片,这样插入时不会影响前面图片的位置
|
| 778 |
-
# current_end = len(image_token_positions) # 最后一个图片token的结束位置
|
| 779 |
-
# for i in range(len(image_token_counts) - 1, -1, -1):
|
| 780 |
-
# count = image_token_counts[i]
|
| 781 |
-
# # 计算当前图片的结束位置
|
| 782 |
-
# start = current_end - count # 当前图片的起始位置
|
| 783 |
-
# end_index = image_token_positions[current_end - 1] # 当前图片的最后一个token位置
|
| 784 |
-
|
| 785 |
-
# # 在第i张图片的末尾插入 self.compare_token_size 个图像对比的token
|
| 786 |
-
# # 获取插入位置的token的值
|
| 787 |
-
# prev_token = input_ids[:, end_index]
|
| 788 |
-
# input_ids = torch.cat([
|
| 789 |
-
# input_ids[:, :end_index + 1],
|
| 790 |
-
# prev_token.repeat(input_ids.shape[0], compare_token_size),
|
| 791 |
-
# input_ids[:, end_index + 1:]
|
| 792 |
-
# ], dim=1)
|
| 793 |
-
|
| 794 |
-
# # 同步更新attention_mask和position_ids
|
| 795 |
-
# if attention_mask is not None:
|
| 796 |
-
# prev_mask = attention_mask[:, end_index]
|
| 797 |
-
# attention_mask = torch.cat([
|
| 798 |
-
# attention_mask[:, :end_index + 1],
|
| 799 |
-
# prev_mask.repeat(input_ids.shape[0], compare_token_size),
|
| 800 |
-
# attention_mask[:, end_index + 1:]
|
| 801 |
-
# ], dim=1)
|
| 802 |
-
|
| 803 |
-
# current_end = start # 更新结束位置为当前图片的起始位置
|
| 804 |
-
|
| 805 |
-
# model_kwargs[1]["input_ids"] = input_ids
|
| 806 |
-
# model_kwargs[1]["attention_mask"] = attention_mask
|
| 807 |
-
# return model_kwargs
|
|
|
|
| 759 |
|
| 760 |
def __init__(self, config):
|
| 761 |
super().__init__(config)
|
| 762 |
+
self.model = YangJianVLModel(config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|