Upload 2 files

Browse files

Files changed (2) hide show

functions_2_patch.py +221 -0
monkey_patching_utils.py +154 -0

functions_2_patch.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import torch
+import inspect
+import importlib
+from typing import Callable, Optional, Union, Any, List
+from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
+from transformers.cache_utils import Cache
+from transformers.processing_utils import Unpack
+from .sep_cache_utils import SepCache
+def truncate_input_ids_4_autoregression(input_ids, key_states):
+    if input_ids.shape[-1] != key_states.shape[-2]:
+        assert input_ids.shape[-1] >= key_states.shape[-2]
+        truncated_input_ids = input_ids[..., -key_states.shape[-2]: ]
+        return truncated_input_ids
+    else:
+        return input_ids
+def llama_atten_forward(
+    self,
+    hidden_states: torch.Tensor,
+    position_embeddings: tuple[torch.Tensor, torch.Tensor],
+    attention_mask: Optional[torch.Tensor],
+    past_key_value: Optional[Cache] = None,
+    cache_position: Optional[torch.LongTensor] = None,
+    **kwargs: Unpack[FlashAttentionKwargs],
+) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
+    input_shape = hidden_states.shape[:-1]
+    if hasattr(self, "head_dim"):
+        head_dim = self.head_dim
+    elif hasattr(self, "head_size"):
+        head_dim = self.head_size
+    hidden_shape = (*input_shape, -1, head_dim)
+    query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+    key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+    value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+    ###########################SepCache########################
+    assert isinstance(past_key_value,  SepCache), f"`past_key_value` must be of the type: `SepCache`."
+    APPLY_PE_SHIFT = past_key_value.APPLY_PE_SHIFT
+    APPLY_PES_INSIDE = past_key_value.APPLY_PES_INSIDE
+    ###########################################################
+    ########################Monkey Patching####################
+    module = importlib.import_module(self.__module__)
+    apply_rotary_pos_emb = module.apply_rotary_pos_emb
+    rotate_half = module.rotate_half
+    eager_attention_forward = module.eager_attention_forward
+    ALL_ATTENTION_FUNCTIONS = module.ALL_ATTENTION_FUNCTIONS
+    ###########################################################
+    if not APPLY_PE_SHIFT:
+        cos, sin = position_embeddings
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
+    if past_key_value is not None:
+        # ##################################################Default#########################################################
+        # sin and cos are specific to RoPE models; cache_position needed for the static cache
+        # cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
+        # key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+        # ##################################################################################################################
+        ##################################################SepCache#########################################################
+        # sin and cos are specific to RoPE models; position_ids needed for the static cache
+        if APPLY_PE_SHIFT and (not APPLY_PES_INSIDE):
+            ### At least the shifted `sin` and `cos` should be properly provided (not `None`).
+            cache_kwargs = {"sin": sin, "cos": cos, "cos_q": cos_q, "sin_q": sin_q, "cache_position": cache_position, "partial_rotation_size": None }
+        else:
+            cache_kwargs = {}
+        if "kwargs" in locals():
+            pass
+        elif "flash_attn_kwargs" in locals():
+            kwargs = flash_attn_kwargs
+        else:
+            raise NameError("`kwargs` or `flash_attn_kwargs` should be given and they need to contain `sepllm_kwargs` (which contains `input_ids`) and `position_ids`.")
+        if "input_ids" not in locals():
+            if "input_ids" in kwargs:
+                input_ids = kwargs.get("input_ids", None)
+            else:
+                sepllm_kwargs = kwargs.get("sepllm_kwargs", None)
+                assert sepllm_kwargs is not None, f"`sepllm_kwargs` must be provided when `input_ids` is not given."
+                input_ids = sepllm_kwargs.get("input_ids", None)
+            assert input_ids is not None, f"`input_ids` must be properly provided directly or through `sepllm_kwargs` when calling `update()` in `SepCache`."
+        if "position_ids" not in locals():
+            position_ids = kwargs.get("position_ids")
+        assert input_ids is not None, f"`input_ids` must be properly provided when calling `update()` in `SepCache`."
+        bsz, q_len, _ = hidden_states.size()
+        input_ids = truncate_input_ids_4_autoregression(input_ids = input_ids, key_states = key_states )
+        if APPLY_PE_SHIFT:
+            key_states, value_states, query_states = past_key_value.update(
+                key_states = key_states,
+                value_states = value_states,
+                query_states = query_states,
+                input_ids = input_ids,
+                layer_idx = self.layer_idx,
+                position_ids = position_ids,
+                PREFILLING_FLAG = q_len > 1,
+                cache_kwargs = cache_kwargs )
+        else:
+            key_states, value_states  =  past_key_value.update(
+                key_states = key_states,
+                value_states = value_states,
+                input_ids = input_ids,
+                layer_idx = self.layer_idx,
+                position_ids = position_ids,
+                PREFILLING_FLAG = q_len > 1,
+                cache_kwargs = cache_kwargs )
+        seq_len = past_key_value.get_usable_length(self.layer_idx)
+        if attention_mask is not None:
+            attention_mask = attention_mask[..., :seq_len]
+        ##################################################################################################################
+    attention_interface: Callable = eager_attention_forward
+    if self.config._attn_implementation != "eager":
+        attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
+    attn_output, attn_weights = attention_interface(
+        self,
+        query_states,
+        key_states,
+        value_states,
+        attention_mask,
+        dropout=0.0 if not self.training else self.attention_dropout,
+        scaling=self.scaling,
+        **kwargs,
+    )
+    attn_output = attn_output.reshape(*input_shape, -1).contiguous()
+    attn_output = self.o_proj(attn_output)
+    return attn_output, attn_weights
+def _validate_model_kwargs(self, model_kwargs: dict[str, Any]):
+    """Validates model kwargs for generation. Generate argument typos will also be caught here."""
+    # If a `Cache` instance is passed, checks whether the model is compatible with it
+    if isinstance(model_kwargs.get("past_key_values", None), Cache) and not self._supports_cache_class:
+        raise ValueError(
+            f"{self.__class__.__name__} does not support an instance of `Cache` as `past_key_values`. Please "
+            "check the model documentation for supported cache formats."
+        )
+    # Excludes arguments that are handled before calling any model function
+    if self.config.is_encoder_decoder:
+        for key in ["decoder_input_ids"]:
+            model_kwargs.pop(key, None)
+    unused_model_args = []
+    model_args = set(inspect.signature(self.prepare_inputs_for_generation).parameters)
+    # `kwargs`/`model_kwargs` is often used to handle optional forward pass inputs like `attention_mask`. If
+    # `prepare_inputs_for_generation` doesn't accept them, then a stricter check can be made ;)
+    if "kwargs" in model_args or "model_kwargs" in model_args:
+        model_args |= set(inspect.signature(self.forward).parameters)
+    # Encoder-Decoder models may also need Encoder arguments from `model_kwargs`
+    if self.config.is_encoder_decoder:
+        base_model = getattr(self, self.base_model_prefix, None)
+        # allow encoder kwargs
+        encoder = getattr(self, "encoder", None)
+        # `MusicgenForConditionalGeneration` has `text_encoder` and `audio_encoder`.
+        # Also, it has `base_model_prefix = "encoder_decoder"` but there is no `self.encoder_decoder`
+        # TODO: A better way to handle this.
+        if encoder is None and base_model is not None:
+            encoder = getattr(base_model, "encoder", None)
+        if encoder is not None:
+            encoder_model_args = set(inspect.signature(encoder.forward).parameters)
+            model_args |= encoder_model_args
+        # allow decoder kwargs
+        decoder = getattr(self, "decoder", None)
+        if decoder is None and base_model is not None:
+            decoder = getattr(base_model, "decoder", None)
+        if decoder is not None:
+            decoder_model_args = set(inspect.signature(decoder.forward).parameters)
+            model_args |= {f"decoder_{x}" for x in decoder_model_args}
+    for key, value in model_kwargs.items():
+        # #############################Default###########################
+        # if value is not None and key not in model_args:
+        #     unused_model_args.append(key)
+        # ###############################################################
+        ###############################SepCache###########################
+        if (value is not None) and (key not in model_args) and ("sep" not in str(key).lower()):
+            unused_model_args.append(key)
+        ###################################################################
+    if unused_model_args:
+        raise ValueError(
+            f"The following `model_kwargs` are not used by the model: {unused_model_args} (note: typos in the"
+            " generate arguments will also show up in this list)"
+        )

monkey_patching_utils.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import torch
+import inspect
+import importlib
+import transformers
+import types
+import torch.nn as nn
+from transformers.modeling_utils import  PreTrainedModel
+from typing import Callable, Optional, Union, Any, List
+from .functions_2_patch import _validate_model_kwargs, llama_atten_forward
+def get_full_class_import_path(obj):
+    """Get the complete class import path of an object"""
+    # Get the class of the object
+    cls = obj.__class__
+    # Get the module name where the class is defined
+    module = cls.__module__
+    # Get the qualified name of the class (including outer classes)
+    qualname = cls.__qualname__
+    # Handle nested classes (e.g., ClassA.ClassB)
+    if '.' in qualname:
+        # Replace nested class separators
+        class_path = f"{module}.{qualname.replace('.', '_')}"
+    else:
+        class_path = f"{module}.{qualname}"
+    return class_path
+def get_importable_class_path(obj):
+    """Get the directly importable class path (handling special cases and dynamic classes)"""
+    cls = obj.__class__
+    module = cls.__module__
+    qualname = cls.__qualname__
+    # Handle built-in types
+    if module == 'builtins':
+        return qualname
+    # Handle dynamically generated classes (e.g., functools.partial)
+    if not hasattr(cls, '__module__') or module is None:
+        return f"<dynamic class {qualname}>"
+    # Handle nested classes
+    if '.' in qualname:
+        # Try to import the parent module to validate the path
+        try:
+            import importlib
+            parent_module = importlib.import_module(module)
+            # Follow the qualified name path
+            parts = qualname.split('.')
+            current = parent_module
+            for part in parts:
+                current = getattr(current, part)
+            # If successful access, return the original path
+            return f"{module}.{qualname}"
+        except (ImportError, AttributeError):
+            # Fallback: use underscore connection
+            return f"{module}.{qualname.replace('.', '_')}"
+    return f"{module}.{qualname}"
+def monkey_patch_by_class_path(model, new_forward):
+    """Perform monkey patching through class path"""
+    # Get the complete class path
+    class_path = get_importable_class_path(model)
+    # Dynamically import the class
+    try:
+        import importlib
+        module_path, class_name = class_path.rsplit('.', 1)
+        module = importlib.import_module(module_path)
+        target_class = getattr(module, class_name)
+        # Save the original method
+        if not hasattr(target_class, '_original_forward'):
+            target_class._original_forward = target_class.forward
+        # Apply the patch
+        target_class.forward = new_forward
+        # Update the method binding of the current instance
+        model.forward = types.MethodType(target_class.forward, model)
+        return f"Successful Monkey Patch: {class_path}.forward"
+    except (ImportError, AttributeError, ValueError) as e:
+        return f"Patch Failed: {str(e)}"
+def find_inner_attribute(obj,  attr_name_list: List[str], default_type = PreTrainedModel ):
+    # try to find the attribute of the name in `attr_name_list`.
+    for target_attr_name in attr_name_list:
+        if hasattr(obj, target_attr_name):
+            return getattr(obj, target_attr_name)
+    # else: try to find the attribute of the type `default_type`
+    for attr_name in dir(obj):
+        attr_value = getattr(obj, attr_name)
+        if isinstance(attr_value, default_type):
+            return attr_value
+    raise AttributeError(f"In the {obj} object, there is no attribute whose name matches any name in {attr_name_list} or whose type is {default_type}.")
+def find_attribute_name(obj, name_pattern_list: List[str], exclude_pattern_list: List[str], match_type = nn.Module):
+    for attr_name in dir(obj):
+        attr_value = getattr(obj, attr_name)
+        for pattern in name_pattern_list:
+            for ex_pattern in exclude_pattern_list:
+                if isinstance(attr_value, match_type) and (pattern.lower() in attr_value.__class__.__name__.lower()) and ( ex_pattern.lower() not in attr_value.__class__.__name__.lower() ):
+                    return attr_value
+                elif isinstance(attr_value, match_type) and (pattern.lower() in attr_name.lower()) and (ex_pattern.lower() not in attr_name.lower() ):
+                    return attr_value
+    raise AttributeError(f"In the {obj} object, there is no attribute whose name matches any pattern in {name_pattern_list} and excludes any pattern in {exclude_pattern_list}, and whose type is {match_type}.")
+def monkey_patching(model_obj, model_atten_forward , verbose = True):
+    transformers.generation.GenerationMixin._validate_model_kwargs = _validate_model_kwargs
+    ## get inner model
+    possible_inner_model_names = ["model", "transformer", "gpt_neox"]
+    inner_model_type = PreTrainedModel
+    inner_model = find_inner_attribute(model_obj, possible_inner_model_names, inner_model_type)
+    possible_layers_names = ["layers", "h" ]
+    layers_type = nn.ModuleList
+    model_layers = find_inner_attribute(inner_model, possible_layers_names, layers_type)
+    atten_attr_name_pattern_list = ["attention", "self_attn"]
+    atten_attr_name_pattern_exclude = ["norm", "layer"]
+    for i, decoder_layer in enumerate(model_layers):
+        self_attn_module = find_attribute_name(decoder_layer, atten_attr_name_pattern_list, atten_attr_name_pattern_exclude, nn.Module)
+        result = monkey_patch_by_class_path(self_attn_module, model_atten_forward)
+        if verbose:
+            decoder_class_name = get_importable_class_path(decoder_layer)
+            print(f"For Layer {i}'s `{decoder_class_name}`: {result}")
+    return model_layers