transformers-community
/

sep_cache

@@ -1,23 +1,6 @@
-def debug_imports():
-    import sys
-    import os
-    import inspect
-    print("\n===== 导入调试信息 =====")
-    print(f"当前工作目录: {os.getcwd()}")
-    print(f"脚本路径: {os.path.abspath(__file__)}")
-    print(f"脚本所在目录: {os.path.dirname(os.path.abspath(__file__))}")
-    print(f"父目录: {os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}")
-    print(f"Python路径(sys.path):")
-    for p in sys.path:
-        print(f"  - {p}")
-    print("=======================\n")
-# 在脚本开头调用
-debug_imports()
 import torch
@@ -30,28 +13,10 @@ import torch.nn as nn
 from transformers.modeling_utils import  PreTrainedModel
-# try:
-#     from functions_2_patch import _validate_model_kwargs, llama_atten_forward
-#     from monkey_patching_utils import monkey_patching
-#     from sep_cache_utils import SepCache
-# except :
-#     from ..functions_2_patch import _validate_model_kwargs, llama_atten_forward
-#     from ..monkey_patching_utils import monkey_patching
-#     from ..sep_cache_utils import SepCache
 from .functions_2_patch import _validate_model_kwargs, llama_atten_forward
 from .monkey_patching_utils import monkey_patching
 from .sep_cache_utils import SepCache
-# except :
-#     from ..functions_2_patch import _validate_model_kwargs, llama_atten_forward
-#     from ..monkey_patching_utils import monkey_patching
-#     from ..sep_cache_utils import SepCache
 UNSUPPORTED_GENERATION_ARGS = [
     "cache_implementation",  # cache-related arguments, here we always use SepCache
@@ -110,23 +75,6 @@ def generate(model,
              **kwargs
              ):
-    debug_imports()
-    import sys
-    import os
-    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-    # from utils.sep_cache_utils import SepCache
-    print(f"__file__: {__file__}")
-    print(f"os.path.abspath(__file__): {os.path.abspath(__file__)}")
-    # from ..functions_2_patch import _validate_model_kwargs, llama_atten_forward
-    # from ..monkey_patching_utils import monkey_patching
-    # from ..sep_cache_utils import SepCache
     """Custom generate function for SepCache.
     A cache as described in the [SepLLM paper - ICML 2025](https://arxiv.org/abs/2412.12094). In the training phase,
@@ -229,7 +177,7 @@ def generate(model,
     """
     # 0. Monkey Patching for the `update` function of `SepCache`
-    # model_layers = monkey_patching(model,  model_atten_forward=llama_atten_forward, verbose=monkey_patch_verbose)
     # 1. General sanity checks
     # 1.a. A few arguments are not allowed, especially arguments that control caches.

+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import torch
 from transformers.modeling_utils import  PreTrainedModel
 from .functions_2_patch import _validate_model_kwargs, llama_atten_forward
 from .monkey_patching_utils import monkey_patching
 from .sep_cache_utils import SepCache
 UNSUPPORTED_GENERATION_ARGS = [
     "cache_implementation",  # cache-related arguments, here we always use SepCache
              **kwargs
              ):
     """Custom generate function for SepCache.
     A cache as described in the [SepLLM paper - ICML 2025](https://arxiv.org/abs/2412.12094). In the training phase,
     """
     # 0. Monkey Patching for the `update` function of `SepCache`
+    model_layers = monkey_patching(model,  model_atten_forward=llama_atten_forward, verbose=monkey_patch_verbose)
     # 1. General sanity checks
     # 1.a. A few arguments are not allowed, especially arguments that control caches.