Update modelling_deepseek.py to make the model forward-compatible with transformers v5+

#21
Files changed (1) hide show
  1. modeling_deepseek.py +5 -2
modeling_deepseek.py CHANGED
@@ -53,7 +53,10 @@ from transformers.utils import (
53
  logging,
54
  replace_return_docstrings,
55
  )
56
- from transformers.utils.import_utils import is_torch_fx_available
 
 
 
57
  from .configuration_deepseek import DeepseekV2Config
58
  import torch.distributed as dist
59
  import numpy as np
@@ -65,7 +68,7 @@ if is_flash_attn_2_available():
65
 
66
  # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
67
  # It means that the function will not be traced through and simply appear as a node in the graph.
68
- if is_torch_fx_available():
69
  if not is_torch_greater_or_equal_than_1_13:
70
  import torch.fx
71
 
 
53
  logging,
54
  replace_return_docstrings,
55
  )
56
+ try:
57
+ from transformers.utils.import_utils import is_torch_fx_available as is_torch_available
58
+ except ImportError:
59
+ from transformers.utils.import_utils import is_torch_available
60
  from .configuration_deepseek import DeepseekV2Config
61
  import torch.distributed as dist
62
  import numpy as np
 
68
 
69
  # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
70
  # It means that the function will not be traced through and simply appear as a node in the graph.
71
+ if is_torch_available():
72
  if not is_torch_greater_or_equal_than_1_13:
73
  import torch.fx
74