Update modeling_deepseek.py
Browse files- modeling_deepseek.py +0 -7
modeling_deepseek.py
CHANGED
|
@@ -48,8 +48,6 @@ from transformers.pytorch_utils import (
|
|
| 48 |
from transformers.utils import (
|
| 49 |
add_start_docstrings,
|
| 50 |
add_start_docstrings_to_model_forward,
|
| 51 |
-
is_flash_attn_2_available,
|
| 52 |
-
is_flash_attn_greater_or_equal_2_10,
|
| 53 |
logging,
|
| 54 |
replace_return_docstrings,
|
| 55 |
)
|
|
@@ -58,11 +56,6 @@ from .configuration_deepseek import DeepseekV2Config
|
|
| 58 |
import torch.distributed as dist
|
| 59 |
import numpy as np
|
| 60 |
|
| 61 |
-
if is_flash_attn_2_available():
|
| 62 |
-
from flash_attn import flash_attn_func, flash_attn_varlen_func
|
| 63 |
-
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
|
| 64 |
-
|
| 65 |
-
|
| 66 |
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
|
| 67 |
# It means that the function will not be traced through and simply appear as a node in the graph.
|
| 68 |
if is_torch_fx_available():
|
|
|
|
| 48 |
from transformers.utils import (
|
| 49 |
add_start_docstrings,
|
| 50 |
add_start_docstrings_to_model_forward,
|
|
|
|
|
|
|
| 51 |
logging,
|
| 52 |
replace_return_docstrings,
|
| 53 |
)
|
|
|
|
| 56 |
import torch.distributed as dist
|
| 57 |
import numpy as np
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
|
| 60 |
# It means that the function will not be traced through and simply appear as a node in the graph.
|
| 61 |
if is_torch_fx_available():
|