Update modeling_orion.py
Browse files- modeling_orion.py +2 -2
modeling_orion.py
CHANGED
|
@@ -25,7 +25,7 @@ from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
|
| 25 |
from transformers.utils import (
|
| 26 |
add_start_docstrings,
|
| 27 |
add_start_docstrings_to_model_forward,
|
| 28 |
-
|
| 29 |
logging,
|
| 30 |
replace_return_docstrings,
|
| 31 |
)
|
|
@@ -33,7 +33,7 @@ from .generation_utils import build_chat_input, TextIterStreamer
|
|
| 33 |
from transformers.generation.utils import GenerationConfig
|
| 34 |
from threading import Thread
|
| 35 |
|
| 36 |
-
if
|
| 37 |
from flash_attn import flash_attn_func, flash_attn_varlen_func
|
| 38 |
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
|
| 39 |
|
|
|
|
| 25 |
from transformers.utils import (
|
| 26 |
add_start_docstrings,
|
| 27 |
add_start_docstrings_to_model_forward,
|
| 28 |
+
is_flash_attn_2_available,
|
| 29 |
logging,
|
| 30 |
replace_return_docstrings,
|
| 31 |
)
|
|
|
|
| 33 |
from transformers.generation.utils import GenerationConfig
|
| 34 |
from threading import Thread
|
| 35 |
|
| 36 |
+
if is_flash_attn_2_available():
|
| 37 |
from flash_attn import flash_attn_func, flash_attn_varlen_func
|
| 38 |
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
|
| 39 |
|