Upload modeling_arctic.py with huggingface_hub
Browse files- modeling_arctic.py +2 -2
modeling_arctic.py
CHANGED
|
@@ -56,7 +56,7 @@ from transformers.utils import (
|
|
| 56 |
)
|
| 57 |
from transformers.utils.import_utils import is_torch_fx_available
|
| 58 |
from .configuration_arctic import ArcticConfig
|
| 59 |
-
from transformers.integrations.deepspeed import is_deepspeed_available
|
| 60 |
from transformers.utils.versions import require_version
|
| 61 |
|
| 62 |
if is_deepspeed_available():
|
|
@@ -354,7 +354,7 @@ class ArcticAttention(nn.Module):
|
|
| 354 |
ds_optimized_quantization_config=quantization_config,
|
| 355 |
ds_optimized_base_weight_sharding=True,
|
| 356 |
dtype=torch.bfloat16)
|
| 357 |
-
self.o_proj = get_arctic_linear(self.hidden_size, self.
|
| 358 |
use_deepspeed_implementation=self.use_deepspeed_implementation,
|
| 359 |
ds_optimized_lora_config=deepspeed_lora_config,
|
| 360 |
ds_optimized_quantization_config=quantization_config,
|
|
|
|
| 56 |
)
|
| 57 |
from transformers.utils.import_utils import is_torch_fx_available
|
| 58 |
from .configuration_arctic import ArcticConfig
|
| 59 |
+
from transformers.integrations.deepspeed import is_deepspeed_available
|
| 60 |
from transformers.utils.versions import require_version
|
| 61 |
|
| 62 |
if is_deepspeed_available():
|
|
|
|
| 354 |
ds_optimized_quantization_config=quantization_config,
|
| 355 |
ds_optimized_base_weight_sharding=True,
|
| 356 |
dtype=torch.bfloat16)
|
| 357 |
+
self.o_proj = get_arctic_linear(self.hidden_size, self.hidden_size, bias=False,
|
| 358 |
use_deepspeed_implementation=self.use_deepspeed_implementation,
|
| 359 |
ds_optimized_lora_config=deepspeed_lora_config,
|
| 360 |
ds_optimized_quantization_config=quantization_config,
|