small fix
Browse files- README.md +1 -1
- modeling_lsg_mbart.py +12 -4
README.md
CHANGED
|
@@ -9,7 +9,7 @@ pipeline_tag: fill-mask
|
|
| 9 |
---
|
| 10 |
|
| 11 |
# LSG model
|
| 12 |
-
**Transformers >= 4.
|
| 13 |
**This model relies on a custom modeling file, you need to add trust_remote_code=True**\
|
| 14 |
**See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
|
| 15 |
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
# LSG model
|
| 12 |
+
**Transformers >= 4.36.1**\
|
| 13 |
**This model relies on a custom modeling file, you need to add trust_remote_code=True**\
|
| 14 |
**See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
|
| 15 |
|
modeling_lsg_mbart.py
CHANGED
|
@@ -816,17 +816,19 @@ class LSGMBartEncoder(LSGMBartPretrainedModel, MBartEncoder):
|
|
| 816 |
if input_ids is not None and inputs_embeds is not None:
|
| 817 |
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
| 818 |
elif input_ids is not None:
|
| 819 |
-
|
|
|
|
| 820 |
input_ids = input_ids.view(-1, input_shape[-1])
|
| 821 |
elif inputs_embeds is not None:
|
| 822 |
-
|
| 823 |
else:
|
| 824 |
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
| 825 |
|
| 826 |
if inputs_embeds is None:
|
| 827 |
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
|
| 828 |
|
| 829 |
-
|
|
|
|
| 830 |
hidden_states = inputs_embeds + embed_pos
|
| 831 |
|
| 832 |
# Add global tokens
|
|
@@ -922,6 +924,12 @@ class LSGMBartModel(LSGMBartPretrainedModel, MBartModel):
|
|
| 922 |
self.encoder = LSGMBartEncoder(config, self.shared)
|
| 923 |
self.decoder = MBartDecoder(config, self.shared)
|
| 924 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 925 |
# Initialize weights and apply final processing
|
| 926 |
self.post_init()
|
| 927 |
|
|
@@ -1091,4 +1099,4 @@ try:
|
|
| 1091 |
str_to_class(value.split(".")[-1]).register_for_auto_class(key)
|
| 1092 |
except:
|
| 1093 |
warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
|
| 1094 |
-
warn("Update to transformers >= 4.
|
|
|
|
| 816 |
if input_ids is not None and inputs_embeds is not None:
|
| 817 |
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
| 818 |
elif input_ids is not None:
|
| 819 |
+
input = input_ids
|
| 820 |
+
input_shape = input.shape
|
| 821 |
input_ids = input_ids.view(-1, input_shape[-1])
|
| 822 |
elif inputs_embeds is not None:
|
| 823 |
+
input = inputs_embeds[:, :, -1]
|
| 824 |
else:
|
| 825 |
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
| 826 |
|
| 827 |
if inputs_embeds is None:
|
| 828 |
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
|
| 829 |
|
| 830 |
+
|
| 831 |
+
embed_pos = self.embed_positions(input).to(inputs_embeds.device)
|
| 832 |
hidden_states = inputs_embeds + embed_pos
|
| 833 |
|
| 834 |
# Add global tokens
|
|
|
|
| 924 |
self.encoder = LSGMBartEncoder(config, self.shared)
|
| 925 |
self.decoder = MBartDecoder(config, self.shared)
|
| 926 |
|
| 927 |
+
self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
|
| 928 |
+
if self._use_flash_attention_2:
|
| 929 |
+
logger.warning(
|
| 930 |
+
"[WARNING flash-attention]: LSG doesnt support flash-attention currently"
|
| 931 |
+
)
|
| 932 |
+
|
| 933 |
# Initialize weights and apply final processing
|
| 934 |
self.post_init()
|
| 935 |
|
|
|
|
| 1099 |
str_to_class(value.split(".")[-1]).register_for_auto_class(key)
|
| 1100 |
except:
|
| 1101 |
warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
|
| 1102 |
+
warn("Update to transformers >= 4.36.1 to fix.")
|