fix
Browse files- modeling_gptbert.py +2 -2
modeling_gptbert.py
CHANGED
|
@@ -327,7 +327,7 @@ class SelfAttention(nn.Module):
|
|
| 327 |
|
| 328 |
self.dropout = nn.Dropout(config.hidden_dropout)
|
| 329 |
|
| 330 |
-
theta = 160_000 if (layer_idx + 1) % config.
|
| 331 |
|
| 332 |
# Initialize rotary embeddings based on whether FlashAttention is available
|
| 333 |
if is_flash_attn_2_available():
|
|
@@ -515,7 +515,7 @@ class Encoder(nn.Module):
|
|
| 515 |
def __init__(self, config: GptBertConfig):
|
| 516 |
super().__init__()
|
| 517 |
self.layers = nn.ModuleList([Layer(config, i) for i in range(config.num_layers)])
|
| 518 |
-
self.
|
| 519 |
|
| 520 |
def set_window_length(self, config: GptBertConfig):
|
| 521 |
for i, layer in enumerate(self.layers):
|
|
|
|
| 327 |
|
| 328 |
self.dropout = nn.Dropout(config.hidden_dropout)
|
| 329 |
|
| 330 |
+
theta = 160_000 if (layer_idx + 1) % config.local_global_ratio == 0 else 10_000
|
| 331 |
|
| 332 |
# Initialize rotary embeddings based on whether FlashAttention is available
|
| 333 |
if is_flash_attn_2_available():
|
|
|
|
| 515 |
def __init__(self, config: GptBertConfig):
|
| 516 |
super().__init__()
|
| 517 |
self.layers = nn.ModuleList([Layer(config, i) for i in range(config.num_layers)])
|
| 518 |
+
self.local_global_ratio = config.local_global_ratio
|
| 519 |
|
| 520 |
def set_window_length(self, config: GptBertConfig):
|
| 521 |
for i, layer in enumerate(self.layers):
|