davda54 commited on
Commit
6cbbc37
·
verified ·
1 Parent(s): 37fc2ed
Files changed (1) hide show
  1. modeling_gptbert.py +2 -2
modeling_gptbert.py CHANGED
@@ -327,7 +327,7 @@ class SelfAttention(nn.Module):
327
 
328
  self.dropout = nn.Dropout(config.hidden_dropout)
329
 
330
- theta = 160_000 if (layer_idx + 1) % config.short_long_ratio == 0 else 10_000
331
 
332
  # Initialize rotary embeddings based on whether FlashAttention is available
333
  if is_flash_attn_2_available():
@@ -515,7 +515,7 @@ class Encoder(nn.Module):
515
  def __init__(self, config: GptBertConfig):
516
  super().__init__()
517
  self.layers = nn.ModuleList([Layer(config, i) for i in range(config.num_layers)])
518
- self.short_long_ratio = config.short_long_ratio
519
 
520
  def set_window_length(self, config: GptBertConfig):
521
  for i, layer in enumerate(self.layers):
 
327
 
328
  self.dropout = nn.Dropout(config.hidden_dropout)
329
 
330
+ theta = 160_000 if (layer_idx + 1) % config.local_global_ratio == 0 else 10_000
331
 
332
  # Initialize rotary embeddings based on whether FlashAttention is available
333
  if is_flash_attn_2_available():
 
515
  def __init__(self, config: GptBertConfig):
516
  super().__init__()
517
  self.layers = nn.ModuleList([Layer(config, i) for i in range(config.num_layers)])
518
+ self.local_global_ratio = config.local_global_ratio
519
 
520
  def set_window_length(self, config: GptBertConfig):
521
  for i, layer in enumerate(self.layers):