Upload GPTRefactForCausalLM

Files changed (3) hide show

configuration_gpt_refact.py CHANGED Viewed

@@ -32,7 +32,7 @@ class GPTRefactConfig(PretrainedConfig):
         use_cache=True,
         bos_token_id=-1,
         eos_token_id=0,
-        max_position_embeddings: int = 2048,
         multi_query: bool = True,
         attention_softmax_in_fp32=False,
         scale_attention_softmax_in_fp32=False,

         use_cache=True,
         bos_token_id=-1,
         eos_token_id=0,
+        max_position_embeddings: int = 4096,
         multi_query: bool = True,
         attention_softmax_in_fp32=False,
         scale_attention_softmax_in_fp32=False,

modeling_gpt_refact.py CHANGED Viewed

@@ -346,9 +346,10 @@ class GPTRefactModel(GPTRefactPreTrainedModel):
         self.h = nn.ModuleList([GPTRefactBlock(config, layer_idx=i) for i in range(config.num_hidden_layers)])
-        max_positions = config.max_position_embeddings
         self.register_buffer(
-            "bias", torch.tril(torch.ones((max_positions, max_positions), dtype=torch.bool)), persistent=False
         )
         self.gradient_checkpointing = False

         self.h = nn.ModuleList([GPTRefactBlock(config, layer_idx=i) for i in range(config.num_hidden_layers)])
+        self.max_positions = config.max_position_embeddings
         self.register_buffer(
+            "bias", torch.tril(torch.ones((self.max_positions, self.max_positions), dtype=torch.bool)),
+            persistent=False
         )
         self.gradient_checkpointing = False

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2369c7e2228204ac8e0bc39c048d1e6349ce5f1bab8005a60bde0f0aa26ca73
 size 6343461637

 version https://git-lfs.github.com/spec/v1
+oid sha256:81388e4a168bb437a7a09af6c8b6c2943990276ee62c2f449cd2bdff257e8860
 size 6343461637