Update modeling_ostlm.py

Browse files

Files changed (1) hide show

modeling_ostlm.py +11 -14

modeling_ostlm.py CHANGED Viewed

@@ -51,7 +51,7 @@ class OSTLMConfig(PretrainedConfig):
 class OSTLMModel(PreTrainedModel, GenerationMixin):
     config_class = OSTLMConfig
-    # תיקון השגיאה: הגדרת המפתחות הקשורים עבור ה-Transformers Library
     _tied_weights_keys = ["lm_head.weight"]
     def __init__(self, config):
@@ -69,15 +69,14 @@ class OSTLMModel(PreTrainedModel, GenerationMixin):
             batch_first=True
         )
-        self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
-        # קשירת משקולות (Weight Tying)
         self.lm_head.weight = self.embedding.weight
-        # אתחול משקולות וסינכרון
         self.post_init()
-    # הכרחי עבור Weight Tying ב-AutoModel
     def get_output_embeddings(self):
         return self.lm_head
@@ -90,7 +89,9 @@ class OSTLMModel(PreTrainedModel, GenerationMixin):
                 super().__init__()
                 self.outer = outer
             def forward(self, input_ids=None, **kwargs):
-                # תמיכה ב-padding קצר מה-max_pos
                 seq_len = input_ids.size(1)
                 out = self.outer.embedding(input_ids) + self.outer.pos_emb[:, :seq_len, :]
                 return BaseModelOutput(last_hidden_state=out)
@@ -104,7 +105,7 @@ class OSTLMModel(PreTrainedModel, GenerationMixin):
         labels=None,
         **kwargs
     ):
-        # 1. טיפול באנקודר (Encoder)
         if encoder_outputs is not None:
             if isinstance(encoder_outputs, (tuple, list)):
                 src_emb = encoder_outputs[0]
@@ -115,11 +116,10 @@ class OSTLMModel(PreTrainedModel, GenerationMixin):
         else:
             src_emb = self.embedding(input_ids) + self.pos_emb[:, :input_ids.size(1), :]
-        # 2. טיפול בדיקודר (Decoder Input)
         if decoder_input_ids is None:
             decoder_input_ids = kwargs.get("input_ids")
             if decoder_input_ids is None:
-                # אתחול ברירת מחדל ל-Generation
                 decoder_input_ids = torch.full(
                     (src_emb.size(0), 1),
                     self.config.decoder_start_token_id,
@@ -127,15 +127,11 @@ class OSTLMModel(PreTrainedModel, GenerationMixin):
                 ).to(self.device)
         tgt_emb = self.embedding(decoder_input_ids) + self.pos_emb[:, :decoder_input_ids.size(1), :]
-        # יצירת מסיכה סיבתיות (Causal Mask)
         tgt_mask = self.transformer.generate_square_subsequent_mask(decoder_input_ids.size(1)).to(self.device)
-        # 3. הרצה בטרנספורמר
         out = self.transformer(src_emb, tgt_emb, tgt_mask=tgt_mask)
         logits = self.lm_head(out)
-        # 4. חישוב Loss במידה ויש לייבלים
         loss = None
         if labels is not None:
             loss_fct = nn.CrossEntropyLoss(ignore_index=self.config.pad_token_id)
@@ -148,11 +144,12 @@ class OSTLMModel(PreTrainedModel, GenerationMixin):
         )
     def prepare_inputs_for_generation(self, input_ids, encoder_outputs=None, **kwargs):
         return {
             "decoder_input_ids": input_ids,
             "encoder_outputs": encoder_outputs,
         }
-# רישום עבור שימוש ב-AutoModel
 AutoConfig.register("ostlm", OSTLMConfig)
 AutoModelForSeq2SeqLM.register(OSTLMConfig, OSTLMModel)

 class OSTLMModel(PreTrainedModel, GenerationMixin):
     config_class = OSTLMConfig
+    # פותר את שגיאת ה-AttributeError
     _tied_weights_keys = ["lm_head.weight"]
     def __init__(self, config):
             batch_first=True
         )
+        # שינוי ל-bias=True כדי להתאים למשקולות הקיימות שלך
+        self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=True)
+        # קשירת משקולות
         self.lm_head.weight = self.embedding.weight
         self.post_init()
     def get_output_embeddings(self):
         return self.lm_head
                 super().__init__()
                 self.outer = outer
             def forward(self, input_ids=None, **kwargs):
+                if input_ids is None:
+                    # הגנה למקרה ש-generate שולח inputs אחרים
+                    input_ids = kwargs.get("decoder_input_ids")
                 seq_len = input_ids.size(1)
                 out = self.outer.embedding(input_ids) + self.outer.pos_emb[:, :seq_len, :]
                 return BaseModelOutput(last_hidden_state=out)
         labels=None,
         **kwargs
     ):
+        # טיפול באנקודר
         if encoder_outputs is not None:
             if isinstance(encoder_outputs, (tuple, list)):
                 src_emb = encoder_outputs[0]
         else:
             src_emb = self.embedding(input_ids) + self.pos_emb[:, :input_ids.size(1), :]
+        # טיפול בדיקודר - וידוא שאין כפילות פרמטרים מ-generate
         if decoder_input_ids is None:
             decoder_input_ids = kwargs.get("input_ids")
             if decoder_input_ids is None:
                 decoder_input_ids = torch.full(
                     (src_emb.size(0), 1),
                     self.config.decoder_start_token_id,
                 ).to(self.device)
         tgt_emb = self.embedding(decoder_input_ids) + self.pos_emb[:, :decoder_input_ids.size(1), :]
         tgt_mask = self.transformer.generate_square_subsequent_mask(decoder_input_ids.size(1)).to(self.device)
         out = self.transformer(src_emb, tgt_emb, tgt_mask=tgt_mask)
         logits = self.lm_head(out)
         loss = None
         if labels is not None:
             loss_fct = nn.CrossEntropyLoss(ignore_index=self.config.pad_token_id)
         )
     def prepare_inputs_for_generation(self, input_ids, encoder_outputs=None, **kwargs):
+        # פותר את בעיית ה-model_kwargs הלא משומשים
         return {
             "decoder_input_ids": input_ids,
             "encoder_outputs": encoder_outputs,
         }
+# רישום סופי
 AutoConfig.register("ostlm", OSTLMConfig)
 AutoModelForSeq2SeqLM.register(OSTLMConfig, OSTLMModel)