Elvis-t9
/

CGE-test

@@ -33,89 +33,12 @@ class CGETransformer(Transformer):
     config_keys: list[str] = ["max_seq_length", "do_lower_case"]
     save_in_root: bool = True
-    # def __init__(
-    #     self,
-    #     model_name_or_path: str,
-    #     max_seq_length: int | None = None,
-    #     model_args: dict[str, Any] | None = None,
-    #     tokenizer_args: dict[str, Any] | None = None,
-    #     config_args: dict[str, Any] | None = None,
-    #     cache_dir: str | None = None,
-    #     do_lower_case: bool = False,
-    #     tokenizer_name_or_path: str | None = None,
-    #     backend: str = "torch",
-    #     **kwargs
-    # ) -> None:
-    #     super().__init__(model_name_or_path, **kwargs)
-    #     self.do_lower_case = do_lower_case
-    #     self.backend = backend
-    #     if model_args is None:
-    #         model_args = {}
-    #     if tokenizer_args is None:
-    #         tokenizer_args = {}
-    #     if config_args is None:
-    #         config_args = {}
-    #     config, is_peft_model = self._load_config(model_name_or_path, cache_dir, backend, config_args)
-    #     self._load_model(model_name_or_path, config, cache_dir, backend, is_peft_model, **model_args)
-    #     # Get the signature of the auto_model's forward method to pass only the expected arguments from `features`,
-    #     # plus some common values like "input_ids", "attention_mask", etc.
-    #     model_forward_params = list(inspect.signature(self.auto_model.forward).parameters)
-    #     self.model_forward_params = set(model_forward_params) | {
-    #         "input_ids",
-    #         "attention_mask",
-    #         "token_type_ids",
-    #         "inputs_embeds",
-    #     }
-    #     if max_seq_length is not None and "model_max_length" not in tokenizer_args:
-    #         tokenizer_args["model_max_length"] = max_seq_length
-    #     self.tokenizer = AutoTokenizer.from_pretrained(
-    #         tokenizer_name_or_path if tokenizer_name_or_path is not None else model_name_or_path,
-    #         cache_dir=cache_dir,
-    #         **tokenizer_args,
-    #     )
-    #     # No max_seq_length set. Try to infer from model
-    #     if max_seq_length is None:
-    #         if (
-    #             hasattr(self.auto_model, "config")
-    #             and hasattr(self.auto_model.config, "max_position_embeddings")
-    #             and hasattr(self.tokenizer, "model_max_length")
-    #         ):
-    #             max_seq_length = min(self.auto_model.config.max_position_embeddings, self.tokenizer.model_max_length)
-    #     self.max_seq_length = max_seq_length
-    #     if tokenizer_name_or_path is not None:
-    #         self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__
     def forward(self, features: dict[str, torch.Tensor], **kwargs) -> dict[str, torch.Tensor]:
-        """
-        Forward pass through the transformer model.
-        This method processes the input features through the underlying transformers model
-        and returns the token embeddings along with any other relevant outputs.
-        Notes:
-            - Only passes arguments that are expected by the underlying transformer model
-        Args:
-            features (dict[str, torch.Tensor]): Input features dictionary containing at least
-                'input_ids' and 'attention_mask'. May also contain other tensors required by
-                the underlying transformer model.
-            **kwargs: Additional keyword arguments to pass to the underlying transformer model.
-        Returns:
-            dict[str, torch.Tensor]: Updated features dictionary containing the input features, plus:
-                - 'token_embeddings': Token-level embeddings from the transformer model
-                - 'attention_mask': Possibly modified attention mask if using PeftModel with prompt learning
-                - 'all_layer_embeddings': If the model outputs hidden states, contains embeddings from all layers
-        """
         trans_features = {key: value for key, value in features.items() if key in self.model_forward_params}
         outputs = self.auto_model(**trans_features, **kwargs, return_dict=True)
         sentence_embedding = outputs["sentence_embedding"]
         features["sentence_embedding"] = sentence_embedding

     config_keys: list[str] = ["max_seq_length", "do_lower_case"]
     save_in_root: bool = True
     def forward(self, features: dict[str, torch.Tensor], **kwargs) -> dict[str, torch.Tensor]:
         trans_features = {key: value for key, value in features.items() if key in self.model_forward_params}
         outputs = self.auto_model(**trans_features, **kwargs, return_dict=True)
         sentence_embedding = outputs["sentence_embedding"]
         features["sentence_embedding"] = sentence_embedding