feat: set adapter based on prompt

Browse files

Signed-off-by: Mohammad Kalim Akram <kalim.akram@jina.ai>

Files changed (2) hide show

modeling_lora.py +34 -15
modeling_xlm_roberta.py +3 -9

modeling_lora.py CHANGED Viewed

@@ -14,9 +14,6 @@ from transformers import PretrainedConfig
 from .modeling_xlm_roberta import XLMRobertaFlashConfig, XLMRobertaModel, XLMRobertaPreTrainedModel
-LORA_NO_UPDATE = '__lora_no_update__'
 def initialized_weights(
     shape: Tuple[int], num_adaptations: int, init: str = "kaiming"
 ) -> torch.Tensor:
@@ -247,6 +244,13 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
         self._task_idx = None
         # By default, disable LoRA until it's specified which adapter/task to use
         self.current_task = None
     @property
     def main_params_trainable(self):
@@ -332,9 +336,18 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
                 partial(LoRAParametrization.select_task_for_layer, task_idx=task_idx)
             )
-    def forward(self, *args, task: Union[str, None] = LORA_NO_UPDATE, **kwargs):
-        if task != LORA_NO_UPDATE:
-            self.current_task = task
         return self.roberta(*args, **kwargs)
@@ -355,7 +368,7 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
     def encode(
         self,
         *args,
-        task: Union[str, None] = LORA_NO_UPDATE,
         **kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
@@ -364,18 +377,24 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
         task(`str`, *optional*, defaults to `LORA_NO_UPDATE`):
             Specifies the task for which the encoding is intended. This parameter controls the
             use of specialized LoRA adapters that are tuned for specific tasks. If `task` is set
-            to `LORA_NO_UPDATE`, there will be no update to the current task, retaining the
-            existing adapter configuration. If `task` is explicitly set to `None`, all LoRA
-            adapters are disabled, and the model reverts to its original, general-purpose weights.
-            If `task` is set to a specific LoRA adaptation, that adaptation is activated.
         """
-        if task != LORA_NO_UPDATE:
-            if not task:
                 warnings.warn(
                     f"Task-specific embeddings are disabled. To enable, specify the `task` "
                     f"argument with one of the supported tasks: {', '.join(self.config.lora_adaptations)}",
                     category=UserWarning,
                 )
-            self.current_task = task
-        return self.roberta.encode(*args, **kwargs)

 from .modeling_xlm_roberta import XLMRobertaFlashConfig, XLMRobertaModel, XLMRobertaPreTrainedModel
 def initialized_weights(
     shape: Tuple[int], num_adaptations: int, init: str = "kaiming"
 ) -> torch.Tensor:
         self._task_idx = None
         # By default, disable LoRA until it's specified which adapter/task to use
         self.current_task = None
+        self.prompts = {
+            'query': 'Represent the query for retrieving supporting documents: ',
+            'document': 'Represent the document for retrieval: ',
+            'sts': 'Represent the text for Semantic Textual Similarity: ',
+            'clustering': 'Cluster the text: ',
+            'classification': 'Classify the text: ',
+            }
     @property
     def main_params_trainable(self):
                 partial(LoRAParametrization.select_task_for_layer, task_idx=task_idx)
             )
+    def forward(self, *args, task_type: Union[str, None] = None, **kwargs):
+        if task_type:
+            self.current_task = task_type
+        else:
+            input_ids = kwargs["input_ids"]
+            input_text = self.roberta.tokenizer.decode(input_ids[0], skip_special_tokens=True)
+            for task_name, prompt in self.prompts.items():
+                if input_text.startswith(prompt):
+                    self.current_task = task_name
+                    break
+            else:
+                self.current_task = None  # No task-specific adapter is found, just use the general-purpose weights
         return self.roberta(*args, **kwargs)
     def encode(
         self,
         *args,
+        task_type: Union[str, None] = None,
         **kwargs,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
         """
         task(`str`, *optional*, defaults to `LORA_NO_UPDATE`):
             Specifies the task for which the encoding is intended. This parameter controls the
             use of specialized LoRA adapters that are tuned for specific tasks. If `task` is set
+            to `None`, all LoRA adapters are disabled, and the model reverts to its original,
+            general-purpose weights. If `task` is set to a specific LoRA adaptation, that adaptation
+            is activated.
         """
+        if task_type:
+            self.current_task = task_type
+        else:  # infer the task from the input text
+            input_text = args[0][0] if isinstance(args[0], list) else args[0]  # take only the first sentence
+            for task_name, prompt in self.prompts.items():
+                if input_text.startswith(prompt):
+                    self.current_task = task_name
+                    break
+            else:
                 warnings.warn(
                     f"Task-specific embeddings are disabled. To enable, specify the `task` "
                     f"argument with one of the supported tasks: {', '.join(self.config.lora_adaptations)}",
                     category=UserWarning,
                 )
+                self.current_task = None  # No task-specific adapter is found, just use the general-purpose weights
+        return self.roberta.encode(*args, **kwargs)

modeling_xlm_roberta.py CHANGED Viewed

@@ -21,7 +21,7 @@ import torch.nn.functional as F
 import torch.utils.checkpoint
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from einops import rearrange
-from transformers import PretrainedConfig
 from transformers.modeling_utils import PreTrainedModel
 from transformers.modeling_outputs import MaskedLMOutput,SequenceClassifierOutput
 from transformers.models.xlm_roberta.modeling_xlm_roberta import XLMRobertaLMHead
@@ -440,7 +440,7 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
         self.pooler = XLMRobertaPooler(config) if add_pooling_layer else None
         self.apply(partial(_init_weights, initializer_range=config.initializer_range))
     @torch.inference_mode()
     def encode(
@@ -492,12 +492,6 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
             If convert_to_tensor, a stacked tensor is returned.
             If convert_to_numpy, a numpy matrix is returned.
         """
-        from transformers import AutoTokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            self.name_or_path, trust_remote_code=True
-        )
         is_training = self.training
         self.eval()
@@ -1278,4 +1272,4 @@ class XLMRobertaForSequenceClassification(XLMRobertaPreTrainedModel):
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
-        )

 import torch.utils.checkpoint
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from einops import rearrange
+from transformers import PretrainedConfig, AutoTokenizer
 from transformers.modeling_utils import PreTrainedModel
 from transformers.modeling_outputs import MaskedLMOutput,SequenceClassifierOutput
 from transformers.models.xlm_roberta.modeling_xlm_roberta import XLMRobertaLMHead
         self.pooler = XLMRobertaPooler(config) if add_pooling_layer else None
         self.apply(partial(_init_weights, initializer_range=config.initializer_range))
+        self.tokenizer = AutoTokenizer.from_pretrained(self.name_or_path, trust_remote_code=True)
     @torch.inference_mode()
     def encode(
             If convert_to_tensor, a stacked tensor is returned.
             If convert_to_numpy, a numpy matrix is returned.
         """
         is_training = self.training
         self.eval()
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
+        )