Spaces:

CreatorIQ-org
/

ls_be_T5_base

Paused

App Files Files Community

b2u commited on Dec 6, 2024

Commit

4b117ce

1 Parent(s): 51dd92c

adding prompt

Browse files

Files changed (1) hide show

model.py +40 -17

model.py CHANGED Viewed

@@ -132,42 +132,65 @@ class T5Model(LabelStudioMLBase):
         choices = root.findall('.//Choice')
         return [choice.get('value') for choice in choices]
     def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> ModelResponse:
         """Generate predictions using T5 model"""
         logger.info("Received prediction request")
         logger.info(f"Tasks: {json.dumps(tasks, indent=2)}")
-        logger.info(f"Context: {json.dumps(context, indent=2) if context else None}")
-        logger.info(f"Additional kwargs: {kwargs}")
         predictions = []
-        # Get valid choices during initialization instead of from kwargs
-        valid_choices = []
         try:
-            import xml.etree.ElementTree as ET
-            root = ET.fromstring(self.label_config)
-            choices = root.findall('.//Choice')
-            valid_choices = [choice.get('value') for choice in choices]
             logger.info(f"Valid choices: {valid_choices}")
         except Exception as e:
             logger.error(f"Error parsing choices: {str(e)}")
-            valid_choices = ["no_category"]  # fallback
         try:
             for task in tasks:
-                logger.info(f"Processing task: {json.dumps(task, indent=2)}")
                 input_text = task['data'].get(self.to_name)
-                logger.info(f"Input text: {input_text}")
-                logger.info(f"Using to_name: {self.to_name}")
                 if not input_text:
                     logger.warning(f"No input text found using {self.to_name}")
-                    logger.warning(f"Available fields in task data: {list(task['data'].keys())}")
                     continue
-                # Generate prediction
                 inputs = self.tokenizer(
-                    input_text,
                     return_tensors="pt",
                     max_length=self.max_length,
                     truncation=True,

         choices = root.findall('.//Choice')
         return [choice.get('value') for choice in choices]
+    def get_categories_with_hints(self, label_config):
+        """Extract categories and their hints from label config"""
+        import xml.etree.ElementTree as ET
+        root = ET.fromstring(label_config)
+        choices = root.findall('.//Choice')
+        categories = []
+        for choice in choices:
+            categories.append({
+                'value': choice.get('value'),
+                'hint': choice.get('hint')
+            })
+        return categories
     def predict(self, tasks: List[Dict], context: Optional[Dict] = None, **kwargs) -> ModelResponse:
         """Generate predictions using T5 model"""
         logger.info("Received prediction request")
         logger.info(f"Tasks: {json.dumps(tasks, indent=2)}")
         predictions = []
+        # Get categories with their descriptions
         try:
+            categories = self.get_categories_with_hints(self.label_config)
+            valid_choices = [cat['value'] for cat in categories]
+            category_descriptions = [f"{cat['value']}: {cat['hint']}" for cat in categories]
             logger.info(f"Valid choices: {valid_choices}")
         except Exception as e:
             logger.error(f"Error parsing choices: {str(e)}")
+            valid_choices = ["no_category"]
+            category_descriptions = ["no_category: Default category when no others apply"]
         try:
             for task in tasks:
                 input_text = task['data'].get(self.to_name)
                 if not input_text:
                     logger.warning(f"No input text found using {self.to_name}")
                     continue
+                # Format prompt with input text and category descriptions
+                prompt = f"""Classify the following text into exactly one category.
+Available categories with descriptions:
+{chr(10).join(f"- {desc}" for desc in category_descriptions)}
+Text to classify: {input_text}
+Instructions:
+1. Consider the text carefully
+2. Choose the most appropriate category from the list
+3. Return ONLY the category value (e.g. 'business_and_career', 'date', etc.)
+4. Do not add any explanations or additional text
+Category:"""
+                logger.info(f"Generated prompt: {prompt}")
+                # Generate prediction with prompt
                 inputs = self.tokenizer(
+                    prompt,
                     return_tensors="pt",
                     max_length=self.max_length,
                     truncation=True,