Spaces:

CreatorIQ-org
/

rlhf_docker

Sleeping

App Files Files Community

b2u commited on Dec 3, 2024

Commit

c4e7614

1 Parent(s): 535fc0a

debugging predictions

Browse files

Files changed (1) hide show

model.py +24 -15

model.py CHANGED Viewed

@@ -105,15 +105,6 @@ class BertClassifier(LabelStudioMLBase):
         return self
     def predict(self, tasks, **kwargs):
-        """
-        Tasks is a list of tasks with the following fields:
-        {
-            "id": 123,
-            "data": {
-                "text": "Example text"
-            }
-        }
-        """
         logger.info("=== PREDICT METHOD CALLED ===")
         logger.info(f"Received tasks: {json.dumps(tasks, indent=2)}")
         logger.info(f"Number of tasks: {len(tasks)}")
@@ -122,19 +113,37 @@ class BertClassifier(LabelStudioMLBase):
         for task_index, task in enumerate(tasks, 1):
             try:
-                # Log the specific task being processed
                 logger.info(f"Processing task {task_index} - Text: {task['data'].get('text', '')[:20]}...")
-                # Log model state
                 model_path = os.path.join(self.model_dir, 'model_state.pt')
                 if os.path.exists(model_path):
                     logger.info("✓ Using trained model")
                 else:
                     logger.info("✗ No trained model found, using initial state")
-                # Get model prediction
-                predicted_label, confidence = self._get_prediction(task['data']['text'])
-                logger.info(f"Predicted category: {predicted_label} with confidence: {confidence:.4f}")
                 # Format the prediction for Label Studio
                 prediction = {
@@ -145,7 +154,7 @@ class BertClassifier(LabelStudioMLBase):
                         'value': {
                             'choices': [predicted_label]
                         },
-                        'score': confidence
                     }],
                     'model_version': self.model_version,
                     'task': task['id']

         return self
     def predict(self, tasks, **kwargs):
         logger.info("=== PREDICT METHOD CALLED ===")
         logger.info(f"Received tasks: {json.dumps(tasks, indent=2)}")
         logger.info(f"Number of tasks: {len(tasks)}")
         for task_index, task in enumerate(tasks, 1):
             try:
                 logger.info(f"Processing task {task_index} - Text: {task['data'].get('text', '')[:20]}...")
                 model_path = os.path.join(self.model_dir, 'model_state.pt')
                 if os.path.exists(model_path):
                     logger.info("✓ Using trained model")
                 else:
                     logger.info("✗ No trained model found, using initial state")
+                # Prepare the text for the model
+                inputs = self.tokenizer(
+                    task['data']['text'],
+                    truncation=True,
+                    padding=True,
+                    return_tensors="pt"
+                ).to(self.device)
+                # Set model to evaluation mode
+                self._model.eval()
+                # Get prediction
+                with torch.no_grad():
+                    outputs = self._model(**inputs)
+                    logits = outputs.logits
+                    probabilities = torch.softmax(logits, dim=1)
+                    confidence, predicted_idx = torch.max(probabilities, dim=1)
+                    # Get predicted label
+                    predicted_label = self.categories[predicted_idx.item()]
+                    confidence_score = confidence.item()
+                logger.info(f"Predicted category: {predicted_label} with confidence: {confidence_score:.4f}")
                 # Format the prediction for Label Studio
                 prediction = {
                         'value': {
                             'choices': [predicted_label]
                         },
+                        'score': confidence_score
                     }],
                     'model_version': self.model_version,
                     'task': task['id']