open-paws
/

animal_advocate_preference_prediction_longform

@@ -1,47 +1,45 @@
-from typing import Dict, List, Any
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
-class EndpointHandler():
     def __init__(self, path=""):
-        # Load the model and tokenizer
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            path if path else ".",
-            num_labels=1,  # Regression task
-            problem_type="regression"
-        )
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            path if path else ".",
-            use_fast=False  # Use the slow tokenizer
-        )
         self.model.eval()
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
-        data args:
-            inputs (:obj: `str`): The longform text input to analyze
-        Return:
-            A :obj:`dict`: containing the regression prediction
         """
-        # Get the input text
-        inputs = data.pop("inputs", data)
-        # Tokenize the input
-        tokenized = self.tokenizer(
-            inputs,
-            padding=True,
-            truncation=True,
-            max_length=4096,  # Longformer's max length
-            return_tensors="pt"
-        )
-        # Get predictions
-        with torch.no_grad():
-            outputs = self.model(**tokenized)
-            prediction = outputs.logits.item()  # Single regression value
-        return {
-            "prediction": prediction,
-            "confidence": 1.0,  # Not applicable for regression
-            "raw_scores": [prediction]  # Just the regression score
-        }

+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+class EndpointHandler:
     def __init__(self, path=""):
+        # Load model and tokenizer from the repo path
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+        self.model = AutoModelForSequenceClassification.from_pretrained(path)
         self.model.eval()
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+    def __call__(self, data):
         """
+        This method is called when the endpoint receives a request.
+        Expected input: { "inputs": "some string" } or { "inputs": ["a", "b", ...] }
         """
+        inputs = data.get("inputs", None)
+        if inputs is None:
+            return {"error": "No input provided"}
+        if isinstance(inputs, str):
+            inputs = [inputs]
+        results = []
+        for text in inputs:
+            encoded = self.tokenizer(
+                text,
+                return_tensors="pt",
+                truncation=True,
+                padding="max_length",
+                max_length=4096,
+            )
+            encoded = {k: v.to(self.device) for k, v in encoded.items()}
+            with torch.no_grad():
+                outputs = self.model(**encoded)
+            raw_score = outputs.logits.squeeze().item()
+            clipped_score = min(max(raw_score, 0.0), 1.0)
+            results.append({"score": round(clipped_score, 4)})
+        return results