SCANSKY
/

distilbertTourism-multilingual-rclassifier

Safetensors

distilbert

Model card Files Files and versions

xet

Community

SCANSKY commited on Apr 10, 2025

Commit

513c09e

verified ·

1 Parent(s): e3b4f0e

Update handler.py

Browse files

Files changed (1) hide show

handler.py +29 -38

handler.py CHANGED Viewed

@@ -2,8 +2,6 @@ from transformers import DistilBertTokenizer, DistilBertForSequenceClassificatio
 import torch
 import os
 # Initialize model and tokenizer
 model_name = "SCANSKY/distilbertTourism-multilingual-rclassifier"
 model = None
@@ -22,7 +20,7 @@ load_model_components()
 def predict_relevance(text):
     """Predict whether a text is relevant or not"""
     if not text.strip():
-        return {"error": "Please provide some text to classify."}
     inputs = tokenizer(
         text,
@@ -46,7 +44,7 @@ def predict_relevance(text):
     return {
         "prediction": predicted_class,  # 1 for relevant, 0 for not relevant
-        "confidence": float(confidence),
         "text": text
     }
@@ -59,43 +57,36 @@ class EndpointHandler:
     def preprocess(self, data):
         # Extract the input text from the request
         text = data.get("inputs", "")
-        return text
-    def inference(self, text):
-        if isinstance(text, list):
-            # Handle batch prediction if multiple texts are provided
-            results = []
-            for t in text:
-                if isinstance(t, dict):
-                    # Handle case where inputs come as list of dicts
-                    t = t.get("inputs", "")
-                result = predict_relevance(t)
-                results.append(result)
-            return results
-        else:
-            # Single prediction
-            return predict_relevance(text)
-    def postprocess(self, output):
-        if isinstance(output, list):
-            # Process batch results
-            return [{
-                "prediction": "Relevant" if item["prediction"] == 1 else "Not Relevant",
-                "confidence": item["confidence"],
-                "text": item["text"]
-            } for item in output]
-        else:
-            # Process single result
             if "error" in output:
-                return {"error": output["error"]}
-            return {
-                "prediction": "Relevant" if output["prediction"] == 1 else "Not Relevant",
-                "confidence": output["confidence"],
-                "text": output["text"]
-            }
     def __call__(self, data):
         # Main method to handle the request
-        text = self.preprocess(data)
-        output = self.inference(text)
-        return self.postprocess(output)

 import torch
 import os
 # Initialize model and tokenizer
 model_name = "SCANSKY/distilbertTourism-multilingual-rclassifier"
 model = None
 def predict_relevance(text):
     """Predict whether a text is relevant or not"""
     if not text.strip():
+        return {"error": "Empty text provided."}
     inputs = tokenizer(
         text,
     return {
         "prediction": predicted_class,  # 1 for relevant, 0 for not relevant
+        "confidence": float(confidence) * 100,  # Convert to percentage
         "text": text
     }
     def preprocess(self, data):
         # Extract the input text from the request
         text = data.get("inputs", "")
+        # Split by newlines and remove empty lines
+        lines = [line.strip() for line in text.split('\n') if line.strip()]
+        return lines
+    def inference(self, lines):
+        results = []
+        for line in lines:
+            result = predict_relevance(line)
+            results.append(result)
+        return results
+    def postprocess(self, outputs):
+        processed_results = []
+        for output in outputs:
             if "error" in output:
+                processed_results.append({
+                    "text": output.get("text", ""),
+                    "error": output["error"],
+                    "confidence": 0
+                })
+            else:
+                processed_results.append({
+                    "text": output["text"],
+                    "confidence": output["confidence"],
+                    "relevance": "Relevant" if output["prediction"] == 1 else "Not Relevant"
+                })
+        return processed_results
     def __call__(self, data):
         # Main method to handle the request
+        lines = self.preprocess(data)
+        outputs = self.inference(lines)
+        return self.postprocess(outputs)