dejanseo
/

universal-query-classifier-base

Model card Files Files and versions

dejanseo commited on Jun 27, 2025

Commit

6f1c63c

·

verified ·

1 Parent(s): 6e375a5

Create handler.py

Files changed (1) hide show

handler.py +61 -0

handler.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from typing import Dict, Any, List
+import torch
+from transformers import AutoTokenizer, AutoModel
+import os
+import json
+class EndpointHandler:
+    def __init__(self, path: str = ""):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+        self.tokenizer.add_special_tokens({
+            "additional_special_tokens": ["[QUERY]", "[LABEL_NAME]", "[LABEL_DESCRIPTION]"]
+        })
+        self.model = AutoModel.from_pretrained(path).to(self.device)
+        head_path = os.path.join(path, "classifier_head.json")
+        with open(head_path, "r") as f:
+            head = json.load(f)
+        self.classifier = torch.nn.Linear(self.model.config.hidden_size, 1).to(self.device)
+        self.classifier.weight.data = torch.tensor(head["scorer_weight"]).to(self.device)
+        self.classifier.bias.data = torch.tensor(head["scorer_bias"]).to(self.device)
+        self.model.eval()
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Expected input format:
+        {
+            "query": "how to sharpen kitchen knives",
+            "candidates": [
+                {"label": "Tool-Specific", "description": "..."},
+                {"label": "Local Intent", "description": "..."}
+            ]
+        }
+        """
+        query = data["query"]
+        candidates = data["candidates"]
+        results = []
+        with torch.no_grad():
+            for entry in candidates:
+                text = f"[QUERY] {query} [LABEL_NAME] {entry['label']} [LABEL_DESCRIPTION] {entry['description']}"
+                tokens = self.tokenizer(
+                    text,
+                    return_tensors="pt",
+                    padding="max_length",
+                    truncation=True,
+                    max_length=64
+                ).to(self.device)
+                out = self.model(**tokens)
+                cls = out.last_hidden_state[:, 0, :]
+                score = torch.sigmoid(self.classifier(cls)).item()
+                results.append({
+                    "label": entry["label"],
+                    "description": entry["description"],
+                    "score": round(score, 4)
+                })
+        return sorted(results, key=lambda x: x["score"], reverse=True)