""" Custom handler for HuggingFace Inference Endpoints. Accepts a context string and a list of candidate sentences, tokenizes them in batches, scores each sentence, and returns the scores. Expected input JSON: { "inputs": { "context": "The Crash at Crush was a publicity stunt in Texas in 1896.", "sentences": [ "An estimated 40,000 people attended the event.", "The event was held on September 15.", "Two people were killed by flying debris." ] } } Response JSON: [ {"sentence": "An estimated 40,000 people attended the event.", "score": 1.234}, {"sentence": "The event was held on September 15.", "score": 0.456}, {"sentence": "Two people were killed by flying debris.", "score": 1.789} ] """ from typing import Any, Dict, List, Union import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer MAX_LENGTH = 384 BATCH_SIZE = 32 class EndpointHandler: """Custom handler for sentence interestingness scoring.""" def __init__(self, path: str = ""): """Load the model and tokenizer from the given path. Args: path: Path to the model directory (provided by the Inference Endpoint). """ self.tokenizer = AutoTokenizer.from_pretrained(path) self.model = AutoModelForSequenceClassification.from_pretrained(path) self.model.eval() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(self.device) def __call__(self, data: Dict[str, Any]) -> Union[List[Dict[str, Any]], Dict[str, str]]: """Score a list of sentences given a context. Args: data: Request payload. Expected shape: { "inputs": { "context": str, "sentences": list[str] } } OR (flat form): { "inputs": str # treated as context, sentences split by newlines } Returns: List of dicts with "sentence" and "score" keys, sorted by score descending. """ # Use pop like HF's example handlers to be resilient to wrapper layers inputs = data.pop("inputs", data) # Also grab parameters if they exist (HF Endpoints sometimes pass them separately) parameters = data.pop("parameters", {}) # Support both structured and simple string input if isinstance(inputs, str): # Simple mode: treat input as context, split into sentences try: import nltk nltk.download("punkt_tab", quiet=True) context = inputs sentences = nltk.sent_tokenize(inputs) except ImportError: return {"error": "Structured input required: provide 'context' and 'sentences' fields."} elif isinstance(inputs, dict): context = inputs.get("context", "") sentences = inputs.get("sentences", []) else: return {"error": "Unexpected input type: {}".format(type(inputs).__name__)} if not context: return {"error": "No context provided."} if not sentences: return {"error": "No sentences provided."} # Score sentences in batches all_scores = [] # type: List[float] for batch_start in range(0, len(sentences), BATCH_SIZE): batch_sentences = sentences[batch_start : batch_start + BATCH_SIZE] # Tokenize the batch: each item is (context, sentence) pair encoded = self.tokenizer( [context] * len(batch_sentences), batch_sentences, return_tensors="pt", truncation=True, padding=True, max_length=MAX_LENGTH, ) encoded = {k: v.to(self.device) for k, v in encoded.items()} with torch.no_grad(): outputs = self.model(**encoded) scores = outputs.logits.squeeze(-1) # (batch_size,) # Handle single-item batch (squeeze removes the dim entirely) if scores.dim() == 0: scores = scores.unsqueeze(0) all_scores.extend(scores.cpu().tolist()) # Build results sorted by score (highest first) results = [ {"sentence": sent, "score": round(score, 4)} for sent, score in zip(sentences, all_scores) ] results.sort(key=lambda x: x["score"], reverse=True) return results