File size: 4,667 Bytes

753d5b7

"""
Custom handler for HuggingFace Inference Endpoints.

Accepts a context string and a list of candidate sentences,
tokenizes them in batches, scores each sentence, and returns
the scores.

Expected input JSON:
{
    "inputs": {
        "context": "The Crash at Crush was a publicity stunt in Texas in 1896.",
        "sentences": [
            "An estimated 40,000 people attended the event.",
            "The event was held on September 15.",
            "Two people were killed by flying debris."
        ]
    }
}

Response JSON:
[
    {"sentence": "An estimated 40,000 people attended the event.", "score": 1.234},
    {"sentence": "The event was held on September 15.", "score": 0.456},
    {"sentence": "Two people were killed by flying debris.", "score": 1.789}
]
"""

from typing import Any, Dict, List, Union
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

MAX_LENGTH = 384
BATCH_SIZE = 32


class EndpointHandler:
    """Custom handler for sentence interestingness scoring."""

    def __init__(self, path: str = ""):
        """Load the model and tokenizer from the given path.

        Args:
            path: Path to the model directory (provided by the Inference Endpoint).
        """
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.model = AutoModelForSequenceClassification.from_pretrained(path)
        self.model.eval()

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def __call__(self, data: Dict[str, Any]) -> Union[List[Dict[str, Any]], Dict[str, str]]:
        """Score a list of sentences given a context.

        Args:
            data: Request payload. Expected shape:
                {
                    "inputs": {
                        "context": str,
                        "sentences": list[str]
                    }
                }
                OR (flat form):
                {
                    "inputs": str  # treated as context, sentences split by newlines
                }

        Returns:
            List of dicts with "sentence" and "score" keys,
            sorted by score descending.
        """
        # Use pop like HF's example handlers to be resilient to wrapper layers
        inputs = data.pop("inputs", data)
        # Also grab parameters if they exist (HF Endpoints sometimes pass them separately)
        parameters = data.pop("parameters", {})

        # Support both structured and simple string input
        if isinstance(inputs, str):
            # Simple mode: treat input as context, split into sentences
            try:
                import nltk

                nltk.download("punkt_tab", quiet=True)
                context = inputs
                sentences = nltk.sent_tokenize(inputs)
            except ImportError:
                return {"error": "Structured input required: provide 'context' and 'sentences' fields."}
        elif isinstance(inputs, dict):
            context = inputs.get("context", "")
            sentences = inputs.get("sentences", [])
        else:
            return {"error": "Unexpected input type: {}".format(type(inputs).__name__)}

        if not context:
            return {"error": "No context provided."}
        if not sentences:
            return {"error": "No sentences provided."}

        # Score sentences in batches
        all_scores = []  # type: List[float]

        for batch_start in range(0, len(sentences), BATCH_SIZE):
            batch_sentences = sentences[batch_start : batch_start + BATCH_SIZE]

            # Tokenize the batch: each item is (context, sentence) pair
            encoded = self.tokenizer(
                [context] * len(batch_sentences),
                batch_sentences,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=MAX_LENGTH,
            )
            encoded = {k: v.to(self.device) for k, v in encoded.items()}

            with torch.no_grad():
                outputs = self.model(**encoded)
                scores = outputs.logits.squeeze(-1)  # (batch_size,)

                # Handle single-item batch (squeeze removes the dim entirely)
                if scores.dim() == 0:
                    scores = scores.unsqueeze(0)

                all_scores.extend(scores.cpu().tolist())

        # Build results sorted by score (highest first)
        results = [
            {"sentence": sent, "score": round(score, 4)}
            for sent, score in zip(sentences, all_scores)
        ]
        results.sort(key=lambda x: x["score"], reverse=True)

        return results