| | """ |
| | Custom handler for HuggingFace Inference Endpoints. |
| | |
| | Accepts a context string and a list of candidate sentences, |
| | tokenizes them in batches, scores each sentence, and returns |
| | the scores. |
| | |
| | Expected input JSON: |
| | { |
| | "inputs": { |
| | "context": "The Crash at Crush was a publicity stunt in Texas in 1896.", |
| | "sentences": [ |
| | "An estimated 40,000 people attended the event.", |
| | "The event was held on September 15.", |
| | "Two people were killed by flying debris." |
| | ] |
| | } |
| | } |
| | |
| | Response JSON: |
| | [ |
| | {"sentence": "An estimated 40,000 people attended the event.", "score": 1.234}, |
| | {"sentence": "The event was held on September 15.", "score": 0.456}, |
| | {"sentence": "Two people were killed by flying debris.", "score": 1.789} |
| | ] |
| | """ |
| |
|
| | from typing import Any, Dict, List, Union |
| | import torch |
| | from transformers import AutoModelForSequenceClassification, AutoTokenizer |
| |
|
| | MAX_LENGTH = 384 |
| | BATCH_SIZE = 32 |
| |
|
| |
|
| | class EndpointHandler: |
| | """Custom handler for sentence interestingness scoring.""" |
| |
|
| | def __init__(self, path: str = ""): |
| | """Load the model and tokenizer from the given path. |
| | |
| | Args: |
| | path: Path to the model directory (provided by the Inference Endpoint). |
| | """ |
| | self.tokenizer = AutoTokenizer.from_pretrained(path) |
| | self.model = AutoModelForSequenceClassification.from_pretrained(path) |
| | self.model.eval() |
| |
|
| | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | self.model.to(self.device) |
| |
|
| | def __call__(self, data: Dict[str, Any]) -> Union[List[Dict[str, Any]], Dict[str, str]]: |
| | """Score a list of sentences given a context. |
| | |
| | Args: |
| | data: Request payload. Expected shape: |
| | { |
| | "inputs": { |
| | "context": str, |
| | "sentences": list[str] |
| | } |
| | } |
| | OR (flat form): |
| | { |
| | "inputs": str # treated as context, sentences split by newlines |
| | } |
| | |
| | Returns: |
| | List of dicts with "sentence" and "score" keys, |
| | sorted by score descending. |
| | """ |
| | |
| | inputs = data.pop("inputs", data) |
| | |
| | parameters = data.pop("parameters", {}) |
| |
|
| | |
| | if isinstance(inputs, str): |
| | |
| | try: |
| | import nltk |
| |
|
| | nltk.download("punkt_tab", quiet=True) |
| | context = inputs |
| | sentences = nltk.sent_tokenize(inputs) |
| | except ImportError: |
| | return {"error": "Structured input required: provide 'context' and 'sentences' fields."} |
| | elif isinstance(inputs, dict): |
| | context = inputs.get("context", "") |
| | sentences = inputs.get("sentences", []) |
| | else: |
| | return {"error": "Unexpected input type: {}".format(type(inputs).__name__)} |
| |
|
| | if not context: |
| | return {"error": "No context provided."} |
| | if not sentences: |
| | return {"error": "No sentences provided."} |
| |
|
| | |
| | all_scores = [] |
| |
|
| | for batch_start in range(0, len(sentences), BATCH_SIZE): |
| | batch_sentences = sentences[batch_start : batch_start + BATCH_SIZE] |
| |
|
| | |
| | encoded = self.tokenizer( |
| | [context] * len(batch_sentences), |
| | batch_sentences, |
| | return_tensors="pt", |
| | truncation=True, |
| | padding=True, |
| | max_length=MAX_LENGTH, |
| | ) |
| | encoded = {k: v.to(self.device) for k, v in encoded.items()} |
| |
|
| | with torch.no_grad(): |
| | outputs = self.model(**encoded) |
| | scores = outputs.logits.squeeze(-1) |
| |
|
| | |
| | if scores.dim() == 0: |
| | scores = scores.unsqueeze(0) |
| |
|
| | all_scores.extend(scores.cpu().tolist()) |
| |
|
| | |
| | results = [ |
| | {"sentence": sent, "score": round(score, 4)} |
| | for sent, score in zip(sentences, all_scores) |
| | ] |
| | results.sort(key=lambda x: x["score"], reverse=True) |
| |
|
| | return results |
| |
|