dejanseo commited on
Commit
5003034
·
verified ·
1 Parent(s): eddb8c1

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +98 -0
handler.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
3
+ import torch
4
+ from typing import Dict, List, Any
5
+
6
+ class EndpointHandler():
7
+ def __init__(self, path=""):
8
+ """
9
+ Initializes the model and tokenizer.
10
+ Args:
11
+ path (str): Path to the directory containing the model files.
12
+ """
13
+ # Load model and tokenizer from the path provided by Inference Endpoints
14
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
15
+ self.model = AutoModelForSequenceClassification.from_pretrained(path)
16
+
17
+ # Determine device - Inference Endpoints will handle GPU allocation if available
18
+ if torch.cuda.is_available():
19
+ self.device = torch.device("cuda")
20
+ else:
21
+ self.device = torch.device("cpu")
22
+ self.model.to(self.device)
23
+ self.model.eval() # Set model to evaluation mode
24
+ print("Model and tokenizer loaded successfully.")
25
+
26
+
27
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
28
+ """
29
+ Processes inference requests.
30
+ Args:
31
+ data (Dict[str, Any]): A dictionary containing the input data.
32
+ Expected key: "inputs" (string or list of strings).
33
+ Optional key: "parameters" (dictionary).
34
+ Returns:
35
+ List[Dict[str, Any]]: A list of dictionaries, where each dictionary
36
+ contains the prediction results for one input string
37
+ (e.g., [{"label": "AI", "score": 0.98}]).
38
+ """
39
+ # Get inputs from the payload
40
+ inputs = data.pop("inputs", None)
41
+ parameters = data.pop("parameters", {}) # Optional parameters
42
+
43
+ if inputs is None:
44
+ raise ValueError("Missing 'inputs' key in request data")
45
+
46
+ # Preprocessing: Tokenize the input text
47
+ # Handle both single string and list of strings inputs
48
+ # Padding=True and truncation=True are good defaults for batch processing
49
+ tokenized_inputs = self.tokenizer(
50
+ inputs,
51
+ return_tensors="pt",
52
+ padding=True,
53
+ truncation=True,
54
+ max_length=self.tokenizer.model_max_length # Use model's max sequence length
55
+ ).to(self.device)
56
+
57
+ # Inference: Run prediction
58
+ with torch.no_grad(): # Disable gradient calculations for inference
59
+ outputs = self.model(**tokenized_inputs)
60
+
61
+ # Postprocessing: Convert logits to probabilities and get labels
62
+ logits = outputs.logits
63
+ probabilities = torch.softmax(logits, dim=-1)
64
+
65
+ results = []
66
+ # Iterate through each input in the batch
67
+ for i in range(probabilities.shape[0]):
68
+ scores = probabilities[i].tolist() # Get probabilities for the i-th input
69
+ predictions = []
70
+ for j, score in enumerate(scores):
71
+ # Map the class index (j) to the actual label string
72
+ label = self.model.config.id2label[j]
73
+ predictions.append({"label": label, "score": score})
74
+
75
+ # Sort predictions by score descending if needed, or just return all
76
+ # predictions.sort(key=lambda x: x["score"], reverse=True)
77
+ results.append(predictions) # Append all label scores for this input
78
+
79
+ # If the original input was a single string, return just the first result list
80
+ if isinstance(inputs, str):
81
+ # However, the standard API often expects a list even for single inputs
82
+ # So we return results which is already a list containing one list of predictions
83
+ # Let's adjust to return a flat list of predictions if input was single string,
84
+ # matching common pipeline output. But check what your consumer expects.
85
+ # For now, return the list of lists structure for consistency.
86
+ pass # Keep results as list of lists: [[{'label': '...', 'score': ...}]]
87
+
88
+ # If you want to return ONLY the top prediction per input string:
89
+ # top_results = []
90
+ # for i in range(probabilities.shape[0]):
91
+ # top_prob, top_idx = torch.max(probabilities[i], dim=0)
92
+ # label = self.model.config.id2label[top_idx.item()]
93
+ # score = top_prob.item()
94
+ # top_results.append({"label": label, "score": score})
95
+ # return top_results
96
+
97
+ # Return all labels and scores per input
98
+ return results