ace-1 commited on
Commit
cef3e59
·
verified ·
1 Parent(s): e890fdc

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +60 -16
handler.py CHANGED
@@ -1,18 +1,62 @@
1
- from typing import Dict, List, Any
2
- from transformers import pipeline
 
 
 
 
 
 
3
 
4
  class EndpointHandler:
5
- def __init__(self, path=""):
6
- # Load the model and tokenizer at startup
7
- self.classifier = pipeline("text-classification", model=path if path else None)
8
-
9
- def preprocess(self, inputs: Dict[str, Any]) -> List[str]:
10
- # Hugging Face Inference API sends {"inputs": ...}
11
- if isinstance(inputs["inputs"], list):
12
- return inputs["inputs"]
13
- return [inputs["inputs"]]
14
-
15
- def __call__(self, inputs: Dict[str, Any]) -> List[Dict[str, Any]]:
16
- texts = self.preprocess(inputs)
17
- results = self.classifier(texts)
18
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List, Union
2
+
3
+ from transformers import (
4
+ AutoTokenizer,
5
+ AutoModelForSequenceClassification,
6
+ TextClassificationPipeline,
7
+ )
8
+
9
 
10
  class EndpointHandler:
11
+ """Custom handler for Hugging Face Inference Endpoints.
12
+
13
+ Loads a fine-tuned text-classification model and exposes a callable
14
+ that the endpoint runtime will invoke. The runtime will instantiate
15
+ this class once at startup, passing the model directory path.
16
+ """
17
+
18
+ def __init__(self, path: str = "", **kwargs):
19
+ # `path` is the directory where the model artefacts are stored.
20
+ # Fallback to current directory if not provided (local testing).
21
+ model_dir = path or "."
22
+
23
+ # Load tokenizer & model
24
+ self.tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=True)
25
+ self.model = AutoModelForSequenceClassification.from_pretrained(model_dir)
26
+
27
+ # Build a text-classification pipeline
28
+ self.pipeline = TextClassificationPipeline(
29
+ model=self.model,
30
+ tokenizer=self.tokenizer,
31
+ device=-1, # CPU; the runtime sets CUDA if available automatically
32
+ return_all_scores=False,
33
+ function_to_apply="sigmoid"
34
+ if getattr(self.model.config, "problem_type", None)
35
+ == "multi_label_classification"
36
+ else "softmax",
37
+ )
38
+
39
+ def __call__(self, data: Dict[str, Any]) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
40
+ """Run inference on the incoming request.
41
+
42
+ Expected input format from the Inference Endpoint runtime:
43
+ {
44
+ "inputs": "some text" | ["text 1", "text 2", ...],
45
+ "parameters": { ... } # optional pipeline kwargs (e.g., top_k)
46
+ }
47
+ """
48
+ # Extract the text(s)
49
+ raw_inputs = data.get("inputs", data)
50
+ if isinstance(raw_inputs, str):
51
+ raw_inputs = [raw_inputs]
52
+
53
+ # Additional pipeline parameters (optional)
54
+ parameters = data.get("parameters", {})
55
+
56
+ # Execute the pipeline
57
+ outputs = self.pipeline(raw_inputs, **parameters)
58
+
59
+ # If only one input was provided, return a single dict for convenience
60
+ if len(outputs) == 1:
61
+ return outputs[0]
62
+ return outputs