flexibility for cpu or cuda ep
Browse files- handler.py +13 -2
handler.py
CHANGED
|
@@ -11,15 +11,26 @@ if torch.backends.cudnn.is_available():
|
|
| 11 |
|
| 12 |
class EndpointHandler():
|
| 13 |
def __init__(self, path=""):
|
|
|
|
|
|
|
| 14 |
# load the optimized model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
model = ORTModelForSequenceClassification.from_pretrained(
|
| 16 |
path,
|
| 17 |
export=False,
|
| 18 |
-
provider=
|
| 19 |
)
|
| 20 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# create inference pipeline
|
| 22 |
-
self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=
|
| 23 |
|
| 24 |
|
| 25 |
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
|
|
|
|
| 11 |
|
| 12 |
class EndpointHandler():
|
| 13 |
def __init__(self, path=""):
|
| 14 |
+
|
| 15 |
+
on_cuda = torch.cuda.is_available()
|
| 16 |
# load the optimized model
|
| 17 |
+
|
| 18 |
+
provider = "CPUExecutionProvider"
|
| 19 |
+
if on_cuda:
|
| 20 |
+
provider = "CUDAExecutionProvider"
|
| 21 |
+
|
| 22 |
model = ORTModelForSequenceClassification.from_pretrained(
|
| 23 |
path,
|
| 24 |
export=False,
|
| 25 |
+
provider=provider,
|
| 26 |
)
|
| 27 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 28 |
+
|
| 29 |
+
device = -1
|
| 30 |
+
if on_cuda:
|
| 31 |
+
device = 0
|
| 32 |
# create inference pipeline
|
| 33 |
+
self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
|
| 34 |
|
| 35 |
|
| 36 |
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
|