neoxt

@@ -2,10 +2,6 @@ import torch
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-# check for GPU
-device = 0 if torch.cuda.is_available() else -1
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
@@ -16,12 +12,5 @@ class EndpointHandler:
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
-        parameters = data.pop("parameters", None)
-        # pass inputs with all kwargs in data
-        if parameters is not None:
-            prediction = self.pipeline(inputs, **parameters)
-        else:
-            prediction = self.pipeline(inputs)
-        # postprocess the prediction
         return prediction

 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
         return prediction