alfinder
/

multi-model-handler

Model card Files Files and versions

xet

Community

abashar commited on Oct 6, 2024

Commit

3315acd

verified ·

1 Parent(s): 7a0f5b2

Update handler.py

Browse files

Files changed (1) hide show

handler.py +16 -14

handler.py CHANGED Viewed

@@ -2,37 +2,39 @@ import torch
 from typing import  Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-# check for GPU
-device = 0 if torch.cuda.is_available() else -1
 # multi-model list
-multi_model_list = [
-    {"model_id": "gemma-2B-2nd_filtered_3_full", "model_path": "omarabb315/gemma-2B-2nd_filtered_3_full", "task": "text-generation"},
 #    {"model_path": "omarabb315/gemma-2B-2nd_filtered_3_16bit", "task": "text-generation"},
 #    {"model_path": "omarabb315/Gemma-2-9B-filtered_3_4bits", "task": "text-generation"},
 ]
 class EndpointHandler():
     def __init__(self, path=""):
-        self.multi_model={}
         # load all the models onto device
-        for model in multi_model_list:
-            self.multi_model[model["model_id"]] = pipeline(model["task"], model=model["model_path"], device=device)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         # deserialize incomin request
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
-        model_id = data.pop("model_id", None)
         # check if model_id is in the list of models
-        if model_id is None or model_id not in self.multi_model:
-            raise ValueError(f"model_id: {model_id} is not valid. Available models are: {list(self.multi_model.keys())}")
         # pass inputs with all kwargs in data
         if parameters is not None:
-            prediction = self.multi_model[model_id](inputs, **parameters)
         else:
-            prediction = self.multi_model[model_id](inputs)
-        # postprocess the prediction
         return prediction

 from typing import  Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # multi-model list
+# multi_model_list = [
+#    {"model_id": "gemma-2B-2nd_filtered_3_full", "model_path": "omarabb315/gemma-2B-2nd_filtered_3_full", "task": "text-generation"},
 #    {"model_path": "omarabb315/gemma-2B-2nd_filtered_3_16bit", "task": "text-generation"},
 #    {"model_path": "omarabb315/Gemma-2-9B-filtered_3_4bits", "task": "text-generation"},
 ]
 class EndpointHandler():
     def __init__(self, path=""):
+        # self.multi_model={}
         # load all the models onto device
+        # for model in multi_model_list:
+        #     self.multi_model[model["model_id"]] = pipeline(model["task"], model=model["model_path"], trust_remote_code=True)
+        model_id = "omarabb315/gemma-2B-2nd_filtered_3_full"
+        task_id = "text-generation"
+        self.pipeline = pipeline(task_id, model=model_id, trust_remote_code=True)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         # deserialize incomin request
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
+        #model_id = data.pop("model_id", None)
         # check if model_id is in the list of models
+        # if model_id is None or model_id not in self.multi_model:
+        #     raise ValueError(f"model_id: {model_id} is not valid. Available models are: {list(self.multi_model.keys())}")
         # pass inputs with all kwargs in data
         if parameters is not None:
+            prediction = self.pipeline(inputs, **parameters)
         else:
+            prediction = self.pipeline(inputs)
         return prediction