cephcyn
/

handler-manual-batching

endpoint-template

Model card Files Files and versions

cephcyn commited on Aug 8, 2024

Commit

02cfa39

·

verified ·

1 Parent(s): af8544b

Upload 2 files

Files changed (2) hide show

handler.py +35 -0
requirements.txt +1 -0

handler.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from typing import Dict, List, Any
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# Need to set HF_TOKEN on the endpoint creation process for this to work
+model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+class EndpointHandler:
+    def __init__(self, path=""):
+        # load the model
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
+        # create inference pipeline
+        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
+    def __call__(self, data: Dict[str, Any]) -> List[List[Dict[str, float]]]:
+        """
+        input args:
+            data: a dict with elements...
+                inputs: List[str] , inputs to batch-process
+                parameters: Any , parameters to be passed into model
+        outputs:
+            list of {'generated_text': str} type outputs
+        """
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            predictions = self.pipeline(inputs, **parameters)
+        else:
+            predictions = self.pipeline(inputs)
+        # postprocess the prediction
+        return [{'generated_text': e} for e in predictions]

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ transformers