zohaibterminator
/

MedicalLLama3.2-11b-instruct

Image-Text-to-Text

text-generation-inference

Model card Files Files and versions

zohaibterminator commited on May 14, 2025

Commit

0fbd23f

·

verified ·

1 Parent(s): 4a52a67

Create handler.py

Files changed (1) hide show

handler.py +51 -0

handler.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from typing import Dict, List, Any
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import os
+import torch
+from subprocess import run
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
+run("pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121", shell=True, check=True)
+class EndpointHandler():
+    def __init__(self, path=""):
+        # Preload all the elements you are going to need at inference.
+        # pseudo
+        # self.model = load_model(path)
+        self.HF_READ_TOKEN = os.getenv("HF_READ_TOKEN")
+        print("loading model")
+        tokenizer = AutoTokenizer.from_pretrained(path, token=self.HF_READ_TOKEN)
+        model= AutoModelForCausalLM.from_pretrained(
+            pretrained_model_name_or_path = path,
+            token = self.HF_READ_TOKEN,
+            torch_dtype=dtype,
+        ).to(device)
+        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
+        self.alpaca_prompt = """REDACTED"""
+        print("model loaded")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        data args:
+            inputs (:obj: `str` | `PIL.Image` | `np.array`)
+            kwargs
+        Return:
+            A :obj:`list` | `dict`: will be serialized and returned
+        """
+        # pseudo
+        # self.model(input)
+        if data["input"] is not Null:
+            request = data.pop("input",data)
+            inputs = self.alpaca_prompt.format(request)
+            prediction = self.pipeline(inputs)
+            return {"prediction": prediction}
+        else:
+            return [{"Error" : "no input received."}]