aidando73
/

llama-3.3-70b-instruct-code-agent-fine-tune-v1

text-generation-inference

Model card Files Files and versions

aidando73 commited on Jan 22, 2025

Commit

f028aae

·

1 Parent(s): 3ee8139

.

Files changed (2) hide show

handler.py +30 -0
test-handler.py +12 -0

handler.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from typing import Dict, List, Any
+from unsloth import FastLanguageModel
+class EndpointHandler():
+    def __init__(self, path=""):
+        # Preload all the elements you are going to need at inference.
+        # pseudo:
+        # self.model= load_model(path)
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name = "aidando73/llama-3.3-70b-instruct-code-agent-fine-tune-v1",
+            max_seq_length = 2048,
+            dtype = "float16",
+            load_in_4bit = True,
+        )
+        FastLanguageModel.for_inference(model)
+        self.model = model
+        self.tokenizer = tokenizer
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+       data args:
+            inputs (:obj: `str` | `PIL.Image` | `np.array`)
+            kwargs
+      Return:
+            A :obj:`list` | `dict`: will be serialized and returned
+        """
+        input_ids = self.tokenizer.encode(data["inputs"], return_tensors = "pt").to("cuda")
+        output = self.model.generate(input_ids, max_new_tokens = 128, pad_token_id = self.tokenizer.eos_token_id)
+        return [{"output": self.tokenizer.decode(output[0], skip_special_tokens = True)}]

test-handler.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from handler import EndpointHandler
+# init handler
+my_handler = EndpointHandler(path=".")
+# prepare sample payload
+input = {"inputs": "Hello World"}
+# test the handler
+output = my_handler(input)
+print("output", output)