Syko
/

SykoNaught-v1

Text Generation

Model card Files Files and versions

Syko commited on Dec 31, 2024

Commit

1ce4cfe

·

verified ·

1 Parent(s): b56fdd4

Upload handler.py

Files changed (1) hide show

handler.py +21 -23

handler.py CHANGED Viewed

@@ -1,29 +1,27 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-class EndpointHandler:
-    def __init__(self, path):
-        # Load tokenizer and model
-        self.tokenizer = AutoTokenizer.from_pretrained(path)
-        self.model = AutoModelForCausalLM.from_pretrained(path)
-    def __call__(self, inputs):
-        # Parse input
-        input_text = inputs.get("inputs", "")
-        parameters = inputs.get("parameters", {})
-        max_new_tokens = parameters.get("max_new_tokens", 50)
-        temperature = parameters.get("temperature", 0.7)
-        # Tokenize input
-        input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids
-        # Generate output
-        output = self.model.generate(
-            input_ids,
-            max_new_tokens=max_new_tokens,
-            temperature=temperature,
-        )
-        # Decode output
-        output_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
-        return {"generated_text": output_text}

 import torch
+from typing import Dict, List, Any
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# check for GPU
+device = 0 if torch.cuda.is_available() else -1
+class EndpointHandler:
+    def __init__(self, path=""):
+        # load the model
+        tokenizer = AutoTokenizer.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
+        # create inference pipeline
+        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
+    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            prediction = self.pipeline(inputs, **parameters)
+        else:
+            prediction = self.pipeline(inputs)
+        # postprocess the prediction
+        return prediction