handraise-dev
/

gguf-inference

Text Generation

Model card Files Files and versions

syberWolf commited on Jul 4, 2024

Commit

96a0103

·

1 Parent(s): 6d33a5e

simplify handler

Files changed (1) hide show

handler.py +3 -22

handler.py CHANGED Viewed

@@ -1,18 +1,9 @@
 from typing import Dict, List, Any
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-device = "cuda"
 class EndpointHandler:
-    def __init__(self, path=""):
-        # load the model
-        tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
-        model = AutoModelForCausalLM.from_pretrained(
-            "Qwen/Qwen2-1.5B-Instruct",
-            torch_dtype="auto",
-            device_map="auto"
-        )
-        # create inference pipeline
-        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
@@ -26,13 +17,3 @@ class EndpointHandler:
         # postprocess the prediction
         return prediction
-# Example usage
-if __name__ == "__main__":
-    handler = EndpointHandler()
-    data = {
-        "inputs": "Hello, how can I",
-        "parameters": {"max_length": 50, "num_return_sequences": 1}
-    }
-    result = handler(data)
-    print(result)

 from typing import Dict, List, Any
+from transformers import pipeline
 class EndpointHandler:
+    def __init__(self, model_name="Qwen/Qwen2-1.5B-Instruct"):
+        self.pipeline = pipeline("text-generation", model=model_name)  # Note: Model name provided as argument for flexibility
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
         inputs = data.pop("inputs", data)
         # postprocess the prediction
         return prediction