Deepthoughtworks
/

gpt-neo-2.7B__low-cpu

@@ -1,6 +1,9 @@
 from typing import Dict, List, Any
 from transformers import AutoModel, AutoTokenizer, pipeline
 class EndpointHandler:
     def __init__(self, path=""):
@@ -10,7 +13,7 @@ class EndpointHandler:
         # create inference pipeline
         # Do I have to check device?
         self.pipeline = pipeline(
-            "text-generation", model=model, tokenizer=tokenizer)
     # (Might have to adjust typing)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]:

+import torch
 from typing import Dict, List, Any
 from transformers import AutoModel, AutoTokenizer, pipeline
+# check for GPU
+device = 0 if torch.cuda.is_available() else -1
 class EndpointHandler:
     def __init__(self, path=""):
         # create inference pipeline
         # Do I have to check device?
         self.pipeline = pipeline(
+            "text-generation", model=model, tokenizer=tokenizer, device=device)
     # (Might have to adjust typing)
     def __call__(self, data: Any) -> List[List[Dict[str, float]]]: