EchoStreet
/

mpt-7b

@@ -4,14 +4,15 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # testing changes
 # get dtype
-dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         tokenizer = AutoTokenizer.from_pretrained(path)
-        model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device='cuda:0')

 # testing changes
 # get dtype
+# dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
         tokenizer = AutoTokenizer.from_pretrained(path)
+        # model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", trust_remote_code=True)
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device='cuda:0')