GOT_MPS

@@ -164,7 +164,7 @@ class GOTQwenModel(Qwen2Model):
         use_im_start_end=False,
         vision_select_layer=-1,
         dtype=torch.float16,
-        device="mps"
     ):
@@ -453,7 +453,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         tokenizer,
         freeze_lm_model=False,
         pretrained_stage1_model=None,
-        device="mps"
     ):
         config = self.get_model().config
@@ -566,7 +566,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
-            with torch.autocast("mps", dtype=torch.float16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_tensor_1.unsqueeze(0).half()],
@@ -578,7 +578,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
                     stopping_criteria=[stopping_criteria]
                     )
         else:
-            with torch.autocast("mps", dtype=torch.float16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_tensor_1.unsqueeze(0).half()],
@@ -820,7 +820,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
-            with torch.autocast("mps", dtype=torch.float16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_list.half()],
@@ -832,7 +832,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
                     stopping_criteria=[stopping_criteria]
                     )
         else:
-            with torch.autocast("mps", dtype=torch.float16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_list.half()],

         use_im_start_end=False,
         vision_select_layer=-1,
         dtype=torch.float16,
+        device="cuda"
     ):
         tokenizer,
         freeze_lm_model=False,
         pretrained_stage1_model=None,
+        device="cuda"
     ):
         config = self.get_model().config
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
+            with torch.autocast("cuda", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_tensor_1.unsqueeze(0).half()],
                     stopping_criteria=[stopping_criteria]
                     )
         else:
+            with torch.autocast("cuda", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_tensor_1.unsqueeze(0).half()],
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
+            with torch.autocast("cuda", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_list.half()],
                     stopping_criteria=[stopping_criteria]
                     )
         else:
+            with torch.autocast("cuda", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
                     images=[image_list.half()],