srimanth-d
/

GOT_CPU

@@ -18,6 +18,7 @@ DEFAULT_IMAGE_TOKEN = "<image>"
 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
 DEFAULT_IM_START_TOKEN = '<img>'
 DEFAULT_IM_END_TOKEN = '</img>'
 from enum import auto, Enum
 class SeparatorStyle(Enum):
@@ -164,7 +165,7 @@ class GOTQwenModel(Qwen2Model):
         use_im_start_end=False,
         vision_select_layer=-1,
         dtype=torch.float16,
-        device="cpu"
     ):
@@ -453,7 +454,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         tokenizer,
         freeze_lm_model=False,
         pretrained_stage1_model=None,
-        device="cpu"
     ):
         config = self.get_model().config
@@ -558,7 +559,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         image_tensor_1 = image_processor_high(image)
-        input_ids = torch.as_tensor(inputs.input_ids).cpu()
         stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
         keywords = [stop_str]
@@ -566,10 +567,10 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
-                    images=[image_tensor_1.unsqueeze(0).half().cpu()],
                     do_sample=False,
                     num_beams = 1,
                     no_repeat_ngram_size = 20,
@@ -578,10 +579,10 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
                     stopping_criteria=[stopping_criteria]
                     )
         else:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
-                    images=[image_tensor_1.unsqueeze(0).half().cpu()],
                     do_sample=False,
                     num_beams = 1,
                     no_repeat_ngram_size = 20,
@@ -812,7 +813,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         inputs = tokenizer([prompt])
-        input_ids = torch.as_tensor(inputs.input_ids).cpu()
         stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
         keywords = [stop_str]
@@ -820,10 +821,10 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
-                    images=[image_list.half().cpu()],
                     do_sample=False,
                     num_beams = 1,
                     # no_repeat_ngram_size = 20,
@@ -832,10 +833,10 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
                     stopping_criteria=[stopping_criteria]
                     )
         else:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
-                    images=[image_list.half().cpu()],
                     do_sample=False,
                     num_beams = 1,
                     # no_repeat_ngram_size = 20,

 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
 DEFAULT_IM_START_TOKEN = '<img>'
 DEFAULT_IM_END_TOKEN = '</img>'
+device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
 from enum import auto, Enum
 class SeparatorStyle(Enum):
         use_im_start_end=False,
         vision_select_layer=-1,
         dtype=torch.float16,
+        device=device
     ):
         tokenizer,
         freeze_lm_model=False,
         pretrained_stage1_model=None,
+        device=device
     ):
         config = self.get_model().config
         image_tensor_1 = image_processor_high(image)
+        input_ids = torch.as_tensor(inputs.input_ids).to(device)
         stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
         keywords = [stop_str]
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
+            with torch.autocast(device, dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
+                    images=[image_tensor_1.unsqueeze(0).half().to(device)],
                     do_sample=False,
                     num_beams = 1,
                     no_repeat_ngram_size = 20,
                     stopping_criteria=[stopping_criteria]
                     )
         else:
+            with torch.autocast(device, dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
+                    images=[image_tensor_1.unsqueeze(0).half().to(device)],
                     do_sample=False,
                     num_beams = 1,
                     no_repeat_ngram_size = 20,
         inputs = tokenizer([prompt])
+        input_ids = torch.as_tensor(inputs.input_ids).to(device)
         stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
         keywords = [stop_str]
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
+            with torch.autocast(device, dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
+                    images=[image_list.half().to(device)],
                     do_sample=False,
                     num_beams = 1,
                     # no_repeat_ngram_size = 20,
                     stopping_criteria=[stopping_criteria]
                     )
         else:
+            with torch.autocast(device, dtype=torch.bfloat16):
                 output_ids = self.generate(
                     input_ids,
+                    images=[image_list.half().to(device)],
                     do_sample=False,
                     num_beams = 1,
                     # no_repeat_ngram_size = 20,