srimanth-d
/

GOT_CPU

@@ -18,7 +18,7 @@ DEFAULT_IMAGE_TOKEN = "<image>"
 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
 DEFAULT_IM_START_TOKEN = '<img>'
 DEFAULT_IM_END_TOKEN = '</img>'
-device = "cpu"
 print("Using device ",device)
 from enum import auto, Enum
@@ -568,29 +568,57 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
-                    input_ids,
-                    images=[image_tensor_1.unsqueeze(0).half().cpu()],
-                    do_sample=False,
-                    num_beams = 1,
-                    no_repeat_ngram_size = 20,
-                    streamer=streamer,
-                    max_new_tokens=4096,
-                    stopping_criteria=[stopping_criteria]
                     )
         else:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
-                    input_ids,
-                    images=[image_tensor_1.unsqueeze(0).half().cpu()],
-                    do_sample=False,
-                    num_beams = 1,
-                    no_repeat_ngram_size = 20,
-                    # streamer=streamer,
-                    max_new_tokens=4096,
-                    stopping_criteria=[stopping_criteria]
                     )
         outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
@@ -822,29 +850,57 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
-                output_ids = self.generate(
-                    input_ids,
-                    images=[image_list.half().cpu()],
-                    do_sample=False,
-                    num_beams = 1,
-                    # no_repeat_ngram_size = 20,
-                    streamer=streamer,
-                    max_new_tokens=4096,
-                    stopping_criteria=[stopping_criteria]
                     )
         else:
-            with torch.autocast("cpu", dtype=torch.bfloat16):
                 output_ids = self.generate(
-                    input_ids,
-                    images=[image_list.half().cpu()],
-                    do_sample=False,
-                    num_beams = 1,
-                    # no_repeat_ngram_size = 20,
-                    # streamer=streamer,
-                    max_new_tokens=4096,
-                    stopping_criteria=[stopping_criteria]
                     )
         outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()

 DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
 DEFAULT_IM_START_TOKEN = '<img>'
 DEFAULT_IM_END_TOKEN = '</img>'
+device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
 print("Using device ",device)
 from enum import auto, Enum
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
+            if device == "cuda":
+                with torch.autocast("cuda", dtype=torch.bfloat16):
+                    output_ids = self.generate(
+                        input_ids,
+                        images=[image_tensor_1.unsqueeze(0).half().cuda()],
+                        do_sample=False,
+                        num_beams = 1,
+                        no_repeat_ngram_size = 20,
+                        streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
+                    )
+            elif device == "mps" or device == "cpu":
                 output_ids = self.generate(
+                        input_ids,
+                        images=[image_tensor_1.unsqueeze(0).half().to(device)],
+                        do_sample=False,
+                        num_beams = 1,
+                        no_repeat_ngram_size = 20,
+                        streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
                     )
+            else:
+                print("Device unknown!")
         else:
+            if device == "cuda":
+                with torch.autocast("cuda", dtype=torch.bfloat16):
+                    output_ids = self.generate(
+                        input_ids,
+                        images=[image_tensor_1.unsqueeze(0).half().cuda()],
+                        do_sample=False,
+                        num_beams = 1,
+                        no_repeat_ngram_size = 20,
+                        # streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
+                    )
+            elif device == "mps" or device == "cpu":
                 output_ids = self.generate(
+                        input_ids,
+                        images=[image_tensor_1.unsqueeze(0).half().to(device)],
+                        do_sample=False,
+                        num_beams = 1,
+                        no_repeat_ngram_size = 20,
+                        # streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
                     )
+            else:
+                print("Device unknown!")
         outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
         streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         if stream_flag:
+            if device == "cuda":
+                with torch.autocast("cuda", dtype=torch.bfloat16):
+                    output_ids = self.generate(
+                        input_ids,
+                        images=[image_list.half().cuda()],
+                        do_sample=False,
+                        num_beams = 1,
+                        # no_repeat_ngram_size = 20,
+                        streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
                     )
+            elif device == "mps" or device == "cpu":
+                    output_ids = self.generate(
+                        input_ids,
+                        images=[image_list.half().to(device)],
+                        do_sample=False,
+                        num_beams = 1,
+                        # no_repeat_ngram_size = 20,
+                        streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
+                    )
+            else:
+                print("Device unknown!")
         else:
+            if device == "cuda":
+                with torch.autocast("cuda", dtype=torch.bfloat16):
+                    output_ids = self.generate(
+                        input_ids,
+                        images=[image_list.half().cuda()],
+                        do_sample=False,
+                        num_beams = 1,
+                        # no_repeat_ngram_size = 20,
+                        # streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
+                    )
+            elif device == "mps" or device == "cpu":
                 output_ids = self.generate(
+                        input_ids,
+                        images=[image_list.half().to(device)],
+                        do_sample=False,
+                        num_beams = 1,
+                        # no_repeat_ngram_size = 20,
+                        # streamer=streamer,
+                        max_new_tokens=4096,
+                        stopping_criteria=[stopping_criteria]
                     )
+            else:
+                print("Device unknown!")
         outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()