Spaces:

m7mdal7aj
/

KB-VQA

Sleeping

App Files Files Community

m7mdal7aj commited on May 15, 2024

Commit

609d6f1

verified ·

1 Parent(s): e2de402

Update my_model/captioner/image_captioning.py

Browse files

Files changed (1) hide show

my_model/captioner/image_captioning.py +11 -8

my_model/captioner/image_captioning.py CHANGED Viewed

@@ -41,7 +41,7 @@ class ImageCaptioningModel:
                                                                    torch_dtype=self.torch_dtype,
                                                                    device_map=self.device_map
                                                                    )
             self.model = InstructBlipForConditionalGeneration.from_pretrained(self.model_path,
                                                                               load_in_8bit=self.load_in_8bit,
                                                                               load_in_4bit=self.load_in_4bit,
@@ -50,7 +50,9 @@ class ImageCaptioningModel:
                                                                               device_map=self.device_map
                                                                               )
     def resize_image(self, image, max_image_size=None):
         if max_image_size is None:
             max_image_size = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
@@ -66,7 +68,8 @@ class ImageCaptioningModel:
     def generate_caption(self, image_path):
         if isinstance(image_path, str) or isinstance(image_path, io.IOBase):
         # If it's a file path or file-like object, open it as a PIL Image
             image = Image.open(image_path)
@@ -78,7 +81,8 @@ class ImageCaptioningModel:
         inputs = self.processor(image, self.prompt, return_tensors="pt").to("cuda", self.torch_dtype)
         outputs = self.model.generate(**inputs, min_length=self.min_length, max_new_tokens=self.max_new_tokens)
         caption = self.processor.decode(outputs[0], skip_special_tokens=self.skip_secial_tokens).strip()
         return caption
     def generate_captions_for_multiple_images(self, image_paths):
@@ -88,12 +92,11 @@ class ImageCaptioningModel:
 def get_caption(img):
     captioner = ImageCaptioningModel()
     captioner.load_model()
     caption = captioner.generate_caption(img)
-    return caption
-if __name__ == "__main__":
-    pass

                                                                    torch_dtype=self.torch_dtype,
                                                                    device_map=self.device_map
                                                                    )
+            free_gpu_resources()
             self.model = InstructBlipForConditionalGeneration.from_pretrained(self.model_path,
                                                                               load_in_8bit=self.load_in_8bit,
                                                                               load_in_4bit=self.load_in_4bit,
                                                                               device_map=self.device_map
                                                                               )
+            free_gpu_resources()
     def resize_image(self, image, max_image_size=None):
         if max_image_size is None:
             max_image_size = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
     def generate_caption(self, image_path):
+        free_gpu_resources()
+        free_gpu_resources()
         if isinstance(image_path, str) or isinstance(image_path, io.IOBase):
         # If it's a file path or file-like object, open it as a PIL Image
             image = Image.open(image_path)
         inputs = self.processor(image, self.prompt, return_tensors="pt").to("cuda", self.torch_dtype)
         outputs = self.model.generate(**inputs, min_length=self.min_length, max_new_tokens=self.max_new_tokens)
         caption = self.processor.decode(outputs[0], skip_special_tokens=self.skip_secial_tokens).strip()
+        free_gpu_resources()
+        free_gpu_resources()
         return caption
     def generate_captions_for_multiple_images(self, image_paths):
 def get_caption(img):
     captioner = ImageCaptioningModel()
+    free_gpu_resources()
     captioner.load_model()
+    free_gpu_resources()
     caption = captioner.generate_caption(img)
+    free_gpu_resources()
+    return caption