Final_Assignment_Template_V2

Sleeping

CindyDelage commited on May 2, 2025

Commit

653e507

verified ·

1 Parent(s): bfde496

Update tools.py

Files changed (1) hide show

tools.py CHANGED Viewed

@@ -73,16 +73,15 @@ class translate_everything(Tool):
         translated_sentence = " ".join(right_sentence[::-1])
         return f"The translated sentence is : {translated_sentence}"
-class multimodal_interpreter(Tool):
-    name = "multimodal_tool"
-    description = "Allows you to answer any question which relies on image or video input."
     inputs = {
-        'image': {"type": "image", "description": "The image or video of interest"},
-        'prompt': {"type": "string", "description": "Any specific question you have on the image. For example: Describe this image."}
     }
     output_type = "string"
-    def forward(self, prompt, image):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
@@ -93,8 +92,8 @@ class multimodal_interpreter(Tool):
             {
                 "role": "user",
                 "content": [
-                    {"type": "image", "image": image},
-                    {"type": "text", "text": prompt},
                 ],
             }
         ]

         translated_sentence = " ".join(right_sentence[::-1])
         return f"The translated sentence is : {translated_sentence}"
+class image_interpreter(Tool):
+    name = "image_tool"
+    description = "Allows you to convert an image to text (the function will describe the image)".
     inputs = {
+        'image': {"type": "image", "description": "The image or video of interest, png format or jpeg"},
     }
     output_type = "string"
+    def forward(self, image):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = Qwen2VLForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
             {
                 "role": "user",
                 "content": [
+                    {"type": "image", "image": Image.open(image).convert("RGB")},
+                    {"type": "text", "text": "describe this image, with as much details as you can"},
                 ],
             }
         ]