CindyDelage commited on
Commit
653e507
·
verified ·
1 Parent(s): bfde496

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +7 -8
tools.py CHANGED
@@ -73,16 +73,15 @@ class translate_everything(Tool):
73
  translated_sentence = " ".join(right_sentence[::-1])
74
  return f"The translated sentence is : {translated_sentence}"
75
 
76
- class multimodal_interpreter(Tool):
77
- name = "multimodal_tool"
78
- description = "Allows you to answer any question which relies on image or video input."
79
  inputs = {
80
- 'image': {"type": "image", "description": "The image or video of interest"},
81
- 'prompt': {"type": "string", "description": "Any specific question you have on the image. For example: Describe this image."}
82
  }
83
  output_type = "string"
84
 
85
- def forward(self, prompt, image):
86
  device = "cuda" if torch.cuda.is_available() else "cpu"
87
  model = Qwen2VLForConditionalGeneration.from_pretrained(
88
  "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
@@ -93,8 +92,8 @@ class multimodal_interpreter(Tool):
93
  {
94
  "role": "user",
95
  "content": [
96
- {"type": "image", "image": image},
97
- {"type": "text", "text": prompt},
98
  ],
99
  }
100
  ]
 
73
  translated_sentence = " ".join(right_sentence[::-1])
74
  return f"The translated sentence is : {translated_sentence}"
75
 
76
+ class image_interpreter(Tool):
77
+ name = "image_tool"
78
+ description = "Allows you to convert an image to text (the function will describe the image)".
79
  inputs = {
80
+ 'image': {"type": "image", "description": "The image or video of interest, png format or jpeg"},
 
81
  }
82
  output_type = "string"
83
 
84
+ def forward(self, image):
85
  device = "cuda" if torch.cuda.is_available() else "cpu"
86
  model = Qwen2VLForConditionalGeneration.from_pretrained(
87
  "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
 
92
  {
93
  "role": "user",
94
  "content": [
95
+ {"type": "image", "image": Image.open(image).convert("RGB")},
96
+ {"type": "text", "text": "describe this image, with as much details as you can"},
97
  ],
98
  }
99
  ]