kenqia commited on
Commit
eba8bcf
·
verified ·
1 Parent(s): 75de192

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +110 -1
tools.py CHANGED
@@ -9,7 +9,9 @@ import requests
9
  import pandas as pd
10
  from langchain_core.tools import tool
11
  from youtube_transcript_api import YouTubeTranscriptApi
12
-
 
 
13
 
14
  DEFAULT_API_URL = os.getenv(
15
  "AGENT_COURSE_API_URL",
@@ -132,6 +134,113 @@ def read_attached_text_file(task_id: str = "", file_path: str = "", max_chars: i
132
  return f"Failed to read file {path}: {e}"
133
 
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  @tool
136
  def answer_python_question(task_id: str = "", file_path: str = "") -> str:
137
  """
 
9
  import pandas as pd
10
  from langchain_core.tools import tool
11
  from youtube_transcript_api import YouTubeTranscriptApi
12
+ import base64
13
+ import mimetypes
14
+ from openai import OpenAI
15
 
16
  DEFAULT_API_URL = os.getenv(
17
  "AGENT_COURSE_API_URL",
 
134
  return f"Failed to read file {path}: {e}"
135
 
136
 
137
+ IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
138
+
139
+
140
+ def _image_to_data_url(path: Path) -> str:
141
+ """
142
+ Convert a local image file to a base64 data URL for Qwen-VL / OpenAI-compatible API.
143
+ """
144
+ mime_type, _ = mimetypes.guess_type(str(path))
145
+
146
+ if not mime_type or not mime_type.startswith("image/"):
147
+ suffix = path.suffix.lower()
148
+ if suffix in [".jpg", ".jpeg"]:
149
+ mime_type = "image/jpeg"
150
+ elif suffix == ".png":
151
+ mime_type = "image/png"
152
+ elif suffix == ".webp":
153
+ mime_type = "image/webp"
154
+ elif suffix == ".bmp":
155
+ mime_type = "image/bmp"
156
+ elif suffix == ".gif":
157
+ mime_type = "image/gif"
158
+ else:
159
+ mime_type = "image/jpeg"
160
+
161
+ encoded = base64.b64encode(path.read_bytes()).decode("utf-8")
162
+ return f"data:{mime_type};base64,{encoded}"
163
+
164
+
165
+ @tool
166
+ def answer_image_question(task_id: str = "", file_path: str = "", question: str = "") -> str:
167
+ """
168
+ Analyze an attached image and answer the user's question.
169
+ Use this tool when the question mentions an attached image, picture, screenshot,
170
+ chess position, visual content, chart image, diagram, object counting, OCR from image,
171
+ or asks what is shown in an image.
172
+
173
+ Provide task_id when available. Also include the original question.
174
+ """
175
+ path = _resolve_file(task_id=task_id, file_path=file_path)
176
+
177
+ if path is None:
178
+ return "No image file could be resolved from the given task_id or file_path."
179
+
180
+ suffix = path.suffix.lower()
181
+
182
+ if suffix not in IMAGE_SUFFIXES:
183
+ return (
184
+ f"Resolved file is not a supported image. "
185
+ f"file_path={path}, suffix={suffix}. "
186
+ f"Supported suffixes: {sorted(IMAGE_SUFFIXES)}"
187
+ )
188
+
189
+ api_key = os.getenv("DASHSCOPE_API_KEY")
190
+ if not api_key:
191
+ return "DASHSCOPE_API_KEY is not set."
192
+
193
+ try:
194
+ image_url = _image_to_data_url(path)
195
+
196
+ client = OpenAI(
197
+ api_key=api_key,
198
+ base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
199
+ )
200
+
201
+ prompt = f"""
202
+ You are a precise visual question-answering tool for an evaluation benchmark.
203
+
204
+ Task:
205
+ Answer the user's question using the image.
206
+
207
+ Rules:
208
+ - Use the image content as the primary evidence.
209
+ - If the question asks for a number, return only the number unless explanation is required.
210
+ - If the question asks for a word, name, color, object, move, or label, return only that final answer.
211
+ - For chess/checker/board-game images, carefully identify the board and pieces before answering.
212
+ - For OCR-like questions, read visible text carefully.
213
+ - Do not add markdown.
214
+ - Do not mention that you are an AI model.
215
+
216
+ Question:
217
+ {question}
218
+ """.strip()
219
+
220
+ response = client.chat.completions.create(
221
+ model=os.getenv("DASHSCOPE_VL_MODEL", "qwen-vl-plus-latest"),
222
+ messages=[
223
+ {
224
+ "role": "user",
225
+ "content": [
226
+ {"type": "text", "text": prompt},
227
+ {
228
+ "type": "image_url",
229
+ "image_url": {"url": image_url},
230
+ },
231
+ ],
232
+ }
233
+ ],
234
+ temperature=0,
235
+ max_tokens=256,
236
+ )
237
+
238
+ answer = response.choices[0].message.content
239
+ return answer.strip() if answer else ""
240
+
241
+ except Exception as e:
242
+ return f"Failed to analyze image {path}: {e}"
243
+
244
  @tool
245
  def answer_python_question(task_id: str = "", file_path: str = "") -> str:
246
  """