Final_Assignment

Sleeping

App Files Files Community

kenqia commited on 21 days ago

Commit

eba8bcf

verified ·

1 Parent(s): 75de192

Update tools.py

Browse files

Files changed (1) hide show

tools.py +110 -1

tools.py CHANGED Viewed

@@ -9,7 +9,9 @@ import requests
 import pandas as pd
 from langchain_core.tools import tool
 from youtube_transcript_api import YouTubeTranscriptApi
 DEFAULT_API_URL = os.getenv(
     "AGENT_COURSE_API_URL",
@@ -132,6 +134,113 @@ def read_attached_text_file(task_id: str = "", file_path: str = "", max_chars: i
         return f"Failed to read file {path}: {e}"
 @tool
 def answer_python_question(task_id: str = "", file_path: str = "") -> str:
     """

 import pandas as pd
 from langchain_core.tools import tool
 from youtube_transcript_api import YouTubeTranscriptApi
+import base64
+import mimetypes
+from openai import OpenAI
 DEFAULT_API_URL = os.getenv(
     "AGENT_COURSE_API_URL",
         return f"Failed to read file {path}: {e}"
+IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
+def _image_to_data_url(path: Path) -> str:
+    """
+    Convert a local image file to a base64 data URL for Qwen-VL / OpenAI-compatible API.
+    """
+    mime_type, _ = mimetypes.guess_type(str(path))
+    if not mime_type or not mime_type.startswith("image/"):
+        suffix = path.suffix.lower()
+        if suffix in [".jpg", ".jpeg"]:
+            mime_type = "image/jpeg"
+        elif suffix == ".png":
+            mime_type = "image/png"
+        elif suffix == ".webp":
+            mime_type = "image/webp"
+        elif suffix == ".bmp":
+            mime_type = "image/bmp"
+        elif suffix == ".gif":
+            mime_type = "image/gif"
+        else:
+            mime_type = "image/jpeg"
+    encoded = base64.b64encode(path.read_bytes()).decode("utf-8")
+    return f"data:{mime_type};base64,{encoded}"
+@tool
+def answer_image_question(task_id: str = "", file_path: str = "", question: str = "") -> str:
+    """
+    Analyze an attached image and answer the user's question.
+    Use this tool when the question mentions an attached image, picture, screenshot,
+    chess position, visual content, chart image, diagram, object counting, OCR from image,
+    or asks what is shown in an image.
+    Provide task_id when available. Also include the original question.
+    """
+    path = _resolve_file(task_id=task_id, file_path=file_path)
+    if path is None:
+        return "No image file could be resolved from the given task_id or file_path."
+    suffix = path.suffix.lower()
+    if suffix not in IMAGE_SUFFIXES:
+        return (
+            f"Resolved file is not a supported image. "
+            f"file_path={path}, suffix={suffix}. "
+            f"Supported suffixes: {sorted(IMAGE_SUFFIXES)}"
+        )
+    api_key = os.getenv("DASHSCOPE_API_KEY")
+    if not api_key:
+        return "DASHSCOPE_API_KEY is not set."
+    try:
+        image_url = _image_to_data_url(path)
+        client = OpenAI(
+            api_key=api_key,
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+        )
+        prompt = f"""
+        You are a precise visual question-answering tool for an evaluation benchmark.
+        Task:
+        Answer the user's question using the image.
+        Rules:
+        - Use the image content as the primary evidence.
+        - If the question asks for a number, return only the number unless explanation is required.
+        - If the question asks for a word, name, color, object, move, or label, return only that final answer.
+        - For chess/checker/board-game images, carefully identify the board and pieces before answering.
+        - For OCR-like questions, read visible text carefully.
+        - Do not add markdown.
+        - Do not mention that you are an AI model.
+        Question:
+        {question}
+        """.strip()
+        response = client.chat.completions.create(
+            model=os.getenv("DASHSCOPE_VL_MODEL", "qwen-vl-plus-latest"),
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": image_url},
+                        },
+                    ],
+                }
+            ],
+            temperature=0,
+            max_tokens=256,
+        )
+        answer = response.choices[0].message.content
+        return answer.strip() if answer else ""
+    except Exception as e:
+        return f"Failed to analyze image {path}: {e}"
 @tool
 def answer_python_question(task_id: str = "", file_path: str = "") -> str:
     """