Final_Assignment_Template_sm

Sleeping

App Files Files Community

shubham7080 commited on May 29, 2025

Commit

96dcdde

verified ·

1 Parent(s): b8050be

Create tools/describe_image_tool.py

Browse files

Files changed (1) hide show

tools/describe_image_tool.py +110 -0

tools/describe_image_tool.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import base64
+import os
+from openai import OpenAI
+from smolagents import Tool
+client = OpenAI()
+class DescribeImageTool(Tool):
+    """
+    Tool to analyze and describe any image using GPT-4 Vision API.
+    Args:
+        image_path (str): Path to the image file.
+        description_type (str): Type of description to generate. Options:
+            - "general": General description of the image
+            - "detailed": Detailed analysis of the image
+            - "chess": Analysis of a chess position
+            - "text": Extract and describe text from the image
+            - "custom": Custom description based on user prompt
+    Returns:
+        str: Description of the image based on the requested type.
+    """
+    name = "describe_image"
+    description = "Analyzes and describes images using GPT-4 Vision API"
+    inputs = {
+        "image_path": {"type": "string", "description": "Path to the image file"},
+        "description_type": {
+            "type": "string",
+            "description": "Type of description to generate (general, detailed, chess, text, custom)",
+            "nullable": True,
+        },
+        "custom_prompt": {
+            "type": "string",
+            "description": "Custom prompt for description (only used when description_type is 'custom')",
+            "nullable": True,
+        },
+    }
+    output_type = "string"
+    def encode_image(self, image_path: str) -> str:
+        """Encode image to base64 string."""
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+    def get_prompt(self, description_type: str, custom_prompt: str = None) -> str:
+        """Get appropriate prompt based on description type."""
+        prompts = {
+            "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
+            "detailed": """Analyze this image in detail. Include:
+            1. Main subjects and their relationships
+            2. Colors, lighting, and composition
+            3. Any text or symbols present
+            4. Context or possible meaning
+            5. Notable details or interesting elements""",
+            "chess": """Analyze this chess position and provide a detailed description including:
+            1. List of pieces on the board for both white and black
+            2. Whose turn it is to move
+            3. Basic evaluation of the position
+            4. Any immediate tactical opportunities or threats
+            5. Suggested next moves with brief explanations""",
+            "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
+        }
+        return (
+            custom_prompt
+            if description_type == "custom"
+            else prompts.get(description_type, prompts["general"])
+        )
+    def forward(
+        self,
+        image_path: str,
+        description_type: str = "general",
+        custom_prompt: str = None,
+    ) -> str:
+        try:
+            if not os.path.exists(image_path):
+                return f"Error: Image file not found at {image_path}"
+            # Encode the image
+            base64_image = self.encode_image(image_path)
+            # Get appropriate prompt
+            prompt = self.get_prompt(description_type, custom_prompt)
+            # Make the API call
+            response = client.chat.completions.create(
+                model="gpt-4.1",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}"
+                                },
+                            },
+                        ],
+                    }
+                ],
+                max_tokens=1000,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Error analyzing image: {str(e)}"