DrekFretson commited on
Commit
703270b
·
verified ·
1 Parent(s): 23f52f0

Delete describe_image_tool.py

Browse files
Files changed (1) hide show
  1. describe_image_tool.py +0 -110
describe_image_tool.py DELETED
@@ -1,110 +0,0 @@
1
- import base64
2
- import os
3
-
4
- from openai import OpenAI
5
- from smolagents import Tool
6
-
7
- client = OpenAI()
8
-
9
-
10
- class DescribeImageTool(Tool):
11
- """
12
- Tool to analyze and describe any image using GPT-4 Vision API.
13
- Args:
14
- image_path (str): Path to the image file.
15
- description_type (str): Type of description to generate. Options:
16
- - "general": General description of the image
17
- - "detailed": Detailed analysis of the image
18
- - "chess": Analysis of a chess position
19
- - "text": Extract and describe text from the image
20
- - "custom": Custom description based on user prompt
21
- Returns:
22
- str: Description of the image based on the requested type.
23
- """
24
-
25
- name = "describe_image"
26
- description = "Analyzes and describes images using GPT-4 Vision API"
27
- inputs = {
28
- "image_path": {"type": "string", "description": "Path to the image file"},
29
- "description_type": {
30
- "type": "string",
31
- "description": "Type of description to generate (general, detailed, chess, text, custom)",
32
- "nullable": True,
33
- },
34
- "custom_prompt": {
35
- "type": "string",
36
- "description": "Custom prompt for description (only used when description_type is 'custom')",
37
- "nullable": True,
38
- },
39
- }
40
- output_type = "string"
41
-
42
- def encode_image(self, image_path: str) -> str:
43
- """Encode image to base64 string."""
44
- with open(image_path, "rb") as image_file:
45
- return base64.b64encode(image_file.read()).decode("utf-8")
46
-
47
- def get_prompt(self, description_type: str, custom_prompt: str = None) -> str:
48
- """Get appropriate prompt based on description type."""
49
- prompts = {
50
- "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
51
- "detailed": """Analyze this image in detail. Include:
52
- 1. Main subjects and their relationships
53
- 2. Colors, lighting, and composition
54
- 3. Any text or symbols present
55
- 4. Context or possible meaning
56
- 5. Notable details or interesting elements""",
57
- "chess": """Analyze this chess position and provide a detailed description including:
58
- 1. List of pieces on the board for both white and black
59
- 2. Whose turn it is to move
60
- 3. Basic evaluation of the position
61
- 4. Any immediate tactical opportunities or threats
62
- 5. Suggested next moves with brief explanations""",
63
- "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
64
- }
65
- return (
66
- custom_prompt
67
- if description_type == "custom"
68
- else prompts.get(description_type, prompts["general"])
69
- )
70
-
71
- def forward(
72
- self,
73
- image_path: str,
74
- description_type: str = "general",
75
- custom_prompt: str = None,
76
- ) -> str:
77
- try:
78
- if not os.path.exists(image_path):
79
- return f"Error: Image file not found at {image_path}"
80
-
81
- # Encode the image
82
- base64_image = self.encode_image(image_path)
83
-
84
- # Get appropriate prompt
85
- prompt = self.get_prompt(description_type, custom_prompt)
86
-
87
- # Make the API call
88
- response = client.chat.completions.create(
89
- model="gpt-4.1",
90
- messages=[
91
- {
92
- "role": "user",
93
- "content": [
94
- {"type": "text", "text": prompt},
95
- {
96
- "type": "image_url",
97
- "image_url": {
98
- "url": f"data:image/jpeg;base64,{base64_image}"
99
- },
100
- },
101
- ],
102
- }
103
- ],
104
- max_tokens=1000,
105
- )
106
-
107
- return response.choices[0].message.content
108
-
109
- except Exception as e:
110
- return f"Error analyzing image: {str(e)}"