giulia-fontanella commited on
Commit
162912a
·
verified ·
1 Parent(s): f3e6ee0

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +12 -6
tools.py CHANGED
@@ -46,9 +46,12 @@ def read_python(file_path: str) -> str:
46
  except Exception as e:
47
  return f"Error reading Python file: {str(e)}"
48
 
49
- def make_text_from_image_tool(vision_llm):
 
 
 
50
  @tool
51
- def extract_text_from_image(img_path: str) -> str:
52
  """
53
  Extract text from an image file using a multimodal model.
54
 
@@ -88,7 +91,7 @@ def make_text_from_image_tool(vision_llm):
88
  ]
89
 
90
  # Call the vision-capable model
91
- response = vision_llm.invoke(message)
92
 
93
  # Append extracted text
94
  all_text += response.content + "\n\n"
@@ -100,9 +103,12 @@ def make_text_from_image_tool(vision_llm):
100
  return ""
101
 
102
 
103
- def make_describe_image_tool(vision_llm):
 
 
 
104
  @tool
105
- def describe_image(img_path: str, query: str) -> str:
106
  """
107
  Generate a detailed description of an image using a multimodal model.
108
  This function reads a image from an url, encodes it, and sends it to a
@@ -142,7 +148,7 @@ def make_describe_image_tool(vision_llm):
142
  ]
143
  )
144
  ]
145
- response = vision_llm.invoke(message)
146
  return response.content.strip()
147
 
148
  except Exception as e:
 
46
  except Exception as e:
47
  return f"Error reading Python file: {str(e)}"
48
 
49
+ class ExtractTextFromImage:
50
+ def __init__(self, vision_llm):
51
+ self.vision_llm = vision_llm
52
+
53
  @tool
54
+ def __call__(self, img_path: str) -> str:
55
  """
56
  Extract text from an image file using a multimodal model.
57
 
 
91
  ]
92
 
93
  # Call the vision-capable model
94
+ response = self.vision_llm.invoke(message)
95
 
96
  # Append extracted text
97
  all_text += response.content + "\n\n"
 
103
  return ""
104
 
105
 
106
+ class DescribeImage:
107
+ def __init__(self, vision_llm):
108
+ self.vision_llm = vision_llm
109
+
110
  @tool
111
+ def __call__(self, img_path: str) -> str:
112
  """
113
  Generate a detailed description of an image using a multimodal model.
114
  This function reads a image from an url, encodes it, and sends it to a
 
148
  ]
149
  )
150
  ]
151
+ response = self.vision_llm.invoke(message)
152
  return response.content.strip()
153
 
154
  except Exception as e: