Agent_Course_Final_Assignment

Sleeping

giulia-fontanella commited on Jun 5, 2025

Commit

162912a

verified ·

1 Parent(s): f3e6ee0

Update tools.py

Files changed (1) hide show

tools.py CHANGED Viewed

@@ -46,9 +46,12 @@ def read_python(file_path: str) -> str:
     except Exception as e:
         return f"Error reading Python file: {str(e)}"
-def make_text_from_image_tool(vision_llm):
     @tool
-    def extract_text_from_image(img_path: str) -> str:
         """
         Extract text from an image file using a multimodal model.
@@ -88,7 +91,7 @@ def make_text_from_image_tool(vision_llm):
             ]
             # Call the vision-capable model
-            response = vision_llm.invoke(message)
             # Append extracted text
             all_text += response.content + "\n\n"
@@ -100,9 +103,12 @@ def make_text_from_image_tool(vision_llm):
             return ""
-def make_describe_image_tool(vision_llm):
     @tool
-    def describe_image(img_path: str, query: str) -> str:
         """
         Generate a detailed description of an image using a multimodal model.
         This function reads a image from an url, encodes it, and sends it to a
@@ -142,7 +148,7 @@ def make_describe_image_tool(vision_llm):
                     ]
                 )
             ]
-            response = vision_llm.invoke(message)
             return response.content.strip()
         except Exception as e:

     except Exception as e:
         return f"Error reading Python file: {str(e)}"
+class ExtractTextFromImage:
+    def __init__(self, vision_llm):
+        self.vision_llm = vision_llm
     @tool
+    def __call__(self, img_path: str) -> str:
         """
         Extract text from an image file using a multimodal model.
             ]
             # Call the vision-capable model
+            response = self.vision_llm.invoke(message)
             # Append extracted text
             all_text += response.content + "\n\n"
             return ""
+class DescribeImage:
+    def __init__(self, vision_llm):
+        self.vision_llm = vision_llm
     @tool
+    def __call__(self, img_path: str) -> str:
         """
         Generate a detailed description of an image using a multimodal model.
         This function reads a image from an url, encodes it, and sends it to a
                     ]
                 )
             ]
+            response = self.vision_llm.invoke(message)
             return response.content.strip()
         except Exception as e: