Agent_Course_Final_Assignment

Sleeping

giulia-fontanella commited on Jun 4, 2025

Commit

d1ebe54

verified ·

1 Parent(s): e205ec9

Update tools.py

Files changed (1) hide show

tools.py CHANGED Viewed

@@ -5,6 +5,12 @@ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
 def extract_text(img_path: str) -> str:
     """
     Extract text from an image file using a multimodal model.
     """
     all_text = ""
     try:
@@ -51,16 +57,15 @@ def extract_text(img_path: str) -> str:
 def describe_image(img_path: str, query: str) -> str:
     """
     Generate a detailed description of an image using a multimodal model.
-    This function reads a local image file, encodes it, and sends it to a
     vision-capable language model to obtain a comprehensive, natural language
     description of the image's content, including its objects, actions, and context,
     following a specific query.
     Args:
-        img_path: A string path to a local image file (e.g., PNG, JPEG).
         query: Information to extract from the image
     Returns:
         A single string containing a detailed, human-readable description of the image.
     """

 def extract_text(img_path: str) -> str:
     """
     Extract text from an image file using a multimodal model.
+    Args:
+        img_path: A url pointing to an image (e.g., PNG, JPEG).
+    Returns:
+        A single string containing the concatenated text extracted from each image.
     """
     all_text = ""
     try:
 def describe_image(img_path: str, query: str) -> str:
     """
     Generate a detailed description of an image using a multimodal model.
+    This function reads a image from an url, encodes it, and sends it to a
     vision-capable language model to obtain a comprehensive, natural language
     description of the image's content, including its objects, actions, and context,
     following a specific query.
     Args:
+        img_path: A url pointing to an image (e.g., PNG, JPEG).
         query: Information to extract from the image
     Returns:
         A single string containing a detailed, human-readable description of the image.
     """