Update tools.py
Browse files
tools.py
CHANGED
|
@@ -5,6 +5,12 @@ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
|
|
| 5 |
def extract_text(img_path: str) -> str:
|
| 6 |
"""
|
| 7 |
Extract text from an image file using a multimodal model.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
all_text = ""
|
| 10 |
try:
|
|
@@ -51,16 +57,15 @@ def extract_text(img_path: str) -> str:
|
|
| 51 |
def describe_image(img_path: str, query: str) -> str:
|
| 52 |
"""
|
| 53 |
Generate a detailed description of an image using a multimodal model.
|
| 54 |
-
|
| 55 |
-
This function reads a local image file, encodes it, and sends it to a
|
| 56 |
vision-capable language model to obtain a comprehensive, natural language
|
| 57 |
description of the image's content, including its objects, actions, and context,
|
| 58 |
following a specific query.
|
| 59 |
-
|
| 60 |
Args:
|
| 61 |
-
img_path: A
|
| 62 |
query: Information to extract from the image
|
| 63 |
-
|
| 64 |
Returns:
|
| 65 |
A single string containing a detailed, human-readable description of the image.
|
| 66 |
"""
|
|
|
|
| 5 |
def extract_text(img_path: str) -> str:
|
| 6 |
"""
|
| 7 |
Extract text from an image file using a multimodal model.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
img_path: A url pointing to an image (e.g., PNG, JPEG).
|
| 11 |
+
|
| 12 |
+
Returns:
|
| 13 |
+
A single string containing the concatenated text extracted from each image.
|
| 14 |
"""
|
| 15 |
all_text = ""
|
| 16 |
try:
|
|
|
|
| 57 |
def describe_image(img_path: str, query: str) -> str:
|
| 58 |
"""
|
| 59 |
Generate a detailed description of an image using a multimodal model.
|
| 60 |
+
This function reads a image from an url, encodes it, and sends it to a
|
|
|
|
| 61 |
vision-capable language model to obtain a comprehensive, natural language
|
| 62 |
description of the image's content, including its objects, actions, and context,
|
| 63 |
following a specific query.
|
| 64 |
+
|
| 65 |
Args:
|
| 66 |
+
img_path: A url pointing to an image (e.g., PNG, JPEG).
|
| 67 |
query: Information to extract from the image
|
| 68 |
+
|
| 69 |
Returns:
|
| 70 |
A single string containing a detailed, human-readable description of the image.
|
| 71 |
"""
|