giulia-fontanella commited on
Commit
d1ebe54
·
verified ·
1 Parent(s): e205ec9

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +10 -5
tools.py CHANGED
@@ -5,6 +5,12 @@ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
5
  def extract_text(img_path: str) -> str:
6
  """
7
  Extract text from an image file using a multimodal model.
 
 
 
 
 
 
8
  """
9
  all_text = ""
10
  try:
@@ -51,16 +57,15 @@ def extract_text(img_path: str) -> str:
51
  def describe_image(img_path: str, query: str) -> str:
52
  """
53
  Generate a detailed description of an image using a multimodal model.
54
-
55
- This function reads a local image file, encodes it, and sends it to a
56
  vision-capable language model to obtain a comprehensive, natural language
57
  description of the image's content, including its objects, actions, and context,
58
  following a specific query.
59
-
60
  Args:
61
- img_path: A string path to a local image file (e.g., PNG, JPEG).
62
  query: Information to extract from the image
63
-
64
  Returns:
65
  A single string containing a detailed, human-readable description of the image.
66
  """
 
5
  def extract_text(img_path: str) -> str:
6
  """
7
  Extract text from an image file using a multimodal model.
8
+
9
+ Args:
10
+ img_path: A url pointing to an image (e.g., PNG, JPEG).
11
+
12
+ Returns:
13
+ A single string containing the concatenated text extracted from each image.
14
  """
15
  all_text = ""
16
  try:
 
57
  def describe_image(img_path: str, query: str) -> str:
58
  """
59
  Generate a detailed description of an image using a multimodal model.
60
+ This function reads a image from an url, encodes it, and sends it to a
 
61
  vision-capable language model to obtain a comprehensive, natural language
62
  description of the image's content, including its objects, actions, and context,
63
  following a specific query.
64
+
65
  Args:
66
+ img_path: A url pointing to an image (e.g., PNG, JPEG).
67
  query: Information to extract from the image
68
+
69
  Returns:
70
  A single string containing a detailed, human-readable description of the image.
71
  """