| import base64 |
| from langchain_core.messages import AnyMessage, HumanMessage, AIMessage |
| from langchain.tools import tool |
| from langchain_community.tools.tavily_search import TavilySearchResults |
| from langchain_community.document_loaders import WikipediaLoader |
| from langchain_community.document_loaders import ArxivLoader |
|
|
|
|
| @tool |
| def extract_text(img_path: str) -> str: |
| """ |
| Extract text from an image file using a multimodal model. |
| |
| Args: |
| img_path: A string representing the url of an image (e.g., PNG, JPEG). |
| |
| Returns: |
| A single string containing the concatenated text extracted from the image. |
| """ |
| all_text = "" |
| try: |
| |
| with open(img_path, "rb") as image_file: |
| image_bytes = image_file.read() |
|
|
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") |
|
|
| |
| message = [ |
| HumanMessage( |
| content=[ |
| { |
| "type": "text", |
| "text": ( |
| "Extract all the text from this image. " |
| "Return only the extracted text, no explanations." |
| ), |
| }, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/png;base64,{image_base64}" |
| }, |
| }, |
| ] |
| ) |
| ] |
|
|
| |
| response = vision_llm.invoke(message) |
|
|
| |
| all_text += response.content + "\n\n" |
|
|
| return all_text.strip() |
| except Exception as e: |
| error_msg = f"Error extracting text: {str(e)}" |
| print(error_msg) |
| return "" |
|
|
|
|
| @tool |
| def describe_image(img_path: str, query: str) -> str: |
| """ |
| Generate a detailed description of an image using a multimodal model. |
| This function reads a image from an url, encodes it, and sends it to a |
| vision-capable language model to obtain a comprehensive, natural language |
| description of the image's content, including its objects, actions, and context, |
| following a specific query. |
| |
| Args: |
| img_path: A string representing the url of an image (e.g., PNG, JPEG). |
| query: Information to extract from the image. |
| |
| Returns: |
| A single string containing a detailed description of the image. |
| """ |
| try: |
| |
| with open(img_path, "rb") as image_file: |
| image_bytes = image_file.read() |
|
|
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") |
|
|
| |
| message = [ |
| HumanMessage( |
| content=[ |
| { |
| "type": "text", |
| "text": ( |
| f"Describe this image in rich detail. Include objects, people, setting, background elements, and any inferred actions or context. Avoid technical jargon. In particular, extract the following information: {query}" ), |
| }, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/png;base64,{image_base64}" |
| }, |
| }, |
| ] |
| ) |
| ] |
| response = vision_llm.invoke(message) |
| return response.content.strip() |
|
|
| except Exception as e: |
| error_msg = f"Error describing image: {str(e)}" |
| print(error_msg) |
| return "" |
|
|
|
|
| @tool |
| def wiki_search(query: str) -> str: |
| """Search Wikipedia for a query and return maximum 2 results. |
| |
| Args: |
| query: The search query.""" |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| for doc in search_docs |
| ]) |
| return {"wiki_results": formatted_search_docs} |
|
|
|
|
| @tool |
| def web_search(query: str) -> str: |
| """Search Tavily for a query and return maximum 3 results. |
| |
| Args: |
| query: The search query.""" |
| search_docs = TavilySearchResults(max_results=3).invoke(query=query) |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| for doc in search_docs |
| ]) |
| return {"web_results": formatted_search_docs} |
|
|
|
|
| @tool |
| def arxiv_search(query: str) -> str: |
| """Search Arxiv for a query and return maximum 3 result. |
| |
| Args: |
| query: The search query.""" |
| search_docs = ArxivLoader(query=query, load_max_docs=3).load() |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' |
| for doc in search_docs |
| ]) |
| return {"arvix_results": formatted_search_docs} |
|
|