|
|
import base64 |
|
|
import io |
|
|
import pandas as pd |
|
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
|
from langchain_core.tools import tool |
|
|
from langchain_experimental.utilities import PythonREPL |
|
|
from langchain_community.document_loaders import WebBaseLoader |
|
|
from langchain_community.utilities import WikipediaAPIWrapper |
|
|
from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun, ArxivQueryRun |
|
|
from langchain_tavily.tavily_search import TavilySearch |
|
|
|
|
|
@tool |
|
|
def python_repl_tool(command: str) -> str: |
|
|
"""A tool to execute Python commands. If you want to see the output of a value, you should print it out with `print(...)`. |
|
|
Args: |
|
|
command (str): A valid Python command to execute. |
|
|
Returns: |
|
|
str: The output of the command.""" |
|
|
print('Python shell tool called') |
|
|
result = PythonREPL.run(command) |
|
|
return str(result) |
|
|
|
|
|
@tool |
|
|
def read_excel_csv(input_str: str, file_type: str = 'csv') -> str: |
|
|
""" |
|
|
Extracts information from a base64-encoded file or a path to a csv or excel file. |
|
|
|
|
|
Args: |
|
|
input_str (str): String containing a base64-encoded file or its path. |
|
|
file_type (str): Type of the file encoded in base64 ('csv' or 'excel'). |
|
|
|
|
|
Returns: |
|
|
str: Content of input file. |
|
|
""" |
|
|
print(f'Read excel/csv tool called {file_type} ({input_str[:20]})') |
|
|
try: |
|
|
|
|
|
byte_path = io.BytesIO(base64.b64decode(input_str)) |
|
|
except Exception as e: |
|
|
|
|
|
byte_path = input_str |
|
|
|
|
|
|
|
|
if file_type == 'csv': |
|
|
df = pd.read_csv(byte_path) |
|
|
elif file_type in ['xlsx', 'excel']: |
|
|
df = pd.read_excel(byte_path) |
|
|
else: |
|
|
raise ValueError("Unsupported file_type. Use 'csv' or 'excel'.") |
|
|
|
|
|
result = f"{file_type.upper()} file loaded with {len(df)} rows and {len(df.columns)} columns.\n" |
|
|
result += f"Columns: {', '.join(df.columns)}\n\n" |
|
|
|
|
|
|
|
|
result += "Summary statistics:\n" |
|
|
result += str(df.describe()) |
|
|
|
|
|
return result |
|
|
|
|
|
@tool |
|
|
def wikipedia_query_tool(query: str) -> str: |
|
|
"""A tool to query Wikipedia. It returns a summary of the page, not the full content. To get the full content, you can use another tool. |
|
|
Args: |
|
|
query (str): A search query for Wikipedia. |
|
|
Returns: |
|
|
str: A summary of the related Wikipedia page.""" |
|
|
print('Wikipedia query tool called:', query) |
|
|
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2)) |
|
|
result = wiki.run(query) |
|
|
print(f"Wikipedia query {query} result (limited to 10 chars): {result[:10]}") |
|
|
return result.strip() |
|
|
|
|
|
@tool |
|
|
def arxiv_query_tool(query: str) -> str: |
|
|
"""A tool to query arXiv.org |
|
|
Useful for when you need to answer physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics |
|
|
questions from scientific articles on arxiv. |
|
|
Args: |
|
|
query (str): A search query for ArXiv. |
|
|
Returns: |
|
|
str: The text content of the ArXiv page. |
|
|
""" |
|
|
print('ArXiv query tool called', query) |
|
|
arxiv = ArxivQueryRun() |
|
|
result = arxiv.run(query) |
|
|
print(f"ArXiv query {query} result (limited to 50 chars): {result[:50]}") |
|
|
return result.strip() |
|
|
|
|
|
@tool |
|
|
def webpage_reader_tool(page_url: str) -> str: |
|
|
"""A tool to read the full content of a webpage. |
|
|
Args: |
|
|
page_url (str): A valid URL of the webpage to read. |
|
|
Returns: |
|
|
str: The text content of the webpage. |
|
|
""" |
|
|
print('Web page reader tool called', page_url) |
|
|
loader = WebBaseLoader(web_paths=[page_url]) |
|
|
docs = [] |
|
|
for doc in loader.lazy_load(): |
|
|
docs.append(doc) |
|
|
|
|
|
assert len(docs) == 1 |
|
|
doc = docs[0] |
|
|
|
|
|
return f'<Document source="{page_url}" title="{doc.get("title", "")}"/>\n{doc.page_content.strip()}\n</Document>' |
|
|
|
|
|
@tool |
|
|
def web_search_tool(query: str) -> str: |
|
|
"""Search internet for a query and return maximum 3 results. |
|
|
Args: |
|
|
query: The search query. |
|
|
Returns: |
|
|
str: The formatted search results. |
|
|
""" |
|
|
|
|
|
print('Web search tool called', query) |
|
|
|
|
|
try: |
|
|
search_docs = TavilySearch(max_results=3).invoke(query) |
|
|
formatted_search_docs = "\n\n---\n\n".join( |
|
|
[ |
|
|
f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>' |
|
|
for doc in search_docs['results'] |
|
|
] |
|
|
) |
|
|
except Exception as e: |
|
|
print(f'\tError {e}, passing to DuckDuckgo') |
|
|
search_docs = DuckDuckGoSearchRun().invoke(query) |
|
|
formatted_search_docs = "\n\n---\n\n".join( |
|
|
[ |
|
|
f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>' |
|
|
for doc in search_docs['results'] |
|
|
] |
|
|
) |
|
|
return formatted_search_docs |
|
|
|
|
|
@tool |
|
|
def transcribe_youtube_video_tool(video_id: str) -> str: |
|
|
"""A tool to transcribe the audio of a YouTube video. |
|
|
Args: |
|
|
video_id (str): A valid YouTube video ID or URL. |
|
|
Returns: |
|
|
str: The transcribed text of the video. |
|
|
""" |
|
|
print(f"Transcribing YouTube video with ID: {video_id}") |
|
|
if 'youtube' in video_id or 'watch' in video_id: |
|
|
|
|
|
video_id = video_id.split('v=')[-1].split('&')[0] |
|
|
|
|
|
transcript_api = YouTubeTranscriptApi() |
|
|
try: |
|
|
transcript = transcript_api.fetch(video_id) |
|
|
transcript_text = ' '.join([entry.text for entry in transcript]) |
|
|
print(f"\t {transcript_text}") |
|
|
return transcript_text.strip() |
|
|
except transcript_api._errors.TranscriptsDisabled as e: |
|
|
return f"Transcription is disabled for this video: {e}" |
|
|
|