CUMANI Paolo
[CHG] Working agent implementation
17e605d
import base64
import io
import pandas as pd
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_core.tools import tool
from langchain_experimental.utilities import PythonREPL
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun, ArxivQueryRun
from langchain_tavily.tavily_search import TavilySearch
@tool
def python_repl_tool(command: str) -> str:
"""A tool to execute Python commands. If you want to see the output of a value, you should print it out with `print(...)`.
Args:
command (str): A valid Python command to execute.
Returns:
str: The output of the command."""
print('Python shell tool called')
result = PythonREPL.run(command)
return str(result)
@tool
def read_excel_csv(input_str: str, file_type: str = 'csv') -> str:
"""
Extracts information from a base64-encoded file or a path to a csv or excel file.
Args:
input_str (str): String containing a base64-encoded file or its path.
file_type (str): Type of the file encoded in base64 ('csv' or 'excel').
Returns:
str: Content of input file.
"""
print(f'Read excel/csv tool called {file_type} ({input_str[:20]})')
try:
# Decode the base64 string
byte_path = io.BytesIO(base64.b64decode(input_str))
except Exception as e:
# Assume it's a file path if decoding fails
byte_path = input_str
# Load into a DataFrame based on file type
if file_type == 'csv':
df = pd.read_csv(byte_path)
elif file_type in ['xlsx', 'excel']:
df = pd.read_excel(byte_path)
else:
raise ValueError("Unsupported file_type. Use 'csv' or 'excel'.")
result = f"{file_type.upper()} file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns)}\n\n"
# Add summary statistics
result += "Summary statistics:\n"
result += str(df.describe())
#print(result)
return result
@tool
def wikipedia_query_tool(query: str) -> str:
"""A tool to query Wikipedia. It returns a summary of the page, not the full content. To get the full content, you can use another tool.
Args:
query (str): A search query for Wikipedia.
Returns:
str: A summary of the related Wikipedia page."""
print('Wikipedia query tool called:', query)
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2))
result = wiki.run(query)
print(f"Wikipedia query {query} result (limited to 10 chars): {result[:10]}")
return result.strip()
@tool
def arxiv_query_tool(query: str) -> str:
"""A tool to query arXiv.org
Useful for when you need to answer physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics
questions from scientific articles on arxiv.
Args:
query (str): A search query for ArXiv.
Returns:
str: The text content of the ArXiv page.
"""
print('ArXiv query tool called', query)
arxiv = ArxivQueryRun()
result = arxiv.run(query)
print(f"ArXiv query {query} result (limited to 50 chars): {result[:50]}")
return result.strip()
@tool
def webpage_reader_tool(page_url: str) -> str:
"""A tool to read the full content of a webpage.
Args:
page_url (str): A valid URL of the webpage to read.
Returns:
str: The text content of the webpage.
"""
print('Web page reader tool called', page_url)
loader = WebBaseLoader(web_paths=[page_url])
docs = []
for doc in loader.lazy_load():
docs.append(doc)
assert len(docs) == 1
doc = docs[0]
return f'<Document source="{page_url}" title="{doc.get("title", "")}"/>\n{doc.page_content.strip()}\n</Document>'
@tool
def web_search_tool(query: str) -> str:
"""Search internet for a query and return maximum 3 results.
Args:
query: The search query.
Returns:
str: The formatted search results.
"""
print('Web search tool called', query)
try:
search_docs = TavilySearch(max_results=3).invoke(query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
for doc in search_docs['results']
]
)
except Exception as e:
print(f'\tError {e}, passing to DuckDuckgo')
search_docs = DuckDuckGoSearchRun().invoke(query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
for doc in search_docs['results']
]
)
return formatted_search_docs
@tool
def transcribe_youtube_video_tool(video_id: str) -> str:
"""A tool to transcribe the audio of a YouTube video.
Args:
video_id (str): A valid YouTube video ID or URL.
Returns:
str: The transcribed text of the video.
"""
print(f"Transcribing YouTube video with ID: {video_id}")
if 'youtube' in video_id or 'watch' in video_id:
# Extract video ID from URL
video_id = video_id.split('v=')[-1].split('&')[0]
transcript_api = YouTubeTranscriptApi()
try:
transcript = transcript_api.fetch(video_id)
transcript_text = ' '.join([entry.text for entry in transcript])
print(f"\t {transcript_text}")
return transcript_text.strip()
except transcript_api._errors.TranscriptsDisabled as e:
return f"Transcription is disabled for this video: {e}"