File size: 5,876 Bytes
17e605d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import base64
import io
import pandas as pd
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_core.tools import tool
from langchain_experimental.utilities import PythonREPL
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun, ArxivQueryRun
from langchain_tavily.tavily_search import TavilySearch

@tool
def python_repl_tool(command: str) -> str:
    """A tool to execute Python commands. If you want to see the output of a value, you should print it out with `print(...)`.
    Args:
        command (str): A valid Python command to execute.
    Returns:
        str: The output of the command."""
    print('Python shell tool called')
    result = PythonREPL.run(command)
    return str(result)

@tool
def read_excel_csv(input_str: str, file_type: str = 'csv') -> str:
    """
    Extracts information from a base64-encoded file or a path to a csv or excel file.
    
    Args:
        input_str (str): String containing a base64-encoded file or its path.
        file_type (str): Type of the file encoded in base64 ('csv' or 'excel').
        
    Returns:
        str: Content of input file.
    """
    print(f'Read excel/csv tool called {file_type} ({input_str[:20]})')
    try:
        # Decode the base64 string
        byte_path = io.BytesIO(base64.b64decode(input_str))
    except Exception as e:
        # Assume it's a file path if decoding fails
        byte_path = input_str  
    
    # Load into a DataFrame based on file type
    if file_type == 'csv':
        df = pd.read_csv(byte_path)
    elif file_type in ['xlsx', 'excel']:
        df = pd.read_excel(byte_path)
    else:
        raise ValueError("Unsupported file_type. Use 'csv' or 'excel'.")
    
    result = f"{file_type.upper()} file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
    result += f"Columns: {', '.join(df.columns)}\n\n"

    # Add summary statistics
    result += "Summary statistics:\n"
    result += str(df.describe())
    #print(result)
    return result

@tool
def wikipedia_query_tool(query: str) -> str:
    """A tool to query Wikipedia. It returns a summary of the page, not the full content. To get the full content, you can use another tool.
    Args:
        query (str): A search query for Wikipedia.
    Returns:
        str: A summary of the related Wikipedia page."""
    print('Wikipedia query tool called:', query)
    wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=2))
    result = wiki.run(query)
    print(f"Wikipedia query {query} result (limited to 10 chars): {result[:10]}")
    return result.strip()

@tool
def arxiv_query_tool(query: str) -> str:
    """A tool to query arXiv.org
    Useful for when you need to answer physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics 
    questions from scientific articles on arxiv.
    Args:
        query (str): A search query for ArXiv.
    Returns:
        str: The text content of the ArXiv page.
    """
    print('ArXiv query tool called', query)
    arxiv = ArxivQueryRun()
    result = arxiv.run(query)
    print(f"ArXiv query {query} result (limited to 50 chars): {result[:50]}")
    return result.strip()

@tool
def webpage_reader_tool(page_url: str) -> str:
    """A tool to read the full content of a webpage.
    Args:
        page_url (str): A valid URL of the webpage to read.
    Returns:
        str: The text content of the webpage.
    """
    print('Web page reader tool called', page_url)
    loader = WebBaseLoader(web_paths=[page_url])
    docs = []
    for doc in loader.lazy_load():
        docs.append(doc)

    assert len(docs) == 1
    doc = docs[0]

    return f'<Document source="{page_url}" title="{doc.get("title", "")}"/>\n{doc.page_content.strip()}\n</Document>'

@tool
def web_search_tool(query: str) -> str:
    """Search internet for a query and return maximum 3 results.
    Args:
        query: The search query.
    Returns:
        str: The formatted search results.
    """
    
    print('Web search tool called', query)

    try:
        search_docs = TavilySearch(max_results=3).invoke(query)
        formatted_search_docs = "\n\n---\n\n".join(
            [
                f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
                for doc in search_docs['results']
            ]
            )
    except Exception as e:
        print(f'\tError {e}, passing to DuckDuckgo')
        search_docs = DuckDuckGoSearchRun().invoke(query)
        formatted_search_docs = "\n\n---\n\n".join(
            [
                f'<Document source="{doc.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
                for doc in search_docs['results']
            ]
            )
    return formatted_search_docs

@tool
def transcribe_youtube_video_tool(video_id: str) -> str:
    """A tool to transcribe the audio of a YouTube video.
    Args:
        video_id (str): A valid YouTube video ID or URL.
    Returns:
        str: The transcribed text of the video.
    """
    print(f"Transcribing YouTube video with ID: {video_id}")
    if 'youtube' in video_id or 'watch' in video_id:
        # Extract video ID from URL
        video_id = video_id.split('v=')[-1].split('&')[0]
    
    transcript_api = YouTubeTranscriptApi()
    try:
        transcript = transcript_api.fetch(video_id)
        transcript_text = ' '.join([entry.text for entry in transcript])
        print(f"\t {transcript_text}")
        return transcript_text.strip()
    except transcript_api._errors.TranscriptsDisabled as e:
        return f"Transcription is disabled for this video: {e}"