File size: 8,370 Bytes
3945599
 
 
 
 
ccce173
 
 
 
 
 
 
 
 
 
 
3945599
 
 
ccce173
3945599
 
 
 
 
ccce173
 
 
 
 
 
 
 
 
3945599
 
ccce173
 
3945599
 
 
 
 
 
ccce173
3945599
 
 
ccce173
 
 
 
 
 
 
 
 
 
3945599
 
 
 
 
 
 
 
 
 
 
ccce173
 
 
 
 
 
 
 
 
 
 
3945599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccce173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3945599
ccce173
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
from typing import List
from langchain_core.tools import tool
from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain_ollama import ChatOllama
from langchain_sandbox import PyodideSandbox
import base64
from langchain_core.messages import HumanMessage, SystemMessage
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from docling.document_converter import DocumentConverter
from langchain_tavily import TavilySearch

doc_converter = DocumentConverter()

@tool
def wikipedia_search(query: str) -> str:
    """
    Search Wikipedia for a given query and return max 1 result.

    Args:
        query: The search query.
    """
    # Simulate a search operation
    search_docs = WikipediaLoader(query=query, load_max_docs=1).load()
    docling_docs = [doc_converter.convert(doc.metadata["source"]).document.export_to_markdown() for doc in search_docs]
    start_indexes = []
    for d in docling_docs:
        start_index = d.find("From Wikipedia")
        if start_index != -1:
            start_indexes.append(start_index)
        else:
            start_indexes.append(0)
    formatted_docs = "\n\n---\n\n".join(
        [
            f'<Document title="{search_doc.metadata["title"]}"/>\n{docling_doc[start_index:]}\n</Document>'
            for search_doc, docling_doc, start_index in zip(search_docs, docling_docs, start_indexes)
        ])
    return formatted_docs

@tool
def youtube_transcript(url: str) -> str:
    """"Returns the transcript of a YouTube video given its URL.
    This is a text-based tool and should not be used for visual information of the video.
    Args:
        url: The YouTube video URL.
    """
    max_tries = 3
    for _ in range(max_tries):
        try:
            transcripts = YoutubeLoader.from_youtube_url(url, add_video_info=False).load()
            return f"Video Transcript: {transcripts[0].page_content}"
        except Exception as e:
            print(f"Attempt failed: {e}")
            continue
    # If all attempts fail, return an error message
    return "No transcript available. This video might not have a transcript or the URL is invalid."
    
    
@tool
def web_search(query: str) -> str:
    """
    Perform a web search for the given query and return the results.
    Use this when you need to find current or factual information.
    Args:
        query: The search query.
    """
    # Simulate a web search operation
    tavily_search = TavilySearch(max_results=3)
    search_docs = tavily_search.invoke(query)

     # Format
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document href="{doc["url"]}">\n{doc["content"]}\n</Document>'
            for doc in search_docs["results"]
        ]
    )
    return f"Web search results for '{query}':\n\n{formatted_search_docs}"

@tool
def add_numbers(numbers: List[float]) -> float:
    """
    Add a list of numbers together. E.g [1, 2, 3] -> 6
    Args:
        numbers: A list of numbers to add.
    """
    return sum(numbers)

@tool
def multiply_numbers(numbers: List[float]) -> float:
    """
    Multiply a list of numbers together. E.g [3, 2, 3] -> 18
    Args:
        numbers: A list of numbers to multiply.
    """
    result = 1
    for number in numbers:
        result *= number
    return result


vision_llm = ChatOllama(model="gemma3:27b")

# might be better to use supervisor method..
@tool
def image_question_answering(img_path: str, question: str) -> str:
    """
    Given an image path and a question, return the answer to the question based on the image. Just pass the initial question from the human as a query.
    Args:
        img_path: The path to the image.
        question: The question to ask about the image.
    """
    system_prompt = """
    You are a helpful assistant that can answer questions about images.
    You need to think step by step carefully, provide your thinking process and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
    """
   
    try:
        # Read image and encode as base64
        with open(img_path, "rb") as image_file:
            image_bytes = image_file.read()

        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

        question = "Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."

        # Prepare the prompt including the base64 image data
        message = [
            SystemMessage(content=system_prompt),
            HumanMessage(
                content=[
                    {
                        "type": "text",
                        "text": question,
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_base64}"
                        },
                    },
                ]
            )
        ]

        # Call the vision-capable model
        response = vision_llm.invoke(message)

        return response.content

    except Exception as e:
        error_msg = f"Error image questioning: {str(e)}"
        print(error_msg)
        return error_msg
    
device = "mps"
checkpoint = "./whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
    checkpoint, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(checkpoint)
pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch.float32,
    device=device,
)

@tool
def speech_to_text(audio_path: str) -> str:
    """
    Convert speech to text using a given audio file. Not for youtube links.
    Args:
        audio_path: The path to the audio file.
    """
    try:
        result = pipe(audio_path)
        return result["text"].strip()
    except Exception as e:
        result = pipe(audio_path, return_timestamps=True)
        return result["text"].strip()
    except Exception as e:
        return f"Error processing audio file: {str(e)}"

@tool
def read_file_content(path: str) -> str:
    """
    Read the content of a file (pdf, docs, xlsx, etc.) but also from a URL (like arxiv or websites) and returns it as markdown.
    Args:
        file_path: The path to the file.
    """
    try:
        doc = doc_converter.convert(path).document
        markdown = doc.export_to_markdown()
        return f"File Content:\n\n{markdown}"
    except Exception as e:
        return f"Error reading file: {str(e)}"

sandbox = PyodideSandbox(
    # Allow Pyodide to install python packages that
    # might be required.
    allow_net=True,
)

@tool
async def run_python_code(input_type: str, input: str) -> str:
    """
    Run Python code in a sandboxed environment. You can provide either a code snippet or a file path.
    1. If input_type is "code", input should be a string containing the Python code to run.
    2. If input_type is "file", input should be a string containing the path to the file.
    Args:
        input_type: The type of input, code or file.
        input: The Python code to run or the path to the file.
    """
    try:
        if input_type == "code":
            code = input
        elif input_type == "file":
            with open(input, "r") as file:
                code = file.read()
        else:
            return "Invalid input type. Please provide 'code' or 'file' as input_type."
        result = await sandbox.execute(code)
        return f"Result execution: result: {result.result}, stdout: {result.stdout}, stderr: {result.stderr}, status: {result.status}"
    except Exception as e:
        return f"Error executing Python code: {str(e)}"
    
@tool
def reverse_string(input: str) -> str:
    """
    Reverse a given string.
    Args:
        input: The string to reverse.
    """
    return input[::-1]



TOOLS = [wikipedia_search, web_search, youtube_transcript, add_numbers, multiply_numbers , image_question_answering, speech_to_text, read_file_content, run_python_code, reverse_string]