jensenwiedler
basic agent with 30 score
ccce173
from typing import List
from langchain_core.tools import tool
from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain_ollama import ChatOllama
from langchain_sandbox import PyodideSandbox
import base64
from langchain_core.messages import HumanMessage, SystemMessage
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from docling.document_converter import DocumentConverter
from langchain_tavily import TavilySearch
doc_converter = DocumentConverter()
@tool
def wikipedia_search(query: str) -> str:
"""
Search Wikipedia for a given query and return max 1 result.
Args:
query: The search query.
"""
# Simulate a search operation
search_docs = WikipediaLoader(query=query, load_max_docs=1).load()
docling_docs = [doc_converter.convert(doc.metadata["source"]).document.export_to_markdown() for doc in search_docs]
start_indexes = []
for d in docling_docs:
start_index = d.find("From Wikipedia")
if start_index != -1:
start_indexes.append(start_index)
else:
start_indexes.append(0)
formatted_docs = "\n\n---\n\n".join(
[
f'<Document title="{search_doc.metadata["title"]}"/>\n{docling_doc[start_index:]}\n</Document>'
for search_doc, docling_doc, start_index in zip(search_docs, docling_docs, start_indexes)
])
return formatted_docs
@tool
def youtube_transcript(url: str) -> str:
""""Returns the transcript of a YouTube video given its URL.
This is a text-based tool and should not be used for visual information of the video.
Args:
url: The YouTube video URL.
"""
max_tries = 3
for _ in range(max_tries):
try:
transcripts = YoutubeLoader.from_youtube_url(url, add_video_info=False).load()
return f"Video Transcript: {transcripts[0].page_content}"
except Exception as e:
print(f"Attempt failed: {e}")
continue
# If all attempts fail, return an error message
return "No transcript available. This video might not have a transcript or the URL is invalid."
@tool
def web_search(query: str) -> str:
"""
Perform a web search for the given query and return the results.
Use this when you need to find current or factual information.
Args:
query: The search query.
"""
# Simulate a web search operation
tavily_search = TavilySearch(max_results=3)
search_docs = tavily_search.invoke(query)
# Format
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document href="{doc["url"]}">\n{doc["content"]}\n</Document>'
for doc in search_docs["results"]
]
)
return f"Web search results for '{query}':\n\n{formatted_search_docs}"
@tool
def add_numbers(numbers: List[float]) -> float:
"""
Add a list of numbers together. E.g [1, 2, 3] -> 6
Args:
numbers: A list of numbers to add.
"""
return sum(numbers)
@tool
def multiply_numbers(numbers: List[float]) -> float:
"""
Multiply a list of numbers together. E.g [3, 2, 3] -> 18
Args:
numbers: A list of numbers to multiply.
"""
result = 1
for number in numbers:
result *= number
return result
vision_llm = ChatOllama(model="gemma3:27b")
# might be better to use supervisor method..
@tool
def image_question_answering(img_path: str, question: str) -> str:
"""
Given an image path and a question, return the answer to the question based on the image. Just pass the initial question from the human as a query.
Args:
img_path: The path to the image.
question: The question to ask about the image.
"""
system_prompt = """
You are a helpful assistant that can answer questions about images.
You need to think step by step carefully, provide your thinking process and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
"""
try:
# Read image and encode as base64
with open(img_path, "rb") as image_file:
image_bytes = image_file.read()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
question = "Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."
# Prepare the prompt including the base64 image data
message = [
SystemMessage(content=system_prompt),
HumanMessage(
content=[
{
"type": "text",
"text": question,
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}"
},
},
]
)
]
# Call the vision-capable model
response = vision_llm.invoke(message)
return response.content
except Exception as e:
error_msg = f"Error image questioning: {str(e)}"
print(error_msg)
return error_msg
device = "mps"
checkpoint = "./whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
checkpoint, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(checkpoint)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
torch_dtype=torch.float32,
device=device,
)
@tool
def speech_to_text(audio_path: str) -> str:
"""
Convert speech to text using a given audio file. Not for youtube links.
Args:
audio_path: The path to the audio file.
"""
try:
result = pipe(audio_path)
return result["text"].strip()
except Exception as e:
result = pipe(audio_path, return_timestamps=True)
return result["text"].strip()
except Exception as e:
return f"Error processing audio file: {str(e)}"
@tool
def read_file_content(path: str) -> str:
"""
Read the content of a file (pdf, docs, xlsx, etc.) but also from a URL (like arxiv or websites) and returns it as markdown.
Args:
file_path: The path to the file.
"""
try:
doc = doc_converter.convert(path).document
markdown = doc.export_to_markdown()
return f"File Content:\n\n{markdown}"
except Exception as e:
return f"Error reading file: {str(e)}"
sandbox = PyodideSandbox(
# Allow Pyodide to install python packages that
# might be required.
allow_net=True,
)
@tool
async def run_python_code(input_type: str, input: str) -> str:
"""
Run Python code in a sandboxed environment. You can provide either a code snippet or a file path.
1. If input_type is "code", input should be a string containing the Python code to run.
2. If input_type is "file", input should be a string containing the path to the file.
Args:
input_type: The type of input, code or file.
input: The Python code to run or the path to the file.
"""
try:
if input_type == "code":
code = input
elif input_type == "file":
with open(input, "r") as file:
code = file.read()
else:
return "Invalid input type. Please provide 'code' or 'file' as input_type."
result = await sandbox.execute(code)
return f"Result execution: result: {result.result}, stdout: {result.stdout}, stderr: {result.stderr}, status: {result.status}"
except Exception as e:
return f"Error executing Python code: {str(e)}"
@tool
def reverse_string(input: str) -> str:
"""
Reverse a given string.
Args:
input: The string to reverse.
"""
return input[::-1]
TOOLS = [wikipedia_search, web_search, youtube_transcript, add_numbers, multiply_numbers , image_question_answering, speech_to_text, read_file_content, run_python_code, reverse_string]