Final_Assignment_Template

Runtime error

File size: 8,370 Bytes

from typing import List
from langchain_core.tools import tool
from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain_ollama import ChatOllama
from langchain_sandbox import PyodideSandbox
import base64
from langchain_core.messages import HumanMessage, SystemMessage
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from docling.document_converter import DocumentConverter
from langchain_tavily import TavilySearch

doc_converter = DocumentConverter()

@tool
def wikipedia_search(query: str) -> str:
    """
    Search Wikipedia for a given query and return max 1 result.

    Args:
        query: The search query.
    """
    # Simulate a search operation
    search_docs = WikipediaLoader(query=query, load_max_docs=1).load()
    docling_docs = [doc_converter.convert(doc.metadata["source"]).document.export_to_markdown() for doc in search_docs]
    start_indexes = []
    for d in docling_docs:
        start_index = d.find("From Wikipedia")
        if start_index != -1:
            start_indexes.append(start_index)
        else:
            start_indexes.append(0)
    formatted_docs = "\n\n---\n\n".join(
        [
            f'<Document title="{search_doc.metadata["title"]}"/>\n{docling_doc[start_index:]}\n</Document>'
            for search_doc, docling_doc, start_index in zip(search_docs, docling_docs, start_indexes)
        ])
    return formatted_docs

@tool
def youtube_transcript(url: str) -> str:
    """"Returns the transcript of a YouTube video given its URL.
    This is a text-based tool and should not be used for visual information of the video.
    Args:
        url: The YouTube video URL.
    """
    max_tries = 3
    for _ in range(max_tries):
        try:
            transcripts = YoutubeLoader.from_youtube_url(url, add_video_info=False).load()
            return f"Video Transcript: {transcripts[0].page_content}"
        except Exception as e:
            print(f"Attempt failed: {e}")
            continue
    # If all attempts fail, return an error message
    return "No transcript available. This video might not have a transcript or the URL is invalid."
    
    
@tool
def web_search(query: str) -> str:
    """
    Perform a web search for the given query and return the results.
    Use this when you need to find current or factual information.
    Args:
        query: The search query.
    """
    # Simulate a web search operation
    tavily_search = TavilySearch(max_results=3)
    search_docs = tavily_search.invoke(query)

     # Format
    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document href="{doc["url"]}">\n{doc["content"]}\n</Document>'
            for doc in search_docs["results"]
        ]
    )
    return f"Web search results for '{query}':\n\n{formatted_search_docs}"

@tool
def add_numbers(numbers: List[float]) -> float:
    """
    Add a list of numbers together. E.g [1, 2, 3] -> 6
    Args:
        numbers: A list of numbers to add.
    """
    return sum(numbers)

@tool
def multiply_numbers(numbers: List[float]) -> float:
    """
    Multiply a list of numbers together. E.g [3, 2, 3] -> 18
    Args:
        numbers: A list of numbers to multiply.
    """
    result = 1
    for number in numbers:
        result *= number
    return result


vision_llm = ChatOllama(model="gemma3:27b")

# might be better to use supervisor method..
@tool
def image_question_answering(img_path: str, question: str) -> str:
    """
    Given an image path and a question, return the answer to the question based on the image. Just pass the initial question from the human as a query.
    Args:
        img_path: The path to the image.
        question: The question to ask about the image.
    """
    system_prompt = """
    You are a helpful assistant that can answer questions about images.
    You need to think step by step carefully, provide your thinking process and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
    """
   
    try:
        # Read image and encode as base64
        with open(img_path, "rb") as image_file:
            image_bytes = image_file.read()

        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

        question = "Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."

        # Prepare the prompt including the base64 image data
        message = [
            SystemMessage(content=system_prompt),
            HumanMessage(
                content=[
                    {
                        "type": "text",
                        "text": question,
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{image_base64}"
                        },
                    },
                ]
            )
        ]

        # Call the vision-capable model
        response = vision_llm.invoke(message)

        return response.content

    except Exception as e:
        error_msg = f"Error image questioning: {str(e)}"
        print(error_msg)
        return error_msg
    
device = "mps"
checkpoint = "./whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
    checkpoint, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(checkpoint)
pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch.float32,
    device=device,
)

@tool
def speech_to_text(audio_path: str) -> str:
    """
    Convert speech to text using a given audio file. Not for youtube links.
    Args:
        audio_path: The path to the audio file.
    """
    try:
        result = pipe(audio_path)
        return result["text"].strip()
    except Exception as e:
        result = pipe(audio_path, return_timestamps=True)
        return result["text"].strip()
    except Exception as e:
        return f"Error processing audio file: {str(e)}"

@tool
def read_file_content(path: str) -> str:
    """
    Read the content of a file (pdf, docs, xlsx, etc.) but also from a URL (like arxiv or websites) and returns it as markdown.
    Args:
        file_path: The path to the file.
    """
    try:
        doc = doc_converter.convert(path).document
        markdown = doc.export_to_markdown()
        return f"File Content:\n\n{markdown}"
    except Exception as e:
        return f"Error reading file: {str(e)}"

sandbox = PyodideSandbox(
    # Allow Pyodide to install python packages that
    # might be required.
    allow_net=True,
)

@tool
async def run_python_code(input_type: str, input: str) -> str:
    """
    Run Python code in a sandboxed environment. You can provide either a code snippet or a file path.
    1. If input_type is "code", input should be a string containing the Python code to run.
    2. If input_type is "file", input should be a string containing the path to the file.
    Args:
        input_type: The type of input, code or file.
        input: The Python code to run or the path to the file.
    """
    try:
        if input_type == "code":
            code = input
        elif input_type == "file":
            with open(input, "r") as file:
                code = file.read()
        else:
            return "Invalid input type. Please provide 'code' or 'file' as input_type."
        result = await sandbox.execute(code)
        return f"Result execution: result: {result.result}, stdout: {result.stdout}, stderr: {result.stderr}, status: {result.status}"
    except Exception as e:
        return f"Error executing Python code: {str(e)}"
    
@tool
def reverse_string(input: str) -> str:
    """
    Reverse a given string.
    Args:
        input: The string to reverse.
    """
    return input[::-1]



TOOLS = [wikipedia_search, web_search, youtube_transcript, add_numbers, multiply_numbers , image_question_answering, speech_to_text, read_file_content, run_python_code, reverse_string]