Spaces:
Runtime error
Runtime error
| from typing import List | |
| from langchain_core.tools import tool | |
| from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader | |
| from langchain_community.tools import DuckDuckGoSearchResults | |
| from langchain_community.utilities import DuckDuckGoSearchAPIWrapper | |
| from langchain_ollama import ChatOllama | |
| from langchain_sandbox import PyodideSandbox | |
| import base64 | |
| from langchain_core.messages import HumanMessage, SystemMessage | |
| import torch | |
| from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
| from docling.document_converter import DocumentConverter | |
| from langchain_tavily import TavilySearch | |
| doc_converter = DocumentConverter() | |
| def wikipedia_search(query: str) -> str: | |
| """ | |
| Search Wikipedia for a given query and return max 1 result. | |
| Args: | |
| query: The search query. | |
| """ | |
| # Simulate a search operation | |
| search_docs = WikipediaLoader(query=query, load_max_docs=1).load() | |
| docling_docs = [doc_converter.convert(doc.metadata["source"]).document.export_to_markdown() for doc in search_docs] | |
| start_indexes = [] | |
| for d in docling_docs: | |
| start_index = d.find("From Wikipedia") | |
| if start_index != -1: | |
| start_indexes.append(start_index) | |
| else: | |
| start_indexes.append(0) | |
| formatted_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document title="{search_doc.metadata["title"]}"/>\n{docling_doc[start_index:]}\n</Document>' | |
| for search_doc, docling_doc, start_index in zip(search_docs, docling_docs, start_indexes) | |
| ]) | |
| return formatted_docs | |
| def youtube_transcript(url: str) -> str: | |
| """"Returns the transcript of a YouTube video given its URL. | |
| This is a text-based tool and should not be used for visual information of the video. | |
| Args: | |
| url: The YouTube video URL. | |
| """ | |
| max_tries = 3 | |
| for _ in range(max_tries): | |
| try: | |
| transcripts = YoutubeLoader.from_youtube_url(url, add_video_info=False).load() | |
| return f"Video Transcript: {transcripts[0].page_content}" | |
| except Exception as e: | |
| print(f"Attempt failed: {e}") | |
| continue | |
| # If all attempts fail, return an error message | |
| return "No transcript available. This video might not have a transcript or the URL is invalid." | |
| def web_search(query: str) -> str: | |
| """ | |
| Perform a web search for the given query and return the results. | |
| Use this when you need to find current or factual information. | |
| Args: | |
| query: The search query. | |
| """ | |
| # Simulate a web search operation | |
| tavily_search = TavilySearch(max_results=3) | |
| search_docs = tavily_search.invoke(query) | |
| # Format | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document href="{doc["url"]}">\n{doc["content"]}\n</Document>' | |
| for doc in search_docs["results"] | |
| ] | |
| ) | |
| return f"Web search results for '{query}':\n\n{formatted_search_docs}" | |
| def add_numbers(numbers: List[float]) -> float: | |
| """ | |
| Add a list of numbers together. E.g [1, 2, 3] -> 6 | |
| Args: | |
| numbers: A list of numbers to add. | |
| """ | |
| return sum(numbers) | |
| def multiply_numbers(numbers: List[float]) -> float: | |
| """ | |
| Multiply a list of numbers together. E.g [3, 2, 3] -> 18 | |
| Args: | |
| numbers: A list of numbers to multiply. | |
| """ | |
| result = 1 | |
| for number in numbers: | |
| result *= number | |
| return result | |
| vision_llm = ChatOllama(model="gemma3:27b") | |
| # might be better to use supervisor method.. | |
| def image_question_answering(img_path: str, question: str) -> str: | |
| """ | |
| Given an image path and a question, return the answer to the question based on the image. Just pass the initial question from the human as a query. | |
| Args: | |
| img_path: The path to the image. | |
| question: The question to ask about the image. | |
| """ | |
| system_prompt = """ | |
| You are a helpful assistant that can answer questions about images. | |
| You need to think step by step carefully, provide your thinking process and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER] | |
| """ | |
| try: | |
| # Read image and encode as base64 | |
| with open(img_path, "rb") as image_file: | |
| image_bytes = image_file.read() | |
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") | |
| question = "Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation." | |
| # Prepare the prompt including the base64 image data | |
| message = [ | |
| SystemMessage(content=system_prompt), | |
| HumanMessage( | |
| content=[ | |
| { | |
| "type": "text", | |
| "text": question, | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/png;base64,{image_base64}" | |
| }, | |
| }, | |
| ] | |
| ) | |
| ] | |
| # Call the vision-capable model | |
| response = vision_llm.invoke(message) | |
| return response.content | |
| except Exception as e: | |
| error_msg = f"Error image questioning: {str(e)}" | |
| print(error_msg) | |
| return error_msg | |
| device = "mps" | |
| checkpoint = "./whisper-large-v3" | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| checkpoint, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True | |
| ) | |
| model.to(device) | |
| processor = AutoProcessor.from_pretrained(checkpoint) | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=model, | |
| tokenizer=processor.tokenizer, | |
| feature_extractor=processor.feature_extractor, | |
| torch_dtype=torch.float32, | |
| device=device, | |
| ) | |
| def speech_to_text(audio_path: str) -> str: | |
| """ | |
| Convert speech to text using a given audio file. Not for youtube links. | |
| Args: | |
| audio_path: The path to the audio file. | |
| """ | |
| try: | |
| result = pipe(audio_path) | |
| return result["text"].strip() | |
| except Exception as e: | |
| result = pipe(audio_path, return_timestamps=True) | |
| return result["text"].strip() | |
| except Exception as e: | |
| return f"Error processing audio file: {str(e)}" | |
| def read_file_content(path: str) -> str: | |
| """ | |
| Read the content of a file (pdf, docs, xlsx, etc.) but also from a URL (like arxiv or websites) and returns it as markdown. | |
| Args: | |
| file_path: The path to the file. | |
| """ | |
| try: | |
| doc = doc_converter.convert(path).document | |
| markdown = doc.export_to_markdown() | |
| return f"File Content:\n\n{markdown}" | |
| except Exception as e: | |
| return f"Error reading file: {str(e)}" | |
| sandbox = PyodideSandbox( | |
| # Allow Pyodide to install python packages that | |
| # might be required. | |
| allow_net=True, | |
| ) | |
| async def run_python_code(input_type: str, input: str) -> str: | |
| """ | |
| Run Python code in a sandboxed environment. You can provide either a code snippet or a file path. | |
| 1. If input_type is "code", input should be a string containing the Python code to run. | |
| 2. If input_type is "file", input should be a string containing the path to the file. | |
| Args: | |
| input_type: The type of input, code or file. | |
| input: The Python code to run or the path to the file. | |
| """ | |
| try: | |
| if input_type == "code": | |
| code = input | |
| elif input_type == "file": | |
| with open(input, "r") as file: | |
| code = file.read() | |
| else: | |
| return "Invalid input type. Please provide 'code' or 'file' as input_type." | |
| result = await sandbox.execute(code) | |
| return f"Result execution: result: {result.result}, stdout: {result.stdout}, stderr: {result.stderr}, status: {result.status}" | |
| except Exception as e: | |
| return f"Error executing Python code: {str(e)}" | |
| def reverse_string(input: str) -> str: | |
| """ | |
| Reverse a given string. | |
| Args: | |
| input: The string to reverse. | |
| """ | |
| return input[::-1] | |
| TOOLS = [wikipedia_search, web_search, youtube_transcript, add_numbers, multiply_numbers , image_question_answering, speech_to_text, read_file_content, run_python_code, reverse_string] |