Spaces:

bstraehle
/

gaia

Running

App Files Files Community

bstraehle commited on 25 days ago

Commit

c0b37e6

verified ·

1 Parent(s): 77d81e0

Create tools/ai_tools.py

Browse files

Files changed (1) hide show

agents/tools/ai_tools.py +386 -0

agents/tools/ai_tools.py ADDED Viewed

	@@ -0,0 +1,386 @@

+import os
+from crewai.tools import tool
+from crewai_tools import StagehandTool
+from google import genai
+from google.genai import types
+from mcp.mcp_client import call_mcp_tool, MCP_SSE_URL, MCP_TOOL
+from utils import (
+    read_docx_text,
+    read_pptx_text,
+    is_ext
+)
+# LLMs
+WEB_SEARCH_MODEL        = "gemini-2.5-flash"
+IMAGE_ANALYSIS_MODEL    = "gemini-2.5-flash"
+AUDIO_ANALYSIS_MODEL    = "gemini-2.5-flash"
+VIDEO_ANALYSIS_MODEL    = "gemini-2.5-flash"
+YOUTUBE_ANALYSIS_MODEL  = "gemini-2.5-flash"
+DOCUMENT_ANALYSIS_MODEL = "gemini-2.5-flash"
+ARITHMETIC_MODEL        = "gemini-2.5-flash"
+CODE_GENERATION_MODEL   = "gemini-2.5-flash"
+CODE_EXECUTION_MODEL    = "gemini-2.5-flash"
+WEB_BROWSER_MODEL       = "claude-sonnet-4-5-latest"
+IMG_TO_FEN_PROMPT       = """Analyze this chess board image and convert it to FEN (Forsyth-Edwards Notation).
+Rules:
+- Start from rank 8 (top) to rank 1 (bottom)
+- For each rank, go from file a (left) to file h (right)
+- Use uppercase for white pieces: K=King, Q=Queen, R=Rook, B=Bishop, N=Knight, P=Pawn
+- Use lowercase for black pieces: k, q, r, b, n, p
+- Use numbers (1-8) for consecutive empty squares
+- Separate ranks with '/'
+- Only provide the piece placement portion of FEN (the first field)
+- Do not include active color, castling rights, en passant, or move counters
+Example: "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR" for the starting position.
+Return ONLY the FEN string, nothing else."""
+class AITools():
+    @tool("Web Search Tool")
+    def web_search_tool(question: str) -> str:
+        """Given a question only, search the web to answer the question.
+           Args:
+               question (str): Question to answer
+           Returns:
+               str: Answer to the question
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            response = client.models.generate_content(
+                model=WEB_SEARCH_MODEL,
+                contents=question,
+                config=types.GenerateContentConfig(
+                    tools=[types.Tool(google_search=types.GoogleSearch())]
+                )
+            )
+            return response.text.strip()
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Web Browser Tool")
+    def web_browser_tool(question: str, url: str) -> str:
+        """Given a question and URL, load the URL and act, extract, or observe to answer the question.
+           Args:
+               question (str): Question about a URL
+               url (str): The URL
+           Returns:
+               str: Answer to the question
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            stagehand_tool = StagehandTool(
+                api_key=os.environ["BROWSERBASE_API_KEY"],
+                project_id=os.environ["BROWSERBASE_PROJECT_ID"],
+                model_api_key=os.environ["MODEL_API_KEY"],
+                model_name=WEB_BROWSER_MODEL,
+                dom_settle_timeout_ms=5000,
+                headless=True,
+                self_heal=True,
+                wait_for_captcha_solves=True,
+                verbose=3
+            )
+            return stagehand_tool.run(
+                instruction=question,
+                url=url
+            )
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+        finally:
+            stagehand_tool.close()
+    @tool("Chess Analysis Tool")
+    def chess_analysis_tool(question: str, file_path: str) -> str:
+        """Given a chess question and image file, analyze the image to answer the question.
+           Args:
+               question (str): Chess question about an image file
+               file_path (str): The image file path
+           Returns:
+               str: Answer to the chess question about the image file
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            import asyncio
+            from mcp.client.sse import sse_client
+            from mcp.client.session import ClientSession, DEFAULT_CLIENT_INFO
+            import anyio
+            # Process image to FEN
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            file = client.files.upload(file=file_path)
+            fen_prompt = IMG_TO_FEN_PROMPT
+            response = client.models.generate_content(
+                model=IMAGE_ANALYSIS_MODEL,
+                contents=[file, fen_prompt]
+            )
+            fen = response.text.strip()
+            # Call MCP server
+            mcp_url = os.getenv("MCP_SSE_URL", MCP_SSE_URL)
+            return call_mcp_tool(
+                mcp_url=mcp_url,
+                tool_name=MCP_TOOL,
+                arguments={"question": question, "fen": fen}
+            )
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Image Analysis Tool")
+    def image_analysis_tool(question: str, file_path: str) -> str:
+        """Given a question and image file, analyze the image to answer the question.
+           Args:
+               question (str): Question about an image file
+               file_path (str): The image file path
+           Returns:
+               str: Answer to the question about the image file
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            file = client.files.upload(file=file_path)
+            response = client.models.generate_content(
+                model=IMAGE_ANALYSIS_MODEL,
+                contents=[file, question]
+            )
+            return response.text.strip()
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Audio Analysis Tool")
+    def audio_analysis_tool(question: str, file_path: str) -> str:
+        """Given a question and audio file, analyze the audio to answer the question.
+           Args:
+               question (str): Question about an audio file
+               file_path (str): The audio file path
+           Returns:
+               str: Answer to the question about the audio file
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            file = client.files.upload(file=file_path)
+            response = client.models.generate_content(
+                model=AUDIO_ANALYSIS_MODEL,
+                contents=[file, question]
+            )
+            return response.text.strip()
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Video Analysis Tool")
+    def video_analysis_tool(question: str, file_path: str) -> str:
+        """Given a question and video file, analyze the video to answer the question.
+           Args:
+               question (str): Question about a video file
+               file_path (str): The video file path
+           Returns:
+               str: Answer to the question about the video file
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            file = client.files.upload(file=file_path)
+            response = client.models.generate_content(
+                model=VIDEO_ANALYSIS_MODEL,
+                contents=[file, question]
+            )
+            return response.text.strip()
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("YouTube Analysis Tool")
+    def youtube_analysis_tool(question: str, url: str) -> str:
+        """Given a question and YouTube URL, analyze the video to answer the question.
+           Args:
+               question (str): Question about a YouTube video
+               url (str): The YouTube URL
+           Returns:
+               str: Answer to the question about the YouTube video
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            return client.models.generate_content(
+                model=YOUTUBE_ANALYSIS_MODEL,
+                contents=types.Content(
+                    parts=[types.Part(file_data=types.FileData(file_uri=url)),
+                           types.Part(text=question)]
+                )
+            )
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Document Analysis Tool")
+    def document_analysis_tool(question: str, file_path: str) -> str:
+        """Given a question and document file, analyze the document to answer the question.
+           Args:
+               question (str): Question about a document file
+               file_path (str): The document file path
+           Returns:
+               str: Answer to the question about the document file
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            contents = []
+            if is_ext(file_path, ".docx"):
+                text_data = read_docx_text(file_path)
+                contents = [f"{question}\n{text_data}"]
+                print(f"=> Text data:\n{text_data}")
+            elif is_ext(file_path, ".pptx"):
+                text_data = read_pptx_text(file_path)
+                contents = [f"{question}\n{text_data}"]
+                print(f"=> Text data:\n{text_data}")
+            else:
+                file = client.files.upload(file=file_path)
+                contents = [file, question]
+            response = client.models.generate_content(
+                model=DOCUMENT_ANALYSIS_MODEL,
+                contents=contents
+            )
+            return response.text.strip()
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Arithmetic Tool")
+    def arithmetic_tool(question: str, a: float, b: float) -> float:
+        """Given a question and two numbers, perform the calculation to answer the question.
+           Args:
+               question (str): Question to answer
+               a (float): First number
+               b (float): Second number
+           Returns:
+               float: Result number
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            response = client.models.generate_content(
+                model=ARITHMETIC_MODEL,
+                contents=question,
+                config=types.GenerateContentConfig(
+                    tools=[add, subtract, multiply, divide, modulus]
+                )
+            )
+            return response.text.strip()
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Code Generation Tool")
+    def code_generation_tool(question: str, json_data: str) -> str:
+        """Given a question and JSON data, generate and execute code to answer the question.
+           Args:
+               question (str): Question to answer
+                file_path (str): The JSON data
+           Returns:
+               str: Answer to the question
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            response = client.models.generate_content(
+                model=CODE_GENERATION_MODEL,
+                contents=[f"{question}\n{json_data}"],
+                config=types.GenerateContentConfig(
+                    tools=[types.Tool(code_execution=types.ToolCodeExecution)]
+                ),
+            )
+            for part in response.candidates[0].content.parts:
+                if part.code_execution_result is not None:
+                    return part.code_execution_result.output
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")
+    @tool("Code Execution Tool")
+    def code_execution_tool(question: str, file_path: str) -> str:
+        """Given a question and Python file, execute the file to answer the question.
+           Args:
+               question (str): Question to answer
+               file_path (str): The Python file path
+           Returns:
+               str: Answer to the question
+           Raises:
+               RuntimeError: If processing fails"""
+        try:
+            client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+            file = client.files.upload(file=file_path)
+            response = client.models.generate_content(
+                model=CODE_EXECUTION_MODEL,
+                contents=[file, question],
+                config=types.GenerateContentConfig(
+                    tools=[types.Tool(code_execution=types.ToolCodeExecution)]
+                ),
+            )
+            for part in response.candidates[0].content.parts:
+                if part.code_execution_result is not None:
+                    return part.code_execution_result.output
+        except Exception as e:
+            raise RuntimeError(f"Processing failed: {str(e)}")