# References: # https://docs.crewai.com/introduction # https://ai.google.dev/gemini-api/docs import base64, chess, os, time from agents.models.llms import ( LLM_WEB_SEARCH, LLM_WEB_BROWSER, LLM_IMAGE_ANALYSIS, LLM_AUDIO_ANALYSIS, LLM_VIDEO_ANALYSIS, LLM_YOUTUBE_ANALYSIS, LLM_DOCUMENT_ANALYSIS, LLM_CODE_GENERATION, LLM_CODE_EXECUTION, LLM_IMAGE_TO_FEN, LLM_ALGEBRAIC_NOTATION, LLM_FINAL_ANSWER, THINKING_LEVEL_WEB_SEARCH, THINKING_LEVEL_MEDIA_ANALYSIS, THINKING_LEVEL_YOUTUBE_ANALYSIS, THINKING_LEVEL_DOCUMENT_ANALYSIS, THINKING_LEVEL_CODE_GENERATION, THINKING_LEVEL_CODE_EXECUTION, THINKING_LEVEL_IMAGE_TO_FEN, THINKING_LEVEL_ALGEBRAIC_NOTATION, THINKING_LEVEL_FINAL_ANSWER ) from agents.models.prompts import ( PROMPT_IMG_TO_FEN, PROMPT_ALGEBRAIC_NOTATION, PROMPT_FINAL_ANSWER ) from crewai.tools import tool from crewai_tools import StagehandTool from google import genai from google.genai import types from utils.utils import ( read_docx_text, read_pptx_text, is_ext ) class AITools(): def _get_client(): return genai.Client(api_key=os.environ["GEMINI_API_KEY"]) def _media_analysis_tool(tool_name: str, model: str, question: str, file_path: str) -> str: print(f"🛠️ AITools: {tool_name}: question={question}, file_path={file_path}") try: client = AITools._get_client() file = client.files.upload(file=file_path) while True: media_file = client.files.get(name=file.name) if media_file.state == "ACTIVE": break elif media_file.state == "FAILED": raise RuntimeError("Media file processing failed") time.sleep(1) response = client.models.generate_content( model=model, contents=[file, question], config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_MEDIA_ANALYSIS ) ) ) result = response.text print(f"🛠️ AITools: {tool_name}: result={result}") return result except Exception as e: print(f"⚠️ AITools: {tool_name}: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") def _extract_execution_result(response): for part in response.candidates[0].content.parts: if part.code_execution_result is not None: return part.code_execution_result.output return None @tool("Web Search Tool") def web_search_tool(question: str) -> str: """Given a question only, search the web to answer the question. Args: question (str): Question to answer Returns: str: Answer to the question Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: web_search_tool: question={question}") try: client = AITools._get_client() response = client.models.generate_content( model=LLM_WEB_SEARCH, contents=question, config=types.GenerateContentConfig( tools=[types.Tool(google_search=types.GoogleSearch())], thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_WEB_SEARCH ) ) ) result = response.text print(f"🛠️ AITools: web_search_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: web_search_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") @tool("Web Browser Tool") def web_browser_tool(question: str, url: str) -> str: """Given a question and URL, load the URL and act, extract, or observe to answer the question. Args: question (str): Question about a URL url (str): The target URL (must be http/https). "http://"/"https://" will be auto-added if missing. Returns: str: Answer to the question Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: web_browser_tool: question={question}, url={url}") try: url_str = url.strip() if not url_str.lower().startswith(("http://", "https://")): url_str = f"https://{url_str}" with StagehandTool( api_key=os.environ["BROWSERBASE_API_KEY"], project_id=os.environ["BROWSERBASE_PROJECT_ID"], model_api_key=os.environ["BROWSERBASE_MODEL_API_KEY"], model_name=LLM_WEB_BROWSER, dom_settle_timeout_ms=5000, headless=True, self_heal=True, wait_for_captcha_solves=True, verbose=3 ) as stagehand_tool: result = stagehand_tool.run( instruction=question, url=url_str, command_type="act" # TODO: act, extract, observe ) print(f"🛠️ AITools: web_browser_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: web_browser_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") @tool("Image Analysis Tool") def image_analysis_tool(question: str, file_path: str) -> str: """Given a question and image file, analyze the image to answer the question. Args: question (str): Question about an image file file_path (str): The image file path Returns: str: Answer to the question about the image file Raises: RuntimeError: If processing fails """ return AITools._media_analysis_tool("image_analysis_tool", LLM_IMAGE_ANALYSIS, question, file_path) @tool("Audio Analysis Tool") def audio_analysis_tool(question: str, file_path: str) -> str: """Given a question and audio file, analyze the audio to answer the question. Args: question (str): Question about an audio file file_path (str): The audio file path Returns: str: Answer to the question about the audio file Raises: RuntimeError: If processing fails """ return AITools._media_analysis_tool("audio_analysis_tool", LLM_AUDIO_ANALYSIS, question, file_path) @tool("Video Analysis Tool") def video_analysis_tool(question: str, file_path: str) -> str: """Given a question and video file, analyze the video to answer the question. Args: question (str): Question about a video file file_path (str): The video file path Returns: str: Answer to the question about the video file Raises: RuntimeError: If processing fails """ return AITools._media_analysis_tool("video_analysis_tool", LLM_VIDEO_ANALYSIS, question, file_path) @tool("YouTube Analysis Tool") def youtube_analysis_tool(question: str, url: str) -> str: """Given a question and YouTube URL, analyze the video to answer the question. Args: question (str): Question about a YouTube video url (str): The YouTube URL Returns: str: Answer to the question about the YouTube video Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: youtube_analysis_tool: question={question}, url={url}") try: client = AITools._get_client() result = client.models.generate_content( model=LLM_YOUTUBE_ANALYSIS, contents=types.Content( parts=[types.Part(file_data=types.FileData(file_uri=url)), types.Part(text=question)] ), config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_YOUTUBE_ANALYSIS ) ) ) print(f"🛠️ AITools: youtube_analysis_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: youtube_analysis_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") @tool("Document Analysis Tool") def document_analysis_tool(question: str, file_path: str) -> str: """Given a question and document file, analyze the document to answer the question. Args: question (str): Question about a document file file_path (str): The document file path Returns: str: Answer to the question about the document file Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: document_analysis_tool: question={question}, file_path={file_path}") try: client = AITools._get_client() contents = [] if is_ext(file_path, ".docx"): text_data = read_docx_text(file_path) contents = [f"{question}\n{text_data}"] print(f"🛠️ Text data:\n{text_data}") elif is_ext(file_path, ".pptx"): text_data = read_pptx_text(file_path) contents = [f"{question}\n{text_data}"] print(f"🛠️ Text data:\n{text_data}") else: file = client.files.upload(file=file_path) contents = [file, question] response = client.models.generate_content( model=LLM_DOCUMENT_ANALYSIS, contents=contents, config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_DOCUMENT_ANALYSIS ) ) ) result = response.text print(f"🛠️ AITools: document_analysis_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: document_analysis_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") @tool("Code Generation and Execution Tool") def code_generation_and_execution_tool(question: str, json_data: str) -> str: """Given a question and JSON data, generate and execute code to answer the question. Args: question (str): Question to answer file_path (str): The JSON data Returns: str: Answer to the question Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: code_generation_and_execution_tool: question={question}, json_data={json_data}") try: client = AITools._get_client() response = client.models.generate_content( model=LLM_CODE_GENERATION, contents=[f"{question}\n{json_data}"], config=types.GenerateContentConfig( tools=[types.Tool(code_execution=types.ToolCodeExecution)], thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_CODE_GENERATION ) ), ) result = AITools._extract_execution_result(response) print(f"🛠️ AITools: code_generation_and_execution_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: code_generation_and_execution_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") @tool("Code Execution Tool") def code_execution_tool(question: str, file_path: str) -> str: """Given a question and Python file, execute the file to answer the question. Args: question (str): Question to answer file_path (str): The Python file path Returns: str: Answer to the question Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: code_execution_tool: question={question}, file_path={file_path}") try: client = AITools._get_client() file = client.files.upload(file=file_path) response = client.models.generate_content( model=LLM_CODE_EXECUTION, contents=[file, question], config=types.GenerateContentConfig( tools=[types.Tool(code_execution=types.ToolCodeExecution)], thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_CODE_EXECUTION ) ), ) result = AITools._extract_execution_result(response) print(f"🛠️ AITools: code_execution_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: code_execution_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") @tool("Image to FEN Tool") def img_to_fen_tool(question: str, file_path: str, active_color: str) -> str: """Given a chess question, image file, and active color, return the FEN. Args: question (str): The chess question file_path (str): The image file path active_color (str): The active color Returns: str: FEN of the chess position Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: img_to_fen_tool: question={question}, file_path={file_path}, active_color={active_color}") try: client = AITools._get_client() with open(file_path, "rb") as f: img_bytes = f.read() img_b64 = base64.b64encode(img_bytes).decode("ascii") prompt = PROMPT_IMG_TO_FEN.format(question=question, active_color=active_color) content = types.Content( parts=[ types.Part(text=prompt), types.Part( inline_data=types.Blob( mime_type="image/png", data=base64.b64decode(img_b64), ) ) ] ) response = client.models.generate_content( model=LLM_IMAGE_TO_FEN, contents=[content], config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_IMAGE_TO_FEN ) ) ) result = None for part in response.parts: if part.text is not None: result = part.text break board = chess.Board(result) # FEN validation print(f"🛠️ AITools: img_to_fen_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: img_to_fen_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") @tool("Algebraic Notation Tool") def algebraic_notation_tool(question: str, file_path: str, position_evaluation: str) -> str: """Given a chess question, image file, and position evaluation in UCI notation, answer the question in algebraic notation. Args: question (str): The chess question file_path (str): The image file path position_evaluation (str): The position evaluation in UCI notation Returns: str: Answer to the question in algebraic notation Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: algebraic_notation_tool: question={question}, file_path={file_path}, position_evaluation={position_evaluation}") try: client = AITools._get_client() with open(file_path, "rb") as f: img_bytes = f.read() img_b64 = base64.b64encode(img_bytes).decode("ascii") prompt = PROMPT_ALGEBRAIC_NOTATION.format(question=question, position_evaluation=position_evaluation) content = types.Content( parts=[ types.Part(text=prompt), types.Part( inline_data=types.Blob( mime_type="image/png", data=base64.b64decode(img_b64), ) ) ] ) response = client.models.generate_content( model=LLM_ALGEBRAIC_NOTATION, contents=[content], config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_ALGEBRAIC_NOTATION ) ) ) result = None for part in response.parts: if part.text is not None: result = part.text break print(f"🛠️ AITools: algebraic_notation_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: algebraic_notation_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}") def final_answer_tool(question: str, answer: str) -> str: """Given a question and initial answer, get the final answer. Args: question (str): The question answer (str): The initial answer Returns: str: Final answer Raises: RuntimeError: If processing fails """ print(f"🛠️ AITools: final_answer_tool: question={question}, answer={answer}") try: client = AITools._get_client() prompt = PROMPT_FINAL_ANSWER.format(question=question, answer=answer) response = client.models.generate_content( model=LLM_FINAL_ANSWER, contents=[prompt], config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level=THINKING_LEVEL_FINAL_ANSWER ) ) ) result = response.text.strip() print(f"🛠️ AITools: final_answer_tool: result={result}") return result except Exception as e: print(f"⚠️ AITools: final_answer_tool: exception={str(e)}") raise RuntimeError(f"Processing failed: {str(e)}")