""" Google Slides Agent – specialized agent with only Slides MCP tools. Uses MCPServerStdio (local subprocess) with create_static_tool_filter so the agent sees *only* presentation tools. Zero network overhead. """ import asyncio import logging from openai import AsyncOpenAI from agents import Agent, Runner, OpenAIChatCompletionsModel from agents.model_settings import ModelSettings try: from .google_mcp_config import ( LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME, SLIDES_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL, ) except ImportError: from google_mcp_config import ( LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME, SLIDES_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL, ) logger = logging.getLogger(__name__) SYSTEM_PROMPT = """\ You are a specialized Google Slides assistant. You can create, read, and update presentations using the available tools. Capabilities: - **Create** new presentations with a title - **Read** full presentation metadata and content (slides, layouts, masters) - **Batch update** — apply multiple changes in one atomic request (insert slides, add shapes, set text, change layouts, etc.) - **Get page details** — read a specific slide or layout by page ID - **Thumbnails** — generate a PNG thumbnail for any slide - **Comments** — read, create, reply to, and resolve presentation comments Rules: 1. The user's Google email is provided in the query — use it for every tool call in the `user_google_email` parameter. NEVER ask the user for their email; it is always supplied. 2. When modifying a presentation, first call `get_presentation` to understand the current slide IDs and object structure. 3. For `batch_update_presentation`, compile all desired changes into a single request array for atomicity. 4. Provide user-friendly summaries after changes (e.g. "Added 3 slides", "Updated title on slide 2"). 5. Use page object IDs (not indices) when referring to specific slides in API calls. """ class GoogleSlidesAgent: """Thin wrapper around the OpenAI Agent SDK wired to Google Slides tools.""" def __init__(self, model: str = MODEL_NAME): self.model = model self._client = AsyncOpenAI( api_key=LONGCAT_API_KEY, base_url=LONGCAT_BASE_URL, timeout=30.0, ) # ── factory helpers ────────────────────────────────────────────────── def _create_mcp_server(self): """Spawn a local MCP subprocess with only Slides tools loaded.""" return create_google_mcp_server(service="slides", tool_names=SLIDES_TOOLS) def _create_agent(self, mcp_server) -> Agent: return Agent( name="Google Slides Agent", instructions=SYSTEM_PROMPT, mcp_servers=[mcp_server], model=OpenAIChatCompletionsModel( model=self.model, openai_client=self._client, ), model_settings=ModelSettings(tool_choice="auto"), ) # ── public API ─────────────────────────────────────────────────────── async def run(self, query: str) -> str: """Spawn MCP connection, run a single query, then clean up.""" mcp_server = self._create_mcp_server() async with mcp_server: agent = self._create_agent(mcp_server) logger.info("Google Slides MCP connected – agent ready") result = await Runner.run(agent, input=query) return result.final_output # ─── CLI entry point ────────────────────────────────────────────────────────── async def main(): agent = GoogleSlidesAgent() resp = await agent.run( "Create a new presentation titled 'Q3 Review'. My email is user@example.com" ) print("Agent Response:\n", resp) if __name__ == "__main__": from dotenv import load_dotenv, find_dotenv load_dotenv(find_dotenv()) asyncio.run(main())