File size: 4,277 Bytes
a66d4bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Google Slides Agent – specialized agent with only Slides MCP tools.

Uses MCPServerStdio (local subprocess) with create_static_tool_filter so the
agent sees *only* presentation tools.  Zero network overhead.
"""

import asyncio
import logging
from openai import AsyncOpenAI
from agents import Agent, Runner, OpenAIChatCompletionsModel
from agents.model_settings import ModelSettings

try:
    from .google_mcp_config import (
        LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME,
        SLIDES_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL,
    )
except ImportError:
    from google_mcp_config import (
        LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME,
        SLIDES_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL,
    )

logger = logging.getLogger(__name__)

SYSTEM_PROMPT = """\
You are a specialized Google Slides assistant. You can create, read,
and update presentations using the available tools.

Capabilities:
- **Create** new presentations with a title
- **Read** full presentation metadata and content (slides, layouts, masters)
- **Batch update** β€” apply multiple changes in one atomic request
  (insert slides, add shapes, set text, change layouts, etc.)
- **Get page details** β€” read a specific slide or layout by page ID
- **Thumbnails** β€” generate a PNG thumbnail for any slide
- **Comments** β€” read, create, reply to, and resolve presentation comments

Rules:
1. The user's Google email is provided in the query β€” use it for every
   tool call in the `user_google_email` parameter. NEVER ask the user
   for their email; it is always supplied.
2. When modifying a presentation, first call `get_presentation` to
   understand the current slide IDs and object structure.
3. For `batch_update_presentation`, compile all desired changes into a
   single request array for atomicity.
4. Provide user-friendly summaries after changes (e.g. "Added 3 slides",
   "Updated title on slide 2").
5. Use page object IDs (not indices) when referring to specific slides
   in API calls.
"""


class GoogleSlidesAgent:
    """Thin wrapper around the OpenAI Agent SDK wired to Google Slides tools."""

    def __init__(self, model: str = MODEL_NAME):
        self.model = model
        self._client = AsyncOpenAI(
            api_key=LONGCAT_API_KEY,
            base_url=LONGCAT_BASE_URL,
            timeout=30.0,
        )

    # ── factory helpers ──────────────────────────────────────────────────
    def _create_mcp_server(self):
        """Spawn a local MCP subprocess with only Slides tools loaded."""
        return create_google_mcp_server(service="slides", tool_names=SLIDES_TOOLS)

    def _create_agent(self, mcp_server) -> Agent:
        return Agent(
            name="Google Slides Agent",
            instructions=SYSTEM_PROMPT,
            mcp_servers=[mcp_server],
            model=OpenAIChatCompletionsModel(
                model=self.model,
                openai_client=self._client,
            ),
            model_settings=ModelSettings(tool_choice="auto"),
        )

    # ── public API ───────────────────────────────────────────────────────
    async def run(self, query: str) -> str:
        """Spawn MCP connection, run a single query, then clean up."""
        mcp_server = self._create_mcp_server()
        async with mcp_server:
            agent = self._create_agent(mcp_server)
            logger.info("Google Slides MCP connected – agent ready")
            result = await Runner.run(agent, input=query)
            return result.final_output


# ─── CLI entry point ──────────────────────────────────────────────────────────
async def main():
    agent = GoogleSlidesAgent()
    resp = await agent.run(
        "Create a new presentation titled 'Q3 Review'. My email is user@example.com"
    )
    print("Agent Response:\n", resp)


if __name__ == "__main__":
    from dotenv import load_dotenv, find_dotenv
    load_dotenv(find_dotenv())
    asyncio.run(main())