Spaces:
Running
Running
File size: 4,277 Bytes
a66d4bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
"""
Google Slides Agent β specialized agent with only Slides MCP tools.
Uses MCPServerStdio (local subprocess) with create_static_tool_filter so the
agent sees *only* presentation tools. Zero network overhead.
"""
import asyncio
import logging
from openai import AsyncOpenAI
from agents import Agent, Runner, OpenAIChatCompletionsModel
from agents.model_settings import ModelSettings
try:
from .google_mcp_config import (
LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME,
SLIDES_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL,
)
except ImportError:
from google_mcp_config import (
LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME,
SLIDES_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL,
)
logger = logging.getLogger(__name__)
SYSTEM_PROMPT = """\
You are a specialized Google Slides assistant. You can create, read,
and update presentations using the available tools.
Capabilities:
- **Create** new presentations with a title
- **Read** full presentation metadata and content (slides, layouts, masters)
- **Batch update** β apply multiple changes in one atomic request
(insert slides, add shapes, set text, change layouts, etc.)
- **Get page details** β read a specific slide or layout by page ID
- **Thumbnails** β generate a PNG thumbnail for any slide
- **Comments** β read, create, reply to, and resolve presentation comments
Rules:
1. The user's Google email is provided in the query β use it for every
tool call in the `user_google_email` parameter. NEVER ask the user
for their email; it is always supplied.
2. When modifying a presentation, first call `get_presentation` to
understand the current slide IDs and object structure.
3. For `batch_update_presentation`, compile all desired changes into a
single request array for atomicity.
4. Provide user-friendly summaries after changes (e.g. "Added 3 slides",
"Updated title on slide 2").
5. Use page object IDs (not indices) when referring to specific slides
in API calls.
"""
class GoogleSlidesAgent:
"""Thin wrapper around the OpenAI Agent SDK wired to Google Slides tools."""
def __init__(self, model: str = MODEL_NAME):
self.model = model
self._client = AsyncOpenAI(
api_key=LONGCAT_API_KEY,
base_url=LONGCAT_BASE_URL,
timeout=30.0,
)
# ββ factory helpers ββββββββββββββββββββββββββββββββββββββββββββββββββ
def _create_mcp_server(self):
"""Spawn a local MCP subprocess with only Slides tools loaded."""
return create_google_mcp_server(service="slides", tool_names=SLIDES_TOOLS)
def _create_agent(self, mcp_server) -> Agent:
return Agent(
name="Google Slides Agent",
instructions=SYSTEM_PROMPT,
mcp_servers=[mcp_server],
model=OpenAIChatCompletionsModel(
model=self.model,
openai_client=self._client,
),
model_settings=ModelSettings(tool_choice="auto"),
)
# ββ public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
async def run(self, query: str) -> str:
"""Spawn MCP connection, run a single query, then clean up."""
mcp_server = self._create_mcp_server()
async with mcp_server:
agent = self._create_agent(mcp_server)
logger.info("Google Slides MCP connected β agent ready")
result = await Runner.run(agent, input=query)
return result.final_output
# βββ CLI entry point ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
async def main():
agent = GoogleSlidesAgent()
resp = await agent.run(
"Create a new presentation titled 'Q3 Review'. My email is user@example.com"
)
print("Agent Response:\n", resp)
if __name__ == "__main__":
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
asyncio.run(main())
|