File size: 4,261 Bytes
a66d4bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Google Docs Agent – specialized agent with only Docs MCP tools.

Uses MCPServerStdio (local subprocess) with create_static_tool_filter so the
agent sees *only* document-related tools.  Zero network overhead.
"""

import asyncio
import logging
from openai import AsyncOpenAI
from agents import Agent, Runner, OpenAIChatCompletionsModel
from agents.model_settings import ModelSettings

try:
    from .google_mcp_config import (
        LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME,
        DOCS_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL,
    )
except ImportError:
    from google_mcp_config import (
        LONGCAT_API_KEY, LONGCAT_BASE_URL, MODEL_NAME,
        DOCS_TOOLS, create_google_mcp_server, USER_GOOGLE_EMAIL,
    )

logger = logging.getLogger(__name__)

SYSTEM_PROMPT = """\
You are a specialized Google Docs assistant. You can create, read, modify,
and manage Google Documents using the available tools.

Capabilities:
- **Search & list** documents by title or folder
- **Read** full document content or inspect its structural elements
- **Create** new documents with initial content
- **Edit** text β€” insert, modify, find-and-replace, batch updates
- **Insert elements** β€” images, tables (with data), page breaks
- **Headers & footers** β€” create and update
- **Paragraph styling** β€” alignment, spacing, indentation, named styles
- **Export** documents to PDF
- **Comments** β€” read, create, reply to, and resolve

Rules:
1. The user's Google email is provided in the query β€” use it for every
   tool call in the `user_google_email` parameter. NEVER ask the user
   for their email; it is always supplied.
2. When inserting content, be mindful of the document index positions.
   Use `inspect_doc_structure` first when modifying existing docs to
   understand the current element layout.
3. For batch updates, send all changes in a single `batch_update_doc`
   call when possible for atomic application.
4. After modifications, briefly confirm what was changed.
"""


class GoogleDocsAgent:
    """Thin wrapper around the OpenAI Agent SDK wired to Google Docs tools."""

    def __init__(self, model: str = MODEL_NAME):
        self.model = model
        self._client = AsyncOpenAI(
            api_key=LONGCAT_API_KEY,
            base_url=LONGCAT_BASE_URL,
            timeout=30.0,
        )

    # ── factory helpers ──────────────────────────────────────────────────
    def _create_mcp_server(self):
        """Spawn a local MCP subprocess with only Docs tools loaded."""
        return create_google_mcp_server(service="docs", tool_names=DOCS_TOOLS)

    def _create_agent(self, mcp_server) -> Agent:
        return Agent(
            name="Google Docs Agent",
            instructions=SYSTEM_PROMPT,
            mcp_servers=[mcp_server],
            model=OpenAIChatCompletionsModel(
                model=self.model,
                openai_client=self._client,
            ),
            model_settings=ModelSettings(tool_choice="auto"),
        )

    # ── public API ───────────────────────────────────────────────────────
    async def run(self, query: str) -> str:
        """Spawn MCP connection, run a single query, then clean up."""
        mcp_server = self._create_mcp_server()
        async with mcp_server:
            agent = self._create_agent(mcp_server)
            logger.info("Google Docs MCP connected – agent ready")
            result = await Runner.run(agent, input=query)
            return result.final_output


# ─── CLI entry point ──────────────────────────────────────────────────────────
async def main():
    agent = GoogleDocsAgent()
    resp = await agent.run(
        "Search for documents with 'meeting notes' in the title. "
        "My email is user@example.com"
    )
    print("Agent Response:\n", resp)


if __name__ == "__main__":
    from dotenv import load_dotenv, find_dotenv
    load_dotenv(find_dotenv())
    asyncio.run(main())