import type { FastMCP } from 'fastmcp'; import { UserError } from 'fastmcp'; import { z } from 'zod'; import { getDocsClient, getDriveClient } from '../../clients.js'; import { DocumentIdParameter, NotImplementedError } from '../../types.js'; import * as GDocsHelpers from '../../googleDocsApiHelpers.js'; import { docsJsonToMarkdown } from '../../markdown-transformer/index.js'; export function register(server: FastMCP) { server.addTool({ name: 'readDocument', description: "Reads the content of a Google Document. Returns plain text by default. Use format='markdown' to get formatted content suitable for editing and re-uploading with replaceDocumentWithMarkdown, or format='json' for the raw document structure.", parameters: DocumentIdParameter.extend({ format: z .enum(['text', 'json', 'markdown']) .optional() .default('text') .describe( "Output format: 'text' (plain text), 'json' (raw API structure, complex), 'markdown' (experimental conversion)." ), maxLength: z .number() .optional() .describe( 'Maximum character limit for text output. If not specified, returns full document content. Use this to limit very large documents.' ), tabId: z .string() .optional() .describe( 'The ID of the specific tab to read. If not specified, reads the first tab (or legacy document.body for documents without tabs).' ), }), execute: async (args, { log }) => { const docs = await getDocsClient(); log.info( `Reading Google Doc: ${args.documentId}, Format: ${args.format}${args.tabId ? `, Tab: ${args.tabId}` : ''}` ); try { // Determine if we need tabs content const needsTabsContent = !!args.tabId; const fields = args.format === 'json' || args.format === 'markdown' ? '*' // Get everything for structure analysis : 'body(content(paragraph(elements(textRun(content)))))'; // Just text content const res = await docs.documents.get({ documentId: args.documentId, includeTabsContent: needsTabsContent, fields: needsTabsContent ? 'title,documentId,tabs(tabProperties,childTabs,documentTab(body,documentStyle,namedStyles,lists,inlineObjects,positionedObjects))' : fields, }); log.info(`Fetched doc: ${args.documentId}${args.tabId ? ` (tab: ${args.tabId})` : ''}`); // If tabId is specified, find the specific tab let contentSource: any; if (args.tabId) { const targetTab = GDocsHelpers.findTabById(res.data, args.tabId); if (!targetTab) { throw new UserError(`Tab with ID "${args.tabId}" not found in document.`); } if (!targetTab.documentTab) { throw new UserError( `Tab "${args.tabId}" does not have content (may not be a document tab).` ); } contentSource = { body: targetTab.documentTab.body }; log.info(`Using content from tab: ${targetTab.tabProperties?.title || 'Untitled'}`); } else { // Use the document body (backward compatible) contentSource = res.data; } if (args.format === 'json') { const jsonContent = JSON.stringify(contentSource, null, 2); // Apply length limit to JSON if specified if (args.maxLength && jsonContent.length > args.maxLength) { return ( jsonContent.substring(0, args.maxLength) + `\n... [JSON truncated: ${jsonContent.length} total chars]` ); } return jsonContent; } if (args.format === 'markdown') { const markdownContent = docsJsonToMarkdown(contentSource); const totalLength = markdownContent.length; log.info(`Generated markdown: ${totalLength} characters`); // Apply length limit to markdown if specified if (args.maxLength && totalLength > args.maxLength) { const truncatedContent = markdownContent.substring(0, args.maxLength); return `${truncatedContent}\n\n... [Markdown truncated to ${args.maxLength} chars of ${totalLength} total. Use maxLength parameter to adjust limit or remove it to get full content.]`; } return markdownContent; } // Default: Text format - extract all text content let textContent = ''; let elementCount = 0; // Process all content elements from contentSource contentSource.body?.content?.forEach((element: any) => { elementCount++; // Handle paragraphs if (element.paragraph?.elements) { element.paragraph.elements.forEach((pe: any) => { if (pe.textRun?.content) { textContent += pe.textRun.content; } }); } // Handle tables if (element.table?.tableRows) { element.table.tableRows.forEach((row: any) => { row.tableCells?.forEach((cell: any) => { cell.content?.forEach((cellElement: any) => { cellElement.paragraph?.elements?.forEach((pe: any) => { if (pe.textRun?.content) { textContent += pe.textRun.content; } }); }); }); }); } }); if (!textContent.trim()) return 'Document found, but appears empty.'; const totalLength = textContent.length; log.info(`Document contains ${totalLength} characters across ${elementCount} elements`); log.info(`maxLength parameter: ${args.maxLength || 'not specified'}`); // Apply length limit only if specified if (args.maxLength && totalLength > args.maxLength) { const truncatedContent = textContent.substring(0, args.maxLength); log.info(`Truncating content from ${totalLength} to ${args.maxLength} characters`); return `Content (truncated to ${args.maxLength} chars of ${totalLength} total):\n---\n${truncatedContent}\n\n... [Document continues for ${totalLength - args.maxLength} more characters. Use maxLength parameter to adjust limit or remove it to get full content.]`; } // Return full content const fullResponse = `Content (${totalLength} characters):\n---\n${textContent}`; const responseLength = fullResponse.length; log.info( `Returning full content: ${responseLength} characters in response (${totalLength} content + ${responseLength - totalLength} metadata)` ); return fullResponse; } catch (error: any) { log.error( `Error reading doc ${args.documentId}: ${error.message || 'Unknown error'} (code: ${error.code || 'N/A'})` ); // Handle errors thrown by helpers or API directly if (error instanceof UserError) throw error; if (error instanceof NotImplementedError) throw error; // Generic fallback for API errors not caught by helpers if (error.code === 404) throw new UserError(`Doc not found (ID: ${args.documentId}).`); if (error.code === 403) { // The Docs API may be blocked by Workspace admin policy even when the Drive API is // accessible. Fall back to drive.files.export() for plain-text format, which uses // the Drive API and respects supportsAllDrives for Shared Drive documents. if (!args.format || args.format === 'text') { try { log.info( `Docs API returned 403, falling back to Drive export for ${args.documentId}` ); const drive = await getDriveClient(); const exportRes = await drive.files.export( { fileId: args.documentId, mimeType: 'text/plain' }, { responseType: 'text' } ); const textContent = (exportRes as any).data as string; if (!textContent?.trim()) return 'Document found, but appears empty.'; if (args.maxLength && textContent.length > args.maxLength) { return `Content (truncated to ${args.maxLength} chars of ${textContent.length} total):\n---\n${textContent.substring(0, args.maxLength)}\n\n... [Document continues. Use maxLength parameter to adjust limit or remove it to get full content.]`; } return `Content (${textContent.length} characters):\n---\n${textContent}`; } catch (exportError: any) { log.error(`Drive export fallback also failed: ${exportError.message}`); } } throw new UserError( `Permission denied for doc (ID: ${args.documentId}). The Google Docs API may be restricted by your Workspace admin.` ); } // Extract detailed error information from Google API response const errorDetails = error.response?.data?.error?.message || error.message || 'Unknown error'; const errorCode = error.response?.data?.error?.code || error.code; throw new UserError( `Failed to read doc: ${errorDetails}${errorCode ? ` (Code: ${errorCode})` : ''}` ); } }, }); }