Spaces:
Sleeping
Sleeping
File size: 9,383 Bytes
7dc28be | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | import type { FastMCP } from 'fastmcp';
import { UserError } from 'fastmcp';
import { z } from 'zod';
import { getDocsClient, getDriveClient } from '../../clients.js';
import { DocumentIdParameter, NotImplementedError } from '../../types.js';
import * as GDocsHelpers from '../../googleDocsApiHelpers.js';
import { docsJsonToMarkdown } from '../../markdown-transformer/index.js';
export function register(server: FastMCP) {
server.addTool({
name: 'readDocument',
description:
"Reads the content of a Google Document. Returns plain text by default. Use format='markdown' to get formatted content suitable for editing and re-uploading with replaceDocumentWithMarkdown, or format='json' for the raw document structure.",
parameters: DocumentIdParameter.extend({
format: z
.enum(['text', 'json', 'markdown'])
.optional()
.default('text')
.describe(
"Output format: 'text' (plain text), 'json' (raw API structure, complex), 'markdown' (experimental conversion)."
),
maxLength: z
.number()
.optional()
.describe(
'Maximum character limit for text output. If not specified, returns full document content. Use this to limit very large documents.'
),
tabId: z
.string()
.optional()
.describe(
'The ID of the specific tab to read. If not specified, reads the first tab (or legacy document.body for documents without tabs).'
),
}),
execute: async (args, { log }) => {
const docs = await getDocsClient();
log.info(
`Reading Google Doc: ${args.documentId}, Format: ${args.format}${args.tabId ? `, Tab: ${args.tabId}` : ''}`
);
try {
// Determine if we need tabs content
const needsTabsContent = !!args.tabId;
const fields =
args.format === 'json' || args.format === 'markdown'
? '*' // Get everything for structure analysis
: 'body(content(paragraph(elements(textRun(content)))))'; // Just text content
const res = await docs.documents.get({
documentId: args.documentId,
includeTabsContent: needsTabsContent,
fields: needsTabsContent
? 'title,documentId,tabs(tabProperties,childTabs,documentTab(body,documentStyle,namedStyles,lists,inlineObjects,positionedObjects))'
: fields,
});
log.info(`Fetched doc: ${args.documentId}${args.tabId ? ` (tab: ${args.tabId})` : ''}`);
// If tabId is specified, find the specific tab
let contentSource: any;
if (args.tabId) {
const targetTab = GDocsHelpers.findTabById(res.data, args.tabId);
if (!targetTab) {
throw new UserError(`Tab with ID "${args.tabId}" not found in document.`);
}
if (!targetTab.documentTab) {
throw new UserError(
`Tab "${args.tabId}" does not have content (may not be a document tab).`
);
}
contentSource = { body: targetTab.documentTab.body };
log.info(`Using content from tab: ${targetTab.tabProperties?.title || 'Untitled'}`);
} else {
// Use the document body (backward compatible)
contentSource = res.data;
}
if (args.format === 'json') {
const jsonContent = JSON.stringify(contentSource, null, 2);
// Apply length limit to JSON if specified
if (args.maxLength && jsonContent.length > args.maxLength) {
return (
jsonContent.substring(0, args.maxLength) +
`\n... [JSON truncated: ${jsonContent.length} total chars]`
);
}
return jsonContent;
}
if (args.format === 'markdown') {
const markdownContent = docsJsonToMarkdown(contentSource);
const totalLength = markdownContent.length;
log.info(`Generated markdown: ${totalLength} characters`);
// Apply length limit to markdown if specified
if (args.maxLength && totalLength > args.maxLength) {
const truncatedContent = markdownContent.substring(0, args.maxLength);
return `${truncatedContent}\n\n... [Markdown truncated to ${args.maxLength} chars of ${totalLength} total. Use maxLength parameter to adjust limit or remove it to get full content.]`;
}
return markdownContent;
}
// Default: Text format - extract all text content
let textContent = '';
let elementCount = 0;
// Process all content elements from contentSource
contentSource.body?.content?.forEach((element: any) => {
elementCount++;
// Handle paragraphs
if (element.paragraph?.elements) {
element.paragraph.elements.forEach((pe: any) => {
if (pe.textRun?.content) {
textContent += pe.textRun.content;
}
});
}
// Handle tables
if (element.table?.tableRows) {
element.table.tableRows.forEach((row: any) => {
row.tableCells?.forEach((cell: any) => {
cell.content?.forEach((cellElement: any) => {
cellElement.paragraph?.elements?.forEach((pe: any) => {
if (pe.textRun?.content) {
textContent += pe.textRun.content;
}
});
});
});
});
}
});
if (!textContent.trim()) return 'Document found, but appears empty.';
const totalLength = textContent.length;
log.info(`Document contains ${totalLength} characters across ${elementCount} elements`);
log.info(`maxLength parameter: ${args.maxLength || 'not specified'}`);
// Apply length limit only if specified
if (args.maxLength && totalLength > args.maxLength) {
const truncatedContent = textContent.substring(0, args.maxLength);
log.info(`Truncating content from ${totalLength} to ${args.maxLength} characters`);
return `Content (truncated to ${args.maxLength} chars of ${totalLength} total):\n---\n${truncatedContent}\n\n... [Document continues for ${totalLength - args.maxLength} more characters. Use maxLength parameter to adjust limit or remove it to get full content.]`;
}
// Return full content
const fullResponse = `Content (${totalLength} characters):\n---\n${textContent}`;
const responseLength = fullResponse.length;
log.info(
`Returning full content: ${responseLength} characters in response (${totalLength} content + ${responseLength - totalLength} metadata)`
);
return fullResponse;
} catch (error: any) {
log.error(
`Error reading doc ${args.documentId}: ${error.message || 'Unknown error'} (code: ${error.code || 'N/A'})`
);
// Handle errors thrown by helpers or API directly
if (error instanceof UserError) throw error;
if (error instanceof NotImplementedError) throw error;
// Generic fallback for API errors not caught by helpers
if (error.code === 404) throw new UserError(`Doc not found (ID: ${args.documentId}).`);
if (error.code === 403) {
// The Docs API may be blocked by Workspace admin policy even when the Drive API is
// accessible. Fall back to drive.files.export() for plain-text format, which uses
// the Drive API and respects supportsAllDrives for Shared Drive documents.
if (!args.format || args.format === 'text') {
try {
log.info(
`Docs API returned 403, falling back to Drive export for ${args.documentId}`
);
const drive = await getDriveClient();
const exportRes = await drive.files.export(
{ fileId: args.documentId, mimeType: 'text/plain' },
{ responseType: 'text' }
);
const textContent = (exportRes as any).data as string;
if (!textContent?.trim()) return 'Document found, but appears empty.';
if (args.maxLength && textContent.length > args.maxLength) {
return `Content (truncated to ${args.maxLength} chars of ${textContent.length} total):\n---\n${textContent.substring(0, args.maxLength)}\n\n... [Document continues. Use maxLength parameter to adjust limit or remove it to get full content.]`;
}
return `Content (${textContent.length} characters):\n---\n${textContent}`;
} catch (exportError: any) {
log.error(`Drive export fallback also failed: ${exportError.message}`);
}
}
throw new UserError(
`Permission denied for doc (ID: ${args.documentId}). The Google Docs API may be restricted by your Workspace admin.`
);
}
// Extract detailed error information from Google API response
const errorDetails =
error.response?.data?.error?.message || error.message || 'Unknown error';
const errorCode = error.response?.data?.error?.code || error.code;
throw new UserError(
`Failed to read doc: ${errorDetails}${errorCode ? ` (Code: ${errorCode})` : ''}`
);
}
},
});
}
|