File size: 7,069 Bytes

e1cc3bc

/**
 * PDF MCP Server - Didactic Example
 *
 * Demonstrates:
 * - Chunked data through size-limited tool responses
 * - Model context updates (current page text + selection)
 * - Display modes: fullscreen with scrolling vs inline with resize
 * - External link opening (openLink)
 */
import {
  registerAppResource,
  registerAppTool,
  RESOURCE_MIME_TYPE,
} from "@modelcontextprotocol/ext-apps/server";
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import type {
  CallToolResult,
  ReadResourceResult,
} from "@modelcontextprotocol/sdk/types.js";
import fs from "node:fs/promises";
import path from "node:path";
import { z } from "zod";

import {
  buildPdfIndex,
  findEntryByUrl,
  createEntry,
  isArxivUrl,
  isFileUrl,
  toFileUrl,
  normalizeArxivUrl,
} from "./src/pdf-indexer.js";
import { loadPdfBytesChunk, populatePdfMetadata } from "./src/pdf-loader.js";
import {
  ReadPdfBytesInputSchema,
  PdfBytesChunkSchema,
  type PdfIndex,
} from "./src/types.js";
import { startServer } from "./server-utils.js";

const DIST_DIR = path.join(import.meta.dirname, "dist");
const RESOURCE_URI = "ui://pdf-viewer/mcp-app.html";
const DEFAULT_PDF = "https://arxiv.org/pdf/1706.03762"; // Attention Is All You Need

let pdfIndex: PdfIndex | null = null;

export function createServer(): McpServer {
  const server = new McpServer({ name: "PDF Server", version: "1.0.0" });

  // Tool: list_pdfs
  server.tool(
    "list_pdfs",
    "List indexed PDFs",
    {},
    async (): Promise<CallToolResult> => {
      if (!pdfIndex) throw new Error("Not initialized");
      return {
        content: [
          { type: "text", text: JSON.stringify(pdfIndex.entries, null, 2) },
        ],
        structuredContent: { entries: pdfIndex.entries },
      };
    },
  );

  // Tool: read_pdf_bytes (app-only) - Chunked binary loading
  registerAppTool(
    server,
    "read_pdf_bytes",
    {
      title: "Read PDF Bytes",
      description: "Load binary data in chunks",
      inputSchema: ReadPdfBytesInputSchema.shape,
      outputSchema: PdfBytesChunkSchema,
      _meta: { ui: { visibility: ["app"] } },
    },
    async (args: unknown): Promise<CallToolResult> => {
      if (!pdfIndex) throw new Error("Not initialized");
      const {
        url: rawUrl,
        offset,
        byteCount,
      } = ReadPdfBytesInputSchema.parse(args);
      const url = isArxivUrl(rawUrl) ? normalizeArxivUrl(rawUrl) : rawUrl;
      let entry = findEntryByUrl(pdfIndex, url);

      // Dynamically add arxiv URLs (handles server restart between display_pdf and read_pdf_bytes)
      if (!entry) {
        if (isFileUrl(url)) {
          throw new Error("File URLs must be in the initial list");
        }
        if (!isArxivUrl(url)) {
          throw new Error(`PDF not found: ${url}`);
        }
        entry = createEntry(url);
        await populatePdfMetadata(entry);
        pdfIndex.entries.push(entry);
      }

      const chunk = await loadPdfBytesChunk(entry, offset, byteCount);
      return {
        content: [
          {
            type: "text",
            text: `${chunk.byteCount} bytes at ${chunk.offset}/${chunk.totalBytes}`,
          },
        ],
        structuredContent: chunk,
      };
    },
  );

  // Tool: display_pdf - Interactive viewer with UI
  registerAppTool(
    server,
    "display_pdf",
    {
      title: "Display PDF",
      description: `Display an interactive PDF viewer in the chat.

Use this tool when the user asks to view, display, read, or open a PDF. Accepts:
- URLs from list_pdfs (preloaded PDFs)
- Any arxiv.org URL (loaded dynamically)

The viewer supports zoom, navigation, text selection, and fullscreen mode.`,
      inputSchema: {
        url: z
          .string()
          .default(DEFAULT_PDF)
          .describe("PDF URL (arxiv.org for dynamic loading)"),
        page: z.number().min(1).default(1).describe("Initial page"),
      },
      outputSchema: z.object({
        url: z.string(),
        title: z.string().optional(),
        pageCount: z.number(),
        initialPage: z.number(),
      }),
      _meta: { ui: { resourceUri: RESOURCE_URI } },
    },
    async ({ url: rawUrl, page }): Promise<CallToolResult> => {
      if (!pdfIndex) throw new Error("Not initialized");

      // Normalize arxiv URLs to PDF format
      const url = isArxivUrl(rawUrl) ? normalizeArxivUrl(rawUrl) : rawUrl;

      let entry = findEntryByUrl(pdfIndex, url);

      if (!entry) {
        if (isFileUrl(url)) {
          throw new Error("File URLs must be in the initial list");
        }
        if (!isArxivUrl(url)) {
          throw new Error(`Only arxiv.org URLs can be loaded dynamically`);
        }

        entry = createEntry(url);
        await populatePdfMetadata(entry);
        pdfIndex.entries.push(entry);
      }

      const result = {
        url: entry.url,
        title: entry.metadata.title,
        pageCount: entry.metadata.pageCount,
        initialPage: Math.min(page, entry.metadata.pageCount),
      };

      return {
        content: [
          {
            type: "text",
            text: `Displaying interactive PDF viewer${entry.metadata.title ? ` for "${entry.metadata.title}"` : ""} (${entry.url}, ${entry.metadata.pageCount} pages)`,
          },
        ],
        structuredContent: result,
      };
    },
  );

  // Resource: UI HTML
  registerAppResource(
    server,
    RESOURCE_URI,
    RESOURCE_URI,
    { mimeType: RESOURCE_MIME_TYPE },
    async (): Promise<ReadResourceResult> => {
      const html = await fs.readFile(
        path.join(DIST_DIR, "mcp-app.html"),
        "utf-8",
      );
      return {
        contents: [
          { uri: RESOURCE_URI, mimeType: RESOURCE_MIME_TYPE, text: html },
        ],
      };
    },
  );

  return server;
}

// CLI
function parseArgs(): { urls: string[]; stdio: boolean } {
  const args = process.argv.slice(2);
  const urls: string[] = [];
  let stdio = false;

  for (const arg of args) {
    if (arg === "--stdio") {
      stdio = true;
    } else if (!arg.startsWith("-")) {
      // Convert local paths to file:// URLs, normalize arxiv URLs
      let url = arg;
      if (
        !arg.startsWith("http://") &&
        !arg.startsWith("https://") &&
        !arg.startsWith("file://")
      ) {
        url = toFileUrl(arg);
      } else if (isArxivUrl(arg)) {
        url = normalizeArxivUrl(arg);
      }
      urls.push(url);
    }
  }

  return { urls: urls.length > 0 ? urls : [DEFAULT_PDF], stdio };
}

async function main() {
  const { urls, stdio } = parseArgs();

  console.error(`[pdf-server] Initializing with ${urls.length} PDF(s)...`);
  pdfIndex = await buildPdfIndex(urls);
  console.error(`[pdf-server] Ready`);

  if (stdio) {
    await createServer().connect(new StdioServerTransport());
  } else {
    const port = parseInt(process.env.PORT ?? "3001", 10);
    await startServer(createServer, { port, name: "PDF Server" });
  }
}

main().catch((e) => {
  console.error(e);
  process.exit(1);
});