from typing import Any, Dict from mcp.server.fastmcp import Context, FastMCP from app.core.logger import logger from app.services.ocr_service import OCRService from app.utils.validators import validate_document_url from app.core.exceptions import InvalidDocumentURLError, OCRProcessingError def register_markdown_tools(mcp: FastMCP) -> None: """Attach markdown-extraction tools to the given FastMCP server.""" @mcp.tool() async def pdf_to_markdown(document_url: str, ctx: Context) -> str: """Convert a PDF or document from a URL to Markdown using Mistral OCR. Args: document_url: Publicly accessible URL of the PDF / document. Returns: Markdown string (all pages concatenated). """ try: url = validate_document_url(document_url) except InvalidDocumentURLError as exc: logger.warning("Invalid URL rejected: {}", exc) return f"Error: {exc}" service = OCRService(client=ctx.request_context.lifespan_context.mistral) try: return await service.document_to_markdown(url) except OCRProcessingError as exc: logger.error("OCR failed for {}: {}", url, exc) return f"Error: {exc}" @mcp.tool() async def pdf_to_structured_markdown( document_url: str, ctx: Context ) -> Dict[str, Any]: """Convert a document to per-page structured markdown. Returns: Dict with keys: page_count (int), pages (list of {index, markdown}), markdown (str, merged). """ try: url = validate_document_url(document_url) except InvalidDocumentURLError as exc: logger.warning("Invalid URL rejected: {}", exc) return {"error": str(exc), "page_count": 0, "pages": [], "markdown": ""} service = OCRService(client=ctx.request_context.lifespan_context.mistral) try: return await service.document_to_structured(url) except OCRProcessingError as exc: logger.error("Structured OCR failed for {}: {}", url, exc) return {"error": str(exc), "page_count": 0, "pages": [], "markdown": ""}