#!/usr/bin/env python3 """ OpenProblems Spatial Transcriptomics MCP Server A Model Context Protocol server that provides AI agents with standardized access to Nextflow pipelines, Viash components, and spatial transcriptomics workflows within the OpenProblems project. """ import asyncio import json import logging import subprocess import sys from pathlib import Path from typing import Any, Dict, List, Optional, Union from .documentation_generator_simple import DocumentationGenerator from mcp.server import Server from mcp.server.models import InitializationOptions from mcp.types import ( GetPromptResult, Prompt, PromptArgument, PromptMessage, Resource, TextContent, Tool, ) import mcp.server.stdio # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Initialize the MCP server server = Server("OpenProblems-SpatialAI-MCP") # Server configuration SERVER_VERSION = "0.1.0" SERVER_NAME = "OpenProblems Spatial Transcriptomics MCP" # Initialize documentation generator doc_generator = DocumentationGenerator() @server.list_resources() async def handle_list_resources() -> List[Resource]: """List available resources for spatial transcriptomics workflows.""" return [ Resource( uri="server://status", name="Server Status", description="Current status and configuration of the MCP server", mimeType="application/json", ), Resource( uri="documentation://nextflow", name="Nextflow Documentation", description="Comprehensive documentation for Nextflow workflows and best practices", mimeType="application/json", ), Resource( uri="documentation://viash", name="Viash Documentation", description="Documentation for Viash components and configuration", mimeType="application/json", ), Resource( uri="documentation://docker", name="Docker Documentation", description="Docker best practices and optimization guidelines", mimeType="application/json", ), Resource( uri="templates://spatial-workflows", name="Spatial Transcriptomics Pipeline Templates", description="Curated Nextflow pipeline templates for spatial transcriptomics analysis", mimeType="application/json", ), ] @server.read_resource() async def handle_read_resource(uri: str) -> str: """Read and return resource content based on URI.""" logger.info(f"Reading resource: {uri}") if uri == "server://status": status = { "server_name": SERVER_NAME, "version": SERVER_VERSION, "status": "running", "capabilities": { "nextflow_execution": True, "viash_components": True, "docker_builds": True, "automated_testing": True, "log_analysis": True, }, "supported_formats": ["h5ad", "json", "yaml", "nf", "vsh.yaml"], "documentation_available": True, } return json.dumps(status, indent=2) elif uri == "documentation://nextflow": # Try to load cached documentation first cached_docs = await doc_generator.load_cached_documentation() if "nextflow" in cached_docs: return cached_docs["nextflow"] else: # Fallback to basic documentation nextflow_docs = { "overview": "Nextflow is a workflow framework for bioinformatics pipelines", "status": "Real documentation not yet cached - run 'python -m mcp_server.documentation_scraper' to download", "best_practices": { "dsl_version": "Use DSL2 for all new workflows", "resource_management": "Specify memory and CPU requirements for each process", "error_handling": "Implement retry strategies and error handling", "containerization": "Use Docker/Singularity containers for reproducibility", }, "common_patterns": { "input_channels": "Use Channel.fromPath() for file inputs", "output_publishing": "Use publishDir directive for results", "conditional_execution": "Use when clause for conditional processes", }, "troubleshooting": { "oom_errors": "Increase memory allocation or implement dynamic resource allocation", "missing_files": "Check file paths and ensure proper input staging", "container_issues": "Verify container availability and permissions", }, } return json.dumps(nextflow_docs, indent=2) elif uri == "documentation://viash": # Try to load cached documentation first cached_docs = await doc_generator.load_cached_documentation() if "viash" in cached_docs: return cached_docs["viash"] else: # Fallback to basic documentation viash_docs = { "overview": "Viash is a meta-framework for building reusable workflow modules", "status": "Real documentation not yet cached - run 'python -m mcp_server.documentation_scraper' to download", "component_structure": { "config_file": "YAML configuration defining component metadata", "script": "Core functionality implementation", "platforms": "Target platforms (docker, native, nextflow)", }, "best_practices": { "modularity": "Keep components focused on single tasks", "documentation": "Provide clear descriptions and examples", "testing": "Include unit tests for all components", "versioning": "Use semantic versioning for component releases", }, "common_commands": { "build": "viash build config.vsh.yaml", "run": "viash run config.vsh.yaml", "test": "viash test config.vsh.yaml", "ns_build": "viash ns build", }, } return json.dumps(viash_docs, indent=2) elif uri == "documentation://docker": # Try to load cached documentation first cached_docs = await doc_generator.load_cached_documentation() if "docker" in cached_docs: return cached_docs["docker"] else: # Return generated Docker best practices return await doc_generator._generate_docker_docs() elif uri == "templates://spatial-workflows": # Try to load cached documentation first cached_docs = await doc_generator.load_cached_documentation() if "spatial_templates" in cached_docs: return cached_docs["spatial_templates"] else: # Return generated spatial workflow templates return await doc_generator._generate_spatial_templates() else: raise ValueError(f"Unknown resource URI: {uri}") @server.list_tools() async def handle_list_tools() -> List[Tool]: """List available tools for spatial transcriptomics workflows.""" return [ Tool( name="echo_test", description="Simple echo test to verify MCP communication", inputSchema={ "type": "object", "properties": { "message": { "type": "string", "description": "Message to echo back" } }, "required": ["message"] } ), Tool( name="list_available_tools", description="List all available MCP tools and their descriptions", inputSchema={ "type": "object", "properties": {}, } ), Tool( name="run_nextflow_workflow", description="Execute a Nextflow pipeline from OpenProblems repositories", inputSchema={ "type": "object", "properties": { "workflow_name": { "type": "string", "description": "Name of the Nextflow workflow (e.g., main.nf)" }, "github_repo_url": { "type": "string", "description": "GitHub URL of the repository containing the workflow" }, "profile": { "type": "string", "description": "Nextflow profile to use (e.g., docker, test)", "default": "docker" }, "params": { "type": "object", "description": "Key-value pairs for pipeline parameters", "default": {} }, "config_file": { "type": "string", "description": "Path to custom Nextflow configuration file" } }, "required": ["workflow_name", "github_repo_url"] } ), Tool( name="run_viash_component", description="Execute a Viash component with specified parameters", inputSchema={ "type": "object", "properties": { "component_name": { "type": "string", "description": "Name of the Viash component" }, "component_config_path": { "type": "string", "description": "Path to the Viash config file (.vsh.yaml)" }, "engine": { "type": "string", "description": "Execution engine (native, docker)", "default": "docker" }, "args": { "type": "object", "description": "Component-specific arguments", "default": {} } }, "required": ["component_name", "component_config_path"] } ), Tool( name="build_docker_image", description="Build a Docker image from a Dockerfile", inputSchema={ "type": "object", "properties": { "dockerfile_path": { "type": "string", "description": "Path to the Dockerfile" }, "image_tag": { "type": "string", "description": "Tag for the Docker image" }, "context_path": { "type": "string", "description": "Build context directory", "default": "." } }, "required": ["dockerfile_path", "image_tag"] } ), Tool( name="analyze_nextflow_log", description="Analyze Nextflow execution logs for errors and troubleshooting", inputSchema={ "type": "object", "properties": { "log_file_path": { "type": "string", "description": "Path to the .nextflow.log file" } }, "required": ["log_file_path"] } ), Tool( name="read_file", description="Read contents of a file for analysis or editing", inputSchema={ "type": "object", "properties": { "file_path": { "type": "string", "description": "Path to the file to read" } }, "required": ["file_path"] } ), Tool( name="write_file", description="Write or create a file with specified content", inputSchema={ "type": "object", "properties": { "file_path": { "type": "string", "description": "Path to the file to write" }, "content": { "type": "string", "description": "Content to write to the file" } }, "required": ["file_path", "content"] } ), Tool( name="list_directory", description="List contents of a directory", inputSchema={ "type": "object", "properties": { "directory_path": { "type": "string", "description": "Path to the directory to list" }, "include_hidden": { "type": "boolean", "description": "Include hidden files and directories", "default": False } }, "required": ["directory_path"] } ), Tool( name="validate_nextflow_config", description="Validate Nextflow configuration and pipeline syntax", inputSchema={ "type": "object", "properties": { "config_path": { "type": "string", "description": "Path to nextflow.config file" }, "pipeline_path": { "type": "string", "description": "Path to main.nf or pipeline file" } }, "required": ["pipeline_path"] } ), Tool( name="check_environment", description="Check if required tools and dependencies are installed", inputSchema={ "type": "object", "properties": { "tools": { "type": "array", "items": {"type": "string"}, "description": "List of tools to check (nextflow, viash, docker, java, etc.)", "default": ["nextflow", "viash", "docker", "java"] } }, "required": [] } ), ] @server.call_tool() async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: """Handle tool execution requests.""" logger.info(f"Executing tool: {name} with arguments: {arguments}") if name == "echo_test": message = arguments.get("message", "") return [TextContent(type="text", text=f"Echo: {message}")] elif name == "list_available_tools": tools = await handle_list_tools() tool_list = [] for tool in tools: tool_list.append({ "name": tool.name, "description": tool.description, "required_params": tool.inputSchema.get("required", []) }) return [TextContent( type="text", text=json.dumps(tool_list, indent=2) )] elif name == "run_nextflow_workflow": return await _execute_nextflow_workflow(arguments) elif name == "run_viash_component": return await _execute_viash_component(arguments) elif name == "build_docker_image": return await _build_docker_image(arguments) elif name == "analyze_nextflow_log": return await _analyze_nextflow_log(arguments) elif name == "read_file": return await _read_file(arguments) elif name == "write_file": return await _write_file(arguments) elif name == "list_directory": return await _list_directory(arguments) elif name == "validate_nextflow_config": return await _validate_nextflow_config(arguments) elif name == "check_environment": return await _check_environment(arguments) else: raise ValueError(f"Unknown tool: {name}") async def _execute_nextflow_workflow(arguments: Dict[str, Any]) -> List[TextContent]: """Execute a Nextflow workflow.""" workflow_name = arguments["workflow_name"] github_repo_url = arguments["github_repo_url"] profile = arguments.get("profile", "docker") params = arguments.get("params", {}) config_file = arguments.get("config_file") # Build the command cmd = ["nextflow", "run", f"{github_repo_url}/{workflow_name}"] if profile: cmd.extend(["-profile", profile]) if config_file: cmd.extend(["-c", config_file]) # Add parameters for key, value in params.items(): cmd.append(f"--{key}") cmd.append(str(value)) try: # Execute the command logger.info(f"Executing command: {' '.join(cmd)}") result = subprocess.run( cmd, capture_output=True, text=True, timeout=3600 # 1 hour timeout ) execution_result = { "command": " ".join(cmd), "exit_code": result.returncode, "stdout": result.stdout, "stderr": result.stderr, "status": "completed" if result.returncode == 0 else "failed" } return [TextContent( type="text", text=json.dumps(execution_result, indent=2) )] except subprocess.TimeoutExpired: return [TextContent( type="text", text=json.dumps({ "command": " ".join(cmd), "status": "timeout", "error": "Workflow execution timed out after 1 hour" }, indent=2) )] except Exception as e: return [TextContent( type="text", text=json.dumps({ "command": " ".join(cmd), "status": "error", "error": str(e) }, indent=2) )] async def _execute_viash_component(arguments: Dict[str, Any]) -> List[TextContent]: """Execute a Viash component.""" component_name = arguments["component_name"] component_config_path = arguments["component_config_path"] engine = arguments.get("engine", "docker") args = arguments.get("args", {}) # Build the command cmd = ["viash", "run", component_config_path, "-p", engine] # Add component arguments if args: cmd.append("--") for key, value in args.items(): cmd.append(f"--{key}") cmd.append(str(value)) try: logger.info(f"Executing Viash component: {' '.join(cmd)}") result = subprocess.run( cmd, capture_output=True, text=True, timeout=1800 # 30 minutes timeout ) execution_result = { "component": component_name, "command": " ".join(cmd), "exit_code": result.returncode, "stdout": result.stdout, "stderr": result.stderr, "status": "completed" if result.returncode == 0 else "failed" } return [TextContent( type="text", text=json.dumps(execution_result, indent=2) )] except subprocess.TimeoutExpired: return [TextContent( type="text", text=json.dumps({ "component": component_name, "command": " ".join(cmd), "status": "timeout", "error": "Component execution timed out after 30 minutes" }, indent=2) )] except Exception as e: return [TextContent( type="text", text=json.dumps({ "component": component_name, "command": " ".join(cmd), "status": "error", "error": str(e) }, indent=2) )] async def _build_docker_image(arguments: Dict[str, Any]) -> List[TextContent]: """Build a Docker image.""" dockerfile_path = arguments["dockerfile_path"] image_tag = arguments["image_tag"] context_path = arguments.get("context_path", ".") cmd = ["docker", "build", "-t", image_tag, "-f", dockerfile_path, context_path] try: logger.info(f"Building Docker image: {' '.join(cmd)}") result = subprocess.run( cmd, capture_output=True, text=True, timeout=1800 # 30 minutes timeout ) build_result = { "image_tag": image_tag, "command": " ".join(cmd), "exit_code": result.returncode, "stdout": result.stdout, "stderr": result.stderr, "status": "completed" if result.returncode == 0 else "failed" } return [TextContent( type="text", text=json.dumps(build_result, indent=2) )] except subprocess.TimeoutExpired: return [TextContent( type="text", text=json.dumps({ "image_tag": image_tag, "command": " ".join(cmd), "status": "timeout", "error": "Docker build timed out after 30 minutes" }, indent=2) )] except Exception as e: return [TextContent( type="text", text=json.dumps({ "image_tag": image_tag, "command": " ".join(cmd), "status": "error", "error": str(e) }, indent=2) )] async def _analyze_nextflow_log(arguments: Dict[str, Any]) -> List[TextContent]: """Analyze Nextflow execution logs for errors and troubleshooting.""" log_file_path = arguments["log_file_path"] try: log_path = Path(log_file_path) if not log_path.exists(): return [TextContent( type="text", text=json.dumps({ "status": "error", "error": f"Log file not found: {log_file_path}" }, indent=2) )] # Read and analyze the log file with open(log_path, 'r') as f: log_content = f.read() analysis = { "log_file": str(log_path), "file_size": log_path.stat().st_size, "issues_found": [], "suggestions": [], } # Common error patterns and their solutions error_patterns = { "exit status 137": { "issue": "Out of memory (OOM) error", "suggestion": "Increase memory allocation for the process or implement dynamic resource allocation" }, "exit status 1": { "issue": "General execution error", "suggestion": "Check process logs for specific error details" }, "command not found": { "issue": "Missing command or tool", "suggestion": "Ensure required tools are installed in the container or environment" }, "No such file or directory": { "issue": "Missing input file", "suggestion": "Verify input file paths and ensure proper file staging" }, "Permission denied": { "issue": "File permission error", "suggestion": "Check file permissions and container user settings" }, } # Analyze log content for known patterns for pattern, info in error_patterns.items(): if pattern.lower() in log_content.lower(): analysis["issues_found"].append({ "pattern": pattern, "issue": info["issue"], "suggestion": info["suggestion"] }) # Extract execution statistics if available if "Execution completed" in log_content: analysis["execution_status"] = "completed" elif "Execution cancelled" in log_content: analysis["execution_status"] = "cancelled" elif "Execution failed" in log_content: analysis["execution_status"] = "failed" else: analysis["execution_status"] = "unknown" return [TextContent( type="text", text=json.dumps(analysis, indent=2) )] except Exception as e: return [TextContent( type="text", text=json.dumps({ "status": "error", "error": f"Failed to analyze log file: {str(e)}" }, indent=2) )] async def _read_file(arguments: Dict[str, Any]) -> List[TextContent]: """Read contents of a file for analysis or editing.""" file_path = arguments["file_path"] try: with open(file_path, 'r') as f: file_content = f.read() return [TextContent(type="text", text=file_content)] except Exception as e: return [TextContent( type="text", text=json.dumps({ "status": "error", "error": f"Failed to read file: {str(e)}" }, indent=2) )] async def _write_file(arguments: Dict[str, Any]) -> List[TextContent]: """Write or create a file with specified content.""" file_path = arguments["file_path"] content = arguments["content"] try: with open(file_path, 'w') as f: f.write(content) return [TextContent(type="text", text="File written successfully")] except Exception as e: return [TextContent( type="text", text=json.dumps({ "status": "error", "error": f"Failed to write file: {str(e)}" }, indent=2) )] async def _list_directory(arguments: Dict[str, Any]) -> List[TextContent]: """List contents of a directory.""" directory_path = arguments["directory_path"] include_hidden = arguments.get("include_hidden", False) try: entries = [] for entry in Path(directory_path).iterdir(): if include_hidden or not entry.name.startswith('.'): entries.append({ "name": entry.name, "is_directory": entry.is_dir(), "size": entry.stat().st_size }) return [TextContent( type="text", text=json.dumps(entries, indent=2) )] except Exception as e: return [TextContent( type="text", text=json.dumps({ "status": "error", "error": f"Failed to list directory: {str(e)}" }, indent=2) )] async def _validate_nextflow_config(arguments: Dict[str, Any]) -> List[TextContent]: """Validate Nextflow configuration and pipeline syntax.""" pipeline_path = arguments["pipeline_path"] config_path = arguments.get("config_path") validation_results = { "pipeline_path": pipeline_path, "config_path": config_path, "issues": [], "warnings": [], "status": "valid" } try: # Check if pipeline file exists pipeline_file = Path(pipeline_path) if not pipeline_file.exists(): validation_results["issues"].append(f"Pipeline file not found: {pipeline_path}") validation_results["status"] = "invalid" return [TextContent(type="text", text=json.dumps(validation_results, indent=2))] # Read and check pipeline content with open(pipeline_file, 'r') as f: pipeline_content = f.read() # Basic Nextflow syntax checks if 'nextflow.enable.dsl=2' not in pipeline_content and 'nextflow { dsl = 2 }' not in pipeline_content: validation_results["warnings"].append("DSL2 not explicitly enabled - recommend adding 'nextflow.enable.dsl=2'") if 'process ' not in pipeline_content and 'workflow ' not in pipeline_content: validation_results["issues"].append("No process or workflow blocks found in pipeline") validation_results["status"] = "invalid" # Check for common issues if 'publishDir' in pipeline_content and 'output:' not in pipeline_content: validation_results["warnings"].append("publishDir found but no output block - this may cause issues") # Check config file if provided if config_path: config_file = Path(config_path) if not config_file.exists(): validation_results["warnings"].append(f"Config file not found: {config_path}") else: with open(config_file, 'r') as f: config_content = f.read() # Basic config validation if 'process ' in config_content: validation_results["warnings"].append("Config looks good - process configuration found") # Try to run nextflow validation if available try: result = subprocess.run( ["nextflow", "config", pipeline_path], capture_output=True, text=True, timeout=30 ) if result.returncode != 0: validation_results["issues"].append(f"Nextflow config validation failed: {result.stderr}") validation_results["status"] = "invalid" except (subprocess.TimeoutExpired, FileNotFoundError): validation_results["warnings"].append("Nextflow not available - performed basic syntax check only") return [TextContent(type="text", text=json.dumps(validation_results, indent=2))] except Exception as e: return [TextContent( type="text", text=json.dumps({ "status": "error", "error": f"Failed to validate Nextflow configuration: {str(e)}" }, indent=2) )] async def _check_environment(arguments: Dict[str, Any]) -> List[TextContent]: """Check if required tools and dependencies are installed.""" tools = arguments.get("tools", ["nextflow", "viash", "docker", "java"]) environment_status = { "overall_status": "ready", "tools": {}, "recommendations": [] } try: for tool in tools: tool_status = {"available": False, "version": None, "path": None} try: if tool == "nextflow": result = subprocess.run(["nextflow", "-version"], capture_output=True, text=True, timeout=10) if result.returncode == 0: tool_status["available"] = True tool_status["version"] = result.stdout.strip() tool_status["path"] = subprocess.run(["which", "nextflow"], capture_output=True, text=True).stdout.strip() elif tool == "viash": result = subprocess.run(["viash", "--version"], capture_output=True, text=True, timeout=10) if result.returncode == 0: tool_status["available"] = True tool_status["version"] = result.stdout.strip() tool_status["path"] = subprocess.run(["which", "viash"], capture_output=True, text=True).stdout.strip() elif tool == "docker": result = subprocess.run(["docker", "--version"], capture_output=True, text=True, timeout=10) if result.returncode == 0: tool_status["available"] = True tool_status["version"] = result.stdout.strip() tool_status["path"] = subprocess.run(["which", "docker"], capture_output=True, text=True).stdout.strip() elif tool == "java": result = subprocess.run(["java", "-version"], capture_output=True, text=True, timeout=10) if result.returncode == 0: tool_status["available"] = True tool_status["version"] = result.stderr.strip() # Java outputs version to stderr tool_status["path"] = subprocess.run(["which", "java"], capture_output=True, text=True).stdout.strip() else: # Generic tool check result = subprocess.run([tool, "--version"], capture_output=True, text=True, timeout=10) if result.returncode == 0: tool_status["available"] = True tool_status["version"] = result.stdout.strip() tool_status["path"] = subprocess.run(["which", tool], capture_output=True, text=True).stdout.strip() except (subprocess.TimeoutExpired, FileNotFoundError): tool_status["available"] = False environment_status["tools"][tool] = tool_status # Add recommendations for missing tools if not tool_status["available"]: environment_status["overall_status"] = "incomplete" if tool == "nextflow": environment_status["recommendations"].append("Install Nextflow: curl -s https://get.nextflow.io | bash") elif tool == "viash": environment_status["recommendations"].append("Install Viash: curl -fsSL get.viash.io | bash") elif tool == "docker": environment_status["recommendations"].append("Install Docker: https://docs.docker.com/get-docker/") elif tool == "java": environment_status["recommendations"].append("Install Java: sudo apt install openjdk-17-jre-headless") return [TextContent(type="text", text=json.dumps(environment_status, indent=2))] except Exception as e: return [TextContent( type="text", text=json.dumps({ "status": "error", "error": f"Failed to check environment: {str(e)}" }, indent=2) )] async def main(): """Main entry point for the MCP server.""" logger.info(f"Starting {SERVER_NAME} v{SERVER_VERSION}") async with mcp.server.stdio.stdio_server() as (read_stream, write_stream): await server.run( read_stream, write_stream, InitializationOptions( server_name=SERVER_NAME, server_version=SERVER_VERSION, capabilities={ "resources": {}, "tools": {}, "prompts": {}, "logging": {} }, ), ) if __name__ == "__main__": asyncio.run(main())