Upload deepdiver_v2/src/tools/mcp_server_standard.py with huggingface_hub

Browse files

Files changed (1) hide show

deepdiver_v2/src/tools/mcp_server_standard.py +1751 -0

deepdiver_v2/src/tools/mcp_server_standard.py ADDED Viewed

	@@ -0,0 +1,1751 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+#!/usr/bin/env python3
+"""
+Demo-Ready MCP Server - New Standard Implementation
+Combines robust session management with comprehensive tool definitions.
+Features: workspace isolation, tool call tracking, rate limiting, security, and full tool suite.
+"""
+import argparse
+import asyncio
+import json
+import logging
+import time
+import uuid
+import yaml
+from collections import defaultdict, deque
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from pathlib import Path
+from threading import Thread, Event
+from typing import Any, Dict, List, Optional
+# Third-party imports
+from starlette.applications import Starlette
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import JSONResponse, StreamingResponse
+import uvicorn
+# Add project root to Python path for imports
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from src.utils.status_codes import JsonRpcErr
+from http import HTTPStatus
+# Handle both relative and absolute imports
+try:
+    from .mcp_tools import MCPTools, get_tool_schemas
+    from .mcp_tools_async import AsyncMCPTools
+except ImportError:
+    # Fallback for direct script execution
+    from src.tools.mcp_tools import MCPTools, get_tool_schemas
+    try:
+        from src.tools.mcp_tools_async import AsyncMCPTools
+    except ImportError:
+        AsyncMCPTools = None
+# Workspace knowledge manager disabled
+WORKSPACE_KNOWLEDGE_AVAILABLE = False
+# Configure structured logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s',
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+        logging.FileHandler('mcp_server.log')
+    ]
+)
+logger = logging.getLogger(__name__)
+# ================ CONFIGURATION ================
+@dataclass
+class ServerConfig:
+    """Server configuration with only actually implemented options"""
+    # Server Core Settings
+    host: str = "127.0.0.1"
+    port: int = 6274
+    debug_mode: bool = False
+    # Session Management
+    session_ttl_seconds: int = 3600  # 1 hour default
+    max_sessions: int = 1000
+    cleanup_interval_seconds: int = 300  # 5 minutes
+    enable_session_keepalive: bool = True
+    keepalive_touch_interval: int = 300
+    # Request Handling
+    request_timeout_seconds: int = 120
+    max_request_size_mb: int = 10
+    # Client Rate Limiting (per IP)
+    rate_limit_requests_per_minute: int = 300
+    # Workspace Management
+    base_workspace_dir: str = "workspaces"
+    # Tool Call Tracking & Logging
+    enable_tool_tracking: bool = True
+    max_tracked_calls_per_session: int = 1000
+    track_detailed_errors: bool = True
+    # Per-tool Rate Limiting Configuration
+    tool_rate_limits: Dict[str, Dict[str, int]] = field(default_factory=dict)
+    @classmethod
+    def from_yaml(cls, config_path: str) -> 'ServerConfig':
+        """Load configuration from YAML file"""
+        try:
+            with open(config_path, 'r') as f:
+                config_data = yaml.safe_load(f)
+            # Extract configuration sections with defaults
+            server_config = config_data.get('server', {})
+            tracking_config = config_data.get('tracking', {})
+            tool_rate_limits = config_data.get('tool_rate_limits', {})
+            return cls(
+                # Server Core Settings
+                host=server_config.get('host', "127.0.0.1"),
+                port=server_config.get('port', 6274),
+                debug_mode=server_config.get('debug_mode', False),
+                # Session Management
+                session_ttl_seconds=server_config.get('session_ttl_seconds', 3600),
+                max_sessions=server_config.get('max_sessions', 1000),
+                cleanup_interval_seconds=server_config.get('cleanup_interval_seconds', 300),
+                enable_session_keepalive=server_config.get('enable_session_keepalive', True),
+                keepalive_touch_interval=server_config.get('keepalive_touch_interval', 300),
+                # Request Handling
+                request_timeout_seconds=server_config.get('request_timeout_seconds', 120),
+                max_request_size_mb=server_config.get('max_request_size_mb', 10),
+                # Client Rate Limiting
+                rate_limit_requests_per_minute=server_config.get('rate_limit_requests_per_minute', 300),
+                # Workspace Management
+                base_workspace_dir=server_config.get('base_workspace_dir', "workspaces"),
+                # Tool Call Tracking & Logging
+                enable_tool_tracking=tracking_config.get('enable_tool_tracking', True),
+                max_tracked_calls_per_session=tracking_config.get('max_tracked_calls_per_session', 1000),
+                track_detailed_errors=tracking_config.get('track_detailed_errors', True),
+                # Per-tool Rate Limiting
+                tool_rate_limits=tool_rate_limits
+            )
+        except Exception as e:
+            logger.error(f"Failed to load configuration from {config_path}: {e}")
+            logger.info("Using default configuration")
+            return cls()
+# Global configuration instance - will be set during startup
+config: Optional[ServerConfig] = None
+# ================ GLOBAL PER-TOOL RATE LIMITING ================
+@dataclass
+class ToolRateLimit:
+    """Rate limit configuration for a specific tool"""
+    requests_per_minute: float
+    requests_per_hour: float
+    burst_limit: int
+class GlobalToolRateLimiter:
+    """
+    Global rate limiter that controls QPS to external APIs per tool.
+    This is shared across all sessions and clients to manage upstream service load.
+    """
+    def __init__(self, tool_rate_limits: Dict[str, Dict[str, int]]):
+        self.tool_limits: Dict[str, ToolRateLimit] = {}
+        self.tool_requests: Dict[str, deque] = defaultdict(deque)
+        self.lock = asyncio.Lock()
+        # Initialize rate limits for each tool
+        for tool_name, limits_config in tool_rate_limits.items():
+            self.tool_limits[tool_name] = ToolRateLimit(
+                requests_per_minute=limits_config.get('requests_per_minute', float('inf')),
+                requests_per_hour=limits_config.get('requests_per_hour', float('inf')),
+                burst_limit=limits_config.get('burst_limit', 10)
+            )
+            self.tool_requests[tool_name] = deque()
+        logger.info(f"Initialized global tool rate limiter for {len(self.tool_limits)} tools")
+    async def is_allowed(self, tool_name: str) -> tuple[bool, Optional[str]]:
+        """
+        Check if a request to the specified tool is allowed based on global rate limits.
+        Returns:
+            tuple[bool, Optional[str]]: (allowed, reason_if_denied)
+        """
+        if tool_name not in self.tool_limits:
+            # Tool not configured for rate limiting - allow
+            return True, None
+        async with self.lock:
+            now = time.time()
+            limits = self.tool_limits[tool_name]
+            requests = self.tool_requests[tool_name]
+            # Clean old requests outside the time windows
+            self._cleanup_old_requests(requests, now)
+            # Check various time window limits
+            recent_requests = list(requests)
+            # Check burst limit (rapid requests in last second) - only if specified
+            if limits.burst_limit != float('inf'):
+                burst_count = sum(1 for req_time in recent_requests if now - req_time < 1.0)
+                if burst_count >= limits.burst_limit:
+                    return False, f"Tool '{tool_name}' burst limit exceeded ({limits.burst_limit} requests/burst)"
+            # Check per-minute limit - only if specified
+            if limits.requests_per_minute != float('inf'):
+                minute_count = sum(1 for req_time in recent_requests if now - req_time < 60.0)
+                if minute_count >= limits.requests_per_minute:
+                    return False, f"Tool '{tool_name}' per-minute limit exceeded ({limits.requests_per_minute} requests/minute)"
+            # Check per-hour limit - only if specified
+            if limits.requests_per_hour != float('inf'):
+                hour_count = sum(1 for req_time in recent_requests if now - req_time < 3600.0)
+                if hour_count >= limits.requests_per_hour:
+                    return False, f"Tool '{tool_name}' per-hour limit exceeded ({limits.requests_per_hour} requests/hour)"
+            return True, None
+    async def record_request(self, tool_name: str):
+        """Record a successful request for rate limiting tracking"""
+        if tool_name not in self.tool_limits:
+            return
+        async with self.lock:
+            now = time.time()
+            self.tool_requests[tool_name].append(now)
+            # Keep deque size manageable (only keep last hour of requests)
+            self._cleanup_old_requests(self.tool_requests[tool_name], now)
+    @staticmethod
+    def _cleanup_old_requests(requests: deque, now: float):
+        """Remove requests older than 1 hour to keep memory usage bounded"""
+        while requests and now - requests[0] > 3600.0:  # 1 hour
+            requests.popleft()
+    async def get_tool_stats(self, tool_name: str) -> Dict[str, Any]:
+        """Get current usage statistics for a tool"""
+        if tool_name not in self.tool_limits:
+            return {"error": f"Tool '{tool_name}' not configured for rate limiting"}
+        async with self.lock:
+            now = time.time()
+            requests = self.tool_requests[tool_name]
+            limits = self.tool_limits[tool_name]
+            # Clean old requests first
+            self._cleanup_old_requests(requests, now)
+            recent_requests = list(requests)
+            return {
+                "tool_name": tool_name,
+                "current_usage": {
+                    "last_second": sum(1 for req_time in recent_requests if now - req_time < 1.0),
+                    "last_minute": sum(1 for req_time in recent_requests if now - req_time < 60.0),
+                    "last_hour": sum(1 for req_time in recent_requests if now - req_time < 3600.0)
+                },
+                "limits": {
+                    "requests_per_minute": limits.requests_per_minute if limits.requests_per_minute != float('inf') else None,
+                    "requests_per_hour": limits.requests_per_hour if limits.requests_per_hour != float('inf') else None,
+                    "burst_limit": limits.burst_limit if limits.burst_limit != float('inf') else None
+                },
+                "utilization": {
+                    "minute_utilization": sum(1 for req_time in recent_requests if now - req_time < 60.0) / limits.requests_per_minute if limits.requests_per_minute != float('inf') else 0,
+                    "hour_utilization": sum(1 for req_time in recent_requests if now - req_time < 3600.0) / limits.requests_per_hour if limits.requests_per_hour != float('inf') else 0
+                }
+            }
+    def get_all_stats(self) -> Dict[str, Any]:
+        """Get usage statistics for all tools"""
+        return {
+            tool_name: self.get_tool_stats(tool_name)
+            for tool_name in self.tool_limits.keys()
+        }
+# Global tool rate limiter instance - will be initialized during startup
+global_tool_rate_limiter: Optional[GlobalToolRateLimiter] = None
+# ================ TOOL DEFINITIONS ================
+# Tool execution function mapping - maps tool names to their implementation functions
+def get_tool_function(tool_name: str):
+    """Get the actual function for a tool"""
+    tool_map = {
+        "batch_web_search": lambda tools, **kwargs: tools.batch_web_search(**kwargs),
+        "url_crawler": lambda tools, **kwargs: tools.url_crawler(**kwargs),
+        "download_files": lambda tools, **kwargs: tools.download_files(**kwargs),
+        "list_workspace": lambda tools, **kwargs: tools.list_workspace(**kwargs),
+        "str_replace_based_edit_tool": lambda tools, **kwargs: tools.str_replace_based_edit_tool(**kwargs),
+        "file_stats": lambda tools, **kwargs: tools.file_stats(**kwargs),
+        "file_read": lambda tools, **kwargs: tools.file_read(**kwargs),
+        "file_read_lines": lambda tools, **kwargs: tools.file_read_lines(**kwargs),
+        "content_preview": lambda tools, **kwargs: tools.content_preview(**kwargs),
+        "file_write": lambda tools, **kwargs: tools.file_write(**kwargs),
+        "file_grep_search": lambda tools, **kwargs: tools.file_grep_search(**kwargs),
+        "file_grep_with_context": lambda tools, **kwargs: tools.file_grep_with_context(**kwargs),
+        "file_find_by_name": lambda tools, **kwargs: tools.file_find_by_name(**kwargs),
+        "bash": lambda tools, **kwargs: tools.bash(**kwargs),
+        "task_done": lambda tools, **kwargs: tools.task_done(**kwargs),
+        "think": lambda tools, **kwargs: tools.think(**kwargs),
+        "reflect": lambda tools, **kwargs: tools.reflect(**kwargs),
+        "document_qa": lambda tools, **kwargs: tools.document_qa(**kwargs),
+        "extract_markdown_toc": lambda tools, **kwargs: tools.extract_markdown_toc(**kwargs),
+        "extract_markdown_section": lambda tools, **kwargs: tools.extract_markdown_section(**kwargs),
+        "document_extract": lambda tools, **kwargs: tools.document_extract(**kwargs),
+        "search_result_classifier": lambda tools, **kwargs: tools.search_result_classifier(**kwargs),
+        "info_seeker_subjective_task_done": None,
+        "writer_subjective_task_done": None,
+        "section_writer": lambda tools, **kwargs: tools.section_writer(**kwargs),
+        "concat_section_files": lambda tools, **kwargs: tools.concat_section_files(**kwargs),
+        # Internal tools - available to server but NOT exposed to agents via tool schemas
+        "internal_file_read_unlimited": lambda tools, **kwargs: tools.internal_file_read_unlimited(**kwargs),
+    }
+    return tool_map.get(tool_name)
+# ================ TOOL CALL TRACKING ================
+@dataclass
+class ToolCallLog:
+    """Individual tool call log entry"""
+    call_id: str
+    timestamp: datetime
+    tool_name: str
+    input_args: Dict[str, Any]
+    output_result: Dict[str, Any]
+    success: bool
+    duration_ms: float
+    error_details: Optional[str] = None
+    session_id: str = ""
+    agent_info: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization"""
+        return {
+            "call_id": self.call_id,
+            "timestamp": self.timestamp.isoformat(),
+            "tool_name": self.tool_name,
+            "input_args": self.input_args,
+            "output_result": self.output_result,
+            "success": self.success,
+            "duration_ms": self.duration_ms,
+            "error_details": self.error_details,
+            "session_id": self.session_id,
+            "agent_info": self.agent_info
+        }
+class ToolCallTracker:
+    """Tracks and saves tool calls to workspace-specific files"""
+    def __init__(self, workspace_path: Path, session_id: str):
+        self.workspace_path = workspace_path
+        self.session_id = session_id
+        self.logs_dir = workspace_path / "tool_call_logs"
+        self.logs_dir.mkdir(exist_ok=True)
+        # Create daily log file
+        today = datetime.now().strftime("%Y-%m-%d")
+        self.current_log_file = self.logs_dir / f"tool_calls_{today}.jsonl"
+        self.summary_file = self.logs_dir / "session_summary.json"
+        # Track call counts
+        self.call_count = 0
+        self.tool_usage_stats = defaultdict(int)
+        # Initialize session summary
+        self._initialize_session_summary()
+    def _initialize_session_summary(self):
+        """Initialize or update session summary file"""
+        summary = {
+            "session_id": self.session_id,
+            "session_start": datetime.now().isoformat(),
+            "last_updated": datetime.now().isoformat(),
+            "total_tool_calls": 0,
+            "tool_usage_stats": {},
+            "agent_activity": {},
+            "workspace_path": str(self.workspace_path)
+        }
+        # Load existing summary if it exists
+        if self.summary_file.exists():
+            try:
+                with open(self.summary_file, 'r') as f:
+                    existing_summary = json.load(f)
+                    summary.update(existing_summary)
+                    # Don't overwrite session_start if it already exists
+                    if "session_start" in existing_summary:
+                        summary["session_start"] = existing_summary["session_start"]
+            except Exception as e:
+                logger.warning(f"Could not load existing session summary: {e}")
+        self._save_summary(summary)
+    def _save_summary(self, summary: Dict[str, Any]):
+        """Save session summary to file"""
+        try:
+            with open(self.summary_file, 'w') as f:
+                json.dump(summary, f, indent=2, ensure_ascii=False)
+        except Exception as e:
+            logger.error(f"Failed to save session summary: {e}")
+    def log_tool_call(self,
+                     tool_name: str,
+                     input_args: Dict[str, Any],
+                     output_result: Dict[str, Any],
+                     success: bool,
+                     duration_ms: float,
+                     error_details: Optional[str] = None,
+                     agent_info: Optional[Dict[str, Any]] = None) -> str:
+        """Log a tool call and return the call ID"""
+        if not config.enable_tool_tracking:
+            return ""
+        # Respect max call limit per session
+        if self.call_count >= config.max_tracked_calls_per_session:
+            logger.warning(f"Max tracked calls reached for session {self.session_id}")
+            return ""
+        call_id = str(uuid.uuid4())
+        timestamp = datetime.now()
+        # Create log entry
+        log_entry = ToolCallLog(
+            call_id=call_id,
+            timestamp=timestamp,
+            tool_name=tool_name,
+            input_args=self._sanitize_args(input_args),
+            output_result=self._sanitize_result(output_result),
+            success=success,
+            duration_ms=duration_ms,
+            error_details=error_details if config.track_detailed_errors else None,
+            session_id=self.session_id,
+            agent_info=agent_info
+        )
+        # Save to JSONL file (one JSON object per line)
+        try:
+            with open(self.current_log_file, 'a', encoding="utf-8") as f:
+                f.write(json.dumps(log_entry.to_dict(), ensure_ascii=False) + '\n')
+        except Exception as e:
+            logger.error(f"Failed to save tool call log: {e}")
+        # Update session summary
+        self._update_session_summary(log_entry)
+        self.call_count += 1
+        self.tool_usage_stats[tool_name] += 1
+        return call_id
+    @staticmethod
+    def _sanitize_args(args: Dict[str, Any]) -> Dict[str, Any]:
+        """Sanitize arguments for logging (remove sensitive data)"""
+        sanitized = {}
+        for key, value in args.items():
+            if isinstance(value, str) and len(value) > 1000:
+                sanitized[key] = value[:1000] + "... [truncated]"
+            elif key.lower() in ['password', 'token', 'secret', 'key']:
+                sanitized[key] = "[REDACTED]"
+            else:
+                sanitized[key] = value
+        return sanitized
+    def _sanitize_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
+        """Sanitize result for logging (remove large content)"""
+        if not isinstance(result, dict):
+            return result
+        sanitized = {}
+        for key, value in result.items():
+            if isinstance(value, str) and len(value) > 2000:
+                sanitized[key] = value[:2000] + "... [truncated]"
+            elif isinstance(value, dict):
+                sanitized[key] = self._sanitize_result(value)
+            else:
+                sanitized[key] = value
+        return sanitized
+    def _update_session_summary(self, log_entry: ToolCallLog):
+        """Update session summary with new tool call"""
+        try:
+            summary = {
+                "session_id": self.session_id,
+                "last_updated": datetime.now().isoformat(),
+                "total_tool_calls": self.call_count + 1,
+                "tool_usage_stats": dict(self.tool_usage_stats),
+                "workspace_path": str(self.workspace_path)
+            }
+            # Load existing summary
+            if self.summary_file.exists():
+                with open(self.summary_file, 'r') as f:
+                    existing_summary = json.load(f)
+                    summary.update(existing_summary)
+            # Update with new data
+            summary["last_updated"] = datetime.now().isoformat()
+            summary["total_tool_calls"] = self.call_count + 1
+            summary["tool_usage_stats"] = dict(self.tool_usage_stats)
+            summary["tool_usage_stats"][log_entry.tool_name] = self.tool_usage_stats[log_entry.tool_name] + 1
+            # Track agent activity
+            if log_entry.agent_info:
+                agent_type = log_entry.agent_info.get('type', 'unknown')
+                if 'agent_activity' not in summary:
+                    summary['agent_activity'] = {}
+                if agent_type not in summary['agent_activity']:
+                    summary['agent_activity'][agent_type] = {
+                        'tool_calls': 0,
+                        'last_active': log_entry.timestamp.isoformat()
+                    }
+                summary['agent_activity'][agent_type]['tool_calls'] += 1
+                summary['agent_activity'][agent_type]['last_active'] = log_entry.timestamp.isoformat()
+            self._save_summary(summary)
+        except Exception as e:
+            logger.error(f"Failed to update session summary: {e}")
+# ================ SESSION KEEP-ALIVE FOR LONG OPERATIONS ================
+class KeepAliveSessionWrapper:
+    """Wrapper that keeps a session alive during long-running operations"""
+    def __init__(self, session: 'Session', touch_interval: int = 300):  # Touch every 5 minutes
+        self.session = session
+        self.touch_interval = touch_interval
+        self.keep_alive_thread = None
+        self.stop_event = Event()
+        self.active = False
+    def start_keep_alive(self):
+        """Start the keep-alive mechanism"""
+        if self.active:
+            return
+        self.active = True
+        self.stop_event.clear()
+        def keep_alive_worker():
+            while not self.stop_event.wait(self.touch_interval):
+                try:
+                    self.session.touch()
+                    logger.debug("Keep-alive: Touched session {%s}", self.session.id)
+                except Exception as e:
+                    logger.error(f"Keep-alive error for session {self.session.id}: {e}")
+                    break
+        self.keep_alive_thread = Thread(target=keep_alive_worker, daemon=True)
+        self.keep_alive_thread.start()
+        logger.info(f"Started keep-alive for session {self.session.id}")
+    def stop_keep_alive(self):
+        """Stop the keep-alive mechanism"""
+        if not self.active:
+            return
+        self.active = False
+        self.stop_event.set()
+        if self.keep_alive_thread and self.keep_alive_thread.is_alive():
+            self.keep_alive_thread.join(timeout=1.0)
+        # Final touch
+        try:
+            self.session.touch()
+        except Exception as e:
+            logger.error(f"Final keep-alive touch error for session {self.session.id}: {e}")
+        logger.info(f"Stopped keep-alive for session {self.session.id}")
+    def __enter__(self):
+        self.start_keep_alive()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop_keep_alive()
+# ================ SESSION MANAGEMENT ================
+@dataclass
+class Session:
+    """Thread-safe session data structure with workspace management"""
+    id: str
+    created_at: datetime
+    last_accessed: datetime
+    initialized: bool = False
+    request_count: int = 0
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    workspace_path: Optional[Path] = None
+    mcp_tools: Optional[MCPTools] = None
+    tool_tracker: Optional[ToolCallTracker] = None
+    def is_expired(self, ttl_seconds: int) -> bool:
+        """Check if session has expired"""
+        return datetime.now() - self.last_accessed > timedelta(seconds=ttl_seconds)
+    def touch(self):
+        """Update last accessed time"""
+        self.last_accessed = datetime.now()
+        self.request_count += 1
+    def get_mcp_tools(self, prefer_async: bool = True) -> MCPTools:
+        """Get or create MCP tools instance for this session"""
+        if self.mcp_tools is None:
+            # Use async tools if available and preferred
+            if prefer_async and AsyncMCPTools is not None:
+                self.mcp_tools = AsyncMCPTools(workspace_path=str(self.workspace_path) if self.workspace_path else None)
+            else:
+                self.mcp_tools = MCPTools(workspace_path=str(self.workspace_path) if self.workspace_path else None)
+        return self.mcp_tools
+    def get_tool_tracker(self) -> Optional[ToolCallTracker]:
+        """Get or create tool call tracker for this session"""
+        if config.enable_tool_tracking and self.workspace_path:
+            if self.tool_tracker is None:
+                self.tool_tracker = ToolCallTracker(self.workspace_path, self.id)
+            return self.tool_tracker
+        return None
+class AsyncRLock:
+    """异步可重入锁，模拟 threading.RLock 的异步版本"""
+    def __init__(self):
+        self._lock = asyncio.Lock()
+        self._owner: Optional[asyncio.Task] = None  # 记录持有锁的协程任务
+        self._count = 0  # 重入次数
+    async def acquire(self):
+        current_task = asyncio.current_task()
+        # 如果当前协程已持有锁，直接增加重入次数
+        if self._owner == current_task:
+            self._count += 1
+            return
+        # 否则等待获取锁
+        await self._lock.acquire()
+        self._owner = current_task
+        self._count = 1
+    async def release(self):
+        if self._owner != asyncio.current_task():
+            raise RuntimeError("不能释放非当前协程持有的锁")
+        self._count -= 1
+        if self._count == 0:  # 重入次数归零时，真正释放锁
+            self._owner = None
+            self._lock.release()
+    # 支持 async with 语法
+    async def __aenter__(self):
+        await self.acquire()
+        return self
+    async def __aexit__(self, exc_type, exc, tb):
+        await self.release()
+class ThreadSafeSessionManager:
+    """Thread-safe session manager with workspace management"""
+    def __init__(self, ttl_seconds: int = 3600, max_sessions: int = 1000, base_workspace_dir: str = "workspaces"):
+        self.ttl_seconds = ttl_seconds
+        self.max_sessions = max_sessions
+        self.base_workspace_dir = Path(base_workspace_dir)
+        self.base_workspace_dir.mkdir(exist_ok=True)
+        # Thread-safe session storage
+        self.sessions: Dict[str, Session] = {}
+        self.lock = AsyncRLock()
+        # Start cleanup thread
+        self._start_cleanup_thread()
+    async def create_session(self) -> str:
+        """Create a new session and return session ID"""
+        session_id = str(uuid.uuid4())
+        async with self.lock:
+            # Check session limits
+            if len(self.sessions) >= self.max_sessions:
+                await self._cleanup_oldest_sessions()
+            # Create workspace directory
+            workspace_path = self.base_workspace_dir / session_id
+            workspace_path.mkdir(exist_ok=True, parents=True)
+            # Create session
+            session = Session(
+                id=session_id,
+                created_at=datetime.now(),
+                last_accessed=datetime.now(),
+                workspace_path=workspace_path
+            )
+            self.sessions[session_id] = session
+            logger.info(f"Created session {session_id} with workspace {workspace_path}")
+            return session_id
+    async def get_session(self, session_id: str) -> Optional[Session]:
+        """Get session by ID if it exists and is not expired"""
+        async with self.lock:
+            session = self.sessions.get(session_id)
+            if session and not session.is_expired(self.ttl_seconds):
+                session.touch()
+                return session
+            elif session:
+                # Remove expired session
+                del self.sessions[session_id]
+                logger.info(f"Removed expired session {session_id}")
+            return None
+    async def get_or_create_session(self, session_id: Optional[str] = None) -> Session:
+        """Get existing session or create new one"""
+        if session_id:
+            session = await self.get_session(session_id)
+            if session:
+                return session
+        # Create new session
+        new_session_id = await self.create_session()
+        return self.sessions[new_session_id]
+    async def _cleanup_expired_sessions(self):
+        """Remove expired sessions"""
+        async with self.lock:
+            expired_sessions = []
+            for session_id, session in self.sessions.items():
+                if session.is_expired(self.ttl_seconds):
+                    expired_sessions.append(session_id)
+            for session_id in expired_sessions:
+                del self.sessions[session_id]
+                logger.info(f"Cleaned up expired session {session_id}")
+    async def _cleanup_oldest_sessions(self):
+        """Remove oldest sessions when limit is reached"""
+        async with self.lock:
+            if len(self.sessions) < self.max_sessions:
+                return
+            # Sort by last accessed time and remove oldest
+            sorted_sessions = sorted(
+                self.sessions.items(),
+                key=lambda x: x[1].last_accessed
+            )
+            sessions_to_remove = len(self.sessions) - self.max_sessions + 10  # Remove extra
+            for i in range(sessions_to_remove):
+                if i < len(sorted_sessions):
+                    session_id = sorted_sessions[i][0]
+                    del self.sessions[session_id]
+                    logger.info(f"Removed old session {session_id} due to session limit")
+    def _start_cleanup_thread(self):
+        """Start background cleanup thread"""
+        def cleanup_worker():
+            while True:
+                try:
+                    time.sleep(config.cleanup_interval_seconds)
+                    # Run async method in sync context
+                    loop = asyncio.new_event_loop()
+                    loop.run_until_complete(self._cleanup_expired_sessions())
+                    loop.close()
+                except Exception as e:
+                    logger.error(f"Error in cleanup thread: {e}")
+        import threading
+        cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
+        cleanup_thread.start()
+        logger.info("Started session cleanup thread")
+    async def get_stats(self) -> Dict[str, Any]:
+        """Get session manager statistics"""
+        async with self.lock:
+            return {
+                "total_sessions": len(self.sessions),
+                "max_sessions": self.max_sessions,
+                "ttl_seconds": self.ttl_seconds,
+                "session_ids": list(self.sessions.keys())
+            }
+# ================ MIDDLEWARE AND SECURITY ================
+class RateLimiter:
+    """Simple rate limiter with time-window tracking"""
+    def __init__(self, requests_per_minute: int = 60):
+        self.requests_per_minute = requests_per_minute
+        self.requests: Dict[str, List[float]] = defaultdict(list)
+        self.lock = asyncio.Lock()
+    async def is_allowed(self, client_id: str) -> bool:
+        """Check if request is allowed for client"""
+        async with self.lock:
+            now = time.time()
+            minute_ago = now - 60
+            # Clean old requests
+            self.requests[client_id] = [
+                req_time for req_time in self.requests[client_id]
+                if req_time > minute_ago
+            ]
+            # Check rate limit
+            if len(self.requests[client_id]) >= self.requests_per_minute:
+                return False
+            # Add current request
+            self.requests[client_id].append(now)
+            return True
+class RequestValidator:
+    """Validates incoming MCP requests"""
+    @staticmethod
+    def validate_mcp_request(data: Dict[str, Any]) -> tuple[bool, Optional[str]]:
+        """Validate basic MCP request structure"""
+        if not isinstance(data, dict):
+            return False, "Request must be a JSON object"
+        if "method" not in data:
+            return False, "Missing 'method' field"
+        if "id" not in data:
+            return False, "Missing 'id' field"
+        return True, None
+    @staticmethod
+    def validate_tool_call(params: Dict[str, Any]) -> tuple[bool, Optional[str]]:
+        """Validate tool call parameters"""
+        if not isinstance(params, dict):
+            return False, "Tool parameters must be a JSON object"
+        if "name" not in params:
+            return False, "Missing tool 'name'"
+        if "arguments" not in params:
+            return False, "Missing tool 'arguments'"
+        tool_name = params["name"]
+        # Get detailed schemas
+        detailed_schemas = get_tool_schemas()
+        if tool_name not in detailed_schemas:
+            return False, f"Unknown tool: {tool_name}. Available tools: {sorted(list(detailed_schemas.keys()))}"
+        return True, None
+class SecurityMiddleware(BaseHTTPMiddleware):
+    """Security middleware for basic protection"""
+    async def dispatch(self, request: Request, call_next):
+        # Check content length
+        content_length = request.headers.get("content-length")
+        if content_length and int(content_length) > config.max_request_size_mb * 1024 * 1024:
+            return JSONResponse(
+                status_code=HTTPStatus.REQUEST_ENTITY_TOO_LARGE,
+                content={"error": "Request too large"}
+            )
+        # Add security headers
+        response = await call_next(request)
+        response.headers["X-Content-Type-Options"] = "nosniff"
+        response.headers["X-Frame-Options"] = "DENY"
+        response.headers["X-XSS-Protection"] = "1; mode=block"
+        return response
+class RateLimitMiddleware(BaseHTTPMiddleware):
+    """Rate limiting middleware"""
+    def __init__(self, app, input_rate_limiter: RateLimiter):
+        super().__init__(app)
+        self.rate_limiter = input_rate_limiter
+    async def dispatch(self, request: Request, call_next):
+        # Get client identifier (IP address)
+        client_ip = request.client.host if request.client else "unknown"
+        if not await self.rate_limiter.is_allowed(client_ip):
+            return JSONResponse(
+                status_code=HTTPStatus.TOO_MANY_REQUESTS,
+                content={"error": "Rate limit exceeded"}
+            )
+        return await call_next(request)
+# Global session manager
+session_manager = None
+rate_limiter = None
+@dataclass
+class RateLimitViolation:
+    """Represents a rate limit violation with standardized error information"""
+    tool_name: str
+    limit_type: str  # "burst", "second", "minute", "hour"
+    current_usage: int
+    limit_value: float
+    retry_after_seconds: float
+    def to_user_friendly_message(self) -> str:
+        """Generate user-friendly error message"""
+        if self.limit_type == "burst":
+            return f"Service temporarily unavailable: Too many rapid requests to {self.tool_name}. Please wait {self.retry_after_seconds:.0f} seconds before trying again."
+        elif self.limit_type == "second":
+            return f"Service temporarily unavailable: {self.tool_name} request rate exceeded ({self.limit_value}/second). Please wait {self.retry_after_seconds:.0f} seconds before trying again."
+        elif self.limit_type == "minute":
+            return f"Service temporarily unavailable: {self.tool_name} quota exceeded ({self.limit_value}/minute). Please try again in {self.retry_after_seconds:.0f} seconds."
+        elif self.limit_type == "hour":
+            return f"Service temporarily unavailable: {self.tool_name} hourly quota exceeded ({self.limit_value}/hour). Please try again in {self.retry_after_seconds:.0f} minutes."
+        else:
+            return f"Service temporarily unavailable: {self.tool_name} rate limit exceeded. Please try again later."
+    def to_technical_message(self) -> str:
+        """Generate technical error message for debugging"""
+        return f"Tool '{self.tool_name}' {self.limit_type} limit exceeded ({self.current_usage}/{self.limit_value} {self.limit_type})"
+def _parse_rate_limit_denial(tool_name: str, denial_reason: str) -> RateLimitViolation:
+    """Parse rate limit denial reason into structured violation information"""
+    import re
+    # Default values
+    limit_type = "unknown"
+    current_usage = 0
+    limit_value = 0.0
+    retry_after_seconds = 60.0  # Default retry after 1 minute
+    # Parse different types of rate limit violations
+    if "burst limit exceeded" in denial_reason:
+        limit_type = "burst"
+        retry_after_seconds = 1.0  # Burst limits reset quickly
+        match = re.search(r'\((\d+) requests/burst\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    elif "per-second limit exceeded" in denial_reason:
+        limit_type = "second"
+        retry_after_seconds = 1.0  # Wait 1 second
+        match = re.search(r'\(([0-9.]+) requests/second\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    elif "per-minute limit exceeded" in denial_reason:
+        limit_type = "minute"
+        retry_after_seconds = 10.0  # Wait 10 seconds for minute limits
+        match = re.search(r'\(([0-9.]+) requests/minute\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    elif "per-hour limit exceeded" in denial_reason:
+        limit_type = "hour"
+        retry_after_seconds = 300.0  # Wait 5 minutes for hour limits
+        match = re.search(r'\(([0-9.]+) requests/hour\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    return RateLimitViolation(
+        tool_name=tool_name,
+        limit_type=limit_type,
+        current_usage=current_usage,
+        limit_value=limit_value,
+        retry_after_seconds=retry_after_seconds
+    )
+async def _call_session_tool_async(session: Session, tool_name: str, tool_args: Dict[str, Any],
+                                   client_ip: str = "unknown") -> Dict[str, Any]:
+    """Execute a tool within a session context with full tracking, workspace management, and global rate limiting"""
+    start_time = time.time()
+    success = False
+    error_details = None
+    result_data = None
+    # Touch session at start of tool execution to prevent expiry during long operations
+    session.touch()
+    try:
+        # CHECK GLOBAL TOOL RATE LIMITS FIRST
+        if global_tool_rate_limiter:
+            allowed, deny_reason = await global_tool_rate_limiter.is_allowed(tool_name)
+            if not allowed:
+                # Parse the denial reason to create structured rate limit violation
+                rate_limit_violation = _parse_rate_limit_denial(tool_name, deny_reason)
+                # Create user-friendly error message
+                user_message = rate_limit_violation.to_user_friendly_message()
+                technical_message = rate_limit_violation.to_technical_message()
+                logger.warning(f"Session {session.id}: {technical_message}")
+                result_data = {
+                    "success": False,
+                    "error": user_message,
+                    "error_code": "RATE_LIMIT_EXCEEDED",
+                    "error_type": "rate_limit",
+                    "tool_name": tool_name,
+                    "limit_type": rate_limit_violation.limit_type,
+                    "retry_after_seconds": rate_limit_violation.retry_after_seconds,
+                    "data": None,
+                    "rate_limited": True,  # Keep for backward compatibility
+                    "technical_details": technical_message  # For debugging
+                }
+                # Still log this for tracking purposes
+                duration_ms = (time.time() - start_time) * 1000
+                tracker = session.get_tool_tracker()
+                if tracker:
+                    try:
+                        agent_info = {
+                            "client_ip": client_ip,
+                            "type": "unknown",
+                            "session_request_count": session.request_count
+                        }
+                        tracker.log_tool_call(
+                            tool_name=tool_name,
+                            input_args=tool_args,
+                            output_result=result_data,
+                            success=False,
+                            duration_ms=duration_ms,
+                            error_details=user_message,
+                            agent_info=agent_info
+                        )
+                    except Exception as e:
+                        logger.error(f"Failed to log rate-limited tool call: {e}")
+                return result_data
+        # Get MCP tools instance for this session (handles workspace isolation)
+        mcp_tools = session.get_mcp_tools(prefer_async=True)
+        # Get tool method directly from the mcp_tools instance
+        if not hasattr(mcp_tools, tool_name):
+            raise ValueError(f"Tool '{tool_name}' not implemented")
+        tool_method = getattr(mcp_tools, tool_name)
+        # Add session context to tool arguments for workspace-aware tools
+        if hasattr(mcp_tools, 'set_session_context'):
+            mcp_tools.set_session_context(session.id, str(session.workspace_path))
+        # Execute tool with keep-alive for potentially long operations
+        logger.info(f"Session {session.id}: Executing tool '{tool_name}' with args: {list(tool_args.keys())}")
+        # Use keep-alive wrapper for tools that might take a long time
+        long_running_tools = {'batch_web_search', 'url_crawler', 'document_qa', 'document_extract', 'bash'}
+        # Check if the tool method is async
+        import inspect
+        is_async_tool = inspect.iscoroutinefunction(tool_method)
+        # Execute tool based on whether it's async or sync
+        if is_async_tool:
+            # Tool is async - execute directly
+            logger.debug("Executing async tool '{%s}'", tool_name)
+            if config.enable_session_keepalive and tool_name in long_running_tools:
+                # For long-running async tools, use keep-alive
+                with KeepAliveSessionWrapper(session, touch_interval=config.keepalive_touch_interval):
+                    result = await tool_method(**tool_args)
+            else:
+                # For regular async tools, execute directly
+                result = await tool_method(**tool_args)
+        else:
+            # Tool is sync - execute in thread pool
+            logger.debug("Executing sync tool '{%s}' in thread pool", tool_name)
+            # Define the synchronous tool execution function
+            def execute_tool_sync():
+                """Synchronous tool execution to be run in thread pool"""
+                return tool_method(**tool_args)
+            # Execute tool asynchronously in thread pool for true non-blocking execution
+            import asyncio
+            import concurrent.futures
+            # Create a thread pool executor for CPU-bound/blocking operations
+            loop = asyncio.get_event_loop()
+            if config.enable_session_keepalive and tool_name in long_running_tools:
+                # For long-running tools, use keep-alive with async execution
+                with KeepAliveSessionWrapper(session, touch_interval=config.keepalive_touch_interval):
+                    # Run in thread pool to avoid blocking the event loop
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+                        result = await loop.run_in_executor(executor, execute_tool_sync)
+            else:
+                # For regular tools, use async execution without keep-alive
+                with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+                    result = await loop.run_in_executor(executor, execute_tool_sync)
+        # Touch session after tool execution to update activity
+        session.touch()
+        # Handle different result formats
+        if hasattr(result, 'to_dict'):
+            result_data = result.to_dict()
+        elif isinstance(result, dict):
+            result_data = result
+        else:
+            result_data = {"result": result}
+        success = result_data.get('success', True)
+        if success:
+            logger.info(f"Session {session.id}: Tool '{tool_name}' completed successfully")
+            # RECORD SUCCESSFUL REQUEST FOR RATE LIMITING
+            if global_tool_rate_limiter:
+                await global_tool_rate_limiter.record_request(tool_name)
+        else:
+            error_details = result_data.get('error', 'Unknown error')
+            logger.warning(f"Session {session.id}: Tool '{tool_name}' failed: {error_details}")
+    except Exception as e:
+        success = False
+        error_details = str(e)
+        result_data = {
+            "success": False,
+            "error": error_details,
+            "data": None
+        }
+        logger.error(f"Session {session.id}: Tool '{tool_name}' exception: {e}")
+    # Calculate execution time
+    duration_ms = (time.time() - start_time) * 1000
+    # Log tool call if tracking is enabled
+    tracker = session.get_tool_tracker()
+    if tracker:
+        try:
+            agent_info = {
+                "client_ip": client_ip,
+                "type": "unknown",  # Could be enhanced to detect agent type
+                "session_request_count": session.request_count
+            }
+            tracker.log_tool_call(
+                tool_name=tool_name,
+                input_args=tool_args,
+                output_result=result_data,
+                success=success,
+                duration_ms=duration_ms,
+                error_details=error_details,
+                agent_info=agent_info
+            )
+        except Exception as e:
+            logger.error(f"Failed to log tool call: {e}")
+    return result_data
+def create_sse_response(response_data: dict, session_id: str = None) -> StreamingResponse:
+    """Create Server-Sent Events response with proper formatting"""
+    def generate_sse():
+        try:
+            # Add session info to response if available
+            if session_id:
+                response_data["session_id"] = session_id
+            json_data = json.dumps(response_data, ensure_ascii=False)
+            yield f"event: message\n"
+            yield f"data: {json_data}\n"
+            yield f"\n"
+        except Exception as e:
+            error_data = {
+                "jsonrpc": "2.0",
+                "error": {"code": JsonRpcErr.INTERNAL_ERROR, "message": f"Internal error: {str(e)}"},
+                "id": response_data.get("id")
+            }
+            json_data = json.dumps(error_data, ensure_ascii=False)
+            yield f"event: error\n"
+            yield f"data: {json_data}\n"
+            yield f"\n"
+    return StreamingResponse(
+        generate_sse(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Access-Control-Allow-Origin": "*",
+        }
+    )
+def create_error_response(request_id: Any, code: int, message: str, session_id: str = None) -> StreamingResponse:
+    """Create error response in SSE format"""
+    error_data = {
+        "jsonrpc": "2.0",
+        "error": {"code": code, "message": message},
+        "id": request_id
+    }
+    return create_sse_response(error_data, session_id)
+def create_rate_limit_response(
+    request_id: Any,
+    tool_name: str,
+    error_message: str,
+    retry_after_seconds: float,
+    limit_type: str,
+    technical_details: str = "",
+    session_id: str = None
+) -> JSONResponse:
+    """
+    Create HTTP 429 Rate Limit Exceeded response with proper headers and error format.
+    Returns proper HTTP status code instead of SSE for rate limiting errors.
+    """
+    # Calculate retry-after header value
+    retry_after_header = int(max(1.0, retry_after_seconds))
+    # Create standardized error response
+    error_data = {
+        "error": {
+            "type": "rate_limit_exceeded",
+            "code": "RATE_LIMIT_EXCEEDED",
+            "message": error_message,
+            "details": {
+                "tool_name": tool_name,
+                "limit_type": limit_type,
+                "retry_after_seconds": retry_after_seconds,
+                "technical_details": technical_details
+            }
+        },
+        "request_id": request_id,
+        "timestamp": datetime.now().isoformat(),
+        "session_id": session_id
+    }
+    # Set appropriate headers
+    headers = {
+        "Retry-After": str(retry_after_header),  # HTTP standard header
+        "X-RateLimit-Limit-Type": limit_type,
+        "X-RateLimit-Tool": tool_name,
+        "X-RateLimit-Retry-After": str(retry_after_seconds),
+        "Content-Type": "application/json"
+    }
+    return JSONResponse(
+        status_code=HTTPStatus.TOO_MANY_REQUESTS,  # Too Many Requests
+        content=error_data,
+        headers=headers
+    )
+async def handle_mcp_request(request: Request) -> StreamingResponse:
+    """Main MCP request handler with session management and tool execution"""
+    try:
+        # Check content length before reading body
+        content_length = request.headers.get("content-length")
+        if content_length:
+            content_size_mb = int(content_length) / (1024 * 1024)
+            if content_size_mb > config.max_request_size_mb:
+                logger.warning(f"Request too large: {content_size_mb:.2f}MB > {config.max_request_size_mb}MB")
+                return create_error_response(None, JsonRpcErr.PARSE_ERROR, f"Request too large: {content_size_mb:.2f}MB")
+        # Parse request with timeout protection
+        try:
+            body = await asyncio.wait_for(request.body(), timeout=config.request_timeout_seconds)
+        except asyncio.TimeoutError:
+            logger.error("Timeout while reading request body")
+            return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Request body read timeout")
+        if not body:
+            return create_error_response(None, JsonRpcErr.PARSE_ERROR, "Empty request body")
+        try:
+            data = json.loads(body.decode('utf-8'))
+        except json.JSONDecodeError as e:
+            return create_error_response(None, JsonRpcErr.PARSE_ERROR, f"Invalid JSON: {str(e)}")
+        # Validate MCP request structure
+        is_valid, error_msg = RequestValidator.validate_mcp_request(data)
+        if not is_valid:
+            return create_error_response(data.get("id"), JsonRpcErr.INVALID_REQUEST, error_msg)
+        request_id = data["id"]
+        method = data["method"]
+        params = data.get("params", {})
+        # Get or create session
+        session_id = request.headers.get("X-Session-ID")
+        client_ip = request.client.host if request.client else "unknown"
+        session = await session_manager.get_or_create_session(session_id)
+        logger.info(f"Processing {method} request for session {session.id} from {client_ip}")
+        # Handle different MCP methods
+        if method == "initialize":
+            # MCP initialization
+            response_data = {
+                "jsonrpc": "2.0",
+                "result": {
+                    "protocolVersion": "2025-06-18",
+                    "capabilities": {
+                        "tools": {"supportsProgress": True},
+                        "resources": {},
+                        "prompts": {}
+                    },
+                    "serverInfo": {
+                        "name": "DeepDiver-Demo-MCP",
+                        "version": "1.0.0"
+                    }
+                },
+                "id": request_id
+            }
+        elif method == "tools/list":
+            # List available tools using detailed schemas from get_tool_schemas()
+            tools_list = []
+            detailed_schemas = get_tool_schemas()
+            # Build tools list from schemas
+            for _, detailed_schema in detailed_schemas.items():
+                tools_list.append({
+                    "name": detailed_schema["name"],
+                    "description": detailed_schema["description"],
+                    "inputSchema": detailed_schema["inputSchema"]
+                })
+            logger.info(f"Serving {len(tools_list)} tools with detailed schemas to client")
+            response_data = {
+                "jsonrpc": "2.0",
+                "result": {"tools": tools_list},
+                "id": request_id
+            }
+        elif method == "tools/call":
+            # Execute tool call
+            is_valid, error_msg = RequestValidator.validate_tool_call(params)
+            if not is_valid:
+                return create_error_response(request_id, JsonRpcErr.INVALID_PARAMS, error_msg, session.id)
+            tool_name = params["name"]
+            tool_arguments = params["arguments"]
+            # Execute tool in session context asynchronously
+            result = await _call_session_tool_async(session, tool_name, tool_arguments, client_ip)
+            # CHECK FOR RATE LIMITING AND RETURN PROPER HTTP STATUS
+            if result.get("rate_limited", False):
+                return create_rate_limit_response(
+                    request_id=request_id,
+                    tool_name=tool_name,
+                    error_message=result.get("error", "Rate limit exceeded"),
+                    retry_after_seconds=result.get("retry_after_seconds", 60),
+                    limit_type=result.get("limit_type", "unknown"),
+                    technical_details=result.get("technical_details", ""),
+                    session_id=session.id
+                )
+            # Format normal response
+            response_data = {
+                "jsonrpc": "2.0",
+                "result": {
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": json.dumps(result, indent=2, ensure_ascii=False)
+                        }
+                    ]
+                },
+                "id": request_id
+            }
+        else:
+            return create_error_response(request_id, JsonRpcErr.METHOD_NOT_FOUND, f"Method not found: {method}", session.id)
+        return create_sse_response(response_data, session.id)
+    except asyncio.TimeoutError:
+        logger.warning("Request timeout - client may have disconnected")
+        return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Request timeout")
+    except Exception as e:
+        # Handle client disconnects gracefully
+        if "ClientDisconnect" in str(e) or "ConnectionClosedError" in str(e):
+            logger.warning(f"Client disconnected during request processing: {e}")
+            return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Client disconnected")
+        logger.error(f"Unexpected error in MCP request handler: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        return create_error_response(None, JsonRpcErr.INTERNAL_ERROR, f"Internal server error: {str(e)}")
+async def handle_health_check(request: Request) -> JSONResponse:
+    """Health check endpoint"""
+    try:
+        stats = await session_manager.get_stats() if session_manager else {}
+        # Get rate limiting summary
+        rate_limit_summary = {}
+        if global_tool_rate_limiter:
+            all_stats = global_tool_rate_limiter.get_all_stats()
+            rate_limit_summary = {
+                "enabled": True,
+                "tools_with_limits": len(all_stats),
+                "total_configured_tools": list(all_stats.keys())
+            }
+        else:
+            rate_limit_summary = {"enabled": False}
+        health_data = {
+            "status": "healthy",
+            "timestamp": datetime.now().isoformat(),
+            "version": "1.0.0",
+            "session_stats": stats,
+            "features": {
+                "workspace_isolation": True,
+                "tool_call_tracking": config.enable_tool_tracking if config else False,
+                "client_rate_limiting": True,
+                "global_tool_rate_limiting": rate_limit_summary["enabled"],
+                "security_middleware": True,
+                "standardized_rate_limit_responses": True
+            },
+            "rate_limiting": rate_limit_summary,
+            "error_formats": {
+                "rate_limit_exceeded": {
+                    "http_status": HTTPStatus.TOO_MANY_REQUESTS,
+                    "headers": ["Retry-After", "X-RateLimit-*"],
+                    "error_code": "RATE_LIMIT_EXCEEDED",
+                    "response_format": "application/json"
+                }
+            }
+        }
+        return JSONResponse(content=health_data)
+    except Exception as e:
+        return JSONResponse(
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+            content={"status": "unhealthy", "error": str(e)}
+        )
+async def handle_tracking_info(request: Request) -> JSONResponse:
+    """Get tool call tracking information for a session"""
+    try:
+        session_id = request.query_params.get("session_id")
+        if not session_id:
+            return JSONResponse(
+                status_code=HTTPStatus.BAD_REQUEST,
+                content={"error": "session_id parameter required"}
+            )
+        session = await session_manager.get_session(session_id)
+        if not session:
+            return JSONResponse(
+                status_code=HTTPStatus.NOT_FOUND,
+                content={"error": f"Session {session_id} not found"}
+            )
+        tracker = session.get_tool_tracker()
+        if not tracker:
+            return JSONResponse(
+                content={
+                    "session_id": session_id,
+                    "tracking_enabled": False,
+                    "message": "Tool call tracking not enabled or no workspace"
+                }
+            )
+        # Read session summary
+        summary_data = {}
+        if tracker.summary_file.exists():
+            try:
+                with open(tracker.summary_file, 'r') as f:
+                    summary_data = json.load(f)
+            except Exception as e:
+                logger.error(f"Failed to read session summary: {e}")
+        return JSONResponse(content={
+            "session_id": session_id,
+            "tracking_enabled": True,
+            "summary": summary_data,
+            "logs_directory": str(tracker.logs_dir),
+            "current_log_file": str(tracker.current_log_file)
+        })
+    except Exception as e:
+        return JSONResponse(
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+            content={"error": str(e)}
+        )
+async def handle_rate_limit_stats(request: Request) -> JSONResponse:
+    """Get global tool rate limiting statistics"""
+    try:
+        if not global_tool_rate_limiter:
+            return JSONResponse(
+                status_code=HTTPStatus.NOT_FOUND,
+                content={"error": "Global tool rate limiter not initialized"}
+            )
+        # Check if specific tool requested
+        tool_name = request.query_params.get("tool")
+        if tool_name:
+            # Get stats for specific tool
+            stats = await global_tool_rate_limiter.get_tool_stats(tool_name)
+            return JSONResponse(content=stats)
+        else:
+            # Get stats for all tools
+            all_stats = global_tool_rate_limiter.get_all_stats()
+            return JSONResponse(content={
+                "timestamp": datetime.now().isoformat(),
+                "global_tool_rate_limiting": True,
+                "tools": all_stats,
+                "summary": {
+                    "total_tools_with_limits": len(all_stats),
+                    "tools_configured": list(all_stats.keys())
+                }
+            })
+    except Exception as e:
+        logger.error(f"Failed to get rate limit stats: {e}")
+        return JSONResponse(
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+            content={"error": str(e)}
+        )
+def create_app() -> Starlette:
+    """Create and configure the Starlette application"""
+    global session_manager, rate_limiter, global_tool_rate_limiter
+    if not config:
+        raise RuntimeError("Server configuration not initialized")
+    # Initialize global components
+    session_manager = ThreadSafeSessionManager(
+        ttl_seconds=config.session_ttl_seconds,
+        max_sessions=config.max_sessions,
+        base_workspace_dir=config.base_workspace_dir
+    )
+    rate_limiter = RateLimiter(config.rate_limit_requests_per_minute)
+    # Initialize global tool rate limiter
+    if config.tool_rate_limits:
+        global_tool_rate_limiter = GlobalToolRateLimiter(config.tool_rate_limits)
+        logger.info(f"Initialized global tool rate limiter with {len(config.tool_rate_limits)} tool limits")
+    else:
+        logger.info("No tool rate limits configured - tools will run without global rate limiting")
+    # Create app
+    app = Starlette(debug=config.debug_mode)
+    app.add_middleware(SecurityMiddleware)
+    app.add_middleware(RateLimitMiddleware, input_rate_limiter=rate_limiter)
+    # Add routes
+    app.add_route("/mcp", handle_mcp_request, methods=["POST"])
+    app.add_route("/health", handle_health_check, methods=["GET"])
+    app.add_route("/tracking", handle_tracking_info, methods=["GET"])
+    app.add_route("/rate-limits", handle_rate_limit_stats, methods=["GET"])
+    return app
+def parse_arguments():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser(
+        description="Demo-Ready MCP Server with Per-Tool Rate Limiting",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python src/tools/mcp_server_standard.py --config src/tools/server_config.yaml
+  python src/tools/mcp_server_standard.py --host 127.0.0.1 --port 8080
+  python src/tools/mcp_server_standard.py --config custom_config.yaml --debug
+        """
+    )
+    parser.add_argument(
+        '--config', '-c',
+        type=str,
+        help='Path to YAML configuration file'
+    )
+    parser.add_argument(
+        '--host',
+        type=str,
+        help='Server host (overrides config file)'
+    )
+    parser.add_argument(
+        '--port', '-p',
+        type=int,
+        help='Server port (overrides config file)'
+    )
+    parser.add_argument(
+        '--debug',
+        action='store_true',
+        help='Enable debug mode (overrides config file)'
+    )
+    parser.add_argument(
+        '--workspace-dir',
+        type=str,
+        help='Base workspace directory (overrides config file)'
+    )
+    return parser.parse_args()
+def print_startup_info():
+    """Print server startup information"""
+    logger.info("🚀 DeepDiver Demo MCP Server")
+    logger.info("=" * 50)
+    logger.info(f"📊 Features:")
+    logger.info(f"  • Session Management: ✅ (TTL: {config.session_ttl_seconds}s)")
+    logger.info(f"  • Workspace Isolation: ✅ (Base: {config.base_workspace_dir})")
+    logger.info(f"  • Tool Call Tracking: {'✅' if config.enable_tool_tracking else '❌'}")
+    logger.info(f"  • Client Rate Limiting: ✅ ({config.rate_limit_requests_per_minute}/min)")
+    logger.info(f"  • Global Tool Rate Limiting: {'✅' if config.tool_rate_limits else '❌'}")
+    logger.info(f"  • Security Middleware: ✅")
+    # Tool rate limiting information
+    if config.tool_rate_limits:
+        logger.info(f"🚦 Tool Rate Limits: {len(config.tool_rate_limits)} tools configured")
+        for tool_name, limits in list(config.tool_rate_limits.items())[:3]:
+            burst = limits.get('burst_limit', '∞')
+            rpm = limits.get('requests_per_minute', '∞')
+            logger.info(f"  • {tool_name}: {rpm}/min, burst: {burst}")
+        if len(config.tool_rate_limits) > 3:
+            logger.info(f"  • ... and {len(config.tool_rate_limits) - 3} more tools")
+    # Tool information from schemas
+    tool_schemas = get_tool_schemas()
+    available_tools = list(tool_schemas.keys())
+    logger.info(f"🔧 Tools Available: {len(available_tools)}")
+    logger.info(f"  • All tools defined in schemas: {len(available_tools)} tools")
+    logger.info(f"  • Sample tools: {', '.join(sorted(available_tools)[:5])}...")
+    logger.info("=" * 50)
+def main():
+    """Main function to run the production MCP server"""
+    global config
+    # Parse command line arguments
+    args = parse_arguments()
+    config = ServerConfig.from_yaml("./src/tools/server_config.yaml")
+    # Apply CLI overrides
+    if args.host:
+        config.host = args.host
+        logger.info(f"🔧 Override: Host = {config.host}")
+    if args.port:
+        config.port = args.port
+        logger.info(f"🔧 Override: Port = {config.port}")
+    if args.debug:
+        config.debug_mode = True
+        logger.info(f"🔧 Override: Debug mode enabled")
+    if args.workspace_dir:
+        config.base_workspace_dir = args.workspace_dir
+        logger.info(f"🔧 Override: Workspace directory = {config.base_workspace_dir}")
+    print_startup_info()
+    try:
+        import os
+        # Calculate optimal worker count for high-concurrency FIRST
+        # Use CPU core count indirectly via uvicorn's defaults; no local variable needed
+        # Override for high-concurrency scenarios
+        if os.getenv('FORCE_HIGH_CONCURRENCY', '').lower() == 'true':
+            pass  # Configuration handled elsewhere if needed
+        app = create_app()
+        logger.info(f"🌐 Starting server at http://{config.host}:{config.port}")
+        logger.info(f"📡 MCP endpoint: http://{config.host}:{config.port}/mcp")
+        logger.info(f"🏥 Health check: http://{config.host}:{config.port}/health")
+        logger.info(f"📊 Tracking info: http://{config.host}:{config.port}/tracking?session_id=<id>")
+        logger.info(f"🚦 Rate limit stats: http://{config.host}:{config.port}/rate-limits")
+        uvicorn.run(
+            app,  # Use app instance directly for single worker with async optimizations
+            host=config.host,
+            port=config.port,
+            log_level="info",
+            timeout_keep_alive=config.request_timeout_seconds,
+            workers=1,  # Single worker with async optimizations
+            backlog=1024,  # Larger backlog for high-concurrency
+            access_log=False,  # Disable access logs for better performance
+            limit_concurrency=None,  # No artificial concurrency limit
+        )
+    except KeyboardInterrupt:
+        print("\n⏹️  Server stopped by user")
+    except Exception as e:
+        print(f"❌ Server startup failed: {e}")
+        import traceback
+        traceback.print_exc()
+        raise
+if __name__ == "__main__":
+    main()