openPangu-Embedded-7B-DeepDiver / deepdiver_v2 /src /tools /mcp_server_standard.py

Upload folder using huggingface_hub

8c6097b verified 3 months ago

69.7 kB

	# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
	#!/usr/bin/env python3
	"""
	Demo-Ready MCP Server - New Standard Implementation
	Combines robust session management with comprehensive tool definitions.
	Features: workspace isolation, tool call tracking, rate limiting, security, and full tool suite.
	"""

	import argparse
	import asyncio
	import json
	import logging
	import time
	import uuid
	import yaml
	from collections import defaultdict, deque
	from dataclasses import dataclass, field
	from datetime import datetime, timedelta
	from pathlib import Path
	from threading import Thread, Event
	from typing import Any, Dict, List, Optional

	# Third-party imports
	from starlette.applications import Starlette
	from starlette.middleware.base import BaseHTTPMiddleware
	from starlette.requests import Request
	from starlette.responses import JSONResponse, StreamingResponse
	import uvicorn

	# Add project root to Python path for imports
	import sys
	sys.path.insert(0, str(Path(__file__).parent.parent.parent))
	from src.utils.status_codes import JsonRpcErr
	from http import HTTPStatus

	# Handle both relative and absolute imports
	try:
	from .mcp_tools import MCPTools, get_tool_schemas
	from .mcp_tools_async import AsyncMCPTools
	except ImportError:
	# Fallback for direct script execution
	from src.tools.mcp_tools import MCPTools, get_tool_schemas
	try:
	from src.tools.mcp_tools_async import AsyncMCPTools
	except ImportError:
	AsyncMCPTools = None

	# Workspace knowledge manager disabled
	WORKSPACE_KNOWLEDGE_AVAILABLE = False

	# Configure structured logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s',
	handlers=[
	logging.StreamHandler(sys.stdout),
	logging.FileHandler('mcp_server.log')
	]
	)
	logger = logging.getLogger(__name__)

	# ================ CONFIGURATION ================


	@dataclass
	class ServerConfig:
	"""Server configuration with only actually implemented options"""
	# Server Core Settings
	host: str = "127.0.0.1"
	port: int = 6274
	debug_mode: bool = False

	# Session Management
	session_ttl_seconds: int = 3600 # 1 hour default
	max_sessions: int = 1000
	cleanup_interval_seconds: int = 300 # 5 minutes
	enable_session_keepalive: bool = True
	keepalive_touch_interval: int = 300

	# Request Handling
	request_timeout_seconds: int = 120
	max_request_size_mb: int = 10

	# Client Rate Limiting (per IP)
	rate_limit_requests_per_minute: int = 300

	# Workspace Management
	base_workspace_dir: str = "workspaces"

	# Tool Call Tracking & Logging
	enable_tool_tracking: bool = True
	max_tracked_calls_per_session: int = 1000
	track_detailed_errors: bool = True



	# Per-tool Rate Limiting Configuration
	tool_rate_limits: Dict[str, Dict[str, int]] = field(default_factory=dict)

	@classmethod
	def from_yaml(cls, config_path: str) -> 'ServerConfig':
	"""Load configuration from YAML file"""
	try:
	with open(config_path, 'r') as f:
	config_data = yaml.safe_load(f)

	# Extract configuration sections with defaults
	server_config = config_data.get('server', {})
	tracking_config = config_data.get('tracking', {})
	tool_rate_limits = config_data.get('tool_rate_limits', {})

	return cls(
	# Server Core Settings
	host=server_config.get('host', "127.0.0.1"),
	port=server_config.get('port', 6274),
	debug_mode=server_config.get('debug_mode', False),

	# Session Management
	session_ttl_seconds=server_config.get('session_ttl_seconds', 3600),
	max_sessions=server_config.get('max_sessions', 1000),
	cleanup_interval_seconds=server_config.get('cleanup_interval_seconds', 300),
	enable_session_keepalive=server_config.get('enable_session_keepalive', True),
	keepalive_touch_interval=server_config.get('keepalive_touch_interval', 300),

	# Request Handling
	request_timeout_seconds=server_config.get('request_timeout_seconds', 120),
	max_request_size_mb=server_config.get('max_request_size_mb', 10),

	# Client Rate Limiting
	rate_limit_requests_per_minute=server_config.get('rate_limit_requests_per_minute', 300),

	# Workspace Management
	base_workspace_dir=server_config.get('base_workspace_dir', "workspaces"),

	# Tool Call Tracking & Logging
	enable_tool_tracking=tracking_config.get('enable_tool_tracking', True),
	max_tracked_calls_per_session=tracking_config.get('max_tracked_calls_per_session', 1000),
	track_detailed_errors=tracking_config.get('track_detailed_errors', True),

	# Per-tool Rate Limiting
	tool_rate_limits=tool_rate_limits
	)

	except Exception as e:
	logger.error(f"Failed to load configuration from {config_path}: {e}")
	logger.info("Using default configuration")
	return cls()

	# Global configuration instance - will be set during startup
	config: Optional[ServerConfig] = None

	# ================ GLOBAL PER-TOOL RATE LIMITING ================


	@dataclass
	class ToolRateLimit:
	"""Rate limit configuration for a specific tool"""
	requests_per_minute: float
	requests_per_hour: float
	burst_limit: int


	class GlobalToolRateLimiter:
	"""
	Global rate limiter that controls QPS to external APIs per tool.
	This is shared across all sessions and clients to manage upstream service load.
	"""

	def __init__(self, tool_rate_limits: Dict[str, Dict[str, int]]):
	self.tool_limits: Dict[str, ToolRateLimit] = {}
	self.tool_requests: Dict[str, deque] = defaultdict(deque)
	self.lock = asyncio.Lock()

	# Initialize rate limits for each tool
	for tool_name, limits_config in tool_rate_limits.items():
	self.tool_limits[tool_name] = ToolRateLimit(
	requests_per_minute=limits_config.get('requests_per_minute', float('inf')),
	requests_per_hour=limits_config.get('requests_per_hour', float('inf')),
	burst_limit=limits_config.get('burst_limit', 10)
	)
	self.tool_requests[tool_name] = deque()

	logger.info(f"Initialized global tool rate limiter for {len(self.tool_limits)} tools")

	async def is_allowed(self, tool_name: str) -> tuple[bool, Optional[str]]:
	"""
	Check if a request to the specified tool is allowed based on global rate limits.

	Returns:
	tuple[bool, Optional[str]]: (allowed, reason_if_denied)
	"""
	if tool_name not in self.tool_limits:
	# Tool not configured for rate limiting - allow
	return True, None

	async with self.lock:
	now = time.time()
	limits = self.tool_limits[tool_name]
	requests = self.tool_requests[tool_name]

	# Clean old requests outside the time windows
	self._cleanup_old_requests(requests, now)

	# Check various time window limits
	recent_requests = list(requests)

	# Check burst limit (rapid requests in last second) - only if specified
	if limits.burst_limit != float('inf'):
	burst_count = sum(1 for req_time in recent_requests if now - req_time < 1.0)
	if burst_count >= limits.burst_limit:
	return False, f"Tool '{tool_name}' burst limit exceeded ({limits.burst_limit} requests/burst)"

	# Check per-minute limit - only if specified
	if limits.requests_per_minute != float('inf'):
	minute_count = sum(1 for req_time in recent_requests if now - req_time < 60.0)
	if minute_count >= limits.requests_per_minute:
	return False, f"Tool '{tool_name}' per-minute limit exceeded ({limits.requests_per_minute} requests/minute)"

	# Check per-hour limit - only if specified
	if limits.requests_per_hour != float('inf'):
	hour_count = sum(1 for req_time in recent_requests if now - req_time < 3600.0)
	if hour_count >= limits.requests_per_hour:
	return False, f"Tool '{tool_name}' per-hour limit exceeded ({limits.requests_per_hour} requests/hour)"

	return True, None

	async def record_request(self, tool_name: str):
	"""Record a successful request for rate limiting tracking"""
	if tool_name not in self.tool_limits:
	return

	async with self.lock:
	now = time.time()
	self.tool_requests[tool_name].append(now)

	# Keep deque size manageable (only keep last hour of requests)
	self._cleanup_old_requests(self.tool_requests[tool_name], now)

	@staticmethod
	def _cleanup_old_requests(requests: deque, now: float):
	"""Remove requests older than 1 hour to keep memory usage bounded"""
	while requests and now - requests[0] > 3600.0: # 1 hour
	requests.popleft()

	async def get_tool_stats(self, tool_name: str) -> Dict[str, Any]:
	"""Get current usage statistics for a tool"""
	if tool_name not in self.tool_limits:
	return {"error": f"Tool '{tool_name}' not configured for rate limiting"}

	async with self.lock:
	now = time.time()
	requests = self.tool_requests[tool_name]
	limits = self.tool_limits[tool_name]

	# Clean old requests first
	self._cleanup_old_requests(requests, now)

	recent_requests = list(requests)

	return {
	"tool_name": tool_name,
	"current_usage": {
	"last_second": sum(1 for req_time in recent_requests if now - req_time < 1.0),
	"last_minute": sum(1 for req_time in recent_requests if now - req_time < 60.0),
	"last_hour": sum(1 for req_time in recent_requests if now - req_time < 3600.0)
	},
	"limits": {
	"requests_per_minute": limits.requests_per_minute if limits.requests_per_minute != float('inf') else None,
	"requests_per_hour": limits.requests_per_hour if limits.requests_per_hour != float('inf') else None,
	"burst_limit": limits.burst_limit if limits.burst_limit != float('inf') else None
	},
	"utilization": {
	"minute_utilization": sum(1 for req_time in recent_requests if now - req_time < 60.0) / limits.requests_per_minute if limits.requests_per_minute != float('inf') else 0,
	"hour_utilization": sum(1 for req_time in recent_requests if now - req_time < 3600.0) / limits.requests_per_hour if limits.requests_per_hour != float('inf') else 0
	}
	}

	def get_all_stats(self) -> Dict[str, Any]:
	"""Get usage statistics for all tools"""
	return {
	tool_name: self.get_tool_stats(tool_name)
	for tool_name in self.tool_limits.keys()
	}

	# Global tool rate limiter instance - will be initialized during startup
	global_tool_rate_limiter: Optional[GlobalToolRateLimiter] = None

	# ================ TOOL DEFINITIONS ================

	# Tool execution function mapping - maps tool names to their implementation functions


	def get_tool_function(tool_name: str):
	"""Get the actual function for a tool"""
	tool_map = {
	"batch_web_search": lambda tools, kwargs: tools.batch_web_search(kwargs),
	"url_crawler": lambda tools, kwargs: tools.url_crawler(kwargs),
	"download_files": lambda tools, kwargs: tools.download_files(kwargs),
	"list_workspace": lambda tools, kwargs: tools.list_workspace(kwargs),
	"str_replace_based_edit_tool": lambda tools, kwargs: tools.str_replace_based_edit_tool(kwargs),
	"file_stats": lambda tools, kwargs: tools.file_stats(kwargs),
	"file_read": lambda tools, kwargs: tools.file_read(kwargs),
	"file_read_lines": lambda tools, kwargs: tools.file_read_lines(kwargs),
	"content_preview": lambda tools, kwargs: tools.content_preview(kwargs),
	"file_write": lambda tools, kwargs: tools.file_write(kwargs),
	"file_grep_search": lambda tools, kwargs: tools.file_grep_search(kwargs),
	"file_grep_with_context": lambda tools, kwargs: tools.file_grep_with_context(kwargs),
	"file_find_by_name": lambda tools, kwargs: tools.file_find_by_name(kwargs),
	"bash": lambda tools, kwargs: tools.bash(kwargs),
	"task_done": lambda tools, kwargs: tools.task_done(kwargs),
	"think": lambda tools, kwargs: tools.think(kwargs),
	"reflect": lambda tools, kwargs: tools.reflect(kwargs),
	"document_qa": lambda tools, kwargs: tools.document_qa(kwargs),
	"extract_markdown_toc": lambda tools, kwargs: tools.extract_markdown_toc(kwargs),
	"extract_markdown_section": lambda tools, kwargs: tools.extract_markdown_section(kwargs),

	"document_extract": lambda tools, kwargs: tools.document_extract(kwargs),
	"search_result_classifier": lambda tools, kwargs: tools.search_result_classifier(kwargs),
	"info_seeker_subjective_task_done": None,
	"writer_subjective_task_done": None,
	"section_writer": lambda tools, kwargs: tools.section_writer(kwargs),
	"concat_section_files": lambda tools, kwargs: tools.concat_section_files(kwargs),

	# Internal tools - available to server but NOT exposed to agents via tool schemas
	"internal_file_read_unlimited": lambda tools, kwargs: tools.internal_file_read_unlimited(kwargs),
	}
	return tool_map.get(tool_name)


	# ================ TOOL CALL TRACKING ================


	@dataclass
	class ToolCallLog:
	"""Individual tool call log entry"""
	call_id: str
	timestamp: datetime
	tool_name: str
	input_args: Dict[str, Any]
	output_result: Dict[str, Any]
	success: bool
	duration_ms: float
	error_details: Optional[str] = None
	session_id: str = ""
	agent_info: Optional[Dict[str, Any]] = None

	def to_dict(self) -> Dict[str, Any]:
	"""Convert to dictionary for JSON serialization"""
	return {
	"call_id": self.call_id,
	"timestamp": self.timestamp.isoformat(),
	"tool_name": self.tool_name,
	"input_args": self.input_args,
	"output_result": self.output_result,
	"success": self.success,
	"duration_ms": self.duration_ms,
	"error_details": self.error_details,
	"session_id": self.session_id,
	"agent_info": self.agent_info
	}


	class ToolCallTracker:
	"""Tracks and saves tool calls to workspace-specific files"""

	def __init__(self, workspace_path: Path, session_id: str):
	self.workspace_path = workspace_path
	self.session_id = session_id
	self.logs_dir = workspace_path / "tool_call_logs"
	self.logs_dir.mkdir(exist_ok=True)

	# Create daily log file
	today = datetime.now().strftime("%Y-%m-%d")
	self.current_log_file = self.logs_dir / f"tool_calls_{today}.jsonl"
	self.summary_file = self.logs_dir / "session_summary.json"

	# Track call counts
	self.call_count = 0
	self.tool_usage_stats = defaultdict(int)

	# Initialize session summary
	self._initialize_session_summary()

	def _initialize_session_summary(self):
	"""Initialize or update session summary file"""
	summary = {
	"session_id": self.session_id,
	"session_start": datetime.now().isoformat(),
	"last_updated": datetime.now().isoformat(),
	"total_tool_calls": 0,
	"tool_usage_stats": {},
	"agent_activity": {},
	"workspace_path": str(self.workspace_path)
	}

	# Load existing summary if it exists
	if self.summary_file.exists():
	try:
	with open(self.summary_file, 'r') as f:
	existing_summary = json.load(f)
	summary.update(existing_summary)
	# Don't overwrite session_start if it already exists
	if "session_start" in existing_summary:
	summary["session_start"] = existing_summary["session_start"]
	except Exception as e:
	logger.warning(f"Could not load existing session summary: {e}")

	self._save_summary(summary)

	def _save_summary(self, summary: Dict[str, Any]):
	"""Save session summary to file"""
	try:
	with open(self.summary_file, 'w') as f:
	json.dump(summary, f, indent=2, ensure_ascii=False)
	except Exception as e:
	logger.error(f"Failed to save session summary: {e}")

	def log_tool_call(self,
	tool_name: str,
	input_args: Dict[str, Any],
	output_result: Dict[str, Any],
	success: bool,
	duration_ms: float,
	error_details: Optional[str] = None,
	agent_info: Optional[Dict[str, Any]] = None) -> str:
	"""Log a tool call and return the call ID"""

	if not config.enable_tool_tracking:
	return ""

	# Respect max call limit per session
	if self.call_count >= config.max_tracked_calls_per_session:
	logger.warning(f"Max tracked calls reached for session {self.session_id}")
	return ""

	call_id = str(uuid.uuid4())
	timestamp = datetime.now()

	# Create log entry
	log_entry = ToolCallLog(
	call_id=call_id,
	timestamp=timestamp,
	tool_name=tool_name,
	input_args=self._sanitize_args(input_args),
	output_result=self._sanitize_result(output_result),
	success=success,
	duration_ms=duration_ms,
	error_details=error_details if config.track_detailed_errors else None,
	session_id=self.session_id,
	agent_info=agent_info
	)

	# Save to JSONL file (one JSON object per line)
	try:
	with open(self.current_log_file, 'a', encoding="utf-8") as f:
	f.write(json.dumps(log_entry.to_dict(), ensure_ascii=False) + '\n')
	except Exception as e:
	logger.error(f"Failed to save tool call log: {e}")

	# Update session summary
	self._update_session_summary(log_entry)

	self.call_count += 1
	self.tool_usage_stats[tool_name] += 1

	return call_id

	@staticmethod
	def _sanitize_args(args: Dict[str, Any]) -> Dict[str, Any]:
	"""Sanitize arguments for logging (remove sensitive data)"""
	sanitized = {}
	for key, value in args.items():
	if isinstance(value, str) and len(value) > 1000:
	sanitized[key] = value[:1000] + "... [truncated]"
	elif key.lower() in ['password', 'token', 'secret', 'key']:
	sanitized[key] = "[REDACTED]"
	else:
	sanitized[key] = value
	return sanitized

	def _sanitize_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
	"""Sanitize result for logging (remove large content)"""
	if not isinstance(result, dict):
	return result

	sanitized = {}
	for key, value in result.items():
	if isinstance(value, str) and len(value) > 2000:
	sanitized[key] = value[:2000] + "... [truncated]"
	elif isinstance(value, dict):
	sanitized[key] = self._sanitize_result(value)
	else:
	sanitized[key] = value
	return sanitized

	def _update_session_summary(self, log_entry: ToolCallLog):
	"""Update session summary with new tool call"""
	try:
	summary = {
	"session_id": self.session_id,
	"last_updated": datetime.now().isoformat(),
	"total_tool_calls": self.call_count + 1,
	"tool_usage_stats": dict(self.tool_usage_stats),
	"workspace_path": str(self.workspace_path)
	}

	# Load existing summary
	if self.summary_file.exists():
	with open(self.summary_file, 'r') as f:
	existing_summary = json.load(f)
	summary.update(existing_summary)

	# Update with new data
	summary["last_updated"] = datetime.now().isoformat()
	summary["total_tool_calls"] = self.call_count + 1
	summary["tool_usage_stats"] = dict(self.tool_usage_stats)
	summary["tool_usage_stats"][log_entry.tool_name] = self.tool_usage_stats[log_entry.tool_name] + 1

	# Track agent activity
	if log_entry.agent_info:
	agent_type = log_entry.agent_info.get('type', 'unknown')
	if 'agent_activity' not in summary:
	summary['agent_activity'] = {}
	if agent_type not in summary['agent_activity']:
	summary['agent_activity'][agent_type] = {
	'tool_calls': 0,
	'last_active': log_entry.timestamp.isoformat()
	}
	summary['agent_activity'][agent_type]['tool_calls'] += 1
	summary['agent_activity'][agent_type]['last_active'] = log_entry.timestamp.isoformat()

	self._save_summary(summary)

	except Exception as e:
	logger.error(f"Failed to update session summary: {e}")

	# ================ SESSION KEEP-ALIVE FOR LONG OPERATIONS ================


	class KeepAliveSessionWrapper:
	"""Wrapper that keeps a session alive during long-running operations"""

	def __init__(self, session: 'Session', touch_interval: int = 300): # Touch every 5 minutes
	self.session = session
	self.touch_interval = touch_interval
	self.keep_alive_thread = None
	self.stop_event = Event()
	self.active = False

	def start_keep_alive(self):
	"""Start the keep-alive mechanism"""
	if self.active:
	return

	self.active = True
	self.stop_event.clear()

	def keep_alive_worker():
	while not self.stop_event.wait(self.touch_interval):
	try:
	self.session.touch()
	logger.debug("Keep-alive: Touched session {%s}", self.session.id)
	except Exception as e:
	logger.error(f"Keep-alive error for session {self.session.id}: {e}")
	break

	self.keep_alive_thread = Thread(target=keep_alive_worker, daemon=True)
	self.keep_alive_thread.start()
	logger.info(f"Started keep-alive for session {self.session.id}")

	def stop_keep_alive(self):
	"""Stop the keep-alive mechanism"""
	if not self.active:
	return

	self.active = False
	self.stop_event.set()

	if self.keep_alive_thread and self.keep_alive_thread.is_alive():
	self.keep_alive_thread.join(timeout=1.0)

	# Final touch
	try:
	self.session.touch()
	except Exception as e:
	logger.error(f"Final keep-alive touch error for session {self.session.id}: {e}")

	logger.info(f"Stopped keep-alive for session {self.session.id}")

	def __enter__(self):
	self.start_keep_alive()
	return self

	def __exit__(self, exc_type, exc_val, exc_tb):
	self.stop_keep_alive()

	# ================ SESSION MANAGEMENT ================


	@dataclass
	class Session:
	"""Thread-safe session data structure with workspace management"""
	id: str
	created_at: datetime
	last_accessed: datetime
	initialized: bool = False
	request_count: int = 0
	metadata: Dict[str, Any] = field(default_factory=dict)
	workspace_path: Optional[Path] = None
	mcp_tools: Optional[MCPTools] = None
	tool_tracker: Optional[ToolCallTracker] = None


	def is_expired(self, ttl_seconds: int) -> bool:
	"""Check if session has expired"""
	return datetime.now() - self.last_accessed > timedelta(seconds=ttl_seconds)

	def touch(self):
	"""Update last accessed time"""
	self.last_accessed = datetime.now()
	self.request_count += 1

	def get_mcp_tools(self, prefer_async: bool = True) -> MCPTools:
	"""Get or create MCP tools instance for this session"""
	if self.mcp_tools is None:
	# Use async tools if available and preferred
	if prefer_async and AsyncMCPTools is not None:
	self.mcp_tools = AsyncMCPTools(workspace_path=str(self.workspace_path) if self.workspace_path else None)
	else:
	self.mcp_tools = MCPTools(workspace_path=str(self.workspace_path) if self.workspace_path else None)
	return self.mcp_tools

	def get_tool_tracker(self) -> Optional[ToolCallTracker]:
	"""Get or create tool call tracker for this session"""
	if config.enable_tool_tracking and self.workspace_path:
	if self.tool_tracker is None:
	self.tool_tracker = ToolCallTracker(self.workspace_path, self.id)
	return self.tool_tracker
	return None



	class AsyncRLock:
	"""异步可重入锁，模拟 threading.RLock 的异步版本"""
	def __init__(self):
	self._lock = asyncio.Lock()
	self._owner: Optional[asyncio.Task] = None # 记录持有锁的协程任务
	self._count = 0 # 重入次数

	async def acquire(self):
	current_task = asyncio.current_task()
	# 如果当前协程已持有锁，直接增加重入次数
	if self._owner == current_task:
	self._count += 1
	return
	# 否则等待获取锁
	await self._lock.acquire()
	self._owner = current_task
	self._count = 1

	async def release(self):
	if self._owner != asyncio.current_task():
	raise RuntimeError("不能释放非当前协程持有的锁")
	self._count -= 1
	if self._count == 0: # 重入次数归零时，真正释放锁
	self._owner = None
	self._lock.release()

	# 支持 async with 语法
	async def __aenter__(self):
	await self.acquire()
	return self

	async def __aexit__(self, exc_type, exc, tb):
	await self.release()


	class ThreadSafeSessionManager:
	"""Thread-safe session manager with workspace management"""

	def __init__(self, ttl_seconds: int = 3600, max_sessions: int = 1000, base_workspace_dir: str = "workspaces"):
	self.ttl_seconds = ttl_seconds
	self.max_sessions = max_sessions
	self.base_workspace_dir = Path(base_workspace_dir)
	self.base_workspace_dir.mkdir(exist_ok=True)

	# Thread-safe session storage
	self.sessions: Dict[str, Session] = {}
	self.lock = AsyncRLock()

	# Start cleanup thread
	self._start_cleanup_thread()

	async def create_session(self) -> str:
	"""Create a new session and return session ID"""
	session_id = str(uuid.uuid4())

	async with self.lock:
	# Check session limits
	if len(self.sessions) >= self.max_sessions:
	await self._cleanup_oldest_sessions()

	# Create workspace directory
	workspace_path = self.base_workspace_dir / session_id
	workspace_path.mkdir(exist_ok=True, parents=True)

	# Create session
	session = Session(
	id=session_id,
	created_at=datetime.now(),
	last_accessed=datetime.now(),
	workspace_path=workspace_path
	)

	self.sessions[session_id] = session

	logger.info(f"Created session {session_id} with workspace {workspace_path}")
	return session_id

	async def get_session(self, session_id: str) -> Optional[Session]:
	"""Get session by ID if it exists and is not expired"""
	async with self.lock:
	session = self.sessions.get(session_id)
	if session and not session.is_expired(self.ttl_seconds):
	session.touch()
	return session
	elif session:
	# Remove expired session
	del self.sessions[session_id]
	logger.info(f"Removed expired session {session_id}")
	return None

	async def get_or_create_session(self, session_id: Optional[str] = None) -> Session:
	"""Get existing session or create new one"""
	if session_id:
	session = await self.get_session(session_id)
	if session:
	return session

	# Create new session
	new_session_id = await self.create_session()
	return self.sessions[new_session_id]

	async def _cleanup_expired_sessions(self):
	"""Remove expired sessions"""
	async with self.lock:
	expired_sessions = []
	for session_id, session in self.sessions.items():
	if session.is_expired(self.ttl_seconds):
	expired_sessions.append(session_id)

	for session_id in expired_sessions:
	del self.sessions[session_id]
	logger.info(f"Cleaned up expired session {session_id}")

	async def _cleanup_oldest_sessions(self):
	"""Remove oldest sessions when limit is reached"""
	async with self.lock:
	if len(self.sessions) < self.max_sessions:
	return

	# Sort by last accessed time and remove oldest
	sorted_sessions = sorted(
	self.sessions.items(),
	key=lambda x: x[1].last_accessed
	)

	sessions_to_remove = len(self.sessions) - self.max_sessions + 10 # Remove extra
	for i in range(sessions_to_remove):
	if i < len(sorted_sessions):
	session_id = sorted_sessions[i][0]
	del self.sessions[session_id]
	logger.info(f"Removed old session {session_id} due to session limit")

	def _start_cleanup_thread(self):
	"""Start background cleanup thread"""
	def cleanup_worker():
	while True:
	try:
	time.sleep(config.cleanup_interval_seconds)
	# Run async method in sync context
	loop = asyncio.new_event_loop()
	loop.run_until_complete(self._cleanup_expired_sessions())
	loop.close()
	except Exception as e:
	logger.error(f"Error in cleanup thread: {e}")

	import threading
	cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
	cleanup_thread.start()
	logger.info("Started session cleanup thread")



	async def get_stats(self) -> Dict[str, Any]:
	"""Get session manager statistics"""
	async with self.lock:
	return {
	"total_sessions": len(self.sessions),
	"max_sessions": self.max_sessions,
	"ttl_seconds": self.ttl_seconds,
	"session_ids": list(self.sessions.keys())
	}

	# ================ MIDDLEWARE AND SECURITY ================


	class RateLimiter:
	"""Simple rate limiter with time-window tracking"""

	def __init__(self, requests_per_minute: int = 60):
	self.requests_per_minute = requests_per_minute
	self.requests: Dict[str, List[float]] = defaultdict(list)
	self.lock = asyncio.Lock()

	async def is_allowed(self, client_id: str) -> bool:
	"""Check if request is allowed for client"""
	async with self.lock:
	now = time.time()
	minute_ago = now - 60

	# Clean old requests
	self.requests[client_id] = [
	req_time for req_time in self.requests[client_id]
	if req_time > minute_ago
	]

	# Check rate limit
	if len(self.requests[client_id]) >= self.requests_per_minute:
	return False

	# Add current request
	self.requests[client_id].append(now)
	return True


	class RequestValidator:
	"""Validates incoming MCP requests"""

	@staticmethod
	def validate_mcp_request(data: Dict[str, Any]) -> tuple[bool, Optional[str]]:
	"""Validate basic MCP request structure"""
	if not isinstance(data, dict):
	return False, "Request must be a JSON object"

	if "method" not in data:
	return False, "Missing 'method' field"

	if "id" not in data:
	return False, "Missing 'id' field"

	return True, None

	@staticmethod
	def validate_tool_call(params: Dict[str, Any]) -> tuple[bool, Optional[str]]:
	"""Validate tool call parameters"""
	if not isinstance(params, dict):
	return False, "Tool parameters must be a JSON object"

	if "name" not in params:
	return False, "Missing tool 'name'"

	if "arguments" not in params:
	return False, "Missing tool 'arguments'"

	tool_name = params["name"]

	# Get detailed schemas
	detailed_schemas = get_tool_schemas()

	if tool_name not in detailed_schemas:
	return False, f"Unknown tool: {tool_name}. Available tools: {sorted(list(detailed_schemas.keys()))}"

	return True, None


	class SecurityMiddleware(BaseHTTPMiddleware):
	"""Security middleware for basic protection"""

	async def dispatch(self, request: Request, call_next):
	# Check content length
	content_length = request.headers.get("content-length")
	if content_length and int(content_length) > config.max_request_size_mb * 1024 * 1024:
	return JSONResponse(
	status_code=HTTPStatus.REQUEST_ENTITY_TOO_LARGE,
	content={"error": "Request too large"}
	)

	# Add security headers
	response = await call_next(request)
	response.headers["X-Content-Type-Options"] = "nosniff"
	response.headers["X-Frame-Options"] = "DENY"
	response.headers["X-XSS-Protection"] = "1; mode=block"

	return response


	class RateLimitMiddleware(BaseHTTPMiddleware):
	"""Rate limiting middleware"""

	def __init__(self, app, input_rate_limiter: RateLimiter):
	super().__init__(app)
	self.rate_limiter = input_rate_limiter

	async def dispatch(self, request: Request, call_next):
	# Get client identifier (IP address)
	client_ip = request.client.host if request.client else "unknown"

	if not await self.rate_limiter.is_allowed(client_ip):
	return JSONResponse(
	status_code=HTTPStatus.TOO_MANY_REQUESTS,
	content={"error": "Rate limit exceeded"}
	)

	return await call_next(request)

	# Global session manager
	session_manager = None
	rate_limiter = None


	@dataclass
	class RateLimitViolation:
	"""Represents a rate limit violation with standardized error information"""
	tool_name: str
	limit_type: str # "burst", "second", "minute", "hour"
	current_usage: int
	limit_value: float
	retry_after_seconds: float

	def to_user_friendly_message(self) -> str:
	"""Generate user-friendly error message"""
	if self.limit_type == "burst":
	return f"Service temporarily unavailable: Too many rapid requests to {self.tool_name}. Please wait {self.retry_after_seconds:.0f} seconds before trying again."
	elif self.limit_type == "second":
	return f"Service temporarily unavailable: {self.tool_name} request rate exceeded ({self.limit_value}/second). Please wait {self.retry_after_seconds:.0f} seconds before trying again."
	elif self.limit_type == "minute":
	return f"Service temporarily unavailable: {self.tool_name} quota exceeded ({self.limit_value}/minute). Please try again in {self.retry_after_seconds:.0f} seconds."
	elif self.limit_type == "hour":
	return f"Service temporarily unavailable: {self.tool_name} hourly quota exceeded ({self.limit_value}/hour). Please try again in {self.retry_after_seconds:.0f} minutes."
	else:
	return f"Service temporarily unavailable: {self.tool_name} rate limit exceeded. Please try again later."

	def to_technical_message(self) -> str:
	"""Generate technical error message for debugging"""
	return f"Tool '{self.tool_name}' {self.limit_type} limit exceeded ({self.current_usage}/{self.limit_value} {self.limit_type})"


	def _parse_rate_limit_denial(tool_name: str, denial_reason: str) -> RateLimitViolation:
	"""Parse rate limit denial reason into structured violation information"""
	import re

	# Default values
	limit_type = "unknown"
	current_usage = 0
	limit_value = 0.0
	retry_after_seconds = 60.0 # Default retry after 1 minute

	# Parse different types of rate limit violations
	if "burst limit exceeded" in denial_reason:
	limit_type = "burst"
	retry_after_seconds = 1.0 # Burst limits reset quickly
	match = re.search(r'\((\d+) requests/burst\)', denial_reason)
	if match:
	limit_value = float(match.group(1))
	current_usage = int(limit_value) # Approximation

	elif "per-second limit exceeded" in denial_reason:
	limit_type = "second"
	retry_after_seconds = 1.0 # Wait 1 second
	match = re.search(r'\(([0-9.]+) requests/second\)', denial_reason)
	if match:
	limit_value = float(match.group(1))
	current_usage = int(limit_value) # Approximation

	elif "per-minute limit exceeded" in denial_reason:
	limit_type = "minute"
	retry_after_seconds = 10.0 # Wait 10 seconds for minute limits
	match = re.search(r'\(([0-9.]+) requests/minute\)', denial_reason)
	if match:
	limit_value = float(match.group(1))
	current_usage = int(limit_value) # Approximation

	elif "per-hour limit exceeded" in denial_reason:
	limit_type = "hour"
	retry_after_seconds = 300.0 # Wait 5 minutes for hour limits
	match = re.search(r'\(([0-9.]+) requests/hour\)', denial_reason)
	if match:
	limit_value = float(match.group(1))
	current_usage = int(limit_value) # Approximation

	return RateLimitViolation(
	tool_name=tool_name,
	limit_type=limit_type,
	current_usage=current_usage,
	limit_value=limit_value,
	retry_after_seconds=retry_after_seconds
	)


	async def _call_session_tool_async(session: Session, tool_name: str, tool_args: Dict[str, Any],
	client_ip: str = "unknown") -> Dict[str, Any]:
	"""Execute a tool within a session context with full tracking, workspace management, and global rate limiting"""

	start_time = time.time()
	success = False
	error_details = None
	result_data = None

	# Touch session at start of tool execution to prevent expiry during long operations
	session.touch()

	try:
	# CHECK GLOBAL TOOL RATE LIMITS FIRST
	if global_tool_rate_limiter:
	allowed, deny_reason = await global_tool_rate_limiter.is_allowed(tool_name)
	if not allowed:
	# Parse the denial reason to create structured rate limit violation
	rate_limit_violation = _parse_rate_limit_denial(tool_name, deny_reason)

	# Create user-friendly error message
	user_message = rate_limit_violation.to_user_friendly_message()
	technical_message = rate_limit_violation.to_technical_message()

	logger.warning(f"Session {session.id}: {technical_message}")

	result_data = {
	"success": False,
	"error": user_message,
	"error_code": "RATE_LIMIT_EXCEEDED",
	"error_type": "rate_limit",
	"tool_name": tool_name,
	"limit_type": rate_limit_violation.limit_type,
	"retry_after_seconds": rate_limit_violation.retry_after_seconds,
	"data": None,
	"rate_limited": True, # Keep for backward compatibility
	"technical_details": technical_message # For debugging
	}

	# Still log this for tracking purposes
	duration_ms = (time.time() - start_time) * 1000
	tracker = session.get_tool_tracker()
	if tracker:
	try:
	agent_info = {
	"client_ip": client_ip,
	"type": "unknown",
	"session_request_count": session.request_count
	}

	tracker.log_tool_call(
	tool_name=tool_name,
	input_args=tool_args,
	output_result=result_data,
	success=False,
	duration_ms=duration_ms,
	error_details=user_message,
	agent_info=agent_info
	)
	except Exception as e:
	logger.error(f"Failed to log rate-limited tool call: {e}")

	return result_data

	# Get MCP tools instance for this session (handles workspace isolation)
	mcp_tools = session.get_mcp_tools(prefer_async=True)

	# Get tool method directly from the mcp_tools instance
	if not hasattr(mcp_tools, tool_name):
	raise ValueError(f"Tool '{tool_name}' not implemented")

	tool_method = getattr(mcp_tools, tool_name)

	# Add session context to tool arguments for workspace-aware tools
	if hasattr(mcp_tools, 'set_session_context'):
	mcp_tools.set_session_context(session.id, str(session.workspace_path))

	# Execute tool with keep-alive for potentially long operations
	logger.info(f"Session {session.id}: Executing tool '{tool_name}' with args: {list(tool_args.keys())}")

	# Use keep-alive wrapper for tools that might take a long time
	long_running_tools = {'batch_web_search', 'url_crawler', 'document_qa', 'document_extract', 'bash'}

	# Check if the tool method is async
	import inspect
	is_async_tool = inspect.iscoroutinefunction(tool_method)

	# Execute tool based on whether it's async or sync
	if is_async_tool:
	# Tool is async - execute directly
	logger.debug("Executing async tool '{%s}'", tool_name)

	if config.enable_session_keepalive and tool_name in long_running_tools:
	# For long-running async tools, use keep-alive
	with KeepAliveSessionWrapper(session, touch_interval=config.keepalive_touch_interval):
	result = await tool_method(**tool_args)
	else:
	# For regular async tools, execute directly
	result = await tool_method(**tool_args)
	else:
	# Tool is sync - execute in thread pool
	logger.debug("Executing sync tool '{%s}' in thread pool", tool_name)

	# Define the synchronous tool execution function
	def execute_tool_sync():
	"""Synchronous tool execution to be run in thread pool"""
	return tool_method(**tool_args)

	# Execute tool asynchronously in thread pool for true non-blocking execution
	import asyncio
	import concurrent.futures

	# Create a thread pool executor for CPU-bound/blocking operations
	loop = asyncio.get_event_loop()

	if config.enable_session_keepalive and tool_name in long_running_tools:
	# For long-running tools, use keep-alive with async execution
	with KeepAliveSessionWrapper(session, touch_interval=config.keepalive_touch_interval):
	# Run in thread pool to avoid blocking the event loop
	with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
	result = await loop.run_in_executor(executor, execute_tool_sync)
	else:
	# For regular tools, use async execution without keep-alive
	with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
	result = await loop.run_in_executor(executor, execute_tool_sync)

	# Touch session after tool execution to update activity
	session.touch()

	# Handle different result formats
	if hasattr(result, 'to_dict'):
	result_data = result.to_dict()
	elif isinstance(result, dict):
	result_data = result
	else:
	result_data = {"result": result}

	success = result_data.get('success', True)

	if success:
	logger.info(f"Session {session.id}: Tool '{tool_name}' completed successfully")

	# RECORD SUCCESSFUL REQUEST FOR RATE LIMITING
	if global_tool_rate_limiter:
	await global_tool_rate_limiter.record_request(tool_name)



	else:
	error_details = result_data.get('error', 'Unknown error')
	logger.warning(f"Session {session.id}: Tool '{tool_name}' failed: {error_details}")

	except Exception as e:
	success = False
	error_details = str(e)
	result_data = {
	"success": False,
	"error": error_details,
	"data": None
	}
	logger.error(f"Session {session.id}: Tool '{tool_name}' exception: {e}")

	# Calculate execution time
	duration_ms = (time.time() - start_time) * 1000

	# Log tool call if tracking is enabled
	tracker = session.get_tool_tracker()
	if tracker:
	try:
	agent_info = {
	"client_ip": client_ip,
	"type": "unknown", # Could be enhanced to detect agent type
	"session_request_count": session.request_count
	}

	tracker.log_tool_call(
	tool_name=tool_name,
	input_args=tool_args,
	output_result=result_data,
	success=success,
	duration_ms=duration_ms,
	error_details=error_details,
	agent_info=agent_info
	)
	except Exception as e:
	logger.error(f"Failed to log tool call: {e}")

	return result_data



	def create_sse_response(response_data: dict, session_id: str = None) -> StreamingResponse:
	"""Create Server-Sent Events response with proper formatting"""
	def generate_sse():
	try:
	# Add session info to response if available
	if session_id:
	response_data["session_id"] = session_id

	json_data = json.dumps(response_data, ensure_ascii=False)
	yield f"event: message\n"
	yield f"data: {json_data}\n"
	yield f"\n"
	except Exception as e:
	error_data = {
	"jsonrpc": "2.0",
	"error": {"code": JsonRpcErr.INTERNAL_ERROR, "message": f"Internal error: {str(e)}"},
	"id": response_data.get("id")
	}
	json_data = json.dumps(error_data, ensure_ascii=False)
	yield f"event: error\n"
	yield f"data: {json_data}\n"
	yield f"\n"

	return StreamingResponse(
	generate_sse(),
	media_type="text/event-stream",
	headers={
	"Cache-Control": "no-cache",
	"Connection": "keep-alive",
	"Access-Control-Allow-Origin": "*",
	}
	)


	def create_error_response(request_id: Any, code: int, message: str, session_id: str = None) -> StreamingResponse:
	"""Create error response in SSE format"""
	error_data = {
	"jsonrpc": "2.0",
	"error": {"code": code, "message": message},
	"id": request_id
	}
	return create_sse_response(error_data, session_id)


	def create_rate_limit_response(
	request_id: Any,
	tool_name: str,
	error_message: str,
	retry_after_seconds: float,
	limit_type: str,
	technical_details: str = "",
	session_id: str = None
	) -> JSONResponse:
	"""
	Create HTTP 429 Rate Limit Exceeded response with proper headers and error format.

	Returns proper HTTP status code instead of SSE for rate limiting errors.
	"""

	# Calculate retry-after header value
	retry_after_header = int(max(1.0, retry_after_seconds))

	# Create standardized error response
	error_data = {
	"error": {
	"type": "rate_limit_exceeded",
	"code": "RATE_LIMIT_EXCEEDED",
	"message": error_message,
	"details": {
	"tool_name": tool_name,
	"limit_type": limit_type,
	"retry_after_seconds": retry_after_seconds,
	"technical_details": technical_details
	}
	},
	"request_id": request_id,
	"timestamp": datetime.now().isoformat(),
	"session_id": session_id
	}

	# Set appropriate headers
	headers = {
	"Retry-After": str(retry_after_header), # HTTP standard header
	"X-RateLimit-Limit-Type": limit_type,
	"X-RateLimit-Tool": tool_name,
	"X-RateLimit-Retry-After": str(retry_after_seconds),
	"Content-Type": "application/json"
	}

	return JSONResponse(
	status_code=HTTPStatus.TOO_MANY_REQUESTS, # Too Many Requests
	content=error_data,
	headers=headers
	)


	async def handle_mcp_request(request: Request) -> StreamingResponse:
	"""Main MCP request handler with session management and tool execution"""

	try:
	# Check content length before reading body
	content_length = request.headers.get("content-length")
	if content_length:
	content_size_mb = int(content_length) / (1024 * 1024)
	if content_size_mb > config.max_request_size_mb:
	logger.warning(f"Request too large: {content_size_mb:.2f}MB > {config.max_request_size_mb}MB")
	return create_error_response(None, JsonRpcErr.PARSE_ERROR, f"Request too large: {content_size_mb:.2f}MB")

	# Parse request with timeout protection
	try:
	body = await asyncio.wait_for(request.body(), timeout=config.request_timeout_seconds)
	except asyncio.TimeoutError:
	logger.error("Timeout while reading request body")
	return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Request body read timeout")

	if not body:
	return create_error_response(None, JsonRpcErr.PARSE_ERROR, "Empty request body")

	try:
	data = json.loads(body.decode('utf-8'))
	except json.JSONDecodeError as e:
	return create_error_response(None, JsonRpcErr.PARSE_ERROR, f"Invalid JSON: {str(e)}")

	# Validate MCP request structure
	is_valid, error_msg = RequestValidator.validate_mcp_request(data)
	if not is_valid:
	return create_error_response(data.get("id"), JsonRpcErr.INVALID_REQUEST, error_msg)

	request_id = data["id"]
	method = data["method"]
	params = data.get("params", {})

	# Get or create session
	session_id = request.headers.get("X-Session-ID")
	client_ip = request.client.host if request.client else "unknown"

	session = await session_manager.get_or_create_session(session_id)
	logger.info(f"Processing {method} request for session {session.id} from {client_ip}")

	# Handle different MCP methods
	if method == "initialize":
	# MCP initialization
	response_data = {
	"jsonrpc": "2.0",
	"result": {
	"protocolVersion": "2025-06-18",
	"capabilities": {
	"tools": {"supportsProgress": True},
	"resources": {},
	"prompts": {}
	},
	"serverInfo": {
	"name": "DeepDiver-Demo-MCP",
	"version": "1.0.0"
	}
	},
	"id": request_id
	}

	elif method == "tools/list":
	# List available tools using detailed schemas from get_tool_schemas()
	tools_list = []
	detailed_schemas = get_tool_schemas()

	# Build tools list from schemas
	for _, detailed_schema in detailed_schemas.items():
	tools_list.append({
	"name": detailed_schema["name"],
	"description": detailed_schema["description"],
	"inputSchema": detailed_schema["inputSchema"]
	})

	logger.info(f"Serving {len(tools_list)} tools with detailed schemas to client")

	response_data = {
	"jsonrpc": "2.0",
	"result": {"tools": tools_list},
	"id": request_id
	}

	elif method == "tools/call":
	# Execute tool call
	is_valid, error_msg = RequestValidator.validate_tool_call(params)
	if not is_valid:
	return create_error_response(request_id, JsonRpcErr.INVALID_PARAMS, error_msg, session.id)

	tool_name = params["name"]
	tool_arguments = params["arguments"]

	# Execute tool in session context asynchronously
	result = await _call_session_tool_async(session, tool_name, tool_arguments, client_ip)

	# CHECK FOR RATE LIMITING AND RETURN PROPER HTTP STATUS
	if result.get("rate_limited", False):
	return create_rate_limit_response(
	request_id=request_id,
	tool_name=tool_name,
	error_message=result.get("error", "Rate limit exceeded"),
	retry_after_seconds=result.get("retry_after_seconds", 60),
	limit_type=result.get("limit_type", "unknown"),
	technical_details=result.get("technical_details", ""),
	session_id=session.id
	)

	# Format normal response
	response_data = {
	"jsonrpc": "2.0",
	"result": {
	"content": [
	{
	"type": "text",
	"text": json.dumps(result, indent=2, ensure_ascii=False)
	}
	]
	},
	"id": request_id
	}

	else:
	return create_error_response(request_id, JsonRpcErr.METHOD_NOT_FOUND, f"Method not found: {method}", session.id)

	return create_sse_response(response_data, session.id)

	except asyncio.TimeoutError:
	logger.warning("Request timeout - client may have disconnected")
	return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Request timeout")
	except Exception as e:
	# Handle client disconnects gracefully
	if "ClientDisconnect" in str(e) or "ConnectionClosedError" in str(e):
	logger.warning(f"Client disconnected during request processing: {e}")
	return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Client disconnected")

	logger.error(f"Unexpected error in MCP request handler: {e}")
	import traceback
	logger.error(traceback.format_exc())
	return create_error_response(None, JsonRpcErr.INTERNAL_ERROR, f"Internal server error: {str(e)}")


	async def handle_health_check(request: Request) -> JSONResponse:
	"""Health check endpoint"""
	try:
	stats = await session_manager.get_stats() if session_manager else {}

	# Get rate limiting summary
	rate_limit_summary = {}
	if global_tool_rate_limiter:
	all_stats = global_tool_rate_limiter.get_all_stats()
	rate_limit_summary = {
	"enabled": True,
	"tools_with_limits": len(all_stats),
	"total_configured_tools": list(all_stats.keys())
	}
	else:
	rate_limit_summary = {"enabled": False}

	health_data = {
	"status": "healthy",
	"timestamp": datetime.now().isoformat(),
	"version": "1.0.0",
	"session_stats": stats,
	"features": {
	"workspace_isolation": True,
	"tool_call_tracking": config.enable_tool_tracking if config else False,
	"client_rate_limiting": True,
	"global_tool_rate_limiting": rate_limit_summary["enabled"],
	"security_middleware": True,
	"standardized_rate_limit_responses": True
	},
	"rate_limiting": rate_limit_summary,
	"error_formats": {
	"rate_limit_exceeded": {
	"http_status": HTTPStatus.TOO_MANY_REQUESTS,
	"headers": ["Retry-After", "X-RateLimit-*"],
	"error_code": "RATE_LIMIT_EXCEEDED",
	"response_format": "application/json"
	}
	}
	}

	return JSONResponse(content=health_data)

	except Exception as e:
	return JSONResponse(
	status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
	content={"status": "unhealthy", "error": str(e)}
	)


	async def handle_tracking_info(request: Request) -> JSONResponse:
	"""Get tool call tracking information for a session"""
	try:
	session_id = request.query_params.get("session_id")
	if not session_id:
	return JSONResponse(
	status_code=HTTPStatus.BAD_REQUEST,
	content={"error": "session_id parameter required"}
	)

	session = await session_manager.get_session(session_id)
	if not session:
	return JSONResponse(
	status_code=HTTPStatus.NOT_FOUND,
	content={"error": f"Session {session_id} not found"}
	)

	tracker = session.get_tool_tracker()
	if not tracker:
	return JSONResponse(
	content={
	"session_id": session_id,
	"tracking_enabled": False,
	"message": "Tool call tracking not enabled or no workspace"
	}
	)

	# Read session summary
	summary_data = {}
	if tracker.summary_file.exists():
	try:
	with open(tracker.summary_file, 'r') as f:
	summary_data = json.load(f)
	except Exception as e:
	logger.error(f"Failed to read session summary: {e}")

	return JSONResponse(content={
	"session_id": session_id,
	"tracking_enabled": True,
	"summary": summary_data,
	"logs_directory": str(tracker.logs_dir),
	"current_log_file": str(tracker.current_log_file)
	})

	except Exception as e:
	return JSONResponse(
	status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
	content={"error": str(e)}
	)



	async def handle_rate_limit_stats(request: Request) -> JSONResponse:
	"""Get global tool rate limiting statistics"""
	try:
	if not global_tool_rate_limiter:
	return JSONResponse(
	status_code=HTTPStatus.NOT_FOUND,
	content={"error": "Global tool rate limiter not initialized"}
	)

	# Check if specific tool requested
	tool_name = request.query_params.get("tool")

	if tool_name:
	# Get stats for specific tool
	stats = await global_tool_rate_limiter.get_tool_stats(tool_name)
	return JSONResponse(content=stats)
	else:
	# Get stats for all tools
	all_stats = global_tool_rate_limiter.get_all_stats()
	return JSONResponse(content={
	"timestamp": datetime.now().isoformat(),
	"global_tool_rate_limiting": True,
	"tools": all_stats,
	"summary": {
	"total_tools_with_limits": len(all_stats),
	"tools_configured": list(all_stats.keys())
	}
	})

	except Exception as e:
	logger.error(f"Failed to get rate limit stats: {e}")
	return JSONResponse(
	status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
	content={"error": str(e)}
	)


	def create_app() -> Starlette:
	"""Create and configure the Starlette application"""
	global session_manager, rate_limiter, global_tool_rate_limiter

	if not config:
	raise RuntimeError("Server configuration not initialized")

	# Initialize global components
	session_manager = ThreadSafeSessionManager(
	ttl_seconds=config.session_ttl_seconds,
	max_sessions=config.max_sessions,
	base_workspace_dir=config.base_workspace_dir
	)
	rate_limiter = RateLimiter(config.rate_limit_requests_per_minute)

	# Initialize global tool rate limiter
	if config.tool_rate_limits:
	global_tool_rate_limiter = GlobalToolRateLimiter(config.tool_rate_limits)
	logger.info(f"Initialized global tool rate limiter with {len(config.tool_rate_limits)} tool limits")
	else:
	logger.info("No tool rate limits configured - tools will run without global rate limiting")

	# Create app
	app = Starlette(debug=config.debug_mode)

	app.add_middleware(SecurityMiddleware)
	app.add_middleware(RateLimitMiddleware, input_rate_limiter=rate_limiter)

	# Add routes
	app.add_route("/mcp", handle_mcp_request, methods=["POST"])
	app.add_route("/health", handle_health_check, methods=["GET"])
	app.add_route("/tracking", handle_tracking_info, methods=["GET"])
	app.add_route("/rate-limits", handle_rate_limit_stats, methods=["GET"])

	return app


	def parse_arguments():
	"""Parse command line arguments"""
	parser = argparse.ArgumentParser(
	description="Demo-Ready MCP Server with Per-Tool Rate Limiting",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	python src/tools/mcp_server_standard.py --config src/tools/server_config.yaml
	python src/tools/mcp_server_standard.py --host 127.0.0.1 --port 8080
	python src/tools/mcp_server_standard.py --config custom_config.yaml --debug
	"""
	)

	parser.add_argument(
	'--config', '-c',
	type=str,
	help='Path to YAML configuration file'
	)

	parser.add_argument(
	'--host',
	type=str,
	help='Server host (overrides config file)'
	)

	parser.add_argument(
	'--port', '-p',
	type=int,
	help='Server port (overrides config file)'
	)

	parser.add_argument(
	'--debug',
	action='store_true',
	help='Enable debug mode (overrides config file)'
	)

	parser.add_argument(
	'--workspace-dir',
	type=str,
	help='Base workspace directory (overrides config file)'
	)

	return parser.parse_args()


	def print_startup_info():
	"""Print server startup information"""
	logger.info("🚀 DeepDiver Demo MCP Server")
	logger.info("=" * 50)
	logger.info(f"📊 Features:")
	logger.info(f" • Session Management: ✅ (TTL: {config.session_ttl_seconds}s)")
	logger.info(f" • Workspace Isolation: ✅ (Base: {config.base_workspace_dir})")
	logger.info(f" • Tool Call Tracking: {'✅' if config.enable_tool_tracking else '❌'}")
	logger.info(f" • Client Rate Limiting: ✅ ({config.rate_limit_requests_per_minute}/min)")
	logger.info(f" • Global Tool Rate Limiting: {'✅' if config.tool_rate_limits else '❌'}")
	logger.info(f" • Security Middleware: ✅")

	# Tool rate limiting information
	if config.tool_rate_limits:
	logger.info(f"🚦 Tool Rate Limits: {len(config.tool_rate_limits)} tools configured")
	for tool_name, limits in list(config.tool_rate_limits.items())[:3]:
	burst = limits.get('burst_limit', '∞')
	rpm = limits.get('requests_per_minute', '∞')
	logger.info(f" • {tool_name}: {rpm}/min, burst: {burst}")
	if len(config.tool_rate_limits) > 3:
	logger.info(f" • ... and {len(config.tool_rate_limits) - 3} more tools")

	# Tool information from schemas
	tool_schemas = get_tool_schemas()
	available_tools = list(tool_schemas.keys())

	logger.info(f"🔧 Tools Available: {len(available_tools)}")
	logger.info(f" • All tools defined in schemas: {len(available_tools)} tools")
	logger.info(f" • Sample tools: {', '.join(sorted(available_tools)[:5])}...")
	logger.info("=" * 50)


	def main():
	"""Main function to run the production MCP server"""
	global config

	# Parse command line arguments
	args = parse_arguments()

	config = ServerConfig.from_yaml("./src/tools/server_config.yaml")

	# Apply CLI overrides
	if args.host:
	config.host = args.host
	logger.info(f"🔧 Override: Host = {config.host}")

	if args.port:
	config.port = args.port
	logger.info(f"🔧 Override: Port = {config.port}")

	if args.debug:
	config.debug_mode = True
	logger.info(f"🔧 Override: Debug mode enabled")

	if args.workspace_dir:
	config.base_workspace_dir = args.workspace_dir
	logger.info(f"🔧 Override: Workspace directory = {config.base_workspace_dir}")

	print_startup_info()

	try:
	import os

	# Calculate optimal worker count for high-concurrency FIRST
	# Use CPU core count indirectly via uvicorn's defaults; no local variable needed

	# Override for high-concurrency scenarios
	if os.getenv('FORCE_HIGH_CONCURRENCY', '').lower() == 'true':
	pass # Configuration handled elsewhere if needed

	app = create_app()

	logger.info(f"🌐 Starting server at http://{config.host}:{config.port}")
	logger.info(f"📡 MCP endpoint: http://{config.host}:{config.port}/mcp")
	logger.info(f"🏥 Health check: http://{config.host}:{config.port}/health")
	logger.info(f"📊 Tracking info: http://{config.host}:{config.port}/tracking?session_id=<id>")
	logger.info(f"🚦 Rate limit stats: http://{config.host}:{config.port}/rate-limits")

	uvicorn.run(
	app, # Use app instance directly for single worker with async optimizations
	host=config.host,
	port=config.port,
	log_level="info",
	timeout_keep_alive=config.request_timeout_seconds,
	workers=1, # Single worker with async optimizations
	backlog=1024, # Larger backlog for high-concurrency
	access_log=False, # Disable access logs for better performance
	limit_concurrency=None, # No artificial concurrency limit
	)

	except KeyboardInterrupt:
	print("\n⏹️ Server stopped by user")
	except Exception as e:
	print(f"❌ Server startup failed: {e}")
	import traceback
	traceback.print_exc()
	raise

	if __name__ == "__main__":
	main()