Spaces:

mackenzietechdocs
/

DocsNavigatorMCP

Sleeping

App Files Files Community

DocsNavigatorMCP / src /server /server.py

mackenzietechdocs

adding app content and source files

6822668 about 2 months ago

raw

history blame contribute delete

43.7 kB

	# server_docs.py
	from __future__ import annotations
	from pathlib import Path
	from typing import List, Dict, Any

	from mcp.server.fastmcp import FastMCP
	import sys
	import os
	sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
	from document_intelligence import DocumentIntelligence

	# Import PDF processing library
	try:
	import PyPDF2
	PDF_SUPPORT = True
	except ImportError:
	PDF_SUPPORT = False
	print("Warning: PyPDF2 not installed. PDF support disabled.")

	# Name your server – this is what clients see
	mcp = FastMCP("DocsNavigator")

	DOCS_ROOT = Path(__file__).parent.parent.parent / "docs"
	doc_intel = DocumentIntelligence(DOCS_ROOT)


	def _iter_docs() -> list[Path]:
	exts = {".md", ".txt", ".rst"}
	if PDF_SUPPORT:
	exts.add(".pdf")
	return [
	p for p in DOCS_ROOT.rglob("*")
	if p.is_file() and p.suffix.lower() in exts
	]


	def _read_file(path: Path) -> str:
	if path.suffix.lower() == ".pdf":
	return _read_pdf_file(path)
	else:
	return path.read_text(encoding="utf-8", errors="ignore")


	def _read_pdf_file(path: Path) -> str:
	"""Extract text from PDF file."""
	if not PDF_SUPPORT:
	return f"PDF support not available. Install PyPDF2 to read {path.name}"

	try:
	text = ""
	with open(path, 'rb') as file:
	pdf_reader = PyPDF2.PdfReader(file)

	for page_num, page in enumerate(pdf_reader.pages):
	try:
	page_text = page.extract_text()
	if page_text:
	text += f"\n--- Page {page_num + 1} ---\n{page_text}\n"
	except Exception as e:
	text += f"\n--- Page {page_num + 1} (Error reading: {str(e)}) ---\n"

	return text if text.strip() else f"No text could be extracted from {path.name}"

	except Exception as e:
	return f"Error reading PDF {path.name}: {str(e)}"


	def _extract_hierarchical_sections(content: str) -> List[Dict[str, str]]:
	"""Extract sections including their subsections for better content access."""
	lines = content.split('\n')
	headers = []

	# Identify all headers
	for i, line in enumerate(lines):
	stripped = line.strip()
	if stripped.startswith('#'):
	level = len(stripped) - len(stripped.lstrip('#'))
	title = stripped.lstrip('#').strip()
	headers.append({
	'title': stripped,
	'clean_title': title,
	'level': level,
	'line_index': i
	})

	if not headers:
	return [{'title': 'Document Content', 'content': content.strip()}]

	hierarchical_sections = []

	# Extract content for each header including subsections
	for i, header in enumerate(headers):
	start_line = header['line_index']

	# Find content that belongs to this section (including subsections)
	end_line = len(lines)
	for j in range(i + 1, len(headers)):
	next_header = headers[j]
	# Only stop at headers of the same or higher level (lower number)
	if next_header['level'] <= header['level']:
	end_line = next_header['line_index']
	break

	# Extract all content for this section (header + content + subsections)
	section_lines = lines[start_line:end_line]
	section_content = '\n'.join(section_lines).strip()

	# Remove the header line itself from content for cleaner output
	if section_content.startswith('#'):
	content_lines = section_content.split('\n')[1:]
	clean_content = '\n'.join(content_lines).strip()
	else:
	clean_content = section_content

	hierarchical_sections.append({
	'title': header['title'],
	'content': clean_content,
	'level': header['level'],
	'includes_subsections': any(h['level'] > header['level'] for h in headers[i+1:] if h['line_index'] < end_line)
	})

	return hierarchical_sections


	def _extract_sections(content: str) -> List[Dict[str, str]]:
	"""Extract sections from markdown content based on headers with proper hierarchy."""
	lines = content.split('\n')
	headers = []

	# First pass: identify all headers with their positions
	for i, line in enumerate(lines):
	stripped = line.strip()
	if stripped.startswith('#'):
	level = len(stripped) - len(stripped.lstrip('#'))
	title = stripped.lstrip('#').strip()
	headers.append({
	'title': stripped,
	'clean_title': title,
	'level': level,
	'line_index': i
	})

	if not headers:
	return [{'title': 'Document Content', 'content': content.strip()}]

	sections = []

	# Second pass: extract content for each header
	for i, header in enumerate(headers):
	start_line = header['line_index'] + 1

	# Find the end of this section (next header of same or higher level)
	end_line = len(lines)
	for j in range(i + 1, len(headers)):
	next_header = headers[j]
	if next_header['level'] <= header['level']:
	end_line = next_header['line_index']
	break

	# Extract content for this section
	section_lines = lines[start_line:end_line]
	section_content = '\n'.join(section_lines).strip()

	sections.append({
	'title': header['title'],
	'content': section_content,
	'level': header['level']
	})

	return sections


	def _extract_headers(content: str) -> List[Dict[str, Any]]:
	"""Extract header hierarchy from markdown content."""
	headers = []
	lines = content.split('\n')

	for line_num, line in enumerate(lines, 1):
	stripped = line.strip()
	if stripped.startswith('#'):
	level = len(stripped) - len(stripped.lstrip('#'))
	title = stripped.lstrip('#').strip()
	headers.append({
	'level': level,
	'title': title,
	'line': line_num
	})

	return headers


	def _create_outline(headers: List[Dict[str, Any]]) -> List[str]:
	"""Create a hierarchical outline from headers."""
	outline = []
	for header in headers:
	indent = " " * (header['level'] - 1)
	outline.append(f"{indent}- {header['title']}")
	return outline


	def _count_code_blocks(content: str) -> int:
	"""Count code blocks in markdown content."""
	return content.count('```')


	def _extract_links(content: str) -> List[str]:
	"""Extract links from markdown content."""
	import re
	# Match markdown links [text](url) and bare URLs
	link_pattern = r'\[([^\]]+)\]$([^)]+)$\|https?://[^\s\])]+'
	matches = re.findall(link_pattern, content)
	links = []
	for match in matches:
	if isinstance(match, tuple) and match[1]:
	links.append(match[1]) # URL from [text](url)
	elif isinstance(match, str):
	links.append(match) # Bare URL
	return links


	def _generate_overview_summary(content: str, sections: List[Dict[str, str]]) -> str:
	"""Generate a concise overview summary."""
	if not sections:
	# If no sections, summarize the whole content
	words = content.split()[:100] # First 100 words
	return ' '.join(words) + "..." if len(content.split()) > 100 else ' '.join(words)

	summary_parts = []

	# Process all meaningful sections (skip empty ones)
	for section in sections:
	title = section['title'].lstrip('#').strip()
	section_content = section['content'].strip()

	# Skip empty sections
	if not section_content:
	continue

	# For overview, take first 50 words of each section
	content_words = section_content.split()[:50]
	section_summary = ' '.join(content_words)
	if len(section['content'].split()) > 50:
	section_summary += "..."

	summary_parts.append(f"{title}: {section_summary}")

	# Limit to 5 sections for overview to avoid too much text
	if len(summary_parts) >= 5:
	break

	# If we still have no content, fall back to first 100 words
	if not summary_parts:
	words = content.split()[:100]
	return ' '.join(words) + "..." if len(content.split()) > 100 else ' '.join(words)

	return '\n\n'.join(summary_parts)


	def _extract_key_points(content: str, sections: List[Dict[str, str]]) -> str:
	"""Extract key points from content."""
	key_points = []

	# Look for bullet points and numbered lists in sections
	for section in sections:
	section_content = section['content']
	lines = section_content.split('\n')

	for line in lines:
	stripped = line.strip()
	if (stripped.startswith('- ') or
	stripped.startswith('* ') or
	stripped.startswith('+ ') or
	(stripped and len(stripped) > 0 and stripped[0].isdigit() and '. ' in stripped)):
	# Clean up the bullet point
	clean_point = stripped.lstrip('- *+0123456789. ').strip()
	if clean_point:
	key_points.append(f"• {clean_point}")

	if key_points:
	return '\n'.join(key_points[:15]) # Top 15 points

	# Fallback: extract sentences that contain key indicators from all content
	sentences = content.replace('\n', ' ').split('.')
	important_sentences = []
	keywords = ['important', 'note', 'warning', 'key', 'must', 'should', 'required', 'avoid', 'best', 'practice']

	for sentence in sentences:
	sentence = sentence.strip()
	if sentence and any(keyword in sentence.lower() for keyword in keywords):
	important_sentences.append(f"• {sentence}.")

	return '\n'.join(important_sentences[:8]) if important_sentences else "No specific key points identified."


	def _generate_detailed_summary(content: str, sections: List[Dict[str, str]]) -> str:
	"""Generate a detailed summary with all sections."""
	if not sections:
	return content[:1500] + "..." if len(content) > 1500 else content

	detailed_parts = []

	for section in sections:
	title = section['title'].lstrip('#').strip()
	section_content = section['content'].strip()

	# Skip empty sections
	if not section_content:
	continue

	# For detailed summary, include more content
	content_preview = section_content[:400]
	if len(section_content) > 400:
	content_preview += "..."

	detailed_parts.append(f"## {title}\n{content_preview}")

	# If no sections with content, return truncated full content
	if not detailed_parts:
	return content[:1500] + "..." if len(content) > 1500 else content

	return '\n\n'.join(detailed_parts)


	def _extract_technical_details(content: str, sections: List[Dict[str, str]]) -> str:
	"""Extract technical details like code, configurations, and specifications."""
	technical_parts = []

	# Extract code blocks
	import re
	code_blocks = re.findall(r'```[\s\S]*?```', content)
	if code_blocks:
	technical_parts.append("Code Examples:")
	for i, block in enumerate(code_blocks[:3], 1):
	technical_parts.append(f"Block {i}: {block[:100]}..." if len(block) > 100 else block)

	# Extract technical terms (words in backticks)
	tech_terms = re.findall(r'`([^`]+)`', content)
	if tech_terms:
	unique_terms = list(set(tech_terms))[:10]
	technical_parts.append(f"Technical Terms: {', '.join(unique_terms)}")

	# Look for configuration or specification patterns
	config_lines = []
	lines = content.split('\n')
	for line in lines:
	if ('config' in line.lower() or
	'setting' in line.lower() or
	'=' in line or
	':' in line and not line.strip().startswith('#')):
	config_lines.append(line.strip())

	if config_lines:
	technical_parts.append("Configurations/Settings:")
	technical_parts.extend(config_lines[:5])

	return '\n\n'.join(technical_parts) if technical_parts else "No specific technical details identified."


	def _generate_brief_summary(content: str) -> str:
	"""Generate a very brief summary (1-2 sentences)."""
	words = content.split()
	if len(words) <= 30:
	return content

	# Take first sentence or first 30 words
	sentences = content.split('.')
	first_sentence = sentences[0].strip() + '.' if sentences else ''

	if len(first_sentence.split()) <= 30:
	return first_sentence
	else:
	return ' '.join(words[:30]) + "..."


	@mcp.resource("docs://list")
	def list_docs_resource() -> list[str]:
	"""
	Resource that returns a simple list of available doc paths.
	"""
	return [str(p.relative_to(DOCS_ROOT)) for p in _iter_docs()]


	@mcp.resource("docs://{relative_path}")
	def read_doc(relative_path: str) -> str:
	"""
	Read a specific doc by relative path (e.g. 'getting-started.md').
	"""
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists() or not path.is_file():
	return f"Document not found: {relative_path}"
	if DOCS_ROOT not in path.parents and DOCS_ROOT != path.parent:
	return "Access denied: path escapes docs root."
	return _read_file(path)


	@mcp.tool()
	def list_docs() -> List[str]:
	"""
	List available documentation files relative to the docs/ folder.
	"""
	return [str(p.relative_to(DOCS_ROOT)) for p in _iter_docs()]


	@mcp.tool()
	def search_docs(query: str, max_results: int = 10) -> List[Dict[str, str]]:
	"""
	Improved full-text search over docs with better matching.

	Args:
	query: Search query string.
	max_results: Max number of matches to return.
	Returns:
	List of {path, snippet} matches.
	"""
	import re

	query_lower = query.lower()
	query_words = query_lower.split()
	results: list[dict[str, str]] = []

	for path in _iter_docs():
	text = _read_file(path)
	text_lower = text.lower()

	# Score based on how many query words are found
	matches = []

	# First, try exact phrase match (highest score)
	if query_lower in text_lower:
	idx = text_lower.find(query_lower)
	start = max(0, idx - 80)
	end = min(len(text), idx + 80)
	snippet = text[start:end].replace("\n", " ")
	matches.append({
	"score": 100,
	"snippet": snippet,
	"match_type": "exact_phrase"
	})

	# Then try to find sentences containing most query words
	sentences = re.split(r'[.!?]+\|\n\n+', text)
	for sentence in sentences:
	sentence_lower = sentence.lower()
	word_matches = sum(1 for word in query_words if word in sentence_lower)

	if word_matches >= max(1, len(query_words) * 0.6): # At least 60% of words
	# Calculate score based on word matches and total words
	score = (word_matches / len(query_words)) * 80
	if len(sentence.strip()) > 20: # Prefer longer, more informative sentences
	snippet = sentence.strip()[:160] + "..." if len(sentence.strip()) > 160 else sentence.strip()
	matches.append({
	"score": score,
	"snippet": snippet,
	"match_type": f"words_{word_matches}/{len(query_words)}"
	})

	# Add the best matches for this document
	if matches:
	# Sort by score and take the best match
	best_match = max(matches, key=lambda x: x["score"])
	results.append({
	"path": str(path.relative_to(DOCS_ROOT)),
	"snippet": best_match["snippet"],
	"score": str(best_match["score"]),
	"match_type": best_match["match_type"]
	})

	# Sort results by score (highest first) and limit
	results.sort(key=lambda x: x["score"], reverse=True)
	return results[:max_results]


	@mcp.tool()
	def extract_section(relative_path: str, section_title: str, include_subsections: bool = True) -> Dict[str, Any]:
	"""
	Extract a specific section from a document.

	Args:
	relative_path: Path to the document relative to docs/ folder
	section_title: Title of the section to extract (case-insensitive, partial matches allowed)
	include_subsections: Whether to include subsections in the extracted content
	Returns:
	Dictionary with section content and metadata
	"""
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists() or not path.is_file():
	return {"error": f"Document not found: {relative_path}"}
	if DOCS_ROOT not in path.parents and DOCS_ROOT != path.parent:
	return {"error": "Access denied: path escapes docs root."}

	content = _read_file(path)

	# Use hierarchical extraction if including subsections, otherwise flat extraction
	if include_subsections:
	sections = _extract_hierarchical_sections(content)
	else:
	sections = _extract_sections(content)

	# Find matching section (case-insensitive, partial match)
	section_title_lower = section_title.lower()
	matching_sections = []

	for section in sections:
	section_title_clean = section['title'].lstrip('#').strip().lower()
	if section_title_lower in section_title_clean or section_title_clean in section_title_lower:
	matching_sections.append(section)

	if not matching_sections:
	# List available sections for user reference
	available_sections = [s['title'].lstrip('#').strip() for s in sections if s['content'].strip()]
	return {
	"error": f"Section '{section_title}' not found",
	"available_sections": available_sections[:10], # Limit to first 10 for readability
	"total_sections": str(len(available_sections))
	}

	if len(matching_sections) == 1:
	section = matching_sections[0]
	result = {
	"document": relative_path,
	"section_title": section['title'].lstrip('#').strip(),
	"content": section['content'].strip(),
	"word_count": str(len(section['content'].split())),
	"match_type": "single",
	"extraction_mode": "hierarchical" if include_subsections else "flat"
	}

	# Add metadata about subsections if available
	if 'includes_subsections' in section:
	result["includes_subsections"] = section['includes_subsections']
	if 'level' in section:
	result["header_level"] = section['level']

	return result
	else:
	# Multiple matches - return all
	results = []
	for section in matching_sections:
	section_info = {
	"section_title": section['title'].lstrip('#').strip(),
	"content": section['content'].strip(),
	"word_count": str(len(section['content'].split()))
	}
	if 'level' in section:
	section_info["header_level"] = section['level']
	if 'includes_subsections' in section:
	section_info["includes_subsections"] = section['includes_subsections']
	results.append(section_info)

	return {
	"document": relative_path,
	"match_type": "multiple",
	"matching_sections": results,
	"total_matches": str(len(results)),
	"extraction_mode": "hierarchical" if include_subsections else "flat"
	}


	@mcp.tool()
	def summarize_document(relative_path: str, summary_type: str = "overview") -> Dict[str, str]:
	"""
	Generate a smart summary of a specific document.

	Args:
	relative_path: Path to the document relative to docs/ folder
	summary_type: Type of summary - 'overview', 'key_points', 'detailed', or 'technical'
	Returns:
	Dictionary with document info and structured summary
	"""
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists() or not path.is_file():
	return {"error": f"Document not found: {relative_path}"}
	if DOCS_ROOT not in path.parents and DOCS_ROOT != path.parent:
	return {"error": "Access denied: path escapes docs root."}

	content = _read_file(path)
	word_count = len(content.split())

	# Extract key sections based on markdown headers
	sections = _extract_sections(content)

	# Generate summary based on type
	if summary_type == "key_points":
	summary = _extract_key_points(content, sections)
	elif summary_type == "detailed":
	summary = _generate_detailed_summary(content, sections)
	elif summary_type == "technical":
	summary = _extract_technical_details(content, sections)
	else: # overview
	summary = _generate_overview_summary(content, sections)

	return {
	"document": relative_path,
	"word_count": str(word_count),
	"sections": str(len(sections)),
	"summary_type": summary_type,
	"summary": summary
	}


	@mcp.tool()
	def analyze_document_structure(relative_path: str) -> Dict[str, Any]:
	"""
	Analyze the structure and metadata of a document.

	Args:
	relative_path: Path to the document relative to docs/ folder
	Returns:
	Dictionary with structural analysis
	"""
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists() or not path.is_file():
	return {"error": f"Document not found: {relative_path}"}

	content = _read_file(path)

	# Extract headers and create outline
	headers = _extract_headers(content)
	sections = _extract_sections(content)

	# Basic statistics
	lines = content.split('\n')
	words = content.split()

	# Find code blocks and links
	code_blocks = _count_code_blocks(content)
	links = _extract_links(content)

	return {
	"document": relative_path,
	"statistics": {
	"lines": len(lines),
	"words": len(words),
	"characters": len(content),
	"sections": str(len(sections)),
	"code_blocks": code_blocks,
	"links": len(links)
	},
	"structure": {
	"headers": headers,
	"outline": _create_outline(headers)
	},
	"content_analysis": {
	"has_tables": "\| " in content,
	"has_images": "![" in content,
	"has_code": "```" in content or " " in content,
	"external_links": [link for link in links if link.startswith(('http', 'https'))]
	}
	}


	@mcp.tool()
	def generate_doc_overview() -> Dict[str, Any]:
	"""
	Generate a comprehensive overview of the entire documentation set.

	Returns:
	Dictionary with overall documentation analysis
	"""
	docs = _iter_docs()
	overview = {
	"total_documents": str(len(docs)),
	"documents_by_type": {},
	"total_content": {"words": 0, "lines": 0, "characters": 0},
	"structure_analysis": {"sections": 0, "code_blocks": 0},
	"document_summaries": []
	}

	for path in docs:
	content = _read_file(path)
	ext = path.suffix.lower()
	rel_path = str(path.relative_to(DOCS_ROOT))

	# Count by type
	overview["documents_by_type"][ext] = overview["documents_by_type"].get(ext, 0) + 1

	# Aggregate statistics
	words = len(content.split())
	lines = len(content.split('\n'))
	chars = len(content)

	overview["total_content"]["words"] += words
	overview["total_content"]["lines"] += lines
	overview["total_content"]["characters"] += chars

	# Structure analysis
	sections = len(_extract_sections(content))
	code_blocks = _count_code_blocks(content)

	overview["structure_analysis"]["sections"] += sections
	overview["structure_analysis"]["code_blocks"] += code_blocks

	# Brief summary for each doc
	brief_summary = _generate_brief_summary(content)
	overview["document_summaries"].append({
	"path": rel_path,
	"words": words,
	"sections": sections,
	"brief_summary": brief_summary
	})

	return overview


	@mcp.tool()
	def semantic_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
	"""
	Perform semantic search across documents using keyword matching and relevance scoring.

	Args:
	query: Search query
	max_results: Maximum number of results to return
	Returns:
	List of documents with relevance scores and context
	"""
	query_words = set(query.lower().split())
	results = []

	for path in _iter_docs():
	content = _read_file(path)
	content_lower = content.lower()

	# Calculate relevance score
	score = 0
	context_snippets = []

	for word in query_words:
	word_count = content_lower.count(word)
	score += word_count * len(word) # Longer words get higher weight

	# Find context for each query word
	word_positions = []
	start = 0
	while True:
	pos = content_lower.find(word, start)
	if pos == -1:
	break
	word_positions.append(pos)
	start = pos + 1

	# Get context snippets around found words
	for pos in word_positions[:2]: # Max 2 snippets per word
	snippet_start = max(0, pos - 60)
	snippet_end = min(len(content), pos + 60)
	snippet = content[snippet_start:snippet_end].replace('\n', ' ')
	context_snippets.append(snippet)

	if score > 0:
	# Normalize score by document length
	normalized_score = score / len(content.split())

	results.append({
	'path': str(path.relative_to(DOCS_ROOT)),
	'relevance_score': normalized_score,
	'context_snippets': context_snippets[:3], # Max 3 snippets
	'word_count': len(content.split())
	})

	# Sort by relevance score
	results.sort(key=lambda x: x['relevance_score'], reverse=True)
	return results[:max_results]


	@mcp.tool()
	def compare_documents(doc1_path: str, doc2_path: str) -> Dict[str, Any]:
	"""
	Compare two documents and identify similarities and differences.

	Args:
	doc1_path: Path to first document
	doc2_path: Path to second document
	Returns:
	Comparison analysis
	"""
	path1 = (DOCS_ROOT / doc1_path).resolve()
	path2 = (DOCS_ROOT / doc2_path).resolve()

	if not path1.exists() or not path2.exists():
	return {"error": "One or both documents not found"}

	content1 = _read_file(path1)
	content2 = _read_file(path2)

	# Basic statistics comparison
	stats1 = {
	"words": len(content1.split()),
	"lines": len(content1.split('\n')),
	"characters": len(content1)
	}
	stats2 = {
	"words": len(content2.split()),
	"lines": len(content2.split('\n')),
	"characters": len(content2)
	}

	# Find common and unique words
	words1 = set(word.lower().strip('.,!?;:') for word in content1.split())
	words2 = set(word.lower().strip('.,!?;:') for word in content2.split())

	common_words = words1.intersection(words2)
	unique_to_doc1 = words1 - words2
	unique_to_doc2 = words2 - words1

	# Extract headers for structure comparison
	headers1 = [h['title'] for h in _extract_headers(content1)]
	headers2 = [h['title'] for h in _extract_headers(content2)]

	return {
	"document1": doc1_path,
	"document2": doc2_path,
	"statistics": {
	"doc1": stats1,
	"doc2": stats2,
	"size_ratio": stats1["words"] / stats2["words"] if stats2["words"] > 0 else float('inf')
	},
	"content_similarity": {
	"common_words_count": len(common_words),
	"unique_to_doc1_count": len(unique_to_doc1),
	"unique_to_doc2_count": len(unique_to_doc2),
	"similarity_ratio": len(common_words) / len(words1.union(words2)) if len(words1.union(words2)) > 0 else 0
	},
	"structure_comparison": {
	"doc1_headers": headers1,
	"doc2_headers": headers2,
	"common_headers": list(set(headers1).intersection(set(headers2))),
	"unique_headers_doc1": list(set(headers1) - set(headers2)),
	"unique_headers_doc2": list(set(headers2) - set(headers1))
	},
	"sample_unique_words": {
	"doc1": list(unique_to_doc1)[:10],
	"doc2": list(unique_to_doc2)[:10]
	}
	}


	@mcp.tool()
	def extract_definitions(relative_path: str) -> Dict[str, Any]:
	"""
	Extract definitions, terms, and explanations from a document.

	Args:
	relative_path: Path to the document
	Returns:
	Extracted definitions and terms
	"""
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists():
	return {"error": f"Document not found: {relative_path}"}

	content = _read_file(path)
	definitions = []

	# Look for definition patterns
	import re

	# Pattern 1: "Term: Definition" or "Term - Definition"
	definition_patterns = [
	r'^([A-Z][^:\-\n]+):\s*(.+)$', # Term: Definition
	r'^([A-Z][^:\-\n]+)\s-\s(.+)$', # Term - Definition
	r'\\([^]+)\\:\s([^\n]+)', # Term: Definition
	r'`([^`]+)`:\s*([^\n]+)' # `Term`: Definition
	]

	for pattern in definition_patterns:
	matches = re.findall(pattern, content, re.MULTILINE)
	for match in matches:
	term, definition = match
	definitions.append({
	"term": term.strip(),
	"definition": definition.strip(),
	"type": "explicit"
	})

	# Look for glossary sections
	sections = _extract_sections(content)
	glossary_terms = []

	for section in sections:
	if any(keyword in section['title'].lower() for keyword in ['glossary', 'definition', 'terminology', 'terms']):
	lines = section['content'].split('\n')
	for line in lines:
	if ':' in line or '-' in line:
	parts = line.split(':') if ':' in line else line.split('-')
	if len(parts) == 2:
	glossary_terms.append({
	"term": parts[0].strip(),
	"definition": parts[1].strip(),
	"type": "glossary"
	})

	# Extract technical terms (words in backticks)
	tech_terms = re.findall(r'`([^`]+)`', content)
	tech_terms_unique = list(set(tech_terms))

	return {
	"document": relative_path,
	"definitions": definitions,
	"glossary_terms": glossary_terms,
	"technical_terms": tech_terms_unique,
	"total_definitions": str(len(definitions) + len(glossary_terms)),
	"definition_density": (len(definitions) + len(glossary_terms)) / len(content.split()) if content.split() else 0
	}


	@mcp.tool()
	def generate_table_of_contents(relative_path: str = None) -> Dict[str, Any]:
	"""
	Generate a table of contents for a specific document or all documents.

	Args:
	relative_path: Path to specific document, or None for all documents
	Returns:
	Table of contents structure
	"""
	if relative_path:
	# Single document TOC
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists():
	return {"error": f"Document not found: {relative_path}"}

	content = _read_file(path)
	headers = _extract_headers(content)

	return {
	"document": relative_path,
	"table_of_contents": _create_outline(headers),
	"header_count": len(headers),
	"max_depth": max([h['level'] for h in headers]) if headers else 0
	}
	else:
	# All documents TOC
	all_toc = {}
	for path in _iter_docs():
	content = _read_file(path)
	headers = _extract_headers(content)
	rel_path = str(path.relative_to(DOCS_ROOT))

	all_toc[rel_path] = {
	"outline": _create_outline(headers),
	"header_count": len(headers),
	"max_depth": max([h['level'] for h in headers]) if headers else 0
	}

	return {
	"type": "complete_documentation_toc",
	"documents": all_toc,
	"total_documents": str(len(all_toc))
	}


	@mcp.tool()
	def intelligent_summarize(relative_path: str, summary_type: str = "medium", focus_keywords: str = None) -> Dict[str, Any]:
	"""
	Generate an intelligent summary using advanced text analysis.

	Args:
	relative_path: Path to the document
	summary_type: "short", "medium", or "long"
	focus_keywords: Optional comma-separated keywords to focus on
	Returns:
	Intelligent summary with analysis
	"""
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists():
	return {"error": f"Document not found: {relative_path}"}

	try:
	content = _read_file(path)

	# Use document intelligence for smart summary
	summary_result = doc_intel.generate_smart_summary(content, summary_type)

	# Add key concepts
	key_concepts = doc_intel.extract_key_concepts(content)

	# Add readability analysis
	readability = doc_intel.analyze_readability(content)

	# If focus keywords provided, highlight relevant sections
	focused_content = None
	if focus_keywords:
	keywords = [k.strip() for k in focus_keywords.split(',')]
	# Find sections that contain the keywords
	sections = _extract_sections(content)
	relevant_sections = []
	for section in sections:
	if section['content'].strip() and any(keyword.lower() in section['content'].lower() for keyword in keywords):
	relevant_sections.append(section['title'].lstrip('#').strip())
	focused_content = relevant_sections

	return {
	"document": relative_path,
	"summary": summary_result,
	"key_concepts": key_concepts[:10],
	"readability": readability,
	"focused_sections": focused_content,
	"analysis_method": "advanced_intelligence"
	}
	except Exception as e:
	return {
	"error": f"Failed to analyze document: {str(e)}",
	"document": relative_path,
	"fallback_available": True
	}


	@mcp.tool()
	def extract_qa_pairs(relative_path: str = None) -> Dict[str, Any]:
	"""
	Extract question-answer pairs from documents for FAQ generation.

	Args:
	relative_path: Specific document path, or None for all documents
	Returns:
	Extracted Q&A pairs
	"""
	if relative_path:
	path = (DOCS_ROOT / relative_path).resolve()
	if not path.exists():
	return {"error": f"Document not found: {relative_path}"}

	content = _read_file(path)
	qa_pairs = doc_intel.extract_questions_and_answers(content)

	return {
	"document": relative_path,
	"qa_pairs": qa_pairs,
	"total_pairs": str(len(qa_pairs))
	}
	else:
	# Extract from all documents
	all_qa_pairs = {}
	total_pairs = 0

	for path in _iter_docs():
	content = _read_file(path)
	qa_pairs = doc_intel.extract_questions_and_answers(content)
	if qa_pairs:
	rel_path = str(path.relative_to(DOCS_ROOT))
	all_qa_pairs[rel_path] = qa_pairs
	total_pairs += len(qa_pairs)

	return {
	"type": "complete_documentation_qa",
	"qa_by_document": all_qa_pairs,
	"total_pairs": str(total_pairs)
	}


	@mcp.tool()
	def find_related_documents(query: str, max_results: int = 3) -> List[Dict[str, Any]]:
	"""
	Find documents most related to a query using advanced similarity scoring.

	Args:
	query: Search query or topic
	max_results: Maximum number of related documents to return
	Returns:
	List of related documents with scores and explanations
	"""
	all_docs = list(_iter_docs())
	related = doc_intel.find_related_content(query, all_docs, max_results)

	return {
	"query": query,
	"related_documents": related,
	"total_analyzed": len(all_docs),
	"method": "tf-idf_similarity"
	}


	@mcp.tool()
	def analyze_document_gaps() -> Dict[str, Any]:
	"""
	Analyze the documentation set to identify potential gaps or areas needing improvement.

	Returns:
	Analysis of documentation completeness and suggestions
	"""
	all_docs = list(_iter_docs())
	analysis = {
	"total_documents": len(all_docs),
	"coverage_analysis": {},
	"recommendations": [],
	"content_quality": {},
	"structure_issues": []
	}

	# Analyze each document
	total_words = 0
	short_docs = []
	long_docs = []
	low_readability_docs = []
	missing_sections = []

	common_sections = ['introduction', 'overview', 'getting started', 'configuration', 'examples', 'troubleshooting']
	section_coverage = {section: 0 for section in common_sections}

	for path in all_docs:
	content = _read_file(path)
	rel_path = str(path.relative_to(DOCS_ROOT))

	# Word count analysis
	word_count = len(content.split())
	total_words += word_count

	if word_count < 100:
	short_docs.append(rel_path)
	elif word_count > 3000:
	long_docs.append(rel_path)

	# Readability analysis
	readability = doc_intel.analyze_readability(content)
	if readability.get('flesch_score', 50) < 30:
	low_readability_docs.append(rel_path)

	# Section coverage analysis
	headers = [h['title'].lower() for h in _extract_headers(content)]
	doc_sections = []
	for section in common_sections:
	if any(section in header for header in headers):
	section_coverage[section] += 1
	doc_sections.append(section)

	missing = [s for s in common_sections if s not in doc_sections]
	if missing:
	missing_sections.append({"document": rel_path, "missing": missing})

	# Generate recommendations
	if short_docs:
	analysis["recommendations"].append(f"Consider expanding these short documents: {', '.join(short_docs[:3])}")

	if low_readability_docs:
	analysis["recommendations"].append(f"Improve readability of: {', '.join(low_readability_docs[:3])}")

	# Find least covered sections
	least_covered = min(section_coverage.values())
	missing_section_types = [section for section, count in section_coverage.items() if count <= least_covered]
	if missing_section_types:
	analysis["recommendations"].append(f"Consider adding {', '.join(missing_section_types)} sections to more documents")

	analysis["coverage_analysis"] = {
	"average_words_per_doc": total_words / len(all_docs) if all_docs else 0,
	"short_documents": short_docs,
	"long_documents": long_docs,
	"section_coverage": section_coverage
	}

	analysis["content_quality"] = {
	"low_readability": low_readability_docs,
	"missing_common_sections": missing_sections
	}

	return analysis


	@mcp.tool()
	def generate_documentation_index() -> Dict[str, Any]:
	"""
	Generate a comprehensive searchable index of all documentation content.

	Returns:
	Searchable index with topics, concepts, and cross-references
	"""
	index = {
	"concepts": {}, # concept -> [documents]
	"topics": {}, # topic -> documents
	"cross_references": {}, # document -> related documents
	"metadata": {}
	}

	all_docs = list(_iter_docs())

	# Build concept index
	all_concepts = {}

	for path in all_docs:
	content = _read_file(path)
	rel_path = str(path.relative_to(DOCS_ROOT))

	# Extract concepts from this document
	concepts = doc_intel.extract_key_concepts(content, min_frequency=1)

	# Add to global concept index
	for concept_info in concepts:
	concept = concept_info['concept']
	if concept not in all_concepts:
	all_concepts[concept] = []
	all_concepts[concept].append({
	"document": rel_path,
	"frequency": concept_info['frequency'],
	"type": concept_info['type']
	})

	# Find cross-references (documents with similar concepts)
	related_docs = doc_intel.find_related_content(
	' '.join([c['concept'] for c in concepts[:5]]),
	all_docs,
	max_results=3
	)
	index["cross_references"][rel_path] = [doc['path'] for doc in related_docs if doc['path'] != rel_path]

	# Document metadata
	headers = _extract_headers(content)
	readability = doc_intel.analyze_readability(content)

	index["metadata"][rel_path] = {
	"word_count": len(content.split()),
	"sections": len(headers),
	"readability_score": readability.get('flesch_score', 0),
	"main_topics": [c['concept'] for c in concepts[:5]]
	}

	# Filter concepts that appear in multiple documents (more valuable for index)
	index["concepts"] = {
	concept: docs for concept, docs in all_concepts.items()
	if len(docs) > 1 or any(d['frequency'] > 2 for d in docs)
	}

	# Create topic clusters
	topic_clusters = {}
	for concept, docs in index["concepts"].items():
	if len(docs) >= 2: # Concept appears in multiple docs
	topic_clusters[concept] = [doc['document'] for doc in docs]

	index["topics"] = topic_clusters

	return {
	"index": index,
	"statistics": {
	"total_concepts": len(index["concepts"]),
	"total_topics": len(index["topics"]),
	"total_documents": len(all_docs),
	"avg_cross_references": sum(len(refs) for refs in index["cross_references"].values()) / len(index["cross_references"]) if index["cross_references"] else 0
	}
	}


	if __name__ == "__main__":
	# stdio transport keeps it compatible with the official client pattern
	mcp.run(transport="stdio")