Spaces:

brickfrog
/

ankigen

Running

File size: 12,426 Bytes

"""Context7 integration for library documentation"""

import asyncio
import subprocess
import json
from typing import Optional, Dict, Any
from tenacity import (
    retry,
    stop_after_attempt,
    wait_exponential,
    retry_if_exception_type,
)
from ankigen_core.logging import logger
from ankigen_core.exceptions import (
    ValidationError,
)

MAX_STRING_LENGTH = 200  # Prevent excessively long inputs
SUBPROCESS_TIMEOUT = 60.0  # 60 second timeout for Context7 calls


class Context7Client:
    """Context7 MCP client for fetching library documentation"""

    def __init__(self):
        pass  # No state needed - each call creates fresh subprocess

    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=2, max=10),
        retry=retry_if_exception_type((TimeoutError, ConnectionError)),
        reraise=True,
    )
    async def call_context7_tool(
        self, tool_name: str, args: Dict[str, Any]
    ) -> Optional[Dict[str, Any]]:
        """Call a Context7 tool via direct JSONRPC with retry logic"""
        try:
            # Build the JSONRPC request
            request = {
                "jsonrpc": "2.0",
                "id": 1,
                "method": "tools/call",
                "params": {"name": tool_name, "arguments": args},
            }

            # Call the Context7 server
            process = await asyncio.create_subprocess_exec(
                "npx",
                "@upstash/context7-mcp",
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )

            # Send initialization first
            init_request = {
                "jsonrpc": "2.0",
                "id": 0,
                "method": "initialize",
                "params": {
                    "protocolVersion": "2025-06-18",
                    "capabilities": {},
                    "clientInfo": {"name": "ankigen", "version": "1.0.0"},
                },
            }

            # Send both requests with timeout protection
            # Optimize: Use list join for string concatenation
            input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""])
            try:
                stdout, stderr = await asyncio.wait_for(
                    process.communicate(input=input_data.encode()),
                    timeout=SUBPROCESS_TIMEOUT,
                )
            except asyncio.TimeoutError:
                # Proper process cleanup on timeout
                try:
                    if process.returncode is None:  # Process still running
                        process.kill()
                        # Wait for process to actually terminate
                        await asyncio.wait_for(process.wait(), timeout=5.0)
                except Exception as cleanup_error:
                    logger.error(f"Error during process cleanup: {cleanup_error}")
                raise TimeoutError(
                    f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s"
                )
            except Exception:
                # Clean up process on any other error
                try:
                    if process.returncode is None:
                        process.kill()
                        await asyncio.wait_for(process.wait(), timeout=5.0)
                except Exception:
                    pass  # Best effort cleanup
                raise

            # Parse responses
            responses = stdout.decode().strip().split("\n")
            if len(responses) >= 2:
                # Skip init response, get tool response
                tool_response = json.loads(responses[1])

                if "result" in tool_response:
                    result = tool_response["result"]
                    # Extract content from the result
                    if "content" in result and result["content"]:
                        content_item = result["content"][0]
                        if "text" in content_item:
                            return {"text": content_item["text"], "success": True}
                        elif "type" in content_item and content_item["type"] == "text":
                            return {
                                "text": content_item.get("text", ""),
                                "success": True,
                            }
                    return {"error": "No content in response", "success": False}
                elif "error" in tool_response:
                    return {"error": tool_response["error"], "success": False}

            return {"error": "Invalid response format", "success": False}

        except Exception as e:
            logger.error(f"Error calling Context7 tool {tool_name}: {e}")
            return {"error": str(e), "success": False}

    def _parse_library_response(self, text: str) -> list[Dict[str, Any]]:
        """Parse Context7 response text into list of library dicts.

        Args:
            text: Raw text response from Context7

        Returns:
            List of library dicts with keys: title, id, snippets, trust
        """
        libraries = []
        lines = text.split("\n")
        current_lib: Dict[str, Any] = {}

        for line in lines:
            line = line.strip()

            if line.startswith("- Title:"):
                if current_lib and current_lib.get("id"):
                    libraries.append(current_lib)
                current_lib = {"title": line.replace("- Title:", "").strip().lower()}

            elif line.startswith("- Context7-compatible library ID:"):
                lib_id = line.replace("- Context7-compatible library ID:", "").strip()
                if current_lib is not None:
                    current_lib["id"] = lib_id

            elif line.startswith("- Code Snippets:"):
                snippets_str = line.replace("- Code Snippets:", "").strip()
                try:
                    if current_lib is not None:
                        current_lib["snippets"] = int(snippets_str)
                except ValueError:
                    pass

            elif line.startswith("- Trust Score:"):
                score_str = line.replace("- Trust Score:", "").strip()
                try:
                    if current_lib is not None:
                        current_lib["trust"] = float(score_str)
                except ValueError:
                    pass

        if current_lib and current_lib.get("id"):
            libraries.append(current_lib)

        return libraries

    def _score_library(self, lib: Dict[str, Any], search_term: str) -> float:
        """Score a library based on how well it matches the search term.

        Args:
            lib: Library dict with title, id, snippets, trust
            search_term: Lowercase search term

        Returns:
            Score (higher is better match)
        """
        score = 0.0
        lib_title = lib.get("title", "")
        lib_id = lib["id"].lower()

        # Exact title match gets highest priority
        if lib_title == search_term:
            score += 10000
        elif lib_id == f"/{search_term}-dev/{search_term}":
            score += 5000
        elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"):
            score += 2000
        elif search_term in lib_title:
            if lib_title == search_term:
                score += 1000
            elif lib_title.startswith(search_term):
                score += 200
            else:
                score += 50

        # Bonus for code snippets (indicates main library)
        snippets = lib.get("snippets", 0)
        score += snippets / 10

        # Bonus for trust score (high trust = official/authoritative)
        trust = lib.get("trust", 0)
        score += trust * 100

        return score

    def _select_best_library(
        self, libraries: list[Dict[str, Any]], search_term: str
    ) -> Optional[Dict[str, Any]]:
        """Select the best matching library from a list.

        Args:
            libraries: List of library dicts
            search_term: Lowercase search term

        Returns:
            Best matching library dict, or None if no match
        """
        best_lib = None
        best_score = -1.0

        for lib in libraries:
            score = self._score_library(lib, search_term)

            if search_term in lib.get("title", "") or search_term in lib["id"].lower():
                logger.debug(
                    f"Scoring {lib['id']}: title='{lib.get('title', '')}', "
                    f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}"
                )

            if score > best_score:
                best_score = score
                best_lib = lib

        if best_lib:
            logger.info(
                f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, "
                f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, "
                f"score: {best_score:.2f})"
            )

        return best_lib

    async def resolve_library_id(self, library_name: str) -> Optional[str]:
        """Resolve a library name to a Context7-compatible ID"""
        logger.info(f"Resolving library ID for: {library_name}")

        result = await self.call_context7_tool(
            "resolve-library-id", {"libraryName": library_name}
        )

        if not (result and result.get("success") and result.get("text")):
            logger.warning(f"Could not resolve library ID for '{library_name}'")
            return None

        libraries = self._parse_library_response(result["text"])
        if not libraries:
            logger.warning(f"Could not resolve library ID for '{library_name}'")
            return None

        best_lib = self._select_best_library(libraries, library_name.lower())
        if best_lib:
            logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}")
            return best_lib["id"]

        logger.warning(f"Could not resolve library ID for '{library_name}'")
        return None

    async def get_library_docs(
        self, library_id: str, topic: Optional[str] = None, tokens: int = 5000
    ) -> Optional[str]:
        """Get documentation for a library"""
        # Security: Validate library_id (should start with /)
        if (
            not library_id
            or not library_id.startswith("/")
            or len(library_id) > MAX_STRING_LENGTH
        ):
            logger.error(f"Invalid library ID format (security): '{library_id}'")
            raise ValidationError("Invalid library ID format")

        logger.info(
            f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "")
        )

        args = {"context7CompatibleLibraryID": library_id, "tokens": tokens}
        if topic:
            args["topic"] = topic

        result = await self.call_context7_tool("get-library-docs", args)

        if result and result.get("success") and result.get("text"):
            docs = result["text"]
            logger.info(f"Retrieved {len(docs)} characters of documentation")
            return docs

        logger.warning(f"Could not fetch docs for '{library_id}'")
        return None

    async def fetch_library_documentation(
        self, library_name: str, topic: Optional[str] = None, tokens: int = 5000
    ) -> Optional[str]:
        """Convenience method to resolve and fetch docs in one call"""
        library_id = await self.resolve_library_id(library_name)
        if not library_id:
            return None

        return await self.get_library_docs(library_id, topic, tokens)


async def test_context7() -> None:
    """Test the Context7 integration"""
    client = Context7Client()

    print("Testing Context7 integration...")

    # Test resolving a library
    library_id = await client.resolve_library_id("react")
    if library_id:
        print(f"✓ Resolved 'react' to ID: {library_id}")

        # Test fetching docs
        docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000)
        if docs:
            print(f"✓ Fetched {len(docs)} characters of documentation")
            print(f"Preview: {docs[:300]}...")
        else:
            print("✗ Failed to fetch documentation")
    else:
        print("✗ Failed to resolve library ID")


if __name__ == "__main__":
    asyncio.run(test_context7())