File size: 12,426 Bytes
0333a17
 
 
 
 
 
93bd7fb
 
 
 
 
 
0333a17
93bd7fb
 
 
 
 
 
0333a17
 
 
 
 
 
93bd7fb
 
 
 
 
 
 
 
0333a17
 
 
93bd7fb
0333a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93bd7fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0333a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ec553e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0333a17
 
 
 
 
 
 
 
2ec553e
 
 
 
 
 
 
 
 
 
 
 
 
0333a17
 
 
 
 
 
 
 
93bd7fb
 
 
 
 
 
 
 
 
0333a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93bd7fb
0333a17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
"""Context7 integration for library documentation"""

import asyncio
import subprocess
import json
from typing import Optional, Dict, Any
from tenacity import (
    retry,
    stop_after_attempt,
    wait_exponential,
    retry_if_exception_type,
)
from ankigen_core.logging import logger
from ankigen_core.exceptions import (
    ValidationError,
)

MAX_STRING_LENGTH = 200  # Prevent excessively long inputs
SUBPROCESS_TIMEOUT = 60.0  # 60 second timeout for Context7 calls


class Context7Client:
    """Context7 MCP client for fetching library documentation"""

    def __init__(self):
        pass  # No state needed - each call creates fresh subprocess

    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=2, max=10),
        retry=retry_if_exception_type((TimeoutError, ConnectionError)),
        reraise=True,
    )
    async def call_context7_tool(
        self, tool_name: str, args: Dict[str, Any]
    ) -> Optional[Dict[str, Any]]:
        """Call a Context7 tool via direct JSONRPC with retry logic"""
        try:
            # Build the JSONRPC request
            request = {
                "jsonrpc": "2.0",
                "id": 1,
                "method": "tools/call",
                "params": {"name": tool_name, "arguments": args},
            }

            # Call the Context7 server
            process = await asyncio.create_subprocess_exec(
                "npx",
                "@upstash/context7-mcp",
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )

            # Send initialization first
            init_request = {
                "jsonrpc": "2.0",
                "id": 0,
                "method": "initialize",
                "params": {
                    "protocolVersion": "2025-06-18",
                    "capabilities": {},
                    "clientInfo": {"name": "ankigen", "version": "1.0.0"},
                },
            }

            # Send both requests with timeout protection
            # Optimize: Use list join for string concatenation
            input_data = "\n".join([json.dumps(init_request), json.dumps(request), ""])
            try:
                stdout, stderr = await asyncio.wait_for(
                    process.communicate(input=input_data.encode()),
                    timeout=SUBPROCESS_TIMEOUT,
                )
            except asyncio.TimeoutError:
                # Proper process cleanup on timeout
                try:
                    if process.returncode is None:  # Process still running
                        process.kill()
                        # Wait for process to actually terminate
                        await asyncio.wait_for(process.wait(), timeout=5.0)
                except Exception as cleanup_error:
                    logger.error(f"Error during process cleanup: {cleanup_error}")
                raise TimeoutError(
                    f"Context7 subprocess timed out after {SUBPROCESS_TIMEOUT}s"
                )
            except Exception:
                # Clean up process on any other error
                try:
                    if process.returncode is None:
                        process.kill()
                        await asyncio.wait_for(process.wait(), timeout=5.0)
                except Exception:
                    pass  # Best effort cleanup
                raise

            # Parse responses
            responses = stdout.decode().strip().split("\n")
            if len(responses) >= 2:
                # Skip init response, get tool response
                tool_response = json.loads(responses[1])

                if "result" in tool_response:
                    result = tool_response["result"]
                    # Extract content from the result
                    if "content" in result and result["content"]:
                        content_item = result["content"][0]
                        if "text" in content_item:
                            return {"text": content_item["text"], "success": True}
                        elif "type" in content_item and content_item["type"] == "text":
                            return {
                                "text": content_item.get("text", ""),
                                "success": True,
                            }
                    return {"error": "No content in response", "success": False}
                elif "error" in tool_response:
                    return {"error": tool_response["error"], "success": False}

            return {"error": "Invalid response format", "success": False}

        except Exception as e:
            logger.error(f"Error calling Context7 tool {tool_name}: {e}")
            return {"error": str(e), "success": False}

    def _parse_library_response(self, text: str) -> list[Dict[str, Any]]:
        """Parse Context7 response text into list of library dicts.

        Args:
            text: Raw text response from Context7

        Returns:
            List of library dicts with keys: title, id, snippets, trust
        """
        libraries = []
        lines = text.split("\n")
        current_lib: Dict[str, Any] = {}

        for line in lines:
            line = line.strip()

            if line.startswith("- Title:"):
                if current_lib and current_lib.get("id"):
                    libraries.append(current_lib)
                current_lib = {"title": line.replace("- Title:", "").strip().lower()}

            elif line.startswith("- Context7-compatible library ID:"):
                lib_id = line.replace("- Context7-compatible library ID:", "").strip()
                if current_lib is not None:
                    current_lib["id"] = lib_id

            elif line.startswith("- Code Snippets:"):
                snippets_str = line.replace("- Code Snippets:", "").strip()
                try:
                    if current_lib is not None:
                        current_lib["snippets"] = int(snippets_str)
                except ValueError:
                    pass

            elif line.startswith("- Trust Score:"):
                score_str = line.replace("- Trust Score:", "").strip()
                try:
                    if current_lib is not None:
                        current_lib["trust"] = float(score_str)
                except ValueError:
                    pass

        if current_lib and current_lib.get("id"):
            libraries.append(current_lib)

        return libraries

    def _score_library(self, lib: Dict[str, Any], search_term: str) -> float:
        """Score a library based on how well it matches the search term.

        Args:
            lib: Library dict with title, id, snippets, trust
            search_term: Lowercase search term

        Returns:
            Score (higher is better match)
        """
        score = 0.0
        lib_title = lib.get("title", "")
        lib_id = lib["id"].lower()

        # Exact title match gets highest priority
        if lib_title == search_term:
            score += 10000
        elif lib_id == f"/{search_term}-dev/{search_term}":
            score += 5000
        elif f"/{search_term}/" in lib_id or lib_id.endswith(f"/{search_term}"):
            score += 2000
        elif search_term in lib_title:
            if lib_title == search_term:
                score += 1000
            elif lib_title.startswith(search_term):
                score += 200
            else:
                score += 50

        # Bonus for code snippets (indicates main library)
        snippets = lib.get("snippets", 0)
        score += snippets / 10

        # Bonus for trust score (high trust = official/authoritative)
        trust = lib.get("trust", 0)
        score += trust * 100

        return score

    def _select_best_library(
        self, libraries: list[Dict[str, Any]], search_term: str
    ) -> Optional[Dict[str, Any]]:
        """Select the best matching library from a list.

        Args:
            libraries: List of library dicts
            search_term: Lowercase search term

        Returns:
            Best matching library dict, or None if no match
        """
        best_lib = None
        best_score = -1.0

        for lib in libraries:
            score = self._score_library(lib, search_term)

            if search_term in lib.get("title", "") or search_term in lib["id"].lower():
                logger.debug(
                    f"Scoring {lib['id']}: title='{lib.get('title', '')}', "
                    f"snippets={lib.get('snippets', 0)}, trust={lib.get('trust', 0)}, score={score:.2f}"
                )

            if score > best_score:
                best_score = score
                best_lib = lib

        if best_lib:
            logger.info(
                f"Selected library: {best_lib['id']} (title: {best_lib.get('title', 'unknown')}, "
                f"snippets: {best_lib.get('snippets', 0)}, trust: {best_lib.get('trust', 0)}, "
                f"score: {best_score:.2f})"
            )

        return best_lib

    async def resolve_library_id(self, library_name: str) -> Optional[str]:
        """Resolve a library name to a Context7-compatible ID"""
        logger.info(f"Resolving library ID for: {library_name}")

        result = await self.call_context7_tool(
            "resolve-library-id", {"libraryName": library_name}
        )

        if not (result and result.get("success") and result.get("text")):
            logger.warning(f"Could not resolve library ID for '{library_name}'")
            return None

        libraries = self._parse_library_response(result["text"])
        if not libraries:
            logger.warning(f"Could not resolve library ID for '{library_name}'")
            return None

        best_lib = self._select_best_library(libraries, library_name.lower())
        if best_lib:
            logger.info(f"Resolved '{library_name}' to ID: {best_lib['id']}")
            return best_lib["id"]

        logger.warning(f"Could not resolve library ID for '{library_name}'")
        return None

    async def get_library_docs(
        self, library_id: str, topic: Optional[str] = None, tokens: int = 5000
    ) -> Optional[str]:
        """Get documentation for a library"""
        # Security: Validate library_id (should start with /)
        if (
            not library_id
            or not library_id.startswith("/")
            or len(library_id) > MAX_STRING_LENGTH
        ):
            logger.error(f"Invalid library ID format (security): '{library_id}'")
            raise ValidationError("Invalid library ID format")

        logger.info(
            f"Fetching docs for: {library_id}" + (f" (topic: {topic})" if topic else "")
        )

        args = {"context7CompatibleLibraryID": library_id, "tokens": tokens}
        if topic:
            args["topic"] = topic

        result = await self.call_context7_tool("get-library-docs", args)

        if result and result.get("success") and result.get("text"):
            docs = result["text"]
            logger.info(f"Retrieved {len(docs)} characters of documentation")
            return docs

        logger.warning(f"Could not fetch docs for '{library_id}'")
        return None

    async def fetch_library_documentation(
        self, library_name: str, topic: Optional[str] = None, tokens: int = 5000
    ) -> Optional[str]:
        """Convenience method to resolve and fetch docs in one call"""
        library_id = await self.resolve_library_id(library_name)
        if not library_id:
            return None

        return await self.get_library_docs(library_id, topic, tokens)


async def test_context7() -> None:
    """Test the Context7 integration"""
    client = Context7Client()

    print("Testing Context7 integration...")

    # Test resolving a library
    library_id = await client.resolve_library_id("react")
    if library_id:
        print(f"✓ Resolved 'react' to ID: {library_id}")

        # Test fetching docs
        docs = await client.get_library_docs(library_id, topic="hooks", tokens=2000)
        if docs:
            print(f"✓ Fetched {len(docs)} characters of documentation")
            print(f"Preview: {docs[:300]}...")
        else:
            print("✗ Failed to fetch documentation")
    else:
        print("✗ Failed to resolve library ID")


if __name__ == "__main__":
    asyncio.run(test_context7())