SPerva commited on
Commit
57b8ecd
·
verified ·
1 Parent(s): dee7da9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +522 -0
app.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ GitHub MCP Server
4
+ Provides GitHub API access via Model Context Protocol using Gradio
5
+ """
6
+
7
+ import base64
8
+ import json
9
+ import logging
10
+ import os
11
+ from typing import List, Dict, Any
12
+
13
+ import aiohttp
14
+ import gradio as gr
15
+
16
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
17
+ GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "")
18
+ GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com")
19
+
20
+ logging.basicConfig(
21
+ level=getattr(logging, LOG_LEVEL.upper()),
22
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
23
+ )
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # API Constants
27
+ RESULTS_PER_PAGE = 100
28
+ SEARCH_RESULTS_LIMIT = 50
29
+
30
+
31
+ # ============================================================================
32
+ # Helper Functions
33
+ # ============================================================================
34
+
35
+ def create_headers() -> Dict[str, str]:
36
+ """
37
+ Create GitHub API request headers with authentication
38
+
39
+ Returns:
40
+ Dictionary of HTTP headers for GitHub API requests
41
+ """
42
+ headers = {
43
+ "Accept": "application/vnd.github.v3+json",
44
+ "User-Agent": "GitHub-MCP-Server/1.0"
45
+ }
46
+
47
+ # Add authorization if token is available
48
+ if GITHUB_TOKEN:
49
+ headers["Authorization"] = f"token {GITHUB_TOKEN}"
50
+
51
+ return headers
52
+
53
+
54
+ async def check_doc_folder(
55
+ session: aiohttp.ClientSession,
56
+ org: str,
57
+ repo: str
58
+ ) -> bool:
59
+ """
60
+ Check if a repository has a /doc folder
61
+
62
+ Args:
63
+ session: aiohttp ClientSession (reuse connection)
64
+ org: Organization name
65
+ repo: Repository name
66
+
67
+ Returns:
68
+ True if /doc folder exists, False otherwise
69
+ """
70
+ headers = create_headers()
71
+ url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"
72
+
73
+ try:
74
+ async with session.get(url, headers=headers) as response:
75
+ return response.status == 200
76
+ except Exception as e:
77
+ logger.debug(f"Error checking /doc folder for {org}/{repo}: {e}")
78
+ return False
79
+
80
+
81
+ def determine_content_type(filename: str) -> str:
82
+ """
83
+ Determine content type from filename
84
+
85
+ Args:
86
+ filename: Name of the file
87
+
88
+ Returns:
89
+ Content type: 'markdown', 'mermaid', 'svg', 'openapi', 'postman', or 'unknown'
90
+ """
91
+ lower_name = filename.lower()
92
+
93
+ if lower_name.endswith(('.mmd', '.mermaid')):
94
+ return 'mermaid'
95
+ elif lower_name.endswith('.md'):
96
+ return 'markdown'
97
+ elif lower_name.endswith('.svg'):
98
+ return 'svg'
99
+ elif lower_name.endswith(('.yml', '.yaml')):
100
+ return 'openapi'
101
+ elif lower_name.endswith('.json'):
102
+ # Check if it's a Postman collection first, otherwise assume OpenAPI
103
+ return 'postman' if lower_name.startswith('postman') else 'openapi'
104
+ else:
105
+ return 'unknown'
106
+
107
+
108
+ # ============================================================================
109
+ # Business Logic Functions (testable)
110
+ # ============================================================================
111
+
112
+ async def get_org_repos(org: str) -> List[Dict[str, Any]]:
113
+ async with aiohttp.ClientSession() as session:
114
+ headers = create_headers()
115
+
116
+ # Strategy 1: Use GitHub Search API (efficient - one request)
117
+ search_url = f"{GITHUB_API_BASE}/search/code"
118
+ params = {
119
+ "q": f"org:{org} path:/doc",
120
+ "per_page": RESULTS_PER_PAGE
121
+ }
122
+
123
+ try:
124
+ async with session.get(search_url, headers=headers, params=params) as response:
125
+ if response.status == 200:
126
+ data = await response.json()
127
+
128
+ # Extract unique repositories from search results
129
+ repos_with_docs = {}
130
+ for item in data.get("items", []):
131
+ repo_info = item.get("repository", {})
132
+ repo_name = repo_info.get("name")
133
+
134
+ if repo_name and repo_name not in repos_with_docs:
135
+ repos_with_docs[repo_name] = {
136
+ "id": str(repo_info.get("id", "")),
137
+ "name": repo_name,
138
+ "description": repo_info.get("description") or "",
139
+ "url": repo_info.get("html_url", ""),
140
+ "hasDocFolder": True
141
+ }
142
+
143
+ logger.info(f"Found {len(repos_with_docs)} repos with /doc via search")
144
+ return list(repos_with_docs.values())
145
+
146
+ except Exception as e:
147
+ logger.warning(f"Search API failed: {e}, falling back to list all repos")
148
+
149
+ # Strategy 2: Fallback - List all repos and check each one
150
+ repos_url = f"{GITHUB_API_BASE}/orgs/{org}/repos"
151
+ all_repos = []
152
+ page = 1
153
+
154
+ logger.info(f"Fetching repos for organization: {org}")
155
+
156
+ while True:
157
+ async with session.get(
158
+ repos_url,
159
+ headers=headers,
160
+ params={"per_page": RESULTS_PER_PAGE, "page": page, "sort": "updated"}
161
+ ) as response:
162
+ if response.status != 200:
163
+ error_text = await response.text()
164
+ raise Exception(f"GitHub API error {response.status}: {error_text}")
165
+
166
+ repos = await response.json()
167
+ if not repos:
168
+ break
169
+
170
+ all_repos.extend(repos)
171
+ logger.info(f"Fetched page {page} ({len(repos)} repos)")
172
+ page += 1
173
+
174
+ # Stop if we got less than full page (last page)
175
+ if len(repos) < RESULTS_PER_PAGE:
176
+ break
177
+
178
+ logger.info(f"Total repos fetched: {len(all_repos)}")
179
+
180
+ # Check each repo for /doc folder
181
+ result = []
182
+ for idx, repo in enumerate(all_repos, 1):
183
+ logger.info(f"Checking {idx}/{len(all_repos)}: {repo['name']}")
184
+ has_doc = await check_doc_folder(session, org, repo["name"])
185
+
186
+ result.append({
187
+ "id": str(repo["id"]),
188
+ "name": repo["name"],
189
+ "description": repo.get("description") or "",
190
+ "url": repo["html_url"],
191
+ "hasDocFolder": has_doc
192
+ })
193
+
194
+ repos_with_docs_count = sum(1 for r in result if r["hasDocFolder"])
195
+ logger.info(f"Found {repos_with_docs_count} repos with /doc folder")
196
+
197
+ return result
198
+
199
+
200
+ async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]:
201
+ """
202
+ Get all documentation files from a repository's /doc folder
203
+
204
+ Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman
205
+
206
+ Args:
207
+ org: GitHub organization name
208
+ repo: Repository name
209
+
210
+ Returns:
211
+ List of documentation file dictionaries:
212
+ [
213
+ {
214
+ "id": "abc123...",
215
+ "name": "README.md",
216
+ "path": "doc/README.md",
217
+ "type": "markdown",
218
+ "url": "https://github.com/org/repo/blob/main/doc/README.md",
219
+ "download_url": "https://raw.githubusercontent.com/.../README.md",
220
+ },
221
+ ...
222
+ ]
223
+
224
+ Example:
225
+ docs = await get_repo_docs("anthropics", "anthropic-sdk-python")
226
+ """
227
+ async with aiohttp.ClientSession() as session:
228
+ headers = create_headers()
229
+ url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"
230
+
231
+ logger.info(f"Fetching docs from: {org}/{repo}/doc")
232
+
233
+ async with session.get(url, headers=headers) as response:
234
+ if response.status == 404:
235
+ logger.warning(f"No /doc folder found in {org}/{repo}")
236
+ return []
237
+
238
+ if response.status != 200:
239
+ error_text = await response.text()
240
+ raise Exception(f"GitHub API error {response.status}: {error_text}")
241
+
242
+ contents = await response.json()
243
+
244
+ # Filter for supported file types
245
+ supported_extensions = [
246
+ '.md', # Markdown
247
+ '.mmd', # Mermaid
248
+ '.mermaid', # Mermaid
249
+ '.svg', # SVG images
250
+ '.yml', # YAML (OpenAPI)
251
+ '.yaml', # YAML (OpenAPI)
252
+ '.json' # JSON (OpenAPI/Postman)
253
+ ]
254
+
255
+ docs = []
256
+ skipped = 0
257
+
258
+ for item in contents:
259
+ # Only process files (not directories)
260
+ if item["type"] == "file":
261
+ name = item["name"]
262
+
263
+ # Check if file extension is supported
264
+ if any(name.lower().endswith(ext) for ext in supported_extensions):
265
+ content_type = determine_content_type(name)
266
+
267
+ docs.append({
268
+ "id": item["sha"],
269
+ "name": name,
270
+ "path": item["path"],
271
+ "type": content_type,
272
+ "url": item["html_url"],
273
+ "download_url": item.get("download_url", ""),
274
+ })
275
+ else:
276
+ skipped += 1
277
+
278
+ logger.info(f"Found {len(docs)} documentation files ({skipped} skipped)")
279
+ return docs
280
+
281
+
282
+ async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]:
283
+ """
284
+ Fetch content of a specific file from GitHub
285
+
286
+ Decodes base64-encoded content returned by GitHub API
287
+
288
+ Args:
289
+ org: GitHub organization name
290
+ repo: Repository name
291
+ path: File path within repository (e.g., "doc/README.md")
292
+
293
+ Returns:
294
+ Dictionary with file metadata and content:
295
+ {
296
+ "name": "README.md",
297
+ "path": "doc/README.md",
298
+ "content": "# Documentation\\n\\nThis is...",
299
+ "encoding": "base64"
300
+ }
301
+
302
+ Example:
303
+ content = await get_file_content("anthropics", "sdk", "doc/README.md")
304
+ """
305
+ async with aiohttp.ClientSession() as session:
306
+ headers = create_headers()
307
+ url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/{path}"
308
+
309
+ logger.info(f"Fetching content: {org}/{repo}/{path}")
310
+
311
+ async with session.get(url, headers=headers) as response:
312
+ if response.status == 404:
313
+ raise Exception(f"File not found: {path}")
314
+
315
+ if response.status != 200:
316
+ error_text = await response.text()
317
+ raise Exception(f"GitHub API error {response.status}: {error_text}")
318
+
319
+ data = await response.json()
320
+
321
+ # Decode base64 content if present
322
+ content = ""
323
+ if "content" in data and data["content"]:
324
+ try:
325
+ # GitHub returns base64-encoded content with newlines
326
+ encoded_content = data["content"].replace('\n', '')
327
+ content = base64.b64decode(encoded_content).decode('utf-8')
328
+ logger.info(f"Decoded content ({len(content)} characters)")
329
+ except Exception as e:
330
+ logger.warning(f"Failed to decode content: {e}")
331
+ content = data.get("content", "")
332
+
333
+ return {
334
+ "name": data["name"],
335
+ "path": data["path"],
336
+ "content": content,
337
+ "encoding": data.get("encoding", "base64")
338
+ }
339
+
340
+
341
+ async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]:
342
+ async with aiohttp.ClientSession() as session:
343
+ headers = create_headers()
344
+ search_url = f"{GITHUB_API_BASE}/search/code"
345
+ params = {
346
+ "q": f"org:{org} path:/doc {query}",
347
+ "per_page": SEARCH_RESULTS_LIMIT
348
+ }
349
+
350
+ logger.info(f"Searching for: '{query}' in {org}")
351
+
352
+ async with session.get(search_url, headers=headers, params=params) as response:
353
+ if response.status == 403:
354
+ raise Exception("Search API rate limit exceeded. Try again later.")
355
+
356
+ if response.status != 200:
357
+ error_text = await response.text()
358
+ raise Exception(f"GitHub API error {response.status}: {error_text}")
359
+
360
+ data = await response.json()
361
+ results = []
362
+
363
+ for item in data.get("items", []):
364
+ repo_info = item.get("repository", {})
365
+ results.append({
366
+ "name": item["name"],
367
+ "path": item["path"],
368
+ "repository": repo_info.get("name", ""),
369
+ "url": item["html_url"],
370
+ })
371
+
372
+ logger.info(f"Found {len(results)} matching files")
373
+ return results
374
+
375
+
376
+ # ============================================================================
377
+ # Gradio MCP Tool Functions
378
+ # ============================================================================
379
+
380
+ async def get_org_repos_tool(org: str) -> str:
381
+ """
382
+ Fetch all repositories from a GitHub organization with /doc folder detection.
383
+
384
+ This tool uses the GitHub Search API to efficiently find repositories
385
+ that have a /doc folder, falling back to checking each repo individually
386
+ if the search API is unavailable.
387
+
388
+ Args:
389
+ org (str): GitHub organization name (e.g., "microsoft", "anthropics")
390
+
391
+ Returns:
392
+ str: JSON string containing list of repositories with their metadata
393
+ """
394
+ try:
395
+ result = await get_org_repos(org)
396
+ return json.dumps(result, indent=2)
397
+ except Exception as e:
398
+ return json.dumps({"error": str(e)}, indent=2)
399
+
400
+
401
+ async def get_repo_docs_tool(org: str, repo: str) -> str:
402
+ """
403
+ Get all documentation files from a repository's /doc folder.
404
+
405
+ Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman.
406
+
407
+ Args:
408
+ org (str): GitHub organization name
409
+ repo (str): Repository name
410
+
411
+ Returns:
412
+ str: JSON string containing list of documentation files with metadata
413
+ """
414
+ try:
415
+ result = await get_repo_docs(org, repo)
416
+ return json.dumps(result, indent=2)
417
+ except Exception as e:
418
+ return json.dumps({"error": str(e)}, indent=2)
419
+
420
+
421
+ async def get_file_content_tool(org: str, repo: str, path: str) -> str:
422
+ """
423
+ Fetch and decode content of a specific file from GitHub.
424
+
425
+ Automatically decodes base64-encoded content returned by GitHub API.
426
+
427
+ Args:
428
+ org (str): GitHub organization name
429
+ repo (str): Repository name
430
+ path (str): File path within repository (e.g., "doc/README.md")
431
+
432
+ Returns:
433
+ str: JSON string containing file metadata and decoded content
434
+ """
435
+ try:
436
+ result = await get_file_content(org, repo, path)
437
+ return json.dumps(result, indent=2)
438
+ except Exception as e:
439
+ return json.dumps({"error": str(e)}, indent=2)
440
+
441
+
442
+ async def search_documentation_tool(org: str, query: str) -> str:
443
+ """
444
+ Search for documentation files across all repositories in an organization.
445
+
446
+ Uses GitHub Code Search API to find matching files in /doc folders.
447
+
448
+ Args:
449
+ org (str): GitHub organization name
450
+ query (str): Search query string (e.g., "authentication", "API", "tutorial")
451
+
452
+ Returns:
453
+ str: JSON string containing list of matching files with their locations
454
+ """
455
+ try:
456
+ result = await search_documentation(org, query)
457
+ return json.dumps(result, indent=2)
458
+ except Exception as e:
459
+ return json.dumps({"error": str(e)}, indent=2)
460
+
461
+
462
+ # ============================================================================
463
+ # Gradio Interface
464
+ # ============================================================================
465
+
466
+ # Create individual interfaces for each tool
467
+ get_repos_interface = gr.Interface(
468
+ fn=get_org_repos_tool,
469
+ inputs=[gr.Textbox(label="Organization", placeholder="e.g., anthropics")],
470
+ outputs=[gr.Textbox(label="Repositories (JSON)", lines=20)],
471
+ title="Get Organization Repos",
472
+ description="Fetch all repositories from a GitHub organization with /doc folder detection",
473
+ )
474
+
475
+ get_docs_interface = gr.Interface(
476
+ fn=get_repo_docs_tool,
477
+ inputs=[
478
+ gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
479
+ gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
480
+ ],
481
+ outputs=[gr.Textbox(label="Documentation Files (JSON)", lines=20)],
482
+ title="Get Repository Docs",
483
+ description="Get all documentation files from a repository's /doc folder",
484
+ )
485
+
486
+ get_content_interface = gr.Interface(
487
+ fn=get_file_content_tool,
488
+ inputs=[
489
+ gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
490
+ gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
491
+ gr.Textbox(label="File Path", placeholder="e.g., doc/README.md"),
492
+ ],
493
+ outputs=[gr.Textbox(label="File Content (JSON)", lines=20)],
494
+ title="Get File Content",
495
+ description="Fetch and decode content of a specific file from GitHub",
496
+ )
497
+
498
+ search_docs_interface = gr.Interface(
499
+ fn=search_documentation_tool,
500
+ inputs=[
501
+ gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
502
+ gr.Textbox(label="Search Query", placeholder="e.g., streaming"),
503
+ ],
504
+ outputs=[gr.Textbox(label="Search Results (JSON)", lines=20)],
505
+ title="Search Documentation",
506
+ description="Search for documentation files across all repositories in an organization",
507
+ )
508
+
509
+ # Combine into tabbed interface
510
+ demo = gr.TabbedInterface(
511
+ [get_repos_interface, get_docs_interface, get_content_interface, search_docs_interface],
512
+ ["Get Repos", "Get Docs", "Get Content", "Search"],
513
+ title="GitHub MCP Server",
514
+ )
515
+
516
+
517
+ # ============================================================================
518
+ # Main Entry Point
519
+ # ============================================================================
520
+
521
+ if __name__ == "__main__":
522
+ demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)