AdityaDevx commited on
Commit
2d48d1e
Β·
verified Β·
1 Parent(s): 1c5f3c1

Upload 2 files

Browse files
Files changed (2) hide show
  1. requirements.txt +16 -0
  2. server.py +630 -0
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio[oauth,mcp]==5.45.0
2
+ fastapi==0.115.2
3
+ uvicorn==0.24.0
4
+ mcp==1.10.1
5
+ smolagents[mcp]>=0.1.0
6
+ requests>=2.28.0
7
+ python-dotenv>=1.0.0
8
+ pydantic>=2.0
9
+ datasets>=2.0.0
10
+ langchain>=0.1.0
11
+ langchain-core>=0.1.0
12
+ langchain-community>=0.0.20
13
+ langchain-text-splitters>=0.0.1
14
+ rank-bm25>=0.2.2
15
+ markdownify>=0.11.6
16
+ beautifulsoup4>=4.12.0
server.py ADDED
@@ -0,0 +1,630 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import base64
4
+ from typing import Dict, List, Any
5
+ import requests
6
+ import gradio as gr
7
+ from dotenv import load_dotenv
8
+ from datasets import load_dataset
9
+ from langchain_core.documents import Document
10
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
11
+ from langchain_community.retrievers import BM25Retriever
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ class GitHubMCPServer:
17
+ """GitHub MCP Server for repository scanning, file access, and CVE retrieval"""
18
+
19
+ def __init__(self):
20
+ self.github_token = os.getenv("GITHUB_TOKEN")
21
+ if not self.github_token:
22
+ raise ValueError("GITHUB_TOKEN environment variable is required")
23
+
24
+ self.headers = {
25
+ "Authorization": f"token {self.github_token}",
26
+ "Accept": "application/vnd.github.v3+json"
27
+ }
28
+
29
+ # Initialize CVE retriever
30
+ self.cve_retriever = None
31
+ self._initialize_cve_retriever()
32
+
33
+ def _initialize_cve_retriever(self):
34
+ """Initialize the CVE retriever with Hugging Face dataset"""
35
+ try:
36
+ print("πŸ”„ Loading CVE dataset from Hugging Face...")
37
+
38
+ # Load CVE dataset from Hugging Face
39
+ # Login using `huggingface-cli login` to access this dataset
40
+ knowledge_base = load_dataset("CIRCL/vulnerability", split="train")
41
+
42
+ print(f"πŸ“Š Loaded {len(knowledge_base)} vulnerability records from Hugging Face")
43
+
44
+ # Debug: Print first few records to understand dataset structure
45
+ print("πŸ” Dataset structure analysis:")
46
+ print(f"Dataset columns: {knowledge_base.column_names}")
47
+ for i in range(min(2, len(knowledge_base))):
48
+ print(f"Record {i}: {dict(knowledge_base[i])}")
49
+
50
+ # Filter to include only CVE entries (not GHSA)
51
+ print("πŸ” Filtering for CVE entries only...")
52
+ cve_dataset = knowledge_base.filter(lambda row: str(row["id"]).startswith("CVE-"))
53
+
54
+ print(f"πŸ“Š Filtered to {len(cve_dataset)} CVE records (excluded GHSA entries)")
55
+
56
+ # Convert dataset entries to Document objects with metadata
57
+ source_docs = []
58
+ for record in cve_dataset:
59
+ cve_id = record.get('id', '')
60
+ description = record.get('description', '')
61
+
62
+ # Skip records without essential information
63
+ if not cve_id or not description:
64
+ continue
65
+
66
+ # Create document content
67
+ content = f"CVE ID: {cve_id}\nDescription: {description}"
68
+
69
+ # Create metadata
70
+ metadata = {
71
+ 'cve_id': str(cve_id),
72
+ 'description': str(description)
73
+ }
74
+
75
+ source_docs.append(Document(page_content=content, metadata=metadata))
76
+
77
+ print(f"πŸ“ Created {len(source_docs)} CVE document objects")
78
+
79
+ if not source_docs:
80
+ print("❌ No valid CVE documents found in dataset")
81
+ self.cve_retriever = None
82
+ return
83
+
84
+ # Split documents into smaller chunks for better retrieval
85
+ print("πŸ”„ Initializing text splitter...")
86
+ try:
87
+ text_splitter = RecursiveCharacterTextSplitter(
88
+ chunk_size=500, # Characters per chunk
89
+ chunk_overlap=50, # Overlap between chunks to maintain context
90
+ add_start_index=True,
91
+ strip_whitespace=True,
92
+ separators=["\n\n", "\n", ".", " ", ""], # Priority order for splitting
93
+ )
94
+ print("βœ… Text splitter initialized successfully")
95
+ except Exception as splitter_error:
96
+ print(f"❌ Text splitter initialization failed: {splitter_error}")
97
+ # Use simple fallback
98
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
99
+ print("βœ… Using simple fallback text splitter")
100
+
101
+ print("πŸ”„ Processing documents with text splitter...")
102
+ try:
103
+ docs_processed = text_splitter.split_documents(source_docs)
104
+ print(f"πŸ“š Knowledge base prepared with {len(docs_processed)} document chunks")
105
+ except Exception as processing_error:
106
+ print(f"❌ Document processing failed: {processing_error}")
107
+ # Use original documents without splitting as fallback
108
+ docs_processed = source_docs
109
+ print(f"βœ… Using original documents without splitting: {len(docs_processed)} documents")
110
+
111
+ # Initialize BM25 retriever
112
+ print("πŸ”„ Initializing BM25 retriever...")
113
+ try:
114
+ self.cve_retriever = BM25Retriever.from_documents(
115
+ docs_processed,
116
+ k=3
117
+ )
118
+ print(f"βœ… CVE Retriever initialized with {len(docs_processed)} document chunks")
119
+ except Exception as retriever_error:
120
+ print(f"❌ BM25 retriever initialization failed: {retriever_error}")
121
+ self.cve_retriever = None
122
+
123
+ except Exception as e:
124
+ print(f"❌ Error initializing CVE retriever: {str(e)}")
125
+ print("πŸ’‘ Make sure you have access to the Hugging Face dataset 'CIRCL/vulnerability'")
126
+ print("πŸ’‘ You may need to login with: huggingface-cli login")
127
+ print("πŸ’‘ Dataset columns should be: id, title, description, cpes")
128
+ self.cve_retriever = None
129
+
130
+ def get_repository_info(self, owner: str, repo: str) -> dict:
131
+ """Get basic repository information"""
132
+ try:
133
+ url = f"https://api.github.com/repos/{owner}/{repo}"
134
+ response = requests.get(url, headers=self.headers)
135
+
136
+ if response.status_code == 200:
137
+ data = response.json()
138
+ return {
139
+ "success": True,
140
+ "repository_name": data["name"],
141
+ "full_name": data["full_name"],
142
+ "description": data.get("description", "No description available"),
143
+ "primary_language": data.get("language", "Unknown"),
144
+ "size_kb": data["size"],
145
+ "stars": data["stargazers_count"],
146
+ "forks": data["forks_count"],
147
+ "default_branch": data["default_branch"],
148
+ "created_date": data["created_at"][:10],
149
+ "last_updated": data["updated_at"][:10],
150
+ "is_private": data["private"],
151
+ "clone_url": data["clone_url"]
152
+ }
153
+ else:
154
+ return {
155
+ "success": False,
156
+ "error": f"Repository not found or inaccessible (HTTP {response.status_code})",
157
+ "status_code": response.status_code
158
+ }
159
+
160
+ except Exception as e:
161
+ return {
162
+ "success": False,
163
+ "error": f"Failed to fetch repository information: {str(e)}"
164
+ }
165
+
166
+ def get_file_content(self, owner: str, repo: str, path: str) -> str:
167
+ """Get content of a specific file - returns just the file content as string"""
168
+ try:
169
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
170
+ response = requests.get(url, headers=self.headers)
171
+
172
+ if response.status_code == 200:
173
+ data = response.json()
174
+ if data["type"] == "file" and "content" in data:
175
+ # Decode base64 content
176
+ try:
177
+ content = base64.b64decode(data["content"]).decode('utf-8')
178
+ return content
179
+ except UnicodeDecodeError:
180
+ return f"ERROR: File '{path}' contains binary data that cannot be decoded as text"
181
+ else:
182
+ return f"ERROR: Path '{path}' is not a file or content is not available"
183
+ else:
184
+ return f"ERROR: File '{path}' not found or inaccessible (HTTP {response.status_code})"
185
+
186
+ except Exception as e:
187
+ return f"ERROR: Failed to fetch file content for '{path}': {str(e)}"
188
+
189
+ def scan_repository(self, owner: str, repo: str, extensions: str = ".py,.js,.ts,.php,.java") -> list:
190
+ """Scan repository for code files - returns simple list of file paths"""
191
+ try:
192
+ ext_list = [ext.strip() for ext in extensions.split(",") if ext.strip()]
193
+ all_files = []
194
+ self._scan_directory_sync(owner, repo, "", ext_list, all_files)
195
+
196
+ # Return simple list of file paths for easier processing by CodeAgent
197
+ file_paths = [file_info.get('path', '') for file_info in all_files[:50]] # Limit to 50 files
198
+ return file_paths
199
+
200
+ except Exception as e:
201
+ return [f"ERROR: Failed to scan repository: {str(e)}"]
202
+
203
+ def _scan_directory_sync(self, owner: str, repo: str, path: str, extensions: List[str], all_files: List[Dict]):
204
+ """Recursively scan directory for files"""
205
+ try:
206
+ url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
207
+ response = requests.get(url, headers=self.headers)
208
+
209
+ if response.status_code == 200:
210
+ data = response.json()
211
+ for item in data:
212
+ if item["type"] == "file":
213
+ if any(item["name"].endswith(ext) for ext in extensions):
214
+ all_files.append({
215
+ "name": item["name"],
216
+ "path": item["path"],
217
+ "type": item["type"],
218
+ "size": item.get("size", 0),
219
+ "sha": item["sha"]
220
+ })
221
+ elif item["type"] == "dir" and len(all_files) < 100:
222
+ self._scan_directory_sync(owner, repo, item["path"], extensions, all_files)
223
+ except Exception:
224
+ pass
225
+
226
+ def search_cve_database(self, query: str) -> str:
227
+ """Search CVE database for relevant vulnerability information"""
228
+ if not self.cve_retriever:
229
+ return "❌ CVE retriever not properly initialized. Please check Hugging Face dataset access."
230
+
231
+ try:
232
+ # Retrieve relevant documents
233
+ docs = self.cve_retriever.invoke(query)
234
+
235
+ if not docs:
236
+ return f"No relevant CVE information found for query: '{query}'"
237
+
238
+ # Format the retrieved CVE information
239
+ result = f"πŸ” **CVE Knowledge Base Results for: '{query}'**\n\n"
240
+
241
+ for i, doc in enumerate(docs, 1):
242
+ metadata = doc.metadata
243
+ result += f"**Result {i}:**\n"
244
+ result += f"- **CVE ID**: {metadata.get('cve_id', 'Unknown')}\n"
245
+
246
+ # Extract description from content or metadata
247
+ description = metadata.get('description', '')
248
+ if not description:
249
+ content_lines = doc.page_content.split('\n')
250
+ desc_line = next((line for line in content_lines if line.startswith('Description:')), '')
251
+ description = desc_line.replace('Description: ', '').strip() if desc_line else 'No description available'
252
+
253
+ result += f"- **Description**: {description[:200]}{'...' if len(description) > 200 else ''}\n"
254
+ result += "---\n"
255
+
256
+ # Add summary of common patterns
257
+ cve_ids = [doc.metadata.get('cve_id') for doc in docs if doc.metadata.get('cve_id')]
258
+
259
+ result += f"\n**πŸ“Š Analysis Summary:**\n"
260
+ result += f"- **CVE Examples**: {', '.join(cve_ids[:3])}{'...' if len(cve_ids) > 3 else ''}\n"
261
+ result += f"- **Total Matches**: {len(docs)}\n"
262
+
263
+ return result
264
+
265
+ except Exception as e:
266
+ return f"❌ Error retrieving CVE information: {str(e)}"
267
+
268
+ def simple_cve_search(self, query: str, k: int = 3) -> str:
269
+ """Simple CVE search that returns only CVE IDs and descriptions for multi-agent workflow"""
270
+ if not self.cve_retriever:
271
+ return "❌ CVE retriever not properly initialized. Please check Hugging Face dataset access."
272
+
273
+ try:
274
+ # Set retriever to return k results
275
+ original_k = self.cve_retriever.k
276
+ self.cve_retriever.k = k
277
+
278
+ # Retrieve relevant documents
279
+ docs = self.cve_retriever.invoke(query)
280
+
281
+ # Restore original k
282
+ self.cve_retriever.k = original_k
283
+
284
+ if not docs:
285
+ return f"No relevant CVE information found for query: '{query}'"
286
+
287
+ # Format simple results - just CVE ID and description
288
+ result = f"Top {len(docs)} CVE matches for '{query}':\n\n"
289
+
290
+ for i, doc in enumerate(docs, 1):
291
+ metadata = doc.metadata
292
+ cve_id = metadata.get('cve_id', 'Unknown')
293
+
294
+ # Extract description from metadata or content
295
+ description = metadata.get('description', '')
296
+ if not description:
297
+ content_lines = doc.page_content.split('\n')
298
+ desc_line = next((line for line in content_lines if line.startswith('Description:')), '')
299
+ description = desc_line.replace('Description: ', '').strip() if desc_line else 'No description available'
300
+
301
+ result += f"{i}. {cve_id}\n"
302
+ result += f" {description[:150]}{'...' if len(description) > 150 else ''}\n\n"
303
+
304
+ return result.strip()
305
+
306
+ except Exception as e:
307
+ return f"❌ Error retrieving CVE information: {str(e)}"
308
+
309
+ def get_nvd_cve_details(self, cve_id: str) -> str:
310
+ """
311
+ Fetches detailed CVE information from NVD (National Vulnerability Database).
312
+
313
+ Args:
314
+ cve_id: The CVE identifier (e.g., 'CVE-2019-16515')
315
+
316
+ Returns:
317
+ Formatted string containing detailed CVE information from NVD
318
+ """
319
+ try:
320
+ # Validate and clean CVE ID format
321
+ cve_id = cve_id.strip().upper()
322
+ if not cve_id.startswith('CVE-'):
323
+ return f"❌ Invalid CVE ID format: '{cve_id}'\nCVE ID must start with 'CVE-' (e.g., CVE-2019-16515)"
324
+
325
+ # NVD API endpoint
326
+ nvd_api_url = "https://services.nvd.nist.gov/rest/json/cves/2.0"
327
+ nvd_web_url = f"https://nvd.nist.gov/vuln/detail/{cve_id}"
328
+
329
+ # Make request to NVD API
330
+ params = {"cveId": cve_id}
331
+ headers = {
332
+ "User-Agent": "VulnerabilityScanner/1.0 (GitHub Security Analysis Tool)"
333
+ }
334
+
335
+ print(f"πŸ” Fetching NVD details for {cve_id}...")
336
+ response = requests.get(nvd_api_url, params=params, headers=headers, timeout=15)
337
+
338
+ if response.status_code == 200:
339
+ data = response.json()
340
+
341
+ # Check if CVE was found
342
+ if data.get('resultsPerPage', 0) == 0:
343
+ return f"⚠️ CVE not found in NVD database: {cve_id}\n\nπŸ”— **NVD URL**: {nvd_web_url}\n\nNote: The CVE may not yet be published in NVD or the ID might be incorrect."
344
+
345
+ # Extract vulnerability data
346
+ vuln = data['vulnerabilities'][0]['cve']
347
+
348
+ # Build formatted result
349
+ result = f"πŸ“‹ **NVD CVE Details: {cve_id}**\n\n"
350
+ result += f"πŸ”— **NVD URL**: {nvd_web_url}\n\n"
351
+
352
+ # Status and dates
353
+ result += f"**Status**: {vuln.get('vulnStatus', 'N/A')}\n"
354
+ result += f"**Published**: {vuln.get('published', 'N/A')[:10]}\n"
355
+ result += f"**Last Modified**: {vuln.get('lastModified', 'N/A')[:10]}\n\n"
356
+
357
+ # Description
358
+ descriptions = vuln.get('descriptions', [])
359
+ for desc in descriptions:
360
+ if desc.get('lang') == 'en':
361
+ result += f"**πŸ“ Description**:\n{desc.get('value', 'N/A')}\n\n"
362
+ break
363
+
364
+ # CVSS Scores
365
+ metrics = vuln.get('metrics', {})
366
+
367
+ # CVSS v3.x (preferred)
368
+ if 'cvssMetricV31' in metrics or 'cvssMetricV30' in metrics:
369
+ cvss_key = 'cvssMetricV31' if 'cvssMetricV31' in metrics else 'cvssMetricV30'
370
+ cvss_v3 = metrics[cvss_key][0]['cvssData']
371
+
372
+ result += f"**🎯 CVSS v3 Score**:\n"
373
+ result += f"- **Base Score**: {cvss_v3.get('baseScore', 'N/A')} ({cvss_v3.get('baseSeverity', 'N/A')})\n"
374
+ result += f"- **Vector String**: {cvss_v3.get('vectorString', 'N/A')}\n"
375
+ result += f"- **Attack Vector**: {cvss_v3.get('attackVector', 'N/A')}\n"
376
+ result += f"- **Attack Complexity**: {cvss_v3.get('attackComplexity', 'N/A')}\n"
377
+ result += f"- **Privileges Required**: {cvss_v3.get('privilegesRequired', 'N/A')}\n"
378
+ result += f"- **User Interaction**: {cvss_v3.get('userInteraction', 'N/A')}\n"
379
+ result += f"- **Scope**: {cvss_v3.get('scope', 'N/A')}\n"
380
+ result += f"- **Confidentiality Impact**: {cvss_v3.get('confidentialityImpact', 'N/A')}\n"
381
+ result += f"- **Integrity Impact**: {cvss_v3.get('integrityImpact', 'N/A')}\n"
382
+ result += f"- **Availability Impact**: {cvss_v3.get('availabilityImpact', 'N/A')}\n\n"
383
+
384
+ # CVSS v2 (if available)
385
+ if 'cvssMetricV2' in metrics:
386
+ cvss_v2 = metrics['cvssMetricV2'][0]['cvssData']
387
+ result += f"**CVSS v2 Score**:\n"
388
+ result += f"- **Base Score**: {cvss_v2.get('baseScore', 'N/A')} ({metrics['cvssMetricV2'][0].get('baseSeverity', 'N/A')})\n"
389
+ result += f"- **Vector String**: {cvss_v2.get('vectorString', 'N/A')}\n\n"
390
+
391
+ # CWE (Common Weakness Enumeration)
392
+ weaknesses = vuln.get('weaknesses', [])
393
+ if weaknesses:
394
+ result += f"**πŸ” CWE (Common Weakness Enumeration)**:\n"
395
+ cwe_list = []
396
+ for weakness in weaknesses:
397
+ for desc in weakness.get('description', []):
398
+ if desc.get('lang') == 'en':
399
+ cwe_list.append(desc.get('value', 'N/A'))
400
+ result += f"- {', '.join(set(cwe_list))}\n\n"
401
+
402
+ # References
403
+ references = vuln.get('references', [])
404
+ if references:
405
+ result += f"**πŸ”— References** (showing first 5):\n"
406
+ for i, ref in enumerate(references[:5], 1):
407
+ result += f"{i}. [{ref.get('source', 'Source')}]({ref.get('url', '#')})\n"
408
+ if len(references) > 5:
409
+ result += f"\n... and {len(references) - 5} more references\n"
410
+ result += "\n"
411
+
412
+ result += f"---\n"
413
+ result += f"πŸ’‘ **Tip**: Use this CVE information to cross-reference vulnerabilities found in code analysis.\n"
414
+
415
+ return result
416
+
417
+ elif response.status_code == 404:
418
+ return f"⚠️ CVE not found: {cve_id}\n\nπŸ”— **NVD URL**: {nvd_web_url}\n\nThe CVE may not exist or may not yet be published in NVD."
419
+
420
+ elif response.status_code == 403:
421
+ return f"❌ Access denied to NVD API (HTTP 403)\n\nThis might be due to rate limiting. Please try again in a few moments.\n\nπŸ”— **NVD URL**: {nvd_web_url}"
422
+
423
+ else:
424
+ return f"❌ NVD API request failed with status {response.status_code}\n\nπŸ”— **NVD URL**: {nvd_web_url}\n\nYou can view the CVE details directly on the NVD website."
425
+
426
+ except requests.exceptions.Timeout:
427
+ return f"⏱️ Request to NVD API timed out for {cve_id}\n\nPlease try again or visit: {nvd_web_url}"
428
+
429
+ except requests.exceptions.RequestException as e:
430
+ return f"❌ Network error while fetching CVE details: {str(e)}\n\nπŸ”— **NVD URL**: {nvd_web_url}"
431
+
432
+ except Exception as e:
433
+ return f"❌ Unexpected error fetching NVD details for {cve_id}: {str(e)}\n\nπŸ”— **NVD URL**: {nvd_web_url}"
434
+
435
+ def search_and_fetch_cve_details(self, query: str, max_nvd_fetches: int = 5) -> str:
436
+ """
437
+ Smart combined function: Searches CVE database and automatically fetches NVD details.
438
+
439
+ This function:
440
+ 1. Searches the CVE knowledge base (RAG) for relevant vulnerabilities
441
+ 2. Automatically parses CVE IDs from the results
442
+ 3. Fetches detailed NVD information for top CVEs
443
+ 4. Returns combined results with both RAG data and NVD details
444
+
445
+ Args:
446
+ query: Vulnerability search query (e.g., "SQL injection", "XSS")
447
+ max_nvd_fetches: Maximum number of CVEs to fetch NVD details for (default: 5)
448
+
449
+ Returns:
450
+ Formatted string with RAG results + detailed NVD information
451
+ """
452
+ import re
453
+ import time
454
+
455
+ try:
456
+ # Step 1: Search CVE database using RAG
457
+ print(f"πŸ” Step 1: Searching CVE knowledge base for '{query}'...")
458
+ rag_results = self.search_cve_database(query)
459
+
460
+ if "❌" in rag_results or "No relevant CVE information found" in rag_results:
461
+ return rag_results
462
+
463
+ # Step 2: Parse CVE IDs from RAG results
464
+ print(f"πŸ“‹ Step 2: Parsing CVE IDs from results...")
465
+ cve_pattern = r'CVE-\d{4}-\d{4,7}'
466
+ cve_ids = re.findall(cve_pattern, rag_results)
467
+
468
+ # Remove duplicates and limit to max_nvd_fetches
469
+ unique_cve_ids = list(dict.fromkeys(cve_ids))[:max_nvd_fetches]
470
+
471
+ if not unique_cve_ids:
472
+ return rag_results + "\n\n⚠️ No CVE IDs found in results to fetch NVD details."
473
+
474
+ print(f"βœ… Found {len(unique_cve_ids)} unique CVE IDs: {', '.join(unique_cve_ids)}")
475
+
476
+ # Step 3: Build combined result
477
+ combined_result = "πŸ”¬ **COMPREHENSIVE CVE ANALYSIS**\n"
478
+ combined_result += "=" * 80 + "\n\n"
479
+
480
+ # Include RAG results first
481
+ combined_result += "## πŸ“š PART 1: CVE Knowledge Base Search Results\n\n"
482
+ combined_result += rag_results
483
+ combined_result += "\n\n" + "=" * 80 + "\n\n"
484
+
485
+ # Step 4: Fetch NVD details for each CVE
486
+ combined_result += f"## 🌐 PART 2: Detailed NVD Information (Top {len(unique_cve_ids)} CVEs)\n\n"
487
+ combined_result += f"Fetching official NVD details for: {', '.join(unique_cve_ids)}\n\n"
488
+ combined_result += "-" * 80 + "\n\n"
489
+
490
+ for idx, cve_id in enumerate(unique_cve_ids, 1):
491
+ print(f"🌐 Step 3.{idx}: Fetching NVD details for {cve_id}...")
492
+
493
+ # Fetch NVD details
494
+ nvd_result = self.get_nvd_cve_details(cve_id)
495
+
496
+ combined_result += nvd_result
497
+ combined_result += "\n" + "=" * 80 + "\n\n"
498
+
499
+ # Rate limiting: Add delay between requests (NVD recommends max 5 requests per 30 seconds)
500
+ if idx < len(unique_cve_ids):
501
+ time.sleep(6) # Wait 6 seconds between requests
502
+
503
+ # Step 5: Add summary
504
+ combined_result += "## πŸ“Š SUMMARY\n\n"
505
+ combined_result += f"βœ… **Total CVEs Analyzed**: {len(unique_cve_ids)}\n"
506
+ combined_result += f"βœ… **Search Query**: {query}\n"
507
+ combined_result += f"βœ… **RAG Results**: {len(cve_ids)} CVE references found\n"
508
+ combined_result += f"βœ… **NVD Details Fetched**: {len(unique_cve_ids)} CVEs\n\n"
509
+ combined_result += "πŸ’‘ **Next Steps**: Use this information to:\n"
510
+ combined_result += "- Cross-reference vulnerabilities in your code\n"
511
+ combined_result += "- Understand CVSS severity scores\n"
512
+ combined_result += "- Review CWE classifications\n"
513
+ combined_result += "- Check official NVD references for remediation guidance\n"
514
+
515
+ print(f"βœ… Combined analysis complete!")
516
+ return combined_result
517
+
518
+ except Exception as e:
519
+ return f"❌ Error in combined CVE analysis: {str(e)}\n\nPlease try using search_cve_database and get_nvd_cve_details separately."
520
+
521
+ # Initialize the GitHub MCP server
522
+ github_server = GitHubMCPServer()
523
+
524
+ # Create Gradio interfaces for MCP
525
+ demo = gr.TabbedInterface(
526
+ [
527
+ gr.Interface(
528
+ fn=github_server.get_repository_info,
529
+ inputs=[
530
+ gr.Textbox(label="Repository Owner", placeholder="octocat"),
531
+ gr.Textbox(label="Repository Name", placeholder="Hello-World")
532
+ ],
533
+ outputs=gr.Textbox(label="Repository Information", lines=15),
534
+ title="Get Repository Information",
535
+ description="Get basic information about a GitHub repository",
536
+ api_name="get_repository_info"
537
+ ),
538
+ gr.Interface(
539
+ fn=github_server.get_file_content,
540
+ inputs=[
541
+ gr.Textbox(label="Repository Owner", placeholder="octocat"),
542
+ gr.Textbox(label="Repository Name", placeholder="Hello-World"),
543
+ gr.Textbox(label="File Path", placeholder="README.md")
544
+ ],
545
+ outputs=gr.Textbox(label="File Content", lines=20),
546
+ title="Get File Content",
547
+ description="Get the content of a specific file from a GitHub repository",
548
+ api_name="get_file_content"
549
+ ),
550
+ gr.Interface(
551
+ fn=github_server.scan_repository,
552
+ inputs=[
553
+ gr.Textbox(label="Repository Owner", placeholder="octocat"),
554
+ gr.Textbox(label="Repository Name", placeholder="Hello-World"),
555
+ gr.Textbox(label="File Extensions", value=".py,.js,.ts,.php,.java", placeholder=".py,.js,.ts,.php,.java")
556
+ ],
557
+ outputs=gr.Textbox(label="Scan Results", lines=20),
558
+ title="Scan Repository for Code Files",
559
+ description="Scan a GitHub repository for code files with specified extensions",
560
+ api_name="scan_repository"
561
+ ),
562
+ gr.Interface(
563
+ fn=github_server.search_cve_database,
564
+ inputs=[
565
+ gr.Textbox(label="Vulnerability Query", placeholder="SQL injection, XSS, command injection, etc.")
566
+ ],
567
+ outputs=gr.Textbox(label="CVE Search Results", lines=25),
568
+ title="Search CVE Database",
569
+ description="Search the CVE knowledge base for vulnerability patterns and CWE information",
570
+ api_name="search_cve_database"
571
+ ),
572
+ gr.Interface(
573
+ fn=github_server.get_nvd_cve_details,
574
+ inputs=[
575
+ gr.Textbox(label="CVE ID", placeholder="CVE-2019-16515", value="CVE-2019-16515")
576
+ ],
577
+ outputs=gr.Textbox(label="NVD CVE Details", lines=30),
578
+ title="Get NVD CVE Details",
579
+ description="Fetch detailed CVE information from National Vulnerability Database (NVD)",
580
+ api_name="get_nvd_cve_details"
581
+ ),
582
+ gr.Interface(
583
+ fn=github_server.search_and_fetch_cve_details,
584
+ inputs=[
585
+ gr.Textbox(label="Vulnerability Query", placeholder="SQL injection, XSS, command injection, etc.", value="SQL injection"),
586
+ gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Max NVD Fetches", info="Number of CVEs to fetch NVD details for")
587
+ ],
588
+ outputs=gr.Textbox(label="Comprehensive CVE Analysis", lines=40),
589
+ title="πŸ”¬ Smart CVE Analysis (RAG + NVD)",
590
+ description="Automatically searches CVE database AND fetches detailed NVD information for top CVEs",
591
+ api_name="search_and_fetch_cve_details"
592
+ ),
593
+ gr.Interface(
594
+ fn=github_server.simple_cve_search,
595
+ inputs=[
596
+ gr.Textbox(label="Vulnerability Query", placeholder="SQL injection, XSS, command injection, etc."),
597
+ gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of Results", info="Number of CVE matches to return")
598
+ ],
599
+ outputs=gr.Textbox(label="Simple CVE Search Results", lines=15),
600
+ title="πŸ” Simple CVE Search",
601
+ description="Simple CVE search returning only CVE IDs and descriptions (for multi-agent workflow)",
602
+ api_name="simple_cve_search"
603
+ )
604
+ ],
605
+ [
606
+ "Repository Info",
607
+ "File Content",
608
+ "Repository Scanner",
609
+ "CVE Database",
610
+ "NVD CVE Details",
611
+ "πŸ”¬ Smart CVE Analysis",
612
+ "πŸ” Simple CVE Search"
613
+ ],
614
+ title="πŸ™ GitHub MCP Server with CVE Knowledge Base & NVD Integration"
615
+ )
616
+
617
+ if __name__ == "__main__":
618
+ print("πŸš€ Starting GitHub MCP Server with CVE Knowledge Base & NVD Integration...")
619
+ print("πŸ“‘ Server will provide GitHub repository access, CVE search, and NVD details via MCP")
620
+ print("πŸ› οΈ Available tools:")
621
+ print(" - get_repository_info: Get repository metadata")
622
+ print(" - get_file_content: Retrieve file contents")
623
+ print(" - scan_repository: Scan for code files")
624
+ print(" - search_cve_database: Search CVE knowledge base")
625
+ print(" - get_nvd_cve_details: Fetch detailed CVE info from NVD")
626
+ print(" - πŸ†• search_and_fetch_cve_details: Smart combined RAG + NVD analysis")
627
+ print(" - simple_cve_search: Simple CVE search for multi-agent workflow")
628
+
629
+ demo.launch(mcp_server=True)
630
+