Baction commited on
Commit
687ebb8
Β·
verified Β·
1 Parent(s): eda1676

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +402 -63
app.py CHANGED
@@ -5,18 +5,17 @@ from typing import Dict, List, Any
5
  import requests
6
  import gradio as gr
7
  from dotenv import load_dotenv
8
- import pandas as pd
9
- from langchain.docstore.document import Document
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_community.retrievers import BM25Retriever
12
- from datasets import load_dataset
13
 
14
  # Load environment variables
15
  load_dotenv()
16
 
17
  class GitHubMCPServer:
18
  """GitHub MCP Server for repository scanning, file access, and CVE retrieval"""
19
-
20
  def __init__(self):
21
  self.github_token = os.getenv("GITHUB_TOKEN")
22
  if not self.github_token:
@@ -32,54 +31,100 @@ class GitHubMCPServer:
32
  self._initialize_cve_retriever()
33
 
34
  def _initialize_cve_retriever(self):
35
- """Initialize the CVE retriever with the dataset"""
36
  try:
37
- csv_path = load_dataset("Baction/cve")
38
-
39
- if not os.path.exists(csv_path):
40
- print(f"⚠ CVE dataset not found at {csv_path}")
41
- return
42
 
43
- # Load CVE dataset
44
- df = pd.read_csv(csv_path)
45
- df = df.dropna(subset=['cwe_code', 'summary'])
46
-
47
- # Create documents from CVE data
48
- documents = []
49
- for idx, row in df.iterrows():
50
- content = f"""
51
- CWE Code: {row['cwe_code']}
52
- CWE Name: {row.get('cwe_name', 'Unknown')}
53
- CVSS Score: {row.get('cvss', 'N/A')}
54
- Summary: {row['summary']}
55
- CVE ID: {idx}
56
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  metadata = {
58
- 'cwe_code': str(row['cwe_code']),
59
- 'cwe_name': row.get('cwe_name', 'Unknown'),
60
- 'cvss': row.get('cvss', 0),
61
- 'cve_id': str(idx)
62
  }
63
- documents.append(Document(page_content=content.strip(), metadata=metadata))
 
64
 
65
- # Split documents for better retrieval
66
- text_splitter = RecursiveCharacterTextSplitter(
67
- chunk_size=300,
68
- chunk_overlap=50,
69
- add_start_index=True,
70
- strip_whitespace=True,
71
- separators=["\n\n", "\n", ".", " "]
72
- )
73
 
74
- processed_docs = text_splitter.split_documents(documents)
 
 
 
75
 
76
- # Initialize BM25 retriever
77
- self.cve_retriever = BM25Retriever.from_documents(processed_docs, k=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- print(f"βœ… CVE Retriever initialized with {len(processed_docs)} document chunks")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  except Exception as e:
82
  print(f"❌ Error initializing CVE retriever: {str(e)}")
 
 
 
83
  self.cve_retriever = None
84
 
85
  def get_repository_info(self, owner: str, repo: str) -> dict:
@@ -181,7 +226,7 @@ CVE ID: {idx}
181
  def search_cve_database(self, query: str) -> str:
182
  """Search CVE database for relevant vulnerability information"""
183
  if not self.cve_retriever:
184
- return "❌ CVE retriever not properly initialized. Please check the dataset path."
185
 
186
  try:
187
  # Retrieve relevant documents
@@ -191,35 +236,287 @@ CVE ID: {idx}
191
  return f"No relevant CVE information found for query: '{query}'"
192
 
193
  # Format the retrieved CVE information
194
- result = f"πŸ” *CVE Knowledge Base Results for: '{query}'*\n\n"
195
 
196
  for i, doc in enumerate(docs, 1):
197
  metadata = doc.metadata
198
- result += f"*Result {i}:*\n"
199
- result += f"- *CWE Code*: {metadata.get('cwe_code', 'Unknown')}\n"
200
- result += f"- *CWE Name*: {metadata.get('cwe_name', 'Unknown')}\n"
201
- result += f"- *CVSS Score*: {metadata.get('cvss', 'N/A')}\n"
202
-
203
- # Extract summary from content
204
- content_lines = doc.page_content.split('\n')
205
- summary_line = next((line for line in content_lines if line.startswith('Summary:')), '')
206
- summary = summary_line.replace('Summary: ', '').strip() if summary_line else 'No summary available'
207
 
208
- result += f"- *Description*: {summary}\n"
209
  result += "---\n"
210
 
211
  # Add summary of common patterns
212
- cwe_codes = [doc.metadata.get('cwe_code') for doc in docs if doc.metadata.get('cwe_code')]
213
- unique_cwes = list(set(cwe_codes))
214
 
215
- result += f"\n*πŸ“Š Common Weakness Patterns Found:*\n"
216
- result += f"- *CWE Codes*: {', '.join(unique_cwes[:5])}\n"
217
- result += f"- *Total Matches*: {len(docs)}\n"
218
 
219
  return result
220
 
221
  except Exception as e:
222
  return f"❌ Error retrieving CVE information: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  # Initialize the GitHub MCP server
225
  github_server = GitHubMCPServer()
@@ -271,20 +568,62 @@ demo = gr.TabbedInterface(
271
  title="Search CVE Database",
272
  description="Search the CVE knowledge base for vulnerability patterns and CWE information",
273
  api_name="search_cve_database"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  )
275
  ],
276
  [
277
  "Repository Info",
278
  "File Content",
279
  "Repository Scanner",
280
- "CVE Database"
 
 
 
281
  ],
282
- title="πŸ™ GitHub MCP Server with CVE Knowledge Base"
283
  )
284
 
285
  if __name__ == "__main__":
286
- print("πŸš€ Starting GitHub MCP Server with CVE Knowledge Base...")
287
- print("πŸ“‘ Server will provide GitHub repository access and CVE search via MCP")
288
- print("πŸ›  Available tools: repository info, file content, repository scanner, CVE database search")
 
 
 
 
 
 
 
289
 
290
  demo.launch(mcp_server=True)
 
5
  import requests
6
  import gradio as gr
7
  from dotenv import load_dotenv
8
+ from datasets import load_dataset
9
+ from langchain_core.documents import Document
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_community.retrievers import BM25Retriever
 
12
 
13
  # Load environment variables
14
  load_dotenv()
15
 
16
  class GitHubMCPServer:
17
  """GitHub MCP Server for repository scanning, file access, and CVE retrieval"""
18
+
19
  def __init__(self):
20
  self.github_token = os.getenv("GITHUB_TOKEN")
21
  if not self.github_token:
 
31
  self._initialize_cve_retriever()
32
 
33
  def _initialize_cve_retriever(self):
34
+ """Initialize the CVE retriever with Hugging Face dataset"""
35
  try:
36
+ print("πŸ”„ Loading CVE dataset from Hugging Face...")
37
+
38
+ # Load CVE dataset from Hugging Face
39
+ # Login using `huggingface-cli login` to access this dataset
40
+ knowledge_base = load_dataset("CIRCL/vulnerability", split="train")
41
 
42
+ print(f"πŸ“Š Loaded {len(knowledge_base)} vulnerability records from Hugging Face")
43
+
44
+ # Debug: Print first few records to understand dataset structure
45
+ print("πŸ” Dataset structure analysis:")
46
+ print(f"Dataset columns: {knowledge_base.column_names}")
47
+ for i in range(min(2, len(knowledge_base))):
48
+ print(f"Record {i}: {dict(knowledge_base[i])}")
49
+
50
+ # Filter to include only CVE entries (not GHSA)
51
+ print("πŸ” Filtering for CVE entries only...")
52
+ cve_dataset = knowledge_base.filter(lambda row: str(row["id"]).startswith("CVE-"))
53
+
54
+ print(f"πŸ“Š Filtered to {len(cve_dataset)} CVE records (excluded GHSA entries)")
55
+
56
+ # Convert dataset entries to Document objects with metadata
57
+ source_docs = []
58
+ for record in cve_dataset:
59
+ cve_id = record.get('id', '')
60
+ description = record.get('description', '')
61
+
62
+ # Skip records without essential information
63
+ if not cve_id or not description:
64
+ continue
65
+
66
+ # Create document content
67
+ content = f"CVE ID: {cve_id}\nDescription: {description}"
68
+
69
+ # Create metadata
70
  metadata = {
71
+ 'cve_id': str(cve_id),
72
+ 'description': str(description)
 
 
73
  }
74
+
75
+ source_docs.append(Document(page_content=content, metadata=metadata))
76
 
77
+ print(f"πŸ“ Created {len(source_docs)} CVE document objects")
 
 
 
 
 
 
 
78
 
79
+ if not source_docs:
80
+ print("❌ No valid CVE documents found in dataset")
81
+ self.cve_retriever = None
82
+ return
83
 
84
+ # Split documents into smaller chunks for better retrieval
85
+ print("πŸ”„ Initializing text splitter...")
86
+ try:
87
+ text_splitter = RecursiveCharacterTextSplitter(
88
+ chunk_size=500, # Characters per chunk
89
+ chunk_overlap=50, # Overlap between chunks to maintain context
90
+ add_start_index=True,
91
+ strip_whitespace=True,
92
+ separators=["\n\n", "\n", ".", " ", ""], # Priority order for splitting
93
+ )
94
+ print("βœ… Text splitter initialized successfully")
95
+ except Exception as splitter_error:
96
+ print(f"❌ Text splitter initialization failed: {splitter_error}")
97
+ # Use simple fallback
98
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
99
+ print("βœ… Using simple fallback text splitter")
100
 
101
+ print("πŸ”„ Processing documents with text splitter...")
102
+ try:
103
+ docs_processed = text_splitter.split_documents(source_docs)
104
+ print(f"πŸ“š Knowledge base prepared with {len(docs_processed)} document chunks")
105
+ except Exception as processing_error:
106
+ print(f"❌ Document processing failed: {processing_error}")
107
+ # Use original documents without splitting as fallback
108
+ docs_processed = source_docs
109
+ print(f"βœ… Using original documents without splitting: {len(docs_processed)} documents")
110
+
111
+ # Initialize BM25 retriever
112
+ print("πŸ”„ Initializing BM25 retriever...")
113
+ try:
114
+ self.cve_retriever = BM25Retriever.from_documents(
115
+ docs_processed,
116
+ k=3
117
+ )
118
+ print(f"βœ… CVE Retriever initialized with {len(docs_processed)} document chunks")
119
+ except Exception as retriever_error:
120
+ print(f"❌ BM25 retriever initialization failed: {retriever_error}")
121
+ self.cve_retriever = None
122
 
123
  except Exception as e:
124
  print(f"❌ Error initializing CVE retriever: {str(e)}")
125
+ print("πŸ’‘ Make sure you have access to the Hugging Face dataset 'CIRCL/vulnerability'")
126
+ print("πŸ’‘ You may need to login with: huggingface-cli login")
127
+ print("πŸ’‘ Dataset columns should be: id, title, description, cpes")
128
  self.cve_retriever = None
129
 
130
  def get_repository_info(self, owner: str, repo: str) -> dict:
 
226
  def search_cve_database(self, query: str) -> str:
227
  """Search CVE database for relevant vulnerability information"""
228
  if not self.cve_retriever:
229
+ return "❌ CVE retriever not properly initialized. Please check Hugging Face dataset access."
230
 
231
  try:
232
  # Retrieve relevant documents
 
236
  return f"No relevant CVE information found for query: '{query}'"
237
 
238
  # Format the retrieved CVE information
239
+ result = f"πŸ” **CVE Knowledge Base Results for: '{query}'**\n\n"
240
 
241
  for i, doc in enumerate(docs, 1):
242
  metadata = doc.metadata
243
+ result += f"**Result {i}:**\n"
244
+ result += f"- **CVE ID**: {metadata.get('cve_id', 'Unknown')}\n"
245
+
246
+ # Extract description from content or metadata
247
+ description = metadata.get('description', '')
248
+ if not description:
249
+ content_lines = doc.page_content.split('\n')
250
+ desc_line = next((line for line in content_lines if line.startswith('Description:')), '')
251
+ description = desc_line.replace('Description: ', '').strip() if desc_line else 'No description available'
252
 
253
+ result += f"- **Description**: {description[:200]}{'...' if len(description) > 200 else ''}\n"
254
  result += "---\n"
255
 
256
  # Add summary of common patterns
257
+ cve_ids = [doc.metadata.get('cve_id') for doc in docs if doc.metadata.get('cve_id')]
 
258
 
259
+ result += f"\n**πŸ“Š Analysis Summary:**\n"
260
+ result += f"- **CVE Examples**: {', '.join(cve_ids[:3])}{'...' if len(cve_ids) > 3 else ''}\n"
261
+ result += f"- **Total Matches**: {len(docs)}\n"
262
 
263
  return result
264
 
265
  except Exception as e:
266
  return f"❌ Error retrieving CVE information: {str(e)}"
267
+
268
+ def simple_cve_search(self, query: str, k: int = 3) -> str:
269
+ """Simple CVE search that returns only CVE IDs and descriptions for multi-agent workflow"""
270
+ if not self.cve_retriever:
271
+ return "❌ CVE retriever not properly initialized. Please check Hugging Face dataset access."
272
+
273
+ try:
274
+ # Set retriever to return k results
275
+ original_k = self.cve_retriever.k
276
+ self.cve_retriever.k = k
277
+
278
+ # Retrieve relevant documents
279
+ docs = self.cve_retriever.invoke(query)
280
+
281
+ # Restore original k
282
+ self.cve_retriever.k = original_k
283
+
284
+ if not docs:
285
+ return f"No relevant CVE information found for query: '{query}'"
286
+
287
+ # Format simple results - just CVE ID and description
288
+ result = f"Top {len(docs)} CVE matches for '{query}':\n\n"
289
+
290
+ for i, doc in enumerate(docs, 1):
291
+ metadata = doc.metadata
292
+ cve_id = metadata.get('cve_id', 'Unknown')
293
+
294
+ # Extract description from metadata or content
295
+ description = metadata.get('description', '')
296
+ if not description:
297
+ content_lines = doc.page_content.split('\n')
298
+ desc_line = next((line for line in content_lines if line.startswith('Description:')), '')
299
+ description = desc_line.replace('Description: ', '').strip() if desc_line else 'No description available'
300
+
301
+ result += f"{i}. {cve_id}\n"
302
+ result += f" {description[:150]}{'...' if len(description) > 150 else ''}\n\n"
303
+
304
+ return result.strip()
305
+
306
+ except Exception as e:
307
+ return f"❌ Error retrieving CVE information: {str(e)}"
308
+
309
+ def get_nvd_cve_details(self, cve_id: str) -> str:
310
+ """
311
+ Fetches detailed CVE information from NVD (National Vulnerability Database).
312
+
313
+ Args:
314
+ cve_id: The CVE identifier (e.g., 'CVE-2019-16515')
315
+
316
+ Returns:
317
+ Formatted string containing detailed CVE information from NVD
318
+ """
319
+ try:
320
+ # Validate and clean CVE ID format
321
+ cve_id = cve_id.strip().upper()
322
+ if not cve_id.startswith('CVE-'):
323
+ return f"❌ Invalid CVE ID format: '{cve_id}'\nCVE ID must start with 'CVE-' (e.g., CVE-2019-16515)"
324
+
325
+ # NVD API endpoint
326
+ nvd_api_url = "https://services.nvd.nist.gov/rest/json/cves/2.0"
327
+ nvd_web_url = f"https://nvd.nist.gov/vuln/detail/{cve_id}"
328
+
329
+ # Make request to NVD API
330
+ params = {"cveId": cve_id}
331
+ headers = {
332
+ "User-Agent": "VulnerabilityScanner/1.0 (GitHub Security Analysis Tool)"
333
+ }
334
+
335
+ print(f"πŸ” Fetching NVD details for {cve_id}...")
336
+ response = requests.get(nvd_api_url, params=params, headers=headers, timeout=15)
337
+
338
+ if response.status_code == 200:
339
+ data = response.json()
340
+
341
+ # Check if CVE was found
342
+ if data.get('resultsPerPage', 0) == 0:
343
+ return f"⚠️ CVE not found in NVD database: {cve_id}\n\nπŸ”— **NVD URL**: {nvd_web_url}\n\nNote: The CVE may not yet be published in NVD or the ID might be incorrect."
344
+
345
+ # Extract vulnerability data
346
+ vuln = data['vulnerabilities'][0]['cve']
347
+
348
+ # Build formatted result
349
+ result = f"πŸ“‹ **NVD CVE Details: {cve_id}**\n\n"
350
+ result += f"πŸ”— **NVD URL**: {nvd_web_url}\n\n"
351
+
352
+ # Status and dates
353
+ result += f"**Status**: {vuln.get('vulnStatus', 'N/A')}\n"
354
+ result += f"**Published**: {vuln.get('published', 'N/A')[:10]}\n"
355
+ result += f"**Last Modified**: {vuln.get('lastModified', 'N/A')[:10]}\n\n"
356
+
357
+ # Description
358
+ descriptions = vuln.get('descriptions', [])
359
+ for desc in descriptions:
360
+ if desc.get('lang') == 'en':
361
+ result += f"**πŸ“ Description**:\n{desc.get('value', 'N/A')}\n\n"
362
+ break
363
+
364
+ # CVSS Scores
365
+ metrics = vuln.get('metrics', {})
366
+
367
+ # CVSS v3.x (preferred)
368
+ if 'cvssMetricV31' in metrics or 'cvssMetricV30' in metrics:
369
+ cvss_key = 'cvssMetricV31' if 'cvssMetricV31' in metrics else 'cvssMetricV30'
370
+ cvss_v3 = metrics[cvss_key][0]['cvssData']
371
+
372
+ result += f"**🎯 CVSS v3 Score**:\n"
373
+ result += f"- **Base Score**: {cvss_v3.get('baseScore', 'N/A')} ({cvss_v3.get('baseSeverity', 'N/A')})\n"
374
+ result += f"- **Vector String**: {cvss_v3.get('vectorString', 'N/A')}\n"
375
+ result += f"- **Attack Vector**: {cvss_v3.get('attackVector', 'N/A')}\n"
376
+ result += f"- **Attack Complexity**: {cvss_v3.get('attackComplexity', 'N/A')}\n"
377
+ result += f"- **Privileges Required**: {cvss_v3.get('privilegesRequired', 'N/A')}\n"
378
+ result += f"- **User Interaction**: {cvss_v3.get('userInteraction', 'N/A')}\n"
379
+ result += f"- **Scope**: {cvss_v3.get('scope', 'N/A')}\n"
380
+ result += f"- **Confidentiality Impact**: {cvss_v3.get('confidentialityImpact', 'N/A')}\n"
381
+ result += f"- **Integrity Impact**: {cvss_v3.get('integrityImpact', 'N/A')}\n"
382
+ result += f"- **Availability Impact**: {cvss_v3.get('availabilityImpact', 'N/A')}\n\n"
383
+
384
+ # CVSS v2 (if available)
385
+ if 'cvssMetricV2' in metrics:
386
+ cvss_v2 = metrics['cvssMetricV2'][0]['cvssData']
387
+ result += f"**CVSS v2 Score**:\n"
388
+ result += f"- **Base Score**: {cvss_v2.get('baseScore', 'N/A')} ({metrics['cvssMetricV2'][0].get('baseSeverity', 'N/A')})\n"
389
+ result += f"- **Vector String**: {cvss_v2.get('vectorString', 'N/A')}\n\n"
390
+
391
+ # CWE (Common Weakness Enumeration)
392
+ weaknesses = vuln.get('weaknesses', [])
393
+ if weaknesses:
394
+ result += f"**πŸ” CWE (Common Weakness Enumeration)**:\n"
395
+ cwe_list = []
396
+ for weakness in weaknesses:
397
+ for desc in weakness.get('description', []):
398
+ if desc.get('lang') == 'en':
399
+ cwe_list.append(desc.get('value', 'N/A'))
400
+ result += f"- {', '.join(set(cwe_list))}\n\n"
401
+
402
+ # References
403
+ references = vuln.get('references', [])
404
+ if references:
405
+ result += f"**πŸ”— References** (showing first 5):\n"
406
+ for i, ref in enumerate(references[:5], 1):
407
+ result += f"{i}. [{ref.get('source', 'Source')}]({ref.get('url', '#')})\n"
408
+ if len(references) > 5:
409
+ result += f"\n... and {len(references) - 5} more references\n"
410
+ result += "\n"
411
+
412
+ result += f"---\n"
413
+ result += f"πŸ’‘ **Tip**: Use this CVE information to cross-reference vulnerabilities found in code analysis.\n"
414
+
415
+ return result
416
+
417
+ elif response.status_code == 404:
418
+ return f"⚠️ CVE not found: {cve_id}\n\nπŸ”— **NVD URL**: {nvd_web_url}\n\nThe CVE may not exist or may not yet be published in NVD."
419
+
420
+ elif response.status_code == 403:
421
+ return f"❌ Access denied to NVD API (HTTP 403)\n\nThis might be due to rate limiting. Please try again in a few moments.\n\nπŸ”— **NVD URL**: {nvd_web_url}"
422
+
423
+ else:
424
+ return f"❌ NVD API request failed with status {response.status_code}\n\nπŸ”— **NVD URL**: {nvd_web_url}\n\nYou can view the CVE details directly on the NVD website."
425
+
426
+ except requests.exceptions.Timeout:
427
+ return f"⏱️ Request to NVD API timed out for {cve_id}\n\nPlease try again or visit: {nvd_web_url}"
428
+
429
+ except requests.exceptions.RequestException as e:
430
+ return f"❌ Network error while fetching CVE details: {str(e)}\n\nπŸ”— **NVD URL**: {nvd_web_url}"
431
+
432
+ except Exception as e:
433
+ return f"❌ Unexpected error fetching NVD details for {cve_id}: {str(e)}\n\nπŸ”— **NVD URL**: {nvd_web_url}"
434
+
435
+ def search_and_fetch_cve_details(self, query: str, max_nvd_fetches: int = 5) -> str:
436
+ """
437
+ Smart combined function: Searches CVE database and automatically fetches NVD details.
438
+
439
+ This function:
440
+ 1. Searches the CVE knowledge base (RAG) for relevant vulnerabilities
441
+ 2. Automatically parses CVE IDs from the results
442
+ 3. Fetches detailed NVD information for top CVEs
443
+ 4. Returns combined results with both RAG data and NVD details
444
+
445
+ Args:
446
+ query: Vulnerability search query (e.g., "SQL injection", "XSS")
447
+ max_nvd_fetches: Maximum number of CVEs to fetch NVD details for (default: 5)
448
+
449
+ Returns:
450
+ Formatted string with RAG results + detailed NVD information
451
+ """
452
+ import re
453
+ import time
454
+
455
+ try:
456
+ # Step 1: Search CVE database using RAG
457
+ print(f"πŸ” Step 1: Searching CVE knowledge base for '{query}'...")
458
+ rag_results = self.search_cve_database(query)
459
+
460
+ if "❌" in rag_results or "No relevant CVE information found" in rag_results:
461
+ return rag_results
462
+
463
+ # Step 2: Parse CVE IDs from RAG results
464
+ print(f"πŸ“‹ Step 2: Parsing CVE IDs from results...")
465
+ cve_pattern = r'CVE-\d{4}-\d{4,7}'
466
+ cve_ids = re.findall(cve_pattern, rag_results)
467
+
468
+ # Remove duplicates and limit to max_nvd_fetches
469
+ unique_cve_ids = list(dict.fromkeys(cve_ids))[:max_nvd_fetches]
470
+
471
+ if not unique_cve_ids:
472
+ return rag_results + "\n\n⚠️ No CVE IDs found in results to fetch NVD details."
473
+
474
+ print(f"βœ… Found {len(unique_cve_ids)} unique CVE IDs: {', '.join(unique_cve_ids)}")
475
+
476
+ # Step 3: Build combined result
477
+ combined_result = "πŸ”¬ **COMPREHENSIVE CVE ANALYSIS**\n"
478
+ combined_result += "=" * 80 + "\n\n"
479
+
480
+ # Include RAG results first
481
+ combined_result += "## πŸ“š PART 1: CVE Knowledge Base Search Results\n\n"
482
+ combined_result += rag_results
483
+ combined_result += "\n\n" + "=" * 80 + "\n\n"
484
+
485
+ # Step 4: Fetch NVD details for each CVE
486
+ combined_result += f"## 🌐 PART 2: Detailed NVD Information (Top {len(unique_cve_ids)} CVEs)\n\n"
487
+ combined_result += f"Fetching official NVD details for: {', '.join(unique_cve_ids)}\n\n"
488
+ combined_result += "-" * 80 + "\n\n"
489
+
490
+ for idx, cve_id in enumerate(unique_cve_ids, 1):
491
+ print(f"🌐 Step 3.{idx}: Fetching NVD details for {cve_id}...")
492
+
493
+ # Fetch NVD details
494
+ nvd_result = self.get_nvd_cve_details(cve_id)
495
+
496
+ combined_result += nvd_result
497
+ combined_result += "\n" + "=" * 80 + "\n\n"
498
+
499
+ # Rate limiting: Add delay between requests (NVD recommends max 5 requests per 30 seconds)
500
+ if idx < len(unique_cve_ids):
501
+ time.sleep(6) # Wait 6 seconds between requests
502
+
503
+ # Step 5: Add summary
504
+ combined_result += "## πŸ“Š SUMMARY\n\n"
505
+ combined_result += f"βœ… **Total CVEs Analyzed**: {len(unique_cve_ids)}\n"
506
+ combined_result += f"βœ… **Search Query**: {query}\n"
507
+ combined_result += f"βœ… **RAG Results**: {len(cve_ids)} CVE references found\n"
508
+ combined_result += f"βœ… **NVD Details Fetched**: {len(unique_cve_ids)} CVEs\n\n"
509
+ combined_result += "πŸ’‘ **Next Steps**: Use this information to:\n"
510
+ combined_result += "- Cross-reference vulnerabilities in your code\n"
511
+ combined_result += "- Understand CVSS severity scores\n"
512
+ combined_result += "- Review CWE classifications\n"
513
+ combined_result += "- Check official NVD references for remediation guidance\n"
514
+
515
+ print(f"βœ… Combined analysis complete!")
516
+ return combined_result
517
+
518
+ except Exception as e:
519
+ return f"❌ Error in combined CVE analysis: {str(e)}\n\nPlease try using search_cve_database and get_nvd_cve_details separately."
520
 
521
  # Initialize the GitHub MCP server
522
  github_server = GitHubMCPServer()
 
568
  title="Search CVE Database",
569
  description="Search the CVE knowledge base for vulnerability patterns and CWE information",
570
  api_name="search_cve_database"
571
+ ),
572
+ gr.Interface(
573
+ fn=github_server.get_nvd_cve_details,
574
+ inputs=[
575
+ gr.Textbox(label="CVE ID", placeholder="CVE-2019-16515", value="CVE-2019-16515")
576
+ ],
577
+ outputs=gr.Textbox(label="NVD CVE Details", lines=30),
578
+ title="Get NVD CVE Details",
579
+ description="Fetch detailed CVE information from National Vulnerability Database (NVD)",
580
+ api_name="get_nvd_cve_details"
581
+ ),
582
+ gr.Interface(
583
+ fn=github_server.search_and_fetch_cve_details,
584
+ inputs=[
585
+ gr.Textbox(label="Vulnerability Query", placeholder="SQL injection, XSS, command injection, etc.", value="SQL injection"),
586
+ gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Max NVD Fetches", info="Number of CVEs to fetch NVD details for")
587
+ ],
588
+ outputs=gr.Textbox(label="Comprehensive CVE Analysis", lines=40),
589
+ title="πŸ”¬ Smart CVE Analysis (RAG + NVD)",
590
+ description="Automatically searches CVE database AND fetches detailed NVD information for top CVEs",
591
+ api_name="search_and_fetch_cve_details"
592
+ ),
593
+ gr.Interface(
594
+ fn=github_server.simple_cve_search,
595
+ inputs=[
596
+ gr.Textbox(label="Vulnerability Query", placeholder="SQL injection, XSS, command injection, etc."),
597
+ gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of Results", info="Number of CVE matches to return")
598
+ ],
599
+ outputs=gr.Textbox(label="Simple CVE Search Results", lines=15),
600
+ title="πŸ” Simple CVE Search",
601
+ description="Simple CVE search returning only CVE IDs and descriptions (for multi-agent workflow)",
602
+ api_name="simple_cve_search"
603
  )
604
  ],
605
  [
606
  "Repository Info",
607
  "File Content",
608
  "Repository Scanner",
609
+ "CVE Database",
610
+ "NVD CVE Details",
611
+ "πŸ”¬ Smart CVE Analysis",
612
+ "πŸ” Simple CVE Search"
613
  ],
614
+ title="πŸ™ GitHub MCP Server with CVE Knowledge Base & NVD Integration"
615
  )
616
 
617
  if __name__ == "__main__":
618
+ print("πŸš€ Starting GitHub MCP Server with CVE Knowledge Base & NVD Integration...")
619
+ print("πŸ“‘ Server will provide GitHub repository access, CVE search, and NVD details via MCP")
620
+ print("πŸ› οΈ Available tools:")
621
+ print(" - get_repository_info: Get repository metadata")
622
+ print(" - get_file_content: Retrieve file contents")
623
+ print(" - scan_repository: Scan for code files")
624
+ print(" - search_cve_database: Search CVE knowledge base")
625
+ print(" - get_nvd_cve_details: Fetch detailed CVE info from NVD")
626
+ print(" - πŸ†• search_and_fetch_cve_details: Smart combined RAG + NVD analysis")
627
+ print(" - simple_cve_search: Simple CVE search for multi-agent workflow")
628
 
629
  demo.launch(mcp_server=True)