HimanshuGoyal2004 commited on
Commit
1168745
·
1 Parent(s): 244630d
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -41,20 +41,39 @@ class GitHubMCPServer:
41
 
42
  print(f"📊 Loaded {len(dataset)} CVE records from Hugging Face")
43
 
 
 
 
 
 
44
  # Create documents from CVE data
45
  documents = []
46
  for idx, record in enumerate(dataset):
47
- # Extract relevant fields from the dataset
48
- cve_id = record.get('cve_id', f'CVE-{idx}')
49
- cwe_code = record.get('cwe_code', 'Unknown')
50
- cwe_name = record.get('cwe_name', 'Unknown')
51
- cvss_score = record.get('cvss_score', record.get('cvss', 'N/A'))
52
- summary = record.get('summary', record.get('description', 'No summary available'))
 
 
 
 
 
 
 
 
53
 
54
- # Skip records without essential information
55
  if not summary or summary == 'No summary available':
56
  continue
57
 
 
 
 
 
 
 
58
  # Create document content
59
  content = f"""
60
  CVE ID: {cve_id}
 
41
 
42
  print(f"📊 Loaded {len(dataset)} CVE records from Hugging Face")
43
 
44
+ # Debug: Print first few records to understand dataset structure
45
+ print("🔍 Dataset structure analysis:")
46
+ for i in range(min(3, len(dataset))):
47
+ print(f"Record {i}: {dict(dataset[i])}")
48
+
49
  # Create documents from CVE data
50
  documents = []
51
  for idx, record in enumerate(dataset):
52
+ # Extract relevant fields from the dataset - check multiple possible field names
53
+ cve_id = (record.get('cve_id') or
54
+ record.get('CVE_ID') or
55
+ record.get('id') or
56
+ record.get('cve') or
57
+ f'CVE-UNKNOWN-{idx}')
58
+
59
+ cwe_code = record.get('cwe_code', record.get('CWE', 'Unknown'))
60
+ cwe_name = record.get('cwe_name', record.get('cwe_description', 'Unknown'))
61
+ cvss_score = record.get('cvss_score', record.get('cvss', record.get('CVSS', 'N/A')))
62
+ summary = (record.get('summary') or
63
+ record.get('description') or
64
+ record.get('Description') or
65
+ 'No summary available')
66
 
67
+ # Skip records without essential information or invalid CVE IDs
68
  if not summary or summary == 'No summary available':
69
  continue
70
 
71
+ # Validate CVE ID format (should be CVE-YYYY-NNNNN)
72
+ import re
73
+ if not re.match(r'^CVE-\d{4}-\d+$', str(cve_id)):
74
+ print(f"⚠️ Skipping invalid CVE ID: {cve_id}")
75
+ continue
76
+
77
  # Create document content
78
  content = f"""
79
  CVE ID: {cve_id}