Commit
·
1168745
1
Parent(s):
244630d
fix
Browse files
app.py
CHANGED
|
@@ -41,20 +41,39 @@ class GitHubMCPServer:
|
|
| 41 |
|
| 42 |
print(f"📊 Loaded {len(dataset)} CVE records from Hugging Face")
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# Create documents from CVE data
|
| 45 |
documents = []
|
| 46 |
for idx, record in enumerate(dataset):
|
| 47 |
-
# Extract relevant fields from the dataset
|
| 48 |
-
cve_id = record.get('cve_id'
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
# Skip records without essential information
|
| 55 |
if not summary or summary == 'No summary available':
|
| 56 |
continue
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# Create document content
|
| 59 |
content = f"""
|
| 60 |
CVE ID: {cve_id}
|
|
|
|
| 41 |
|
| 42 |
print(f"📊 Loaded {len(dataset)} CVE records from Hugging Face")
|
| 43 |
|
| 44 |
+
# Debug: Print first few records to understand dataset structure
|
| 45 |
+
print("🔍 Dataset structure analysis:")
|
| 46 |
+
for i in range(min(3, len(dataset))):
|
| 47 |
+
print(f"Record {i}: {dict(dataset[i])}")
|
| 48 |
+
|
| 49 |
# Create documents from CVE data
|
| 50 |
documents = []
|
| 51 |
for idx, record in enumerate(dataset):
|
| 52 |
+
# Extract relevant fields from the dataset - check multiple possible field names
|
| 53 |
+
cve_id = (record.get('cve_id') or
|
| 54 |
+
record.get('CVE_ID') or
|
| 55 |
+
record.get('id') or
|
| 56 |
+
record.get('cve') or
|
| 57 |
+
f'CVE-UNKNOWN-{idx}')
|
| 58 |
+
|
| 59 |
+
cwe_code = record.get('cwe_code', record.get('CWE', 'Unknown'))
|
| 60 |
+
cwe_name = record.get('cwe_name', record.get('cwe_description', 'Unknown'))
|
| 61 |
+
cvss_score = record.get('cvss_score', record.get('cvss', record.get('CVSS', 'N/A')))
|
| 62 |
+
summary = (record.get('summary') or
|
| 63 |
+
record.get('description') or
|
| 64 |
+
record.get('Description') or
|
| 65 |
+
'No summary available')
|
| 66 |
|
| 67 |
+
# Skip records without essential information or invalid CVE IDs
|
| 68 |
if not summary or summary == 'No summary available':
|
| 69 |
continue
|
| 70 |
|
| 71 |
+
# Validate CVE ID format (should be CVE-YYYY-NNNNN)
|
| 72 |
+
import re
|
| 73 |
+
if not re.match(r'^CVE-\d{4}-\d+$', str(cve_id)):
|
| 74 |
+
print(f"⚠️ Skipping invalid CVE ID: {cve_id}")
|
| 75 |
+
continue
|
| 76 |
+
|
| 77 |
# Create document content
|
| 78 |
content = f"""
|
| 79 |
CVE ID: {cve_id}
|