|
|
|
|
|
import os |
|
|
import gradio as gr |
|
|
import shap |
|
|
from transformers import pipeline |
|
|
import torch |
|
|
import xgboost as xgb |
|
|
from sklearn.ensemble import RandomForestClassifier, VotingClassifier |
|
|
from sklearn.svm import SVC |
|
|
from sklearn.linear_model import LogisticRegression |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.preprocessing import LabelEncoder |
|
|
from sklearn.pipeline import FeatureUnion |
|
|
from sklearn.base import BaseEstimator, TransformerMixin |
|
|
from sentence_transformers import SentenceTransformer |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import json |
|
|
import joblib |
|
|
import re |
|
|
import time |
|
|
import warnings |
|
|
from urllib.parse import urlparse |
|
|
from requests.adapters import HTTPAdapter |
|
|
from urllib3.util.retry import Retry |
|
|
import socket |
|
|
import threading |
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
|
import ssl |
|
|
from datetime import datetime |
|
|
|
|
|
warnings.filterwarnings('ignore') |
|
|
plt.switch_backend('Agg') |
|
|
|
|
|
def load_from_drive(path): |
|
|
"""Load model components from disk with error handling""" |
|
|
if os.path.exists(path): |
|
|
try: |
|
|
return joblib.load(path) |
|
|
except Exception as e: |
|
|
print(f"Error loading {path}: {e}") |
|
|
return None |
|
|
else: |
|
|
return None |
|
|
|
|
|
|
|
|
class SentenceTransformerFeatures(BaseEstimator, TransformerMixin): |
|
|
"""Transformer for generating sentence embeddings""" |
|
|
def __init__(self, model_name='all-MiniLM-L6-v2'): |
|
|
self.model_name = model_name |
|
|
self.model = None |
|
|
|
|
|
def fit(self, X, y=None): |
|
|
self.model = SentenceTransformer(self.model_name) |
|
|
return self |
|
|
|
|
|
def transform(self, X): |
|
|
if self.model is None: |
|
|
self.model = SentenceTransformer(self.model_name) |
|
|
return self.model.encode(X, show_progress_bar=False) |
|
|
|
|
|
class AdvancedFeatureEngine: |
|
|
"""Enhanced feature engineering with multiple feature types""" |
|
|
def __init__(self, use_embeddings=True): |
|
|
self.use_embeddings = use_embeddings |
|
|
self.vectorizer = TfidfVectorizer( |
|
|
max_features=2000, ngram_range=(1, 3), stop_words='english', |
|
|
min_df=2, max_df=0.8, analyzer='word', sublinear_tf=True) |
|
|
self.sentence_transformer = None |
|
|
self.feature_union = None |
|
|
|
|
|
def build_feature_pipeline(self): |
|
|
"""Build feature union pipeline""" |
|
|
if self.use_embeddings: |
|
|
self.sentence_transformer = SentenceTransformerFeatures() |
|
|
self.feature_union = FeatureUnion([ |
|
|
('tfidf', self.vectorizer), |
|
|
('embeddings', self.sentence_transformer) |
|
|
]) |
|
|
else: |
|
|
self.feature_union = self.vectorizer |
|
|
return self.feature_union |
|
|
|
|
|
|
|
|
class CalibratedVulnerabilityClassifier: |
|
|
"""Enhanced vulnerability classifier with improved accuracy""" |
|
|
def __init__(self, use_embeddings=True, model_path_prefix="models/"): |
|
|
self.feature_engine = AdvancedFeatureEngine(use_embeddings) |
|
|
self.label_encoder = LabelEncoder() |
|
|
self.models = {} |
|
|
self.explainer = None |
|
|
self.training_complete = False |
|
|
self.calibration_thresholds = {} |
|
|
self.model_path_prefix = model_path_prefix |
|
|
self.xgb_model = None |
|
|
self.rf_model = None |
|
|
self.svm_model = None |
|
|
self.lr_model = None |
|
|
self.ensemble = None |
|
|
self.ensemble_calibrated = None |
|
|
self.load_models() |
|
|
|
|
|
def load_models(self): |
|
|
"""Load trained models with fallback mechanisms""" |
|
|
try: |
|
|
self.feature_engine.vectorizer = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "tfidf_vectorizer.joblib")) |
|
|
|
|
|
self.label_encoder = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "label_encoder.joblib")) or LabelEncoder() |
|
|
|
|
|
|
|
|
self.xgb_model = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "xgb_model.joblib")) or xgb.XGBClassifier( |
|
|
n_estimators=300, max_depth=10, learning_rate=0.1, subsample=0.8, random_state=42) |
|
|
|
|
|
self.rf_model = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "rf_model.joblib")) or RandomForestClassifier( |
|
|
n_estimators=300, max_depth=20, min_samples_split=5, random_state=42) |
|
|
|
|
|
self.lr_model = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "lr_model.joblib")) or LogisticRegression( |
|
|
C=1.0, max_iter=2000, solver='liblinear', random_state=42) |
|
|
|
|
|
self.svm_model = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "svm_model.joblib")) or SVC( |
|
|
probability=True, kernel='rbf', C=1.0, gamma='scale', random_state=42) |
|
|
|
|
|
self.ensemble = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "ensemble_model.joblib")) |
|
|
|
|
|
self.ensemble_calibrated = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "calibrated_ensemble.joblib")) |
|
|
|
|
|
self.calibration_thresholds = load_from_drive( |
|
|
os.path.join(self.model_path_prefix, "calibration_thresholds.joblib")) or { |
|
|
'SQL Injection': 0.65, 'XSS': 0.68, 'CSRF': 0.55, |
|
|
'Information Disclosure': 0.58, 'Authentication Bypass': 0.62, |
|
|
'Secure Config': 0.52, 'File Inclusion': 0.60, 'Command Injection': 0.70, |
|
|
'XXE': 0.65, 'SSRF': 0.63, 'IDOR': 0.58, 'Buffer Overflow': 0.72 |
|
|
} |
|
|
|
|
|
|
|
|
if not hasattr(self.label_encoder, 'classes_') or len(self.label_encoder.classes_) < 8: |
|
|
self.label_encoder.fit([ |
|
|
'SQL Injection', 'XSS', 'CSRF', 'Information Disclosure', |
|
|
'Authentication Bypass', 'Secure Config', 'File Inclusion', |
|
|
'Command Injection', 'XXE', 'SSRF', 'IDOR', 'Buffer Overflow' |
|
|
]) |
|
|
self.training_complete = False |
|
|
else: |
|
|
self.training_complete = True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Model loading error: {e}") |
|
|
self.training_complete = False |
|
|
|
|
|
def get_meaningful_predictions(self, text): |
|
|
"""Get vulnerability predictions with improved accuracy""" |
|
|
preds = [] |
|
|
thresholds = self.calibration_thresholds if isinstance(self.calibration_thresholds, dict) else { |
|
|
'SQL Injection': 0.65, 'XSS': 0.68, 'CSRF': 0.55, |
|
|
'Information Disclosure': 0.58, 'Authentication Bypass': 0.62, |
|
|
'Secure Config': 0.52, 'File Inclusion': 0.60, 'Command Injection': 0.70, |
|
|
'XXE': 0.65, 'SSRF': 0.63, 'IDOR': 0.58, 'Buffer Overflow': 0.72 |
|
|
} |
|
|
|
|
|
classes = [ |
|
|
'SQL Injection', 'XSS', 'CSRF', 'Information Disclosure', |
|
|
'Authentication Bypass', 'Secure Config', 'File Inclusion', |
|
|
'Command Injection', 'XXE', 'SSRF', 'IDOR', 'Buffer Overflow' |
|
|
] |
|
|
|
|
|
|
|
|
if self.ensemble_calibrated and self.feature_engine.vectorizer: |
|
|
try: |
|
|
X = self.feature_engine.vectorizer.transform([text]) |
|
|
proba = self.ensemble_calibrated.predict_proba(X)[0] |
|
|
sorted_idx = np.argsort(proba)[::-1] |
|
|
|
|
|
for i in sorted_idx[:8]: |
|
|
if i < len(self.label_encoder.classes_): |
|
|
cl = self.label_encoder.classes_[i] |
|
|
confidence = proba[i] |
|
|
|
|
|
|
|
|
if confidence > 0.8: |
|
|
risk = 'Critical' |
|
|
elif confidence > 0.65: |
|
|
risk = 'High' |
|
|
elif confidence > 0.45: |
|
|
risk = 'Medium' |
|
|
else: |
|
|
risk = 'Low' |
|
|
|
|
|
preds.append({ |
|
|
'type': cl, |
|
|
'confidence': float(confidence), |
|
|
'threshold': thresholds.get(cl, 0.5), |
|
|
'above_threshold': confidence > thresholds.get(cl, 0.5), |
|
|
'risk_level': risk |
|
|
}) |
|
|
except Exception as e: |
|
|
print(f"Model prediction error: {e}") |
|
|
|
|
|
|
|
|
|
|
|
if not preds or len(preds) < 3: |
|
|
keyword_preds = self._keyword_based_analysis(text, thresholds) |
|
|
|
|
|
existing_types = {p['type'] for p in preds} |
|
|
for pred in keyword_preds: |
|
|
if pred['type'] not in existing_types: |
|
|
preds.append(pred) |
|
|
|
|
|
return sorted(preds, key=lambda x: x['confidence'], reverse=True)[:8] |
|
|
|
|
|
def _keyword_based_analysis(self, text, thresholds): |
|
|
"""Enhanced keyword-based vulnerability analysis with improved patterns""" |
|
|
preds = [] |
|
|
text_lower = text.lower() |
|
|
|
|
|
|
|
|
sql_patterns = [ |
|
|
r'\b(select|insert|update|delete|union|drop|alter|create)\b.*\b(from|into|table|database)\b', |
|
|
r'.*\b(sql|query).*(injection|bypass|escape)\b', |
|
|
r'.*(union.*select|1=1|or\s+1=1|--|;)\b', |
|
|
r'.*(exec\s*\(|sp_|xp_)\b' |
|
|
] |
|
|
sql_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in sql_patterns) |
|
|
if sql_matches > 0: |
|
|
confidence = min(0.85 + sql_matches * 0.08, 0.95) |
|
|
preds.append({ |
|
|
'type': 'SQL Injection', 'confidence': confidence, |
|
|
'threshold': thresholds.get('SQL Injection', 0.65), |
|
|
'above_threshold': True, 'risk_level': 'Critical' if confidence > 0.8 else 'High' |
|
|
}) |
|
|
|
|
|
|
|
|
xss_patterns = [ |
|
|
r'.*(script|alert|document\.cookie|onclick|onload|onerror)\b', |
|
|
r'.*(<script|</script|javascript:).*', |
|
|
r'.*(xss|cross.site).*(script|injection)\b', |
|
|
r'.*(eval\(|setTimeout|setInterval).*' |
|
|
] |
|
|
xss_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in xss_patterns) |
|
|
if xss_matches > 0: |
|
|
confidence = min(0.82 + xss_matches * 0.06, 0.94) |
|
|
preds.append({ |
|
|
'type': 'XSS', 'confidence': confidence, |
|
|
'threshold': thresholds.get('XSS', 0.68), |
|
|
'above_threshold': True, 'risk_level': 'Critical' if confidence > 0.8 else 'High' |
|
|
}) |
|
|
|
|
|
|
|
|
cmd_patterns = [ |
|
|
r'.*(exec|system|popen|shell_exec|passthru|subprocess)\b', |
|
|
r'.*(command.*injection|cmd.*injection)\b', |
|
|
r'.*(\||&|;|`|\$\().*(command|exec)\b', |
|
|
r'.*(os\.system|subprocess\.call)\b' |
|
|
] |
|
|
cmd_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in cmd_patterns) |
|
|
if cmd_matches > 0: |
|
|
confidence = min(0.80 + cmd_matches * 0.07, 0.93) |
|
|
preds.append({ |
|
|
'type': 'Command Injection', 'confidence': confidence, |
|
|
'threshold': thresholds.get('Command Injection', 0.70), |
|
|
'above_threshold': True, 'risk_level': 'Critical' if confidence > 0.8 else 'High' |
|
|
}) |
|
|
|
|
|
|
|
|
file_patterns = [ |
|
|
r'.*(include|require|file_get_contents|fopen).*[\$|%]', |
|
|
r'.*(file.*inclusion|path.*traversal|directory.*traversal)\b', |
|
|
r'.*(\.\./|\.\.\\).*', |
|
|
r'.*(\.\.%2f|\.\.%5c)\b' |
|
|
] |
|
|
file_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in file_patterns) |
|
|
if file_matches > 0: |
|
|
confidence = min(0.75 + file_matches * 0.06, 0.90) |
|
|
preds.append({ |
|
|
'type': 'File Inclusion', 'confidence': confidence, |
|
|
'threshold': thresholds.get('File Inclusion', 0.60), |
|
|
'above_threshold': True, 'risk_level': 'High' if confidence > 0.7 else 'Medium' |
|
|
}) |
|
|
|
|
|
|
|
|
xxe_patterns = [ |
|
|
r'.*(xxe|xml.*external.*entity)\b', |
|
|
r'.*(<!DOCTYPE|<!ENTITY|SYSTEM|PUBLIC).*', |
|
|
r'.*(loadXML|DOMDocument|SimpleXML).*' |
|
|
] |
|
|
if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in xxe_patterns): |
|
|
confidence = 0.78 |
|
|
preds.append({ |
|
|
'type': 'XXE', 'confidence': confidence, |
|
|
'threshold': thresholds.get('XXE', 0.65), |
|
|
'above_threshold': True, 'risk_level': 'High' |
|
|
}) |
|
|
|
|
|
|
|
|
ssrf_patterns = [ |
|
|
r'.*(ssrf|server.*side.*request.*forgery)\b', |
|
|
r'.*(curl|file_get_contents|requests\.get).*(http|https|ftp|file)', |
|
|
r'.*(url_fopen|allow_url_include)\b' |
|
|
] |
|
|
if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in ssrf_patterns): |
|
|
confidence = 0.76 |
|
|
preds.append({ |
|
|
'type': 'SSRF', 'confidence': confidence, |
|
|
'threshold': thresholds.get('SSRF', 0.63), |
|
|
'above_threshold': True, 'risk_level': 'High' |
|
|
}) |
|
|
|
|
|
|
|
|
auth_patterns = [ |
|
|
r'.*(auth|login|password).*(bypass|weak|broken)\b', |
|
|
r'.*(session.*fixation|credential.*stuffing)\b', |
|
|
r'.*(default.*password|admin.*admin|root.*root)\b', |
|
|
r'.*(jwt.*secret|token.*hardcoded)\b' |
|
|
] |
|
|
if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in auth_patterns): |
|
|
confidence = 0.81 |
|
|
preds.append({ |
|
|
'type': 'Authentication Bypass', 'confidence': confidence, |
|
|
'threshold': thresholds.get('Authentication Bypass', 0.62), |
|
|
'above_threshold': True, 'risk_level': 'High' |
|
|
}) |
|
|
|
|
|
|
|
|
info_patterns = [ |
|
|
r'.*(password|credential|key|token|secret).*(exposed|leak|disclosure|visible)\b', |
|
|
r'.*(error.*message|stack.*trace|debug.*info).*exposed', |
|
|
r'.*(config|configuration).*(file|data).*exposed', |
|
|
r'.*(\.env|\.pem|\.key|\.cert)\b' |
|
|
] |
|
|
info_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in info_patterns) |
|
|
if info_matches > 0: |
|
|
confidence = min(0.72 + info_matches * 0.05, 0.87) |
|
|
preds.append({ |
|
|
'type': 'Information Disclosure', 'confidence': confidence, |
|
|
'threshold': thresholds.get('Information Disclosure', 0.58), |
|
|
'above_threshold': True, 'risk_level': 'High' if confidence > 0.7 else 'Medium' |
|
|
}) |
|
|
|
|
|
|
|
|
if any(word in text_lower for word in ['csrf', 'cross.site request forgery', 'anti.forgery', 'request forgery']): |
|
|
confidence = 0.68 |
|
|
preds.append({ |
|
|
'type': 'CSRF', 'confidence': confidence, |
|
|
'threshold': thresholds.get('CSRF', 0.55), |
|
|
'above_threshold': True, 'risk_level': 'Medium' |
|
|
}) |
|
|
|
|
|
|
|
|
if any(word in text_lower for word in ['idor', 'insecure direct object reference', 'direct object reference']): |
|
|
confidence = 0.65 |
|
|
preds.append({ |
|
|
'type': 'IDOR', 'confidence': confidence, |
|
|
'threshold': thresholds.get('IDOR', 0.58), |
|
|
'above_threshold': True, 'risk_level': 'Medium' |
|
|
}) |
|
|
|
|
|
|
|
|
buffer_patterns = [ |
|
|
r'.*(buffer.*overflow|stack.*overflow)\b', |
|
|
r'.*(strcpy|strcat|gets|sprintf).*', |
|
|
r'.*(memcpy|memmove|memset).*' |
|
|
] |
|
|
if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in buffer_patterns): |
|
|
confidence = 0.79 |
|
|
preds.append({ |
|
|
'type': 'Buffer Overflow', 'confidence': confidence, |
|
|
'threshold': thresholds.get('Buffer Overflow', 0.72), |
|
|
'above_threshold': True, 'risk_level': 'High' |
|
|
}) |
|
|
|
|
|
|
|
|
if len(text) > 50 and any(word in text_lower for word in ['config', 'setting', 'header', 'permission']): |
|
|
confidence = 0.55 |
|
|
preds.append({ |
|
|
'type': 'Secure Config', 'confidence': confidence, |
|
|
'threshold': thresholds.get('Secure Config', 0.52), |
|
|
'above_threshold': True, 'risk_level': 'Low' |
|
|
}) |
|
|
|
|
|
return preds |
|
|
|
|
|
def explain_prediction(self, text, top_k=10): |
|
|
"""Enhanced prediction explanation with feature importance""" |
|
|
|
|
|
keywords = { |
|
|
|
|
|
'sql': 0.85, 'injection': 0.90, 'select': 0.80, 'union': 0.85, |
|
|
'database': 0.75, 'query': 0.70, 'insert': 0.75, 'update': 0.75, |
|
|
'delete': 0.75, 'drop': 0.80, 'alter': 0.75, '1=1': 0.90, 'exec': 0.82, |
|
|
|
|
|
|
|
|
'xss': 0.88, 'script': 0.85, 'cross.site': 0.80, 'javascript': 0.75, |
|
|
'alert': 0.80, 'document.cookie': 0.85, 'onclick': 0.70, 'onload': 0.70, |
|
|
'onerror': 0.70, '<script>': 0.90, '</script>': 0.90, 'eval': 0.82, |
|
|
|
|
|
|
|
|
'command': 0.80, 'injection': 0.85, 'exec': 0.85, 'system': 0.80, |
|
|
'shell': 0.75, 'popen': 0.80, 'passthru': 0.80, 'subprocess': 0.78, |
|
|
|
|
|
|
|
|
'file': 0.75, 'include': 0.80, 'require': 0.75, 'path': 0.70, |
|
|
'traversal': 0.85, 'directory': 0.65, '../': 0.88, |
|
|
|
|
|
|
|
|
'xxe': 0.82, 'xml': 0.75, 'entity': 0.78, 'DOCTYPE': 0.80, |
|
|
|
|
|
|
|
|
'ssrf': 0.80, 'server.side': 0.75, 'request.forgery': 0.75, 'curl': 0.70, |
|
|
|
|
|
|
|
|
'authentication': 0.80, 'bypass': 0.85, 'login': 0.75, 'password': 0.80, |
|
|
'session': 0.70, 'credential': 0.75, 'admin': 0.65, 'jwt': 0.72, |
|
|
|
|
|
|
|
|
'information': 0.65, 'disclosure': 0.75, 'exposed': 0.70, 'leak': 0.75, |
|
|
'password': 0.80, 'credential': 0.85, 'key': 0.80, 'token': 0.75, |
|
|
'config': 0.65, 'debug': 0.70, 'error': 0.60, |
|
|
|
|
|
|
|
|
'buffer': 0.78, 'overflow': 0.82, 'stack': 0.75, 'strcpy': 0.80 |
|
|
} |
|
|
|
|
|
features = [] |
|
|
text_lower = text.lower() |
|
|
|
|
|
for word, base_importance in keywords.items(): |
|
|
|
|
|
count = text_lower.count(word) |
|
|
if count > 0: |
|
|
|
|
|
frequency_boost = min(count * 0.1, 0.3) |
|
|
context_boost = 0.1 if any(ctx in text_lower for ctx in ['vulnerability', 'security', 'attack', 'exploit', 'injection']) else 0 |
|
|
adjusted_importance = base_importance + frequency_boost + context_boost |
|
|
|
|
|
features.append({ |
|
|
'feature': word, |
|
|
'importance': float(min(adjusted_importance, 1.0)), |
|
|
'in_text': True, |
|
|
'count': count |
|
|
}) |
|
|
|
|
|
|
|
|
features.sort(key=lambda x: x['importance'], reverse=True) |
|
|
return {'features': features[:top_k]} |
|
|
|
|
|
|
|
|
classifier = CalibratedVulnerabilityClassifier(use_embeddings=True, model_path_prefix="models/") |
|
|
|
|
|
|
|
|
class PortScanner: |
|
|
"""Enhanced port scanner with common vulnerability ports""" |
|
|
def __init__(self): |
|
|
self.common_ports = { |
|
|
21: 'FTP', 22: 'SSH', 23: 'Telnet', 25: 'SMTP', 53: 'DNS', |
|
|
80: 'HTTP', 110: 'POP3', 443: 'HTTPS', 993: 'IMAPS', |
|
|
995: 'POP3S', 1433: 'MSSQL', 3306: 'MySQL', 3389: 'RDP', |
|
|
5432: 'PostgreSQL', 5900: 'VNC', 27017: 'MongoDB', |
|
|
8080: 'HTTP-Alt', 8443: 'HTTPS-Alt', 9200: 'Elasticsearch', |
|
|
11211: 'Memcached', 6379: 'Redis', 5984: 'CouchDB' |
|
|
} |
|
|
|
|
|
self.vulnerable_ports = { |
|
|
21: 'FTP - Anonymous access possible', |
|
|
23: 'Telnet - Unencrypted communication', |
|
|
80: 'HTTP - Potential web vulnerabilities', |
|
|
443: 'HTTPS - SSL/TLS configuration issues', |
|
|
3389: 'RDP - Remote Desktop vulnerabilities', |
|
|
5900: 'VNC - Unencrypted remote access', |
|
|
8080: 'HTTP-Alt - Alternative web service', |
|
|
9200: 'Elasticsearch - Database exposure risk', |
|
|
11211: 'Memcached - Unauthenticated access', |
|
|
6379: 'Redis - Unauthenticated access' |
|
|
} |
|
|
|
|
|
def scan_port(self, host, port, timeout=2): |
|
|
"""Scan individual port""" |
|
|
try: |
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: |
|
|
sock.settimeout(timeout) |
|
|
result = sock.connect_ex((host, port)) |
|
|
if result == 0: |
|
|
service = self.common_ports.get(port, 'Unknown') |
|
|
vulnerability = self.vulnerable_ports.get(port, '') |
|
|
return { |
|
|
'port': port, |
|
|
'status': 'open', |
|
|
'service': service, |
|
|
'vulnerability_note': vulnerability |
|
|
} |
|
|
except: |
|
|
pass |
|
|
return None |
|
|
|
|
|
def quick_scan(self, host, max_workers=20): |
|
|
"""Quick port scan with common ports""" |
|
|
open_ports = [] |
|
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
|
future_to_port = { |
|
|
executor.submit(self.scan_port, host, port): port |
|
|
for port in self.common_ports.keys() |
|
|
} |
|
|
|
|
|
for future in as_completed(future_to_port): |
|
|
result = future.result() |
|
|
if result: |
|
|
open_ports.append(result) |
|
|
|
|
|
return sorted(open_ports, key=lambda x: x['port']) |
|
|
|
|
|
|
|
|
class EnhancedPassiveAnalyzer: |
|
|
"""Enhanced website analyzer with port scanning""" |
|
|
def __init__(self, classifier): |
|
|
self.classifier = classifier |
|
|
self.port_scanner = PortScanner() |
|
|
self.session = requests.Session() |
|
|
self.session.headers.update({ |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' |
|
|
}) |
|
|
|
|
|
retry_strategy = Retry( |
|
|
total=3, |
|
|
backoff_factor=1, |
|
|
status_forcelist=[429, 500, 502, 503, 504], |
|
|
) |
|
|
adapter = HTTPAdapter(max_retries=retry_strategy) |
|
|
self.session.mount("http://", adapter) |
|
|
self.session.mount("https://", adapter) |
|
|
requests.packages.urllib3.disable_warnings() |
|
|
|
|
|
def analyze_website(self, url, quick_mode=False, enable_port_scan=False): |
|
|
"""Comprehensive website analysis""" |
|
|
analysis = { |
|
|
'url': url, |
|
|
'timestamp': pd.Timestamp.now().isoformat(), |
|
|
'quick_mode': quick_mode, |
|
|
'network_info': {}, |
|
|
'content_analysis': {}, |
|
|
'security_headers': {}, |
|
|
'technologies': [], |
|
|
'vulnerability_predictions': [], |
|
|
'risk_assessment': {}, |
|
|
'enhanced_features': [], |
|
|
'port_scan': {}, |
|
|
'ssl_info': {} |
|
|
} |
|
|
|
|
|
try: |
|
|
if not url.startswith(('http://', 'https://')): |
|
|
url = 'https://' + url |
|
|
|
|
|
parsed_url = urlparse(url) |
|
|
if not parsed_url.netloc: |
|
|
analysis['error'] = "Invalid URL format" |
|
|
return analysis |
|
|
|
|
|
domain = parsed_url.netloc |
|
|
|
|
|
|
|
|
response = self.session.get( |
|
|
url, |
|
|
timeout=10 if quick_mode else 15, |
|
|
verify=False, |
|
|
allow_redirects=True |
|
|
) |
|
|
|
|
|
|
|
|
analysis['network_info'] = self.get_network_info(domain) |
|
|
|
|
|
|
|
|
analysis['ssl_info'] = self.get_ssl_info(domain) |
|
|
|
|
|
|
|
|
if enable_port_scan and not quick_mode: |
|
|
try: |
|
|
analysis['port_scan'] = self.port_scanner.quick_scan(domain) |
|
|
except Exception as e: |
|
|
analysis['port_scan'] = {'error': f'Port scan failed: {str(e)}'} |
|
|
|
|
|
|
|
|
analysis['security_headers'] = self.analyze_security_headers(response) |
|
|
|
|
|
|
|
|
analysis['technologies'] = self.detect_technologies(response) |
|
|
|
|
|
if not quick_mode: |
|
|
|
|
|
analysis['content_analysis'] = self.analyze_content(response) |
|
|
|
|
|
analysis['enhanced_features'] = self.extract_enhanced_passive_features(url, response) |
|
|
|
|
|
|
|
|
analysis['vulnerability_predictions'] = self.predict_vulnerabilities(analysis, quick_mode) |
|
|
|
|
|
|
|
|
analysis['risk_assessment'] = self.assess_risk(analysis) |
|
|
|
|
|
except requests.exceptions.Timeout: |
|
|
analysis['error'] = "Request timeout - site may be unavailable" |
|
|
except requests.exceptions.SSLError: |
|
|
analysis['error'] = "SSL certificate verification failed" |
|
|
except requests.exceptions.ConnectionError: |
|
|
analysis['error'] = "Connection error - site may be unreachable" |
|
|
except Exception as e: |
|
|
analysis['error'] = f"Analysis error: {str(e)}" |
|
|
|
|
|
return analysis |
|
|
|
|
|
def get_network_info(self, domain): |
|
|
"""Get comprehensive network information""" |
|
|
try: |
|
|
ip = socket.gethostbyname(domain) |
|
|
return { |
|
|
'domain': domain, |
|
|
'ip_address': ip, |
|
|
'resolved': True |
|
|
} |
|
|
except: |
|
|
return { |
|
|
'domain': domain, |
|
|
'ip_address': 'Unresolvable', |
|
|
'resolved': False |
|
|
} |
|
|
|
|
|
def get_ssl_info(self, domain): |
|
|
"""Get SSL certificate information""" |
|
|
try: |
|
|
context = ssl.create_default_context() |
|
|
with socket.create_connection((domain, 443), timeout=5) as sock: |
|
|
with context.wrap_socket(sock, server_hostname=domain) as ssock: |
|
|
cert = ssock.getpeercert() |
|
|
|
|
|
|
|
|
expiry_date = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z') |
|
|
days_until_expiry = (expiry_date - datetime.now()).days |
|
|
|
|
|
return { |
|
|
'has_ssl': True, |
|
|
'issuer': dict(x[0] for x in cert['issuer']) if isinstance(cert['issuer'], tuple) else str(cert['issuer']), |
|
|
'subject': dict(x[0] for x in cert['subject']) if isinstance(cert['subject'], tuple) else str(cert['subject']), |
|
|
'expires_in_days': days_until_expiry, |
|
|
'valid': days_until_expiry > 0 |
|
|
} |
|
|
except Exception as e: |
|
|
return { |
|
|
'has_ssl': False, |
|
|
'valid': False, |
|
|
'error': str(e) |
|
|
} |
|
|
|
|
|
def analyze_security_headers(self, response): |
|
|
"""Analyze security headers with enhanced checks""" |
|
|
headers = response.headers |
|
|
security_headers = {} |
|
|
|
|
|
important_headers = { |
|
|
'X-Frame-Options': {'purpose': 'Clickjacking protection', 'required': True}, |
|
|
'X-Content-Type-Options': {'purpose': 'MIME sniffing protection', 'required': True}, |
|
|
'Strict-Transport-Security': {'purpose': 'HTTPS enforcement', 'required': True}, |
|
|
'Content-Security-Policy': {'purpose': 'XSS protection', 'required': True}, |
|
|
'X-XSS-Protection': {'purpose': 'XSS protection', 'required': False}, |
|
|
'Referrer-Policy': {'purpose': 'Referrer info control', 'required': False}, |
|
|
'Permissions-Policy': {'purpose': 'Browser features control', 'required': False} |
|
|
} |
|
|
|
|
|
for header, info in important_headers.items(): |
|
|
value = headers.get(header, 'MISSING') |
|
|
security_headers[header] = { |
|
|
'value': value, |
|
|
'status': 'PRESENT' if value != 'MISSING' else 'MISSING', |
|
|
'purpose': info['purpose'], |
|
|
'required': info['required'] |
|
|
} |
|
|
|
|
|
return security_headers |
|
|
|
|
|
def detect_technologies(self, response): |
|
|
"""Enhanced technology detection""" |
|
|
technologies = [] |
|
|
server = response.headers.get('Server', '').lower() |
|
|
content = response.text.lower() |
|
|
|
|
|
|
|
|
if 'apache' in server: |
|
|
technologies.append('Apache Web Server') |
|
|
elif 'nginx' in server: |
|
|
technologies.append('Nginx Web Server') |
|
|
elif 'iis' in server: |
|
|
technologies.append('Microsoft IIS') |
|
|
elif 'cloudflare' in server: |
|
|
technologies.append('Cloudflare') |
|
|
|
|
|
|
|
|
tech_patterns = { |
|
|
'WordPress': ['wp-content', 'wp-includes', 'wordpress'], |
|
|
'React': ['react', 'next.js', 'gatsby'], |
|
|
'Angular': ['angular', 'ng-'], |
|
|
'Vue.js': ['vue', 'vue.js'], |
|
|
'Django': ['django', 'csrfmiddleware'], |
|
|
'Laravel': ['laravel'], |
|
|
'PHP': ['.php', 'php/'], |
|
|
'jQuery': ['jquery'], |
|
|
'Bootstrap': ['bootstrap'], |
|
|
'Google Analytics': ['ga.js', 'google-analytics'], |
|
|
'Font Awesome': ['font-awesome'] |
|
|
} |
|
|
|
|
|
for tech, patterns in tech_patterns.items(): |
|
|
if any(pattern in content for pattern in patterns): |
|
|
technologies.append(tech) |
|
|
|
|
|
return list(set(technologies)) |
|
|
|
|
|
def analyze_content(self, response): |
|
|
"""Enhanced content analysis""" |
|
|
try: |
|
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
text_content = soup.get_text()[:2000] |
|
|
|
|
|
|
|
|
security_indicators = { |
|
|
'exposed_emails': len(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text_content)), |
|
|
'php_errors': 'php' in text_content.lower() and any(err in text_content.lower() for err in ['error', 'warning', 'notice']), |
|
|
'database_errors': any(db in text_content.lower() for db in ['mysql', 'postgresql', 'oracle', 'sql server', 'database error']), |
|
|
'debug_info': any(term in text_content.lower() for term in ['debug', 'test', 'development', 'staging']), |
|
|
'exposed_paths': len(re.findall(r'/[\w/.-]+', text_content)) > 50, |
|
|
'comments_with_info': len(re.findall(r'<!--.*?(password|key|token|admin).*?-->', text_content, re.IGNORECASE)) > 0 |
|
|
} |
|
|
|
|
|
return { |
|
|
'text_sample': text_content[:800], |
|
|
'security_indicators': security_indicators, |
|
|
'forms_count': len(soup.find_all('form')), |
|
|
'scripts_count': len(soup.find_all('script')), |
|
|
'inputs_count': len(soup.find_all('input')), |
|
|
'links_count': len(soup.find_all('a')) |
|
|
} |
|
|
except Exception as e: |
|
|
return { |
|
|
'text_sample': f'Content analysis failed: {str(e)}', |
|
|
'security_indicators': {}, |
|
|
'forms_count': 0, |
|
|
'scripts_count': 0, |
|
|
'inputs_count': 0, |
|
|
'links_count': 0 |
|
|
} |
|
|
|
|
|
def extract_enhanced_passive_features(self, url, response): |
|
|
"""Extract enhanced passive security features""" |
|
|
features = [] |
|
|
domain = urlparse(url).netloc |
|
|
|
|
|
try: |
|
|
|
|
|
robots_features = self.check_robots_txt(url) |
|
|
features.extend(robots_features) |
|
|
|
|
|
|
|
|
sitemap_features = self.check_sitemap(url) |
|
|
features.extend(sitemap_features) |
|
|
|
|
|
|
|
|
sensitive_files = self.check_sensitive_files(url) |
|
|
features.extend(sensitive_files) |
|
|
|
|
|
except Exception as e: |
|
|
features.append(f"Feature extraction error: {str(e)}") |
|
|
|
|
|
return features |
|
|
|
|
|
def check_robots_txt(self, url): |
|
|
"""Check robots.txt for sensitive information""" |
|
|
features = [] |
|
|
try: |
|
|
robots_url = f"{url.rstrip('/')}/robots.txt" |
|
|
response = self.session.get(robots_url, timeout=3, verify=False) |
|
|
if response.status_code == 200: |
|
|
features.append("robots.txt present") |
|
|
content = response.text.lower() |
|
|
sensitive_paths = ['admin', 'login', 'config', 'backup', 'database', 'sql'] |
|
|
if any(path in content for path in sensitive_paths): |
|
|
features.append("sensitive paths exposed in robots.txt") |
|
|
except: |
|
|
pass |
|
|
return features |
|
|
|
|
|
def check_sitemap(self, url): |
|
|
"""Check sitemap.xml for information disclosure""" |
|
|
features = [] |
|
|
try: |
|
|
sitemap_url = f"{url.rstrip('/')}/sitemap.xml" |
|
|
response = self.session.get(sitemap_url, timeout=3, verify=False) |
|
|
if response.status_code == 200: |
|
|
features.append("sitemap.xml present") |
|
|
except: |
|
|
pass |
|
|
return features |
|
|
|
|
|
def check_sensitive_files(self, url): |
|
|
"""Check for common sensitive files""" |
|
|
features = [] |
|
|
sensitive_files = [ |
|
|
'.env', 'config.php', 'backup.sql', 'wp-config.php', |
|
|
'web.config', '.git/config', 'phpinfo.php' |
|
|
] |
|
|
|
|
|
for file in sensitive_files[:3]: |
|
|
try: |
|
|
file_url = f"{url.rstrip('/')}/{file}" |
|
|
response = self.session.get(file_url, timeout=2, verify=False) |
|
|
if response.status_code == 200: |
|
|
features.append(f"sensitive file accessible: {file}") |
|
|
except: |
|
|
pass |
|
|
|
|
|
return features |
|
|
|
|
|
def predict_vulnerabilities(self, analysis, quick_mode=False): |
|
|
"""Predict vulnerabilities based on analysis""" |
|
|
feature_text = self.create_feature_text(analysis) |
|
|
if feature_text: |
|
|
try: |
|
|
return self.classifier.get_meaningful_predictions(feature_text) |
|
|
except Exception as e: |
|
|
print(f"Prediction error: {e}") |
|
|
return [] |
|
|
return [] |
|
|
|
|
|
def create_feature_text(self, analysis): |
|
|
"""Create feature text for vulnerability prediction""" |
|
|
text_parts = [] |
|
|
|
|
|
|
|
|
if 'content_analysis' in analysis: |
|
|
content = analysis['content_analysis'] |
|
|
text_parts.append(content.get('text_sample', '')) |
|
|
|
|
|
indicators = content.get('security_indicators', {}) |
|
|
if indicators.get('php_errors'): |
|
|
text_parts.append("php error messages exposed") |
|
|
if indicators.get('database_errors'): |
|
|
text_parts.append("database errors visible") |
|
|
if indicators.get('exposed_emails', 0) > 0: |
|
|
text_parts.append(f"{indicators['exposed_emails']} emails exposed") |
|
|
if indicators.get('comments_with_info'): |
|
|
text_parts.append("sensitive information in comments") |
|
|
|
|
|
|
|
|
tech_text = " ".join(analysis.get('technologies', [])) |
|
|
text_parts.append(tech_text) |
|
|
|
|
|
|
|
|
missing_headers = [ |
|
|
h for h, info in analysis.get('security_headers', {}).items() |
|
|
if info.get('status') == 'MISSING' and info.get('required', False) |
|
|
] |
|
|
if missing_headers: |
|
|
text_parts.append(f"missing security headers: {', '.join(missing_headers)}") |
|
|
|
|
|
|
|
|
enhanced_features = analysis.get('enhanced_features', []) |
|
|
text_parts.extend(enhanced_features) |
|
|
|
|
|
|
|
|
open_ports = analysis.get('port_scan', []) |
|
|
if open_ports and isinstance(open_ports, list): |
|
|
vulnerable_ports = [p for p in open_ports if p.get('vulnerability_note')] |
|
|
if vulnerable_ports: |
|
|
text_parts.append(f"vulnerable ports open: {[p['port'] for p in vulnerable_ports]}") |
|
|
|
|
|
|
|
|
ssl_info = analysis.get('ssl_info', {}) |
|
|
if not ssl_info.get('valid', False): |
|
|
text_parts.append("ssl certificate issues") |
|
|
|
|
|
return " ".join(text_parts) |
|
|
|
|
|
def assess_risk(self, analysis): |
|
|
"""Enhanced risk assessment""" |
|
|
risk_score = 0 |
|
|
factors = [] |
|
|
|
|
|
|
|
|
missing_headers = sum( |
|
|
1 for h, info in analysis.get('security_headers', {}).items() |
|
|
if info.get('status') == 'MISSING' and info.get('required', False) |
|
|
) |
|
|
if missing_headers > 0: |
|
|
risk_score += missing_headers * 12 |
|
|
factors.append(f"Missing {missing_headers} critical security headers") |
|
|
|
|
|
|
|
|
content = analysis.get('content_analysis', {}) |
|
|
indicators = content.get('security_indicators', {}) |
|
|
|
|
|
if indicators.get('php_errors'): |
|
|
risk_score += 25 |
|
|
factors.append("PHP errors exposed to users") |
|
|
if indicators.get('database_errors'): |
|
|
risk_score += 30 |
|
|
factors.append("Database errors visible") |
|
|
if indicators.get('exposed_emails', 0) > 0: |
|
|
risk_score += indicators['exposed_emails'] * 5 |
|
|
factors.append(f"{indicators['exposed_emails']} email addresses exposed") |
|
|
if indicators.get('comments_with_info'): |
|
|
risk_score += 20 |
|
|
factors.append("Sensitive information in HTML comments") |
|
|
|
|
|
|
|
|
vuln_predictions = analysis.get('vulnerability_predictions', []) |
|
|
critical_risk_vulns = sum(1 for v in vuln_predictions if v['risk_level'] == 'Critical') |
|
|
high_risk_vulns = sum(1 for v in vuln_predictions if v['risk_level'] == 'High') |
|
|
medium_risk_vulns = sum(1 for v in vuln_predictions if v['risk_level'] == 'Medium') |
|
|
|
|
|
if critical_risk_vulns > 0: |
|
|
risk_score += critical_risk_vulns * 40 |
|
|
factors.append(f"{critical_risk_vulns} critical-risk vulnerabilities predicted") |
|
|
if high_risk_vulns > 0: |
|
|
risk_score += high_risk_vulns * 25 |
|
|
factors.append(f"{high_risk_vulns} high-risk vulnerabilities predicted") |
|
|
if medium_risk_vulns > 0: |
|
|
risk_score += medium_risk_vulns * 15 |
|
|
factors.append(f"{medium_risk_vulns} medium-risk vulnerabilities predicted") |
|
|
|
|
|
|
|
|
open_ports = analysis.get('port_scan', []) |
|
|
if open_ports and isinstance(open_ports, list): |
|
|
vulnerable_ports = [p for p in open_ports if p.get('vulnerability_note')] |
|
|
if vulnerable_ports: |
|
|
risk_score += len(vulnerable_ports) * 10 |
|
|
factors.append(f"{len(vulnerable_ports)} potentially vulnerable ports open") |
|
|
|
|
|
|
|
|
ssl_info = analysis.get('ssl_info', {}) |
|
|
if not ssl_info.get('valid', False): |
|
|
risk_score += 20 |
|
|
factors.append("SSL certificate issues detected") |
|
|
|
|
|
|
|
|
if risk_score >= 85: |
|
|
level, color = "CRITICAL", "#dc2626" |
|
|
elif risk_score >= 65: |
|
|
level, color = "HIGH", "#ea580c" |
|
|
elif risk_score >= 45: |
|
|
level, color = "MEDIUM", "#d97706" |
|
|
elif risk_score >= 20: |
|
|
level, color = "LOW", "#2563eb" |
|
|
else: |
|
|
level, color = "MINIMAL", "#16a34a" |
|
|
|
|
|
return { |
|
|
'level': level, |
|
|
'score': min(risk_score, 100), |
|
|
'color': color, |
|
|
'factors': factors |
|
|
} |
|
|
|
|
|
|
|
|
analyzer = EnhancedPassiveAnalyzer(classifier) |
|
|
|
|
|
|
|
|
def create_confidence_chart(result): |
|
|
"""Create enhanced confidence chart with proper visualization""" |
|
|
vulns = result.get('vulnerability_predictions', []) |
|
|
|
|
|
|
|
|
plt.style.use('default') |
|
|
fig, ax = plt.subplots(figsize=(14, 8)) |
|
|
|
|
|
if not vulns: |
|
|
|
|
|
ax.text(0.5, 0.5, 'No vulnerabilities detected\nAll systems secure!', |
|
|
ha='center', va='center', transform=ax.transAxes, fontsize=18, |
|
|
bbox=dict(boxstyle="round,pad=0.5", facecolor="#d1fae5", edgecolor="#10b981", alpha=0.8)) |
|
|
ax.set_xlim(0, 1) |
|
|
ax.set_ylim(0, 1) |
|
|
ax.axis('off') |
|
|
else: |
|
|
|
|
|
vuln_types = [v['type'] for v in vulns[:8]] |
|
|
confidences = [v['confidence'] for v in vulns[:8]] |
|
|
thresholds = [v['threshold'] for v in vulns[:8]] |
|
|
|
|
|
|
|
|
colors = [] |
|
|
risk_colors = { |
|
|
'Critical': '#991b1b', |
|
|
'High': '#dc2626', |
|
|
'Medium': '#ea580c', |
|
|
'Low': '#2563eb' |
|
|
} |
|
|
|
|
|
for v in vulns[:8]: |
|
|
colors.append(risk_colors.get(v['risk_level'], '#6b7280')) |
|
|
|
|
|
|
|
|
y_pos = np.arange(len(vuln_types)) |
|
|
bar_height = 0.6 |
|
|
|
|
|
|
|
|
bars = ax.barh(y_pos, confidences, color=colors, alpha=0.85, height=bar_height, label='Confidence') |
|
|
|
|
|
|
|
|
for i, (confidence, threshold) in enumerate(zip(confidences, thresholds)): |
|
|
ax.axvline(x=threshold, ymin=(i-bar_height/2)/len(vuln_types), |
|
|
ymax=(i+bar_height/2)/len(vuln_types), |
|
|
color='#6b7280', linestyle='--', alpha=0.8, linewidth=2) |
|
|
|
|
|
ax.text(threshold + 0.01, i, f'Threshold: {threshold:.0%}', |
|
|
va='center', fontsize=9, color='#6b7280', fontweight='bold') |
|
|
|
|
|
|
|
|
ax.set_yticks(y_pos) |
|
|
ax.set_yticklabels(vuln_types, fontsize=12, fontweight='bold') |
|
|
ax.set_xlabel('Confidence Score', fontsize=14, fontweight='bold', color='#374151') |
|
|
ax.set_title('Vulnerability Confidence Analysis', fontsize=16, fontweight='bold', |
|
|
color='#1f2937', pad=20) |
|
|
ax.set_xlim(0, 1.1) |
|
|
|
|
|
|
|
|
for spine in ['top', 'right']: |
|
|
ax.spines[spine].set_visible(False) |
|
|
ax.spines['left'].set_color('#d1d5db') |
|
|
ax.spines['bottom'].set_color('#d1d5db') |
|
|
|
|
|
ax.grid(axis='x', alpha=0.3, linestyle='--', color='#9ca3af') |
|
|
ax.set_axisbelow(True) |
|
|
|
|
|
|
|
|
for i, (bar, confidence, threshold) in enumerate(zip(bars, confidences, thresholds)): |
|
|
width = bar.get_width() |
|
|
label_x = width + 0.02 |
|
|
label_color = '#1f2937' |
|
|
|
|
|
|
|
|
ax.text(label_x, bar.get_y() + bar.get_height()/2, |
|
|
f'{confidence:.1%}', ha='left', va='center', fontweight='bold', |
|
|
fontsize=11, color=label_color) |
|
|
|
|
|
|
|
|
if width > 0.15: |
|
|
ax.text(width/2, bar.get_y() + bar.get_height()/2, |
|
|
vulns[i]['risk_level'], ha='center', va='center', |
|
|
fontweight='bold', fontsize=10, color='white') |
|
|
|
|
|
|
|
|
legend_elements = [ |
|
|
plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['Critical'], alpha=0.85, label='Critical'), |
|
|
plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['High'], alpha=0.85, label='High'), |
|
|
plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['Medium'], alpha=0.85, label='Medium'), |
|
|
plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['Low'], alpha=0.85, label='Low') |
|
|
] |
|
|
ax.legend(handles=legend_elements, loc='lower right', framealpha=0.9) |
|
|
|
|
|
plt.tight_layout() |
|
|
|
|
|
|
|
|
chart_path = "conf_chart.png" |
|
|
plt.savefig(chart_path, bbox_inches='tight', dpi=150, facecolor='white', |
|
|
edgecolor='none', transparent=False, pad_inches=0.1) |
|
|
plt.close() |
|
|
|
|
|
return chart_path |
|
|
|
|
|
def format_results(result): |
|
|
"""Format analysis results with enhanced display""" |
|
|
risk = result.get('risk_assessment', {}) |
|
|
risk_color = risk.get('color', 'gray') |
|
|
|
|
|
|
|
|
html_output = f""" |
|
|
<div style="padding:20px;border-radius:12px;background: linear-gradient(135deg, #2d3748 0%, #1e293b 100%); color:white; margin-bottom: 20px; box-shadow:0 4px 12px rgba(0,0,0,0.15);"> |
|
|
<h2 style="color: black;background: #fff;padding:15px 20px;border-radius:10px;font-size:2.6rem;margin-bottom:1rem;font-weight:800;text-shadow:0 2px 10px rgba(0,0,0,0.1);text-align:center;">🔒 Security AI Vulnerability Analyzer</h2> |
|
|
<div style="color:black;background:#f8fafc;font-size:1.3rem;border-radius:12px;padding:6px 15px;font-weight:900;margin-bottom:12px;letter-spacing:0.8px;border:2px solid #e2e8f0;text-align:center;">Ethical ML for Security Research</div> |
|
|
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-top:15px;color:white;"> |
|
|
<div><b>Input:</b> {result.get('input_data', 'N/A')}</div> |
|
|
<div><b>Type:</b> {result.get('input_type', 'unknown').upper()}</div> |
|
|
<div><b>Mode:</b> {"QUICK" if result.get('quick_mode') else "FULL"}</div> |
|
|
<div><b>Timestamp:</b> {result.get('timestamp', 'N/A')}</div> |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
html_output += f""" |
|
|
<div style="padding:20px; margin:20px 0; border-radius:12px; background:{risk_color}; color:white; box-shadow:0 4px 12px rgba(0,0,0,0.1);"> |
|
|
<h3 style="margin:0 0 15px 0; font-size:1.6rem;text-align:center;">Risk Assessment: {risk.get('level', 'UNKNOWN')}</h3> |
|
|
<div style="text-align:center;margin-bottom:15px;"> |
|
|
<strong style="font-size:1.2rem;">Risk Score: {risk.get('score',0)}/100</strong> |
|
|
</div> |
|
|
<div style="width:100%;background:rgba(255,255,255,0.2);border-radius:12px;margin:12px 0;overflow:hidden;"> |
|
|
<div style="width:{risk.get('score',0)}%;height:25px;background:white;border-radius:12px;transition:width 0.5s ease;display:flex;align-items:center;justify-content:center;"> |
|
|
<span style="color:{risk_color};font-weight:bold;font-size:0.9rem;">{risk.get('score',0)}%</span> |
|
|
</div> |
|
|
</div> |
|
|
<div style="margin-top:15px;"> |
|
|
<b style="font-size:1.1rem;">Key Factors:</b> |
|
|
{''.join(f'<div style="margin:8px 0;padding:8px 12px;background:rgba(255,255,255,0.1);border-radius:8px;border-left:4px solid white;">• {factor}</div>' for factor in risk.get('factors',[]))} |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
try: |
|
|
chart_path = create_confidence_chart(result) |
|
|
html_output += f""" |
|
|
<div style="background:white;padding:20px; border-radius:12px;margin:20px 0; border:1px solid #e2e8f0;box-shadow:0 4px 12px rgba(0,0,0,0.05);"> |
|
|
<h3 style="color:#1a202c; margin:0 0 15px 0;text-align:center;">📊 Vulnerability Confidence Analysis</h3> |
|
|
<img src="file/{chart_path}" style="width:100%; border-radius:10px; box-shadow:0 4px 12px rgba(0,0,0,0.1);"> |
|
|
</div> |
|
|
""" |
|
|
except Exception as e: |
|
|
html_output += f""" |
|
|
<div style="background:#fef2f2;padding:20px; border-radius:12px;margin:20px 0; border:2px solid #fecaca;"> |
|
|
<h3 style="color:#dc2626; margin:0 0 15px 0;text-align:center;">❌ Chart Generation Error</h3> |
|
|
<p style="color:#7f1d1d;text-align:center;">Unable to generate confidence chart: {str(e)}</p> |
|
|
</div> |
|
|
""" |
|
|
chart_path = None |
|
|
|
|
|
|
|
|
vulns = result.get('vulnerability_predictions', []) |
|
|
if vulns: |
|
|
html_output += """<div style="background:white;padding:20px;border-radius:12px;margin:20px 0; border:1px solid #e2e8f0;box-shadow:0 4px 12px rgba(0,0,0,0.05);"> |
|
|
<h3 style="color:#2d3748; margin:0 0 15px 0;text-align:center;">🚨 Predicted Vulnerabilities</h3>""" |
|
|
for vuln in vulns[:8]: |
|
|
confidence = vuln['confidence'] |
|
|
if confidence > 0.8: color,emoji="#991b1b","💀" |
|
|
elif confidence > 0.65: color,emoji="#dc2626","🔴" |
|
|
elif confidence > 0.45: color,emoji="#ea580c","🟠" |
|
|
else: color,emoji="#2563eb","🔵" |
|
|
|
|
|
html_output += f""" |
|
|
<div style="padding:15px;margin:10px 0; border-left:6px solid {color};background:#f7fafc;border-radius:10px;border:1px solid #e2e8f0;transition:transform 0.2s;"> |
|
|
<div style="display:flex;justify-content:between;align-items:center;flex-wrap:wrap;gap:10px;"> |
|
|
<b style="color:#2d3748;font-size:1.2rem;flex:1;">{emoji} {vuln['type']}</b> |
|
|
<span style="color:{color};font-weight:600;font-size:1.1rem;">{confidence:.1%}</span> |
|
|
<span style="background:{color};color:white;padding:6px 15px;border-radius:20px;font-size:0.9rem;font-weight:600;">{vuln['risk_level']} Risk</span> |
|
|
</div> |
|
|
<div style="margin-top:8px;color:#4a5568;font-size:0.95rem;display:flex;justify-content:space-between;flex-wrap:wrap;gap:10px;"> |
|
|
<span>Threshold: {vuln.get('threshold',0.3):.1%}</span> |
|
|
<span style="color:{'#22c55e' if vuln['above_threshold'] else '#ef4444'};font-weight:600;"> |
|
|
{"✅ ABOVE THRESHOLD" if vuln['above_threshold'] else "⚠️ BELOW THRESHOLD"} |
|
|
</span> |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
html_output += "</div>" |
|
|
else: |
|
|
html_output += """<div style="background:#f0fdf4;padding:20px;border-radius:12px;margin:20px 0; border:2px solid #bbf7d0;"> |
|
|
<h3 style="color:#166534; margin:0 0 12px 0;text-align:center;">✅ No Significant Vulnerabilities Detected</h3> |
|
|
<p style="color:#3f6212;text-align:center;font-size:1.1rem;">All predictions are below calibrated confidence thresholds or no vulnerabilities were identified in the input.</p> |
|
|
</div>""" |
|
|
|
|
|
|
|
|
if result.get('port_scan') and isinstance(result['port_scan'], list) and result['port_scan']: |
|
|
open_ports = result['port_scan'] |
|
|
if open_ports: |
|
|
html_output += """<div style="background:white;padding:20px;border-radius:12px;margin:20px 0; border:1px solid #e2e8f0;"> |
|
|
<h3 style="color:#2d3748; margin:0 0 15px 0;text-align:center;">🌐 Port Scan Results</h3> |
|
|
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: 12px;">""" |
|
|
|
|
|
for port in open_ports[:10]: |
|
|
vuln_note = port.get('vulnerability_note', '') |
|
|
border_color = '#ef4444' if vuln_note else '#22c55e' |
|
|
bg_color = '#fef2f2' if vuln_note else '#f0fdf4' |
|
|
|
|
|
html_output += f""" |
|
|
<div style="padding:12px; border:2px solid {border_color}; border-radius:10px; background:{bg_color};"> |
|
|
<div style="font-weight:bold;color:#1e293b;font-size:1.1rem;">Port {port['port']}</div> |
|
|
<div style="color:#475569;margin:4px 0;">Service: {port['service']}</div> |
|
|
<div style="color:#16a34a;font-size:0.9rem;font-weight:600;">Status: OPEN</div> |
|
|
{f'<div style="color:#dc2626;font-size:0.85rem;margin-top:6px;padding:4px 8px;background:#fecaca;border-radius:6px;">⚠️ {vuln_note}</div>' if vuln_note else ''} |
|
|
</div> |
|
|
""" |
|
|
html_output += "</div></div>" |
|
|
|
|
|
|
|
|
if 'technologies' in result and result['technologies']: |
|
|
html_output += f"""<div style="background:white;padding:18px;border-radius:12px;margin:18px 0; border:1px solid #e2e8f0;"> |
|
|
<h3 style="color:#3730a3;font-size:1.3rem;margin:0 0 12px 0;text-align:center;">🛠️ Technologies Detected</h3> |
|
|
<div style="margin-top:12px; display: flex; flex-wrap:wrap; gap:10px;justify-content:center;"> |
|
|
{''.join(f'<span style="background:#e0e7ff;color:#3730a3;padding:8px 16px;border-radius:20px;font-weight:600;border:1px solid #c7d2fe;">{tech}</span>' for tech in result['technologies'])} |
|
|
</div></div>""" |
|
|
|
|
|
|
|
|
if 'security_headers' in result: |
|
|
missing = [h for h, info in result['security_headers'].items() if info.get('status') == 'MISSING' and info.get('required', False)] |
|
|
if missing: |
|
|
html_output += f"""<div style="background:#fef2f2;padding:18px;border-radius:12px;margin:18px 0; border:2px solid #fecaca;"> |
|
|
<h3 style="color:#dc2626;margin:0 0 12px 0;text-align:center;">⚠️ Missing Critical Security Headers</h3> |
|
|
<div style="margin-top:10px;color:#7f1d1d;text-align:center;"> |
|
|
{', '.join(missing)} |
|
|
</div> |
|
|
</div>""" |
|
|
|
|
|
|
|
|
if result.get('explainability_data'): |
|
|
html_output += f""" |
|
|
<details style='margin:20px 0;border:1px solid #e2e8f0;border-radius:12px;overflow:hidden;'> |
|
|
<summary style='cursor:pointer;padding:16px;background:#f8fafc;font-weight:bold;color:#000000;font-size:1.1rem;list-style:none;'> |
|
|
🔍 Show Explainability Insights |
|
|
</summary> |
|
|
<div style='margin-top:0;color:#000000;padding:20px;background:#ffffff;border-top:1px solid #e2e8f0;'> |
|
|
{result.get('explainability_data','No explanation available')} |
|
|
</div> |
|
|
</details> |
|
|
""" |
|
|
|
|
|
|
|
|
html_output += f""" |
|
|
<div style="background:#fff;padding:20px; border-radius:12px; margin:20px 0; border:1px solid #e2e8f0;box-shadow:0 4px 12px rgba(0,0,0,0.05);text-align:center;"> |
|
|
<h3 style="color:#1e293b;font-size:1.3rem;margin:0 0 15px 0;">📤 Export Results</h3> |
|
|
<button onclick="navigator.clipboard.writeText(JSON.stringify({json.dumps(result)},null,2));alert('JSON report copied to clipboard!');" |
|
|
style="padding:12px 24px; background: #2563eb; color: white; border: none; border-radius: 8px; font-size:16px;cursor:pointer;font-weight:600;transition:background 0.3s;"> |
|
|
📋 Copy JSON Report |
|
|
</button> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
html_output += """ |
|
|
<div style="background:#1f2937;color:white;padding:30px 20px 20px 20px;border-radius:12px;margin:20px 0 0 0;font-size:1.1rem;box-shadow:0 4px 12px rgba(0,0,0,0.15);line-height:1.6;"> |
|
|
<h3 style="font-size:1.8rem;color:#fbbf24;text-align:center;margin-bottom:20px;border-bottom:2px solid #374151;padding-bottom:10px;">Developed By</h3> |
|
|
|
|
|
<div style="display:grid;grid-template-columns:repeat(auto-fit, minmax(300px, 1fr));gap:20px;margin-bottom:20px;"> |
|
|
<div style="background:#374151;padding:15px;border-radius:8px;"> |
|
|
<strong style="color:#e5e7eb;">👨💻 Developer:</strong> |
|
|
<div style="color:#f9fafb;margin-top:5px;">Chenna Kesava Reddy Yenugu</div> |
|
|
</div> |
|
|
|
|
|
<div style="background:#374151;padding:15px;border-radius:8px;"> |
|
|
<strong style="color:#e5e7eb;">🎯 Model Accuracy:</strong> |
|
|
<div style="background:#2563eb;color:#fff;border-radius:8px;padding:6px 12px;margin-top:5px;display:inline-block;font-weight:600;"> |
|
|
Industry ML/AI Practices Applied, Human Calibrated |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="display:grid;grid-template-columns:repeat(auto-fit, minmax(250px, 1fr));gap:15px;"> |
|
|
<div style="background:#374151;padding:12px;border-radius:8px;"> |
|
|
<strong style="color:#e5e7eb;">🌐 Portfolio:</strong> |
|
|
<div style="margin-top:5px;"> |
|
|
<a href='https://mycareer.ccbp.tech/' target='_blank' style='color:#60a5fa;font-weight:600;text-decoration:none;'>https://mycareer.ccbp.tech/</a> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="background:#374151;padding:12px;border-radius:8px;"> |
|
|
<strong style="color:#e5e7eb;">💼 LinkedIn:</strong> |
|
|
<div style="margin-top:5px;"> |
|
|
<a href='https://www.linkedin.com/in/ychennakesavareddy' target='_blank' style='color:#34d399;font-weight:600;text-decoration:none;'>ychennakesavareddy</a> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="background:#374151;padding:12px;border-radius:8px;"> |
|
|
<strong style="color:#e5e7eb;">🤗 HuggingFace:</strong> |
|
|
<div style="margin-top:5px;"> |
|
|
<a href='https://huggingface.co/yenugu' target='_blank' style='color:#a3e635;font-weight:600;text-decoration:none;'>yenugu</a> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="background:#374151;padding:12px;border-radius:8px;"> |
|
|
<strong style="color:#e5e7eb;">🐙 GitHub:</strong> |
|
|
<div style="margin-top:5px;"> |
|
|
<a href='https://github.com/ychennakesavareddy' target='_blank' style='color:#60a5fa;font-weight:600;text-decoration:none;'>ychennakesavareddy</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="display:grid;grid-template-columns:repeat(auto-fit, minmax(250px, 1fr));gap:15px;margin-top:15px;"> |
|
|
<div style="background:#374151;padding:12px;border-radius:8px;"> |
|
|
<strong style="color:#e5e7eb;">🐦 Twitter:</strong> |
|
|
<div style="margin-top:5px;"> |
|
|
<a href='https://twitter.com/chenna332003' target='_blank' style='color:#38bdf8;font-weight:600;text-decoration:none;'>@chenna332003</a> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="background:#374151;padding:12px;border-radius:8px;grid-column:span 2;"> |
|
|
<strong style="color:#e5e7eb;">📧 Email:</strong> |
|
|
<div style="margin-top:5px;color:#f5d558;"> |
|
|
<a href='mailto:chenna.yenugu.tech@zohomail.in' style='color:#f5d558;text-decoration:none;font-weight:600;'>chenna.yenugu.tech@zohomail.in</a> | |
|
|
<a href='mailto:c.yenugu.tech@gmail.com' style='color:#f5d558;text-decoration:none;font-weight:600;'>c.yenugu.tech@gmail.com</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
return html_output, risk.get('score',0), json.dumps(result,indent=2), chart_path |
|
|
|
|
|
def format_error(error_msg): |
|
|
"""Format error messages""" |
|
|
return f"""<div style="padding:25px;border-radius:12px;background:#fef2f2;border-left:6px solid #dc2626;border:2px solid #fecaca;text-align:center;"> |
|
|
<h2 style="color:#dc2626;margin:0 0 15px 0;font-size:1.8rem;">❌ Analysis Error</h2> |
|
|
<b style="color:#7f1d1d;font-size:1.2rem;">{error_msg}</b> |
|
|
<p style="color:#991b1b;margin:15px 0 0 0;font-size:1.1rem;">Please check your input and try again.</p></div>""", 0, "{}", None |
|
|
|
|
|
def generate_explainability(input_data, advanced_explainability): |
|
|
"""Generate enhanced explainability data""" |
|
|
explanation = classifier.explain_prediction(input_data, top_k=10) |
|
|
if explanation and explanation.get('features'): |
|
|
features_html = "<h4 style='color:#000000;margin-bottom:15px;font-size:1.3rem;text-align:center;'>Top Influencing Features</h4><div style='display:grid;grid-template-columns:repeat(auto-fit, minmax(280px, 1fr));gap:15px;'>" |
|
|
for feature in explanation['features']: |
|
|
color = "#16a34a" if feature['importance'] > 0.7 else "#ca8a04" if feature['importance'] > 0.5 else "#dc2626" |
|
|
icon = "📈" if feature['importance'] > 0.7 else "📊" if feature['importance'] > 0.5 else "📉" |
|
|
features_html += f"""<div style="background:white;padding:15px;border-radius:10px;border-left:5px solid {color};border:1px solid #e2e8f0;box-shadow:0 2px 8px rgba(0,0,0,0.1);"> |
|
|
<div style="font-weight:bold;color:#000000;margin-bottom:6px;font-size:1.1rem;">{icon} {feature['feature'].title()}</div> |
|
|
<div style="color:#475569;font-size:1rem;">Importance: <b style="color:{color};font-size:1.1rem;">{feature['importance']:.3f}</b></div> |
|
|
<div style="color:#64748b;font-size:0.9rem;">Count: {feature.get('count', 1)} • In text: {'✅' if feature.get('in_text') else '❌'}</div> |
|
|
</div>""" |
|
|
features_html += "</div>" |
|
|
return features_html |
|
|
else: |
|
|
return "<p style='color:#000000;padding:20px;background:#f8fafc;border-radius:10px;text-align:center;font-size:1.1rem;'>No significant features identified in the input.</p>" |
|
|
|
|
|
def create_enhanced_dashboard(): |
|
|
"""Create the enhanced Gradio dashboard""" |
|
|
legal_notice = """ |
|
|
## ⚠️ Legal & Ethical Notice |
|
|
|
|
|
**Security AI is for authorized security research only.** |
|
|
By using this tool, you agree to: |
|
|
- Only scan sites you own or have explicit permission to test |
|
|
- Comply with all applicable laws and regulations |
|
|
- Not use for malicious purposes or unauthorized testing |
|
|
- Accept full responsibility for your actions |
|
|
|
|
|
**Analysis is PASSIVE only.** No active exploitation or intrusive scanning is performed. |
|
|
Port scanning is limited to common ports and should only be used on authorized systems. |
|
|
""" |
|
|
|
|
|
with gr.Blocks( |
|
|
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), |
|
|
title="Security AI Vulnerability Analyzer", |
|
|
css=""" |
|
|
.gradio-container { |
|
|
background: linear-gradient(135deg, #e0eaff 0%, #f8fafc 100%); |
|
|
font-family: 'Inter', sans-serif; |
|
|
} |
|
|
.container { |
|
|
max-width: 1400px; |
|
|
margin: 0 auto; |
|
|
} |
|
|
.footer { |
|
|
background: #1f2937; |
|
|
color: white; |
|
|
padding: 30px 20px; |
|
|
border-radius: 12px; |
|
|
margin-top: 30px; |
|
|
} |
|
|
""" |
|
|
) as interface: |
|
|
gr.Markdown(""" |
|
|
<h2 style="color: black; background: #fff; padding:20px 25px; border-radius:12px; font-size:2.8rem; margin-bottom:0.5rem; font-weight:800; box-shadow:0 4px 12px rgba(0,0,0,0.1); text-align:center;"> |
|
|
🔒 Security AI Vulnerability Analyzer |
|
|
</h2> |
|
|
<div style="color:black; background:#f8fafc; font-size:1.4rem; border-radius:12px; padding:8px 20px; font-weight:900; display:inline-block; margin-bottom:2rem; letter-spacing:0.8px; border:2px solid #e2e8f0; margin:0 auto; display:table;"> |
|
|
Ethical ML for Security Research |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Accordion("⚠️ Legal & Ethical Notice", open=False): |
|
|
gr.Markdown(legal_notice) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
consent_checkbox = gr.Checkbox( |
|
|
label="I agree to the terms above and will use this tool ethically", |
|
|
value=False, |
|
|
info="Required to enable analysis" |
|
|
) |
|
|
input_data = gr.Textbox( |
|
|
label="Website URL or Security Text", |
|
|
placeholder="https://example.com or paste security-related text...", |
|
|
lines=3, |
|
|
max_lines=5 |
|
|
) |
|
|
with gr.Row(): |
|
|
analysis_type = gr.Radio( |
|
|
choices=["url", "text"], |
|
|
label="Analysis Type", |
|
|
value="url", |
|
|
info="URL: Passive website analysis | Text: Predict vulnerabilities" |
|
|
) |
|
|
quick_mode = gr.Checkbox( |
|
|
label="Quick Mode", |
|
|
value=False, |
|
|
info="Faster, less detailed analysis" |
|
|
) |
|
|
enable_port_scan = gr.Checkbox( |
|
|
label="Enable Port Scanning", |
|
|
value=False, |
|
|
info="Scan common ports (URL analysis only)", |
|
|
interactive=True |
|
|
) |
|
|
enable_explainability = gr.Checkbox( |
|
|
label="Enable Explainability", |
|
|
value=True, |
|
|
info="Show why vulnerabilities are predicted" |
|
|
) |
|
|
analyze_btn = gr.Button( |
|
|
"🔍 Analyze Security", |
|
|
variant="primary", |
|
|
size="lg", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
output_html = gr.HTML( |
|
|
label="Security Analysis Results", |
|
|
value="<div style='padding:60px;text-align:center;color:#64748b; background:white; border-radius:12px; border:2px dashed #cbd5e1;'><h3 style='color:#374151;'>Configure Your Security Analysis</h3><p style='font-size:1.1rem;'>Agree to the terms, enter data, and click 'Analyze Security'</p></div>" |
|
|
) |
|
|
risk_score = gr.Number( |
|
|
label="Risk Score", |
|
|
precision=1, |
|
|
value=0.0, |
|
|
info="Overall security risk score (0-100)" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
json_output = gr.JSON( |
|
|
label="Technical Report", |
|
|
height=400, |
|
|
value={}, |
|
|
show_label=True |
|
|
) |
|
|
chart_output = gr.Image( |
|
|
label="Confidence Chart", |
|
|
height=300, |
|
|
show_label=True, |
|
|
show_download_button=True |
|
|
) |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["https://example.com", "url", False, False], |
|
|
["SQL injection vulnerability in login form with unsanitized user input", "text", False, False], |
|
|
["XSS cross-site scripting in comment section allowing script execution", "text", False, False], |
|
|
["Missing security headers and exposed configuration files with passwords", "text", False, False], |
|
|
["Authentication bypass vulnerability in admin panel access control", "text", False, False], |
|
|
["Command injection through user input in system() function call", "text", False, False] |
|
|
], |
|
|
inputs=[input_data, analysis_type, quick_mode, enable_port_scan], |
|
|
label="Try These Security Examples:" |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
<div style="color: black; background: #f8fafc; padding: 25px; border-radius: 12px; border: 1px solid #e2e8f0; margin-top: 25px;"> |
|
|
<h3 style="color: black; margin-bottom: 20px;text-align:center;font-size:1.8rem;">🔧 How It Works</h3> |
|
|
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: 20px; color: black;"> |
|
|
<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #3b82f6;box-shadow:0 2px 8px rgba(0,0,0,0.1);"> |
|
|
<b style="color: black;font-size:1.2rem;">Passive Analysis</b><br> |
|
|
<span style="color: #4b5563;">Non-intrusive security assessment without active exploitation</span> |
|
|
</div> |
|
|
<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #10b981;box-shadow:0 2px 8px rgba(0,0,0,0.1);"> |
|
|
<b style="color: black;font-size:1.2rem;">ML Ensemble</b><br> |
|
|
<span style="color: #4b5563;">Multiple machine learning models combined for accuracy</span> |
|
|
</div> |
|
|
<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #f59e0b;box-shadow:0 2px 8px rgba(0,0,0,0.1);"> |
|
|
<b style="color: black;font-size:1.2rem;">Calibrated Confidence</b><br> |
|
|
<span style="color: #4b5563;">Industry-standard confidence thresholds for reliable results</span> |
|
|
</div> |
|
|
<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #ef4444;box-shadow:0 2px 8px rgba(0,0,0,0.1);"> |
|
|
<b style="color: black;font-size:1.2rem;">Explainable AI</b><br> |
|
|
<span style="color: #4b5563;">Understand why vulnerabilities are predicted with feature importance</span> |
|
|
</div> |
|
|
<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #8b5cf6;box-shadow:0 2px 8px rgba(0,0,0,0.1);"> |
|
|
<b style="color: black;font-size:1.2rem;">Risk Assessment</b><br> |
|
|
<span style="color: #4b5563;">Comprehensive risk scoring based on multiple factors</span> |
|
|
</div> |
|
|
</div> |
|
|
<div style="margin-top: 25px; color: black;text-align:center;"> |
|
|
<b style="font-size:1.3rem;">Supported Vulnerability Types:</b> |
|
|
<div style="display:flex;flex-wrap:wrap;gap:10px;justify-content:center;margin-top:15px;"> |
|
|
<span style="background: #fee2e2; color: #dc2626; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">SQL Injection</span> |
|
|
<span style="background: #fef3c7; color: #d97706; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">XSS</span> |
|
|
<span style="background: #dbeafe; color: #2563eb; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">CSRF</span> |
|
|
<span style="background: #f3e8ff; color: #7c3aed; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">Info Disclosure</span> |
|
|
<span style="background: #dcfce7; color: #16a34a; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">Auth Bypass</span> |
|
|
<span style="background: #fce7f3; color: #db2777; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">Secure Config</span> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
def toggle_analyze(consent): |
|
|
return gr.Button(interactive=consent) |
|
|
|
|
|
consent_checkbox.change( |
|
|
fn=toggle_analyze, |
|
|
inputs=[consent_checkbox], |
|
|
outputs=[analyze_btn] |
|
|
) |
|
|
|
|
|
def analyze_input(input_data, analysis_type, quick_mode, enable_port_scan, enable_explainability, progress=gr.Progress()): |
|
|
"""Enhanced analysis function with progress tracking""" |
|
|
if not input_data.strip(): |
|
|
return format_error("Input URL or security text is required.") |
|
|
|
|
|
try: |
|
|
if analysis_type == "url": |
|
|
progress(0.2, "Connecting to target website...") |
|
|
result = analyzer.analyze_website( |
|
|
input_data, |
|
|
quick_mode, |
|
|
enable_port_scan=enable_port_scan |
|
|
) |
|
|
result['input_type'] = 'url' |
|
|
else: |
|
|
progress(0.4, "Analyzing security text...") |
|
|
result = { |
|
|
'input_type': 'text', |
|
|
'input_data': input_data, |
|
|
'quick_mode': quick_mode, |
|
|
'vulnerability_predictions': classifier.get_meaningful_predictions(input_data), |
|
|
'risk_assessment': {'level': 'UNKNOWN', 'score': 0, 'color': 'gray'} |
|
|
} |
|
|
|
|
|
|
|
|
risk_score = 0 |
|
|
factors = [] |
|
|
predictions = result['vulnerability_predictions'] |
|
|
|
|
|
for pred in predictions: |
|
|
if pred['risk_level'] == 'Critical': |
|
|
risk_score += 35 |
|
|
elif pred['risk_level'] == 'High': |
|
|
risk_score += 25 |
|
|
elif pred['risk_level'] == 'Medium': |
|
|
risk_score += 15 |
|
|
else: |
|
|
risk_score += 8 |
|
|
|
|
|
if predictions: |
|
|
factors.append(f"Found {len(predictions)} potential vulnerabilities") |
|
|
|
|
|
result['risk_assessment'] = { |
|
|
'level': 'CRITICAL' if risk_score >= 80 else 'HIGH' if risk_score >= 60 else 'MEDIUM' if risk_score >= 30 else 'LOW', |
|
|
'score': min(risk_score, 100), |
|
|
'color': '#dc2626' if risk_score >= 80 else '#ea580c' if risk_score >= 60 else '#d97706' if risk_score >= 30 else '#2563eb', |
|
|
'factors': factors |
|
|
} |
|
|
|
|
|
result['input_data'] = input_data |
|
|
result['timestamp'] = pd.Timestamp.now().isoformat() |
|
|
|
|
|
|
|
|
if enable_explainability and not quick_mode: |
|
|
progress(0.8, "Generating explainability insights...") |
|
|
explainability_data = generate_explainability(input_data, False) |
|
|
result['explainability_data'] = explainability_data |
|
|
|
|
|
progress(0.95, "Finalizing security report...") |
|
|
return format_results(result) |
|
|
|
|
|
except Exception as e: |
|
|
return format_error(f"Security analysis failed: {str(e)}") |
|
|
|
|
|
analyze_btn.click( |
|
|
fn=analyze_input, |
|
|
inputs=[input_data, analysis_type, quick_mode, enable_port_scan, enable_explainability], |
|
|
outputs=[output_html, risk_score, json_output, chart_output] |
|
|
) |
|
|
|
|
|
return interface |
|
|
|
|
|
|
|
|
interface = create_enhanced_dashboard() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
ports_to_try = [7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869] |
|
|
|
|
|
for port in ports_to_try: |
|
|
try: |
|
|
print(f"Trying to launch on port {port}...") |
|
|
interface.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=port, |
|
|
show_error=True, |
|
|
share=False, |
|
|
inbrowser=True |
|
|
) |
|
|
break |
|
|
except OSError as e: |
|
|
if "Address already in use" in str(e) or "port" in str(e).lower(): |
|
|
print(f"Port {port} is busy, trying next port...") |
|
|
continue |
|
|
else: |
|
|
raise e |
|
|
else: |
|
|
print("Could not find an available port. Please check if any Gradio apps are running.") |
|
|
|
|
|
interface.launch( |
|
|
server_name="0.0.0.0", |
|
|
show_error=True, |
|
|
share=False, |
|
|
inbrowser=True |
|
|
) |
|
|
|