Spaces:

yenugu
/

Security_AI_Vulnerability_Analyzer

Sleeping

App Files Files Community

Security_AI_Vulnerability_Analyzer / app.py

yenugu

Upload app.py with huggingface_hub

c143925 verified 4 months ago

raw

history blame contribute delete

77.1 kB


	import os
	import gradio as gr
	import shap
	from transformers import pipeline
	import torch
	import xgboost as xgb
	from sklearn.ensemble import RandomForestClassifier, VotingClassifier
	from sklearn.svm import SVC
	from sklearn.linear_model import LogisticRegression
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.preprocessing import LabelEncoder
	from sklearn.pipeline import FeatureUnion
	from sklearn.base import BaseEstimator, TransformerMixin
	from sentence_transformers import SentenceTransformer
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import requests
	from bs4 import BeautifulSoup
	import json
	import joblib
	import re
	import time
	import warnings
	from urllib.parse import urlparse
	from requests.adapters import HTTPAdapter
	from urllib3.util.retry import Retry
	import socket
	import threading
	from concurrent.futures import ThreadPoolExecutor, as_completed
	import ssl
	from datetime import datetime

	warnings.filterwarnings('ignore')
	plt.switch_backend('Agg')

	def load_from_drive(path):
	"""Load model components from disk with error handling"""
	if os.path.exists(path):
	try:
	return joblib.load(path)
	except Exception as e:
	print(f"Error loading {path}: {e}")
	return None
	else:
	return None

	# =========== Enhanced Feature Engineering ================
	class SentenceTransformerFeatures(BaseEstimator, TransformerMixin):
	"""Transformer for generating sentence embeddings"""
	def __init__(self, model_name='all-MiniLM-L6-v2'):
	self.model_name = model_name
	self.model = None

	def fit(self, X, y=None):
	self.model = SentenceTransformer(self.model_name)
	return self

	def transform(self, X):
	if self.model is None:
	self.model = SentenceTransformer(self.model_name)
	return self.model.encode(X, show_progress_bar=False)

	class AdvancedFeatureEngine:
	"""Enhanced feature engineering with multiple feature types"""
	def __init__(self, use_embeddings=True):
	self.use_embeddings = use_embeddings
	self.vectorizer = TfidfVectorizer(
	max_features=2000, ngram_range=(1, 3), stop_words='english',
	min_df=2, max_df=0.8, analyzer='word', sublinear_tf=True)
	self.sentence_transformer = None
	self.feature_union = None

	def build_feature_pipeline(self):
	"""Build feature union pipeline"""
	if self.use_embeddings:
	self.sentence_transformer = SentenceTransformerFeatures()
	self.feature_union = FeatureUnion([
	('tfidf', self.vectorizer),
	('embeddings', self.sentence_transformer)
	])
	else:
	self.feature_union = self.vectorizer
	return self.feature_union

	# =========== Enhanced Model Class ========================
	class CalibratedVulnerabilityClassifier:
	"""Enhanced vulnerability classifier with improved accuracy"""
	def __init__(self, use_embeddings=True, model_path_prefix="models/"):
	self.feature_engine = AdvancedFeatureEngine(use_embeddings)
	self.label_encoder = LabelEncoder()
	self.models = {}
	self.explainer = None
	self.training_complete = False
	self.calibration_thresholds = {}
	self.model_path_prefix = model_path_prefix
	self.xgb_model = None
	self.rf_model = None
	self.svm_model = None
	self.lr_model = None
	self.ensemble = None
	self.ensemble_calibrated = None
	self.load_models()

	def load_models(self):
	"""Load trained models with fallback mechanisms"""
	try:
	self.feature_engine.vectorizer = load_from_drive(
	os.path.join(self.model_path_prefix, "tfidf_vectorizer.joblib"))

	self.label_encoder = load_from_drive(
	os.path.join(self.model_path_prefix, "label_encoder.joblib")) or LabelEncoder()

	# Initialize models with better parameters
	self.xgb_model = load_from_drive(
	os.path.join(self.model_path_prefix, "xgb_model.joblib")) or xgb.XGBClassifier(
	n_estimators=300, max_depth=10, learning_rate=0.1, subsample=0.8, random_state=42)

	self.rf_model = load_from_drive(
	os.path.join(self.model_path_prefix, "rf_model.joblib")) or RandomForestClassifier(
	n_estimators=300, max_depth=20, min_samples_split=5, random_state=42)

	self.lr_model = load_from_drive(
	os.path.join(self.model_path_prefix, "lr_model.joblib")) or LogisticRegression(
	C=1.0, max_iter=2000, solver='liblinear', random_state=42)

	self.svm_model = load_from_drive(
	os.path.join(self.model_path_prefix, "svm_model.joblib")) or SVC(
	probability=True, kernel='rbf', C=1.0, gamma='scale', random_state=42)

	self.ensemble = load_from_drive(
	os.path.join(self.model_path_prefix, "ensemble_model.joblib"))

	self.ensemble_calibrated = load_from_drive(
	os.path.join(self.model_path_prefix, "calibrated_ensemble.joblib"))

	self.calibration_thresholds = load_from_drive(
	os.path.join(self.model_path_prefix, "calibration_thresholds.joblib")) or {
	'SQL Injection': 0.65, 'XSS': 0.68, 'CSRF': 0.55,
	'Information Disclosure': 0.58, 'Authentication Bypass': 0.62,
	'Secure Config': 0.52, 'File Inclusion': 0.60, 'Command Injection': 0.70,
	'XXE': 0.65, 'SSRF': 0.63, 'IDOR': 0.58, 'Buffer Overflow': 0.72
	}

	# Initialize label encoder with comprehensive classes
	if not hasattr(self.label_encoder, 'classes_') or len(self.label_encoder.classes_) < 8:
	self.label_encoder.fit([
	'SQL Injection', 'XSS', 'CSRF', 'Information Disclosure',
	'Authentication Bypass', 'Secure Config', 'File Inclusion',
	'Command Injection', 'XXE', 'SSRF', 'IDOR', 'Buffer Overflow'
	])
	self.training_complete = False
	else:
	self.training_complete = True

	except Exception as e:
	print(f"Model loading error: {e}")
	self.training_complete = False

	def get_meaningful_predictions(self, text):
	"""Get vulnerability predictions with improved accuracy"""
	preds = []
	thresholds = self.calibration_thresholds if isinstance(self.calibration_thresholds, dict) else {
	'SQL Injection': 0.65, 'XSS': 0.68, 'CSRF': 0.55,
	'Information Disclosure': 0.58, 'Authentication Bypass': 0.62,
	'Secure Config': 0.52, 'File Inclusion': 0.60, 'Command Injection': 0.70,
	'XXE': 0.65, 'SSRF': 0.63, 'IDOR': 0.58, 'Buffer Overflow': 0.72
	}

	classes = [
	'SQL Injection', 'XSS', 'CSRF', 'Information Disclosure',
	'Authentication Bypass', 'Secure Config', 'File Inclusion',
	'Command Injection', 'XXE', 'SSRF', 'IDOR', 'Buffer Overflow'
	]

	# Use ensemble model if available
	if self.ensemble_calibrated and self.feature_engine.vectorizer:
	try:
	X = self.feature_engine.vectorizer.transform([text])
	proba = self.ensemble_calibrated.predict_proba(X)[0]
	sorted_idx = np.argsort(proba)[::-1]

	for i in sorted_idx[:8]: # Top 8 predictions
	if i < len(self.label_encoder.classes_):
	cl = self.label_encoder.classes_[i]
	confidence = proba[i]

	# Enhanced risk assessment
	if confidence > 0.8:
	risk = 'Critical'
	elif confidence > 0.65:
	risk = 'High'
	elif confidence > 0.45:
	risk = 'Medium'
	else:
	risk = 'Low'

	preds.append({
	'type': cl,
	'confidence': float(confidence),
	'threshold': thresholds.get(cl, 0.5),
	'above_threshold': confidence > thresholds.get(cl, 0.5),
	'risk_level': risk
	})
	except Exception as e:
	print(f"Model prediction error: {e}")
	# Fall through to keyword analysis

	# Enhanced keyword-based fallback with pattern matching
	if not preds or len(preds) < 3:
	keyword_preds = self._keyword_based_analysis(text, thresholds)
	# Merge with existing predictions
	existing_types = {p['type'] for p in preds}
	for pred in keyword_preds:
	if pred['type'] not in existing_types:
	preds.append(pred)

	return sorted(preds, key=lambda x: x['confidence'], reverse=True)[:8]

	def _keyword_based_analysis(self, text, thresholds):
	"""Enhanced keyword-based vulnerability analysis with improved patterns"""
	preds = []
	text_lower = text.lower()

	# Enhanced SQL Injection patterns
	sql_patterns = [
	r'\b(select\|insert\|update\|delete\|union\|drop\|alter\|create)\b.*\b(from\|into\|table\|database)\b',
	r'.\b(sql\|query).(injection\|bypass\|escape)\b',
	r'.(union.select\|1=1\|or\s+1=1\|--\|;)\b',
	r'.(exec\s\(\|sp_\|xp_)\b'
	]
	sql_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in sql_patterns)
	if sql_matches > 0:
	confidence = min(0.85 + sql_matches * 0.08, 0.95)
	preds.append({
	'type': 'SQL Injection', 'confidence': confidence,
	'threshold': thresholds.get('SQL Injection', 0.65),
	'above_threshold': True, 'risk_level': 'Critical' if confidence > 0.8 else 'High'
	})

	# Enhanced XSS patterns
	xss_patterns = [
	r'.*(script\|alert\|document\.cookie\|onclick\|onload\|onerror)\b',
	r'.(<script\|</script\|javascript:).',
	r'.(xss\|cross.site).(script\|injection)\b',
	r'.(eval\(\|setTimeout\|setInterval).'
	]
	xss_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in xss_patterns)
	if xss_matches > 0:
	confidence = min(0.82 + xss_matches * 0.06, 0.94)
	preds.append({
	'type': 'XSS', 'confidence': confidence,
	'threshold': thresholds.get('XSS', 0.68),
	'above_threshold': True, 'risk_level': 'Critical' if confidence > 0.8 else 'High'
	})

	# Command Injection patterns
	cmd_patterns = [
	r'.*(exec\|system\|popen\|shell_exec\|passthru\|subprocess)\b',
	r'.(command.injection\|cmd.*injection)\b',
	r'.(\\|\|&\|;\|`\|\$\().(command\|exec)\b',
	r'.*(os\.system\|subprocess\.call)\b'
	]
	cmd_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in cmd_patterns)
	if cmd_matches > 0:
	confidence = min(0.80 + cmd_matches * 0.07, 0.93)
	preds.append({
	'type': 'Command Injection', 'confidence': confidence,
	'threshold': thresholds.get('Command Injection', 0.70),
	'above_threshold': True, 'risk_level': 'Critical' if confidence > 0.8 else 'High'
	})

	# File Inclusion patterns
	file_patterns = [
	r'.(include\|require\|file_get_contents\|fopen).[\$\|%]',
	r'.(file.inclusion\|path.traversal\|directory.traversal)\b',
	r'.(\.\./\|\.\.\\).',
	r'.*(\.\.%2f\|\.\.%5c)\b'
	]
	file_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in file_patterns)
	if file_matches > 0:
	confidence = min(0.75 + file_matches * 0.06, 0.90)
	preds.append({
	'type': 'File Inclusion', 'confidence': confidence,
	'threshold': thresholds.get('File Inclusion', 0.60),
	'above_threshold': True, 'risk_level': 'High' if confidence > 0.7 else 'Medium'
	})

	# XXE patterns
	xxe_patterns = [
	r'.(xxe\|xml.external.*entity)\b',
	r'.(<!DOCTYPE\|<!ENTITY\|SYSTEM\|PUBLIC).',
	r'.(loadXML\|DOMDocument\|SimpleXML).'
	]
	if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in xxe_patterns):
	confidence = 0.78
	preds.append({
	'type': 'XXE', 'confidence': confidence,
	'threshold': thresholds.get('XXE', 0.65),
	'above_threshold': True, 'risk_level': 'High'
	})

	# SSRF patterns
	ssrf_patterns = [
	r'.(ssrf\|server.side.request.forgery)\b',
	r'.(curl\|file_get_contents\|requests\.get).(http\|https\|ftp\|file)',
	r'.*(url_fopen\|allow_url_include)\b'
	]
	if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in ssrf_patterns):
	confidence = 0.76
	preds.append({
	'type': 'SSRF', 'confidence': confidence,
	'threshold': thresholds.get('SSRF', 0.63),
	'above_threshold': True, 'risk_level': 'High'
	})

	# Authentication Bypass patterns
	auth_patterns = [
	r'.(auth\|login\|password).(bypass\|weak\|broken)\b',
	r'.(session.fixation\|credential.*stuffing)\b',
	r'.(default.password\|admin.admin\|root.root)\b',
	r'.(jwt.secret\|token.*hardcoded)\b'
	]
	if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in auth_patterns):
	confidence = 0.81
	preds.append({
	'type': 'Authentication Bypass', 'confidence': confidence,
	'threshold': thresholds.get('Authentication Bypass', 0.62),
	'above_threshold': True, 'risk_level': 'High'
	})

	# Information Disclosure patterns
	info_patterns = [
	r'.(password\|credential\|key\|token\|secret).(exposed\|leak\|disclosure\|visible)\b',
	r'.(error.message\|stack.trace\|debug.info).*exposed',
	r'.(config\|configuration).(file\|data).*exposed',
	r'.*(\.env\|\.pem\|\.key\|\.cert)\b'
	]
	info_matches = sum(len(re.findall(pattern, text_lower, re.IGNORECASE)) for pattern in info_patterns)
	if info_matches > 0:
	confidence = min(0.72 + info_matches * 0.05, 0.87)
	preds.append({
	'type': 'Information Disclosure', 'confidence': confidence,
	'threshold': thresholds.get('Information Disclosure', 0.58),
	'above_threshold': True, 'risk_level': 'High' if confidence > 0.7 else 'Medium'
	})

	# CSRF patterns
	if any(word in text_lower for word in ['csrf', 'cross.site request forgery', 'anti.forgery', 'request forgery']):
	confidence = 0.68
	preds.append({
	'type': 'CSRF', 'confidence': confidence,
	'threshold': thresholds.get('CSRF', 0.55),
	'above_threshold': True, 'risk_level': 'Medium'
	})

	# IDOR patterns
	if any(word in text_lower for word in ['idor', 'insecure direct object reference', 'direct object reference']):
	confidence = 0.65
	preds.append({
	'type': 'IDOR', 'confidence': confidence,
	'threshold': thresholds.get('IDOR', 0.58),
	'above_threshold': True, 'risk_level': 'Medium'
	})

	# Buffer Overflow patterns
	buffer_patterns = [
	r'.(buffer.overflow\|stack.*overflow)\b',
	r'.(strcpy\|strcat\|gets\|sprintf).',
	r'.(memcpy\|memmove\|memset).'
	]
	if any(re.search(pattern, text_lower, re.IGNORECASE) for pattern in buffer_patterns):
	confidence = 0.79
	preds.append({
	'type': 'Buffer Overflow', 'confidence': confidence,
	'threshold': thresholds.get('Buffer Overflow', 0.72),
	'above_threshold': True, 'risk_level': 'High'
	})

	# Secure Config patterns (general security issues)
	if len(text) > 50 and any(word in text_lower for word in ['config', 'setting', 'header', 'permission']):
	confidence = 0.55
	preds.append({
	'type': 'Secure Config', 'confidence': confidence,
	'threshold': thresholds.get('Secure Config', 0.52),
	'above_threshold': True, 'risk_level': 'Low'
	})

	return preds

	def explain_prediction(self, text, top_k=10):
	"""Enhanced prediction explanation with feature importance"""
	# Comprehensive vulnerability keywords with weights
	keywords = {
	# SQL Injection
	'sql': 0.85, 'injection': 0.90, 'select': 0.80, 'union': 0.85,
	'database': 0.75, 'query': 0.70, 'insert': 0.75, 'update': 0.75,
	'delete': 0.75, 'drop': 0.80, 'alter': 0.75, '1=1': 0.90, 'exec': 0.82,

	# XSS
	'xss': 0.88, 'script': 0.85, 'cross.site': 0.80, 'javascript': 0.75,
	'alert': 0.80, 'document.cookie': 0.85, 'onclick': 0.70, 'onload': 0.70,
	'onerror': 0.70, '<script>': 0.90, '</script>': 0.90, 'eval': 0.82,

	# Command Injection
	'command': 0.80, 'injection': 0.85, 'exec': 0.85, 'system': 0.80,
	'shell': 0.75, 'popen': 0.80, 'passthru': 0.80, 'subprocess': 0.78,

	# File Inclusion
	'file': 0.75, 'include': 0.80, 'require': 0.75, 'path': 0.70,
	'traversal': 0.85, 'directory': 0.65, '../': 0.88,

	# XXE
	'xxe': 0.82, 'xml': 0.75, 'entity': 0.78, 'DOCTYPE': 0.80,

	# SSRF
	'ssrf': 0.80, 'server.side': 0.75, 'request.forgery': 0.75, 'curl': 0.70,

	# Authentication
	'authentication': 0.80, 'bypass': 0.85, 'login': 0.75, 'password': 0.80,
	'session': 0.70, 'credential': 0.75, 'admin': 0.65, 'jwt': 0.72,

	# Information Disclosure
	'information': 0.65, 'disclosure': 0.75, 'exposed': 0.70, 'leak': 0.75,
	'password': 0.80, 'credential': 0.85, 'key': 0.80, 'token': 0.75,
	'config': 0.65, 'debug': 0.70, 'error': 0.60,

	# Buffer Overflow
	'buffer': 0.78, 'overflow': 0.82, 'stack': 0.75, 'strcpy': 0.80
	}

	features = []
	text_lower = text.lower()

	for word, base_importance in keywords.items():
	# Count occurrences and calculate frequency-based importance
	count = text_lower.count(word)
	if count > 0:
	# Adjust importance based on frequency and context
	frequency_boost = min(count * 0.1, 0.3)
	context_boost = 0.1 if any(ctx in text_lower for ctx in ['vulnerability', 'security', 'attack', 'exploit', 'injection']) else 0
	adjusted_importance = base_importance + frequency_boost + context_boost

	features.append({
	'feature': word,
	'importance': float(min(adjusted_importance, 1.0)),
	'in_text': True,
	'count': count
	})

	# Sort by importance and return top features
	features.sort(key=lambda x: x['importance'], reverse=True)
	return {'features': features[:top_k]}

	# Initialize classifier
	classifier = CalibratedVulnerabilityClassifier(use_embeddings=True, model_path_prefix="models/")

	# =========== Enhanced Port Scanner ================
	class PortScanner:
	"""Enhanced port scanner with common vulnerability ports"""
	def __init__(self):
	self.common_ports = {
	21: 'FTP', 22: 'SSH', 23: 'Telnet', 25: 'SMTP', 53: 'DNS',
	80: 'HTTP', 110: 'POP3', 443: 'HTTPS', 993: 'IMAPS',
	995: 'POP3S', 1433: 'MSSQL', 3306: 'MySQL', 3389: 'RDP',
	5432: 'PostgreSQL', 5900: 'VNC', 27017: 'MongoDB',
	8080: 'HTTP-Alt', 8443: 'HTTPS-Alt', 9200: 'Elasticsearch',
	11211: 'Memcached', 6379: 'Redis', 5984: 'CouchDB'
	}

	self.vulnerable_ports = {
	21: 'FTP - Anonymous access possible',
	23: 'Telnet - Unencrypted communication',
	80: 'HTTP - Potential web vulnerabilities',
	443: 'HTTPS - SSL/TLS configuration issues',
	3389: 'RDP - Remote Desktop vulnerabilities',
	5900: 'VNC - Unencrypted remote access',
	8080: 'HTTP-Alt - Alternative web service',
	9200: 'Elasticsearch - Database exposure risk',
	11211: 'Memcached - Unauthenticated access',
	6379: 'Redis - Unauthenticated access'
	}

	def scan_port(self, host, port, timeout=2):
	"""Scan individual port"""
	try:
	with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
	sock.settimeout(timeout)
	result = sock.connect_ex((host, port))
	if result == 0:
	service = self.common_ports.get(port, 'Unknown')
	vulnerability = self.vulnerable_ports.get(port, '')
	return {
	'port': port,
	'status': 'open',
	'service': service,
	'vulnerability_note': vulnerability
	}
	except:
	pass
	return None

	def quick_scan(self, host, max_workers=20):
	"""Quick port scan with common ports"""
	open_ports = []

	with ThreadPoolExecutor(max_workers=max_workers) as executor:
	future_to_port = {
	executor.submit(self.scan_port, host, port): port
	for port in self.common_ports.keys()
	}

	for future in as_completed(future_to_port):
	result = future.result()
	if result:
	open_ports.append(result)

	return sorted(open_ports, key=lambda x: x['port'])

	# =========== Enhanced Passive Website Analyzer ========
	class EnhancedPassiveAnalyzer:
	"""Enhanced website analyzer with port scanning"""
	def __init__(self, classifier):
	self.classifier = classifier
	self.port_scanner = PortScanner()
	self.session = requests.Session()
	self.session.headers.update({
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	})

	retry_strategy = Retry(
	total=3,
	backoff_factor=1,
	status_forcelist=[429, 500, 502, 503, 504],
	)
	adapter = HTTPAdapter(max_retries=retry_strategy)
	self.session.mount("http://", adapter)
	self.session.mount("https://", adapter)
	requests.packages.urllib3.disable_warnings()

	def analyze_website(self, url, quick_mode=False, enable_port_scan=False):
	"""Comprehensive website analysis"""
	analysis = {
	'url': url,
	'timestamp': pd.Timestamp.now().isoformat(),
	'quick_mode': quick_mode,
	'network_info': {},
	'content_analysis': {},
	'security_headers': {},
	'technologies': [],
	'vulnerability_predictions': [],
	'risk_assessment': {},
	'enhanced_features': [],
	'port_scan': {},
	'ssl_info': {}
	}

	try:
	if not url.startswith(('http://', 'https://')):
	url = 'https://' + url

	parsed_url = urlparse(url)
	if not parsed_url.netloc:
	analysis['error'] = "Invalid URL format"
	return analysis

	domain = parsed_url.netloc

	# Basic request
	response = self.session.get(
	url,
	timeout=10 if quick_mode else 15,
	verify=False,
	allow_redirects=True
	)

	# Network and domain information
	analysis['network_info'] = self.get_network_info(domain)

	# SSL/TLS information
	analysis['ssl_info'] = self.get_ssl_info(domain)

	# Port scanning (if enabled)
	if enable_port_scan and not quick_mode:
	try:
	analysis['port_scan'] = self.port_scanner.quick_scan(domain)
	except Exception as e:
	analysis['port_scan'] = {'error': f'Port scan failed: {str(e)}'}

	# Security headers
	analysis['security_headers'] = self.analyze_security_headers(response)

	# Technology detection
	analysis['technologies'] = self.detect_technologies(response)

	if not quick_mode:
	# Content analysis
	analysis['content_analysis'] = self.analyze_content(response)
	# Enhanced features
	analysis['enhanced_features'] = self.extract_enhanced_passive_features(url, response)

	# Vulnerability predictions
	analysis['vulnerability_predictions'] = self.predict_vulnerabilities(analysis, quick_mode)

	# Risk assessment
	analysis['risk_assessment'] = self.assess_risk(analysis)

	except requests.exceptions.Timeout:
	analysis['error'] = "Request timeout - site may be unavailable"
	except requests.exceptions.SSLError:
	analysis['error'] = "SSL certificate verification failed"
	except requests.exceptions.ConnectionError:
	analysis['error'] = "Connection error - site may be unreachable"
	except Exception as e:
	analysis['error'] = f"Analysis error: {str(e)}"

	return analysis

	def get_network_info(self, domain):
	"""Get comprehensive network information"""
	try:
	ip = socket.gethostbyname(domain)
	return {
	'domain': domain,
	'ip_address': ip,
	'resolved': True
	}
	except:
	return {
	'domain': domain,
	'ip_address': 'Unresolvable',
	'resolved': False
	}

	def get_ssl_info(self, domain):
	"""Get SSL certificate information"""
	try:
	context = ssl.create_default_context()
	with socket.create_connection((domain, 443), timeout=5) as sock:
	with context.wrap_socket(sock, server_hostname=domain) as ssock:
	cert = ssock.getpeercert()

	# Check certificate expiration
	expiry_date = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z')
	days_until_expiry = (expiry_date - datetime.now()).days

	return {
	'has_ssl': True,
	'issuer': dict(x[0] for x in cert['issuer']) if isinstance(cert['issuer'], tuple) else str(cert['issuer']),
	'subject': dict(x[0] for x in cert['subject']) if isinstance(cert['subject'], tuple) else str(cert['subject']),
	'expires_in_days': days_until_expiry,
	'valid': days_until_expiry > 0
	}
	except Exception as e:
	return {
	'has_ssl': False,
	'valid': False,
	'error': str(e)
	}

	def analyze_security_headers(self, response):
	"""Analyze security headers with enhanced checks"""
	headers = response.headers
	security_headers = {}

	important_headers = {
	'X-Frame-Options': {'purpose': 'Clickjacking protection', 'required': True},
	'X-Content-Type-Options': {'purpose': 'MIME sniffing protection', 'required': True},
	'Strict-Transport-Security': {'purpose': 'HTTPS enforcement', 'required': True},
	'Content-Security-Policy': {'purpose': 'XSS protection', 'required': True},
	'X-XSS-Protection': {'purpose': 'XSS protection', 'required': False},
	'Referrer-Policy': {'purpose': 'Referrer info control', 'required': False},
	'Permissions-Policy': {'purpose': 'Browser features control', 'required': False}
	}

	for header, info in important_headers.items():
	value = headers.get(header, 'MISSING')
	security_headers[header] = {
	'value': value,
	'status': 'PRESENT' if value != 'MISSING' else 'MISSING',
	'purpose': info['purpose'],
	'required': info['required']
	}

	return security_headers

	def detect_technologies(self, response):
	"""Enhanced technology detection"""
	technologies = []
	server = response.headers.get('Server', '').lower()
	content = response.text.lower()

	# Server detection
	if 'apache' in server:
	technologies.append('Apache Web Server')
	elif 'nginx' in server:
	technologies.append('Nginx Web Server')
	elif 'iis' in server:
	technologies.append('Microsoft IIS')
	elif 'cloudflare' in server:
	technologies.append('Cloudflare')

	# Framework detection
	tech_patterns = {
	'WordPress': ['wp-content', 'wp-includes', 'wordpress'],
	'React': ['react', 'next.js', 'gatsby'],
	'Angular': ['angular', 'ng-'],
	'Vue.js': ['vue', 'vue.js'],
	'Django': ['django', 'csrfmiddleware'],
	'Laravel': ['laravel'],
	'PHP': ['.php', 'php/'],
	'jQuery': ['jquery'],
	'Bootstrap': ['bootstrap'],
	'Google Analytics': ['ga.js', 'google-analytics'],
	'Font Awesome': ['font-awesome']
	}

	for tech, patterns in tech_patterns.items():
	if any(pattern in content for pattern in patterns):
	technologies.append(tech)

	return list(set(technologies))

	def analyze_content(self, response):
	"""Enhanced content analysis"""
	try:
	soup = BeautifulSoup(response.content, 'html.parser')
	text_content = soup.get_text()[:2000]

	# Enhanced security indicators
	security_indicators = {
	'exposed_emails': len(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z\|a-z]{2,}\b', text_content)),
	'php_errors': 'php' in text_content.lower() and any(err in text_content.lower() for err in ['error', 'warning', 'notice']),
	'database_errors': any(db in text_content.lower() for db in ['mysql', 'postgresql', 'oracle', 'sql server', 'database error']),
	'debug_info': any(term in text_content.lower() for term in ['debug', 'test', 'development', 'staging']),
	'exposed_paths': len(re.findall(r'/[\w/.-]+', text_content)) > 50,
	'comments_with_info': len(re.findall(r'<!--.?(password\|key\|token\|admin).?-->', text_content, re.IGNORECASE)) > 0
	}

	return {
	'text_sample': text_content[:800],
	'security_indicators': security_indicators,
	'forms_count': len(soup.find_all('form')),
	'scripts_count': len(soup.find_all('script')),
	'inputs_count': len(soup.find_all('input')),
	'links_count': len(soup.find_all('a'))
	}
	except Exception as e:
	return {
	'text_sample': f'Content analysis failed: {str(e)}',
	'security_indicators': {},
	'forms_count': 0,
	'scripts_count': 0,
	'inputs_count': 0,
	'links_count': 0
	}

	def extract_enhanced_passive_features(self, url, response):
	"""Extract enhanced passive security features"""
	features = []
	domain = urlparse(url).netloc

	try:
	# Check robots.txt
	robots_features = self.check_robots_txt(url)
	features.extend(robots_features)

	# Check sitemap.xml
	sitemap_features = self.check_sitemap(url)
	features.extend(sitemap_features)

	# Check common sensitive files
	sensitive_files = self.check_sensitive_files(url)
	features.extend(sensitive_files)

	except Exception as e:
	features.append(f"Feature extraction error: {str(e)}")

	return features

	def check_robots_txt(self, url):
	"""Check robots.txt for sensitive information"""
	features = []
	try:
	robots_url = f"{url.rstrip('/')}/robots.txt"
	response = self.session.get(robots_url, timeout=3, verify=False)
	if response.status_code == 200:
	features.append("robots.txt present")
	content = response.text.lower()
	sensitive_paths = ['admin', 'login', 'config', 'backup', 'database', 'sql']
	if any(path in content for path in sensitive_paths):
	features.append("sensitive paths exposed in robots.txt")
	except:
	pass
	return features

	def check_sitemap(self, url):
	"""Check sitemap.xml for information disclosure"""
	features = []
	try:
	sitemap_url = f"{url.rstrip('/')}/sitemap.xml"
	response = self.session.get(sitemap_url, timeout=3, verify=False)
	if response.status_code == 200:
	features.append("sitemap.xml present")
	except:
	pass
	return features

	def check_sensitive_files(self, url):
	"""Check for common sensitive files"""
	features = []
	sensitive_files = [
	'.env', 'config.php', 'backup.sql', 'wp-config.php',
	'web.config', '.git/config', 'phpinfo.php'
	]

	for file in sensitive_files[:3]: # Check first 3 to avoid too many requests
	try:
	file_url = f"{url.rstrip('/')}/{file}"
	response = self.session.get(file_url, timeout=2, verify=False)
	if response.status_code == 200:
	features.append(f"sensitive file accessible: {file}")
	except:
	pass

	return features

	def predict_vulnerabilities(self, analysis, quick_mode=False):
	"""Predict vulnerabilities based on analysis"""
	feature_text = self.create_feature_text(analysis)
	if feature_text:
	try:
	return self.classifier.get_meaningful_predictions(feature_text)
	except Exception as e:
	print(f"Prediction error: {e}")
	return []
	return []

	def create_feature_text(self, analysis):
	"""Create feature text for vulnerability prediction"""
	text_parts = []

	# Content analysis
	if 'content_analysis' in analysis:
	content = analysis['content_analysis']
	text_parts.append(content.get('text_sample', ''))

	indicators = content.get('security_indicators', {})
	if indicators.get('php_errors'):
	text_parts.append("php error messages exposed")
	if indicators.get('database_errors'):
	text_parts.append("database errors visible")
	if indicators.get('exposed_emails', 0) > 0:
	text_parts.append(f"{indicators['exposed_emails']} emails exposed")
	if indicators.get('comments_with_info'):
	text_parts.append("sensitive information in comments")

	# Technologies
	tech_text = " ".join(analysis.get('technologies', []))
	text_parts.append(tech_text)

	# Security headers
	missing_headers = [
	h for h, info in analysis.get('security_headers', {}).items()
	if info.get('status') == 'MISSING' and info.get('required', False)
	]
	if missing_headers:
	text_parts.append(f"missing security headers: {', '.join(missing_headers)}")

	# Enhanced features
	enhanced_features = analysis.get('enhanced_features', [])
	text_parts.extend(enhanced_features)

	# Port scan results
	open_ports = analysis.get('port_scan', [])
	if open_ports and isinstance(open_ports, list):
	vulnerable_ports = [p for p in open_ports if p.get('vulnerability_note')]
	if vulnerable_ports:
	text_parts.append(f"vulnerable ports open: {[p['port'] for p in vulnerable_ports]}")

	# SSL information
	ssl_info = analysis.get('ssl_info', {})
	if not ssl_info.get('valid', False):
	text_parts.append("ssl certificate issues")

	return " ".join(text_parts)

	def assess_risk(self, analysis):
	"""Enhanced risk assessment"""
	risk_score = 0
	factors = []

	# Security headers
	missing_headers = sum(
	1 for h, info in analysis.get('security_headers', {}).items()
	if info.get('status') == 'MISSING' and info.get('required', False)
	)
	if missing_headers > 0:
	risk_score += missing_headers * 12
	factors.append(f"Missing {missing_headers} critical security headers")

	# Content analysis indicators
	content = analysis.get('content_analysis', {})
	indicators = content.get('security_indicators', {})

	if indicators.get('php_errors'):
	risk_score += 25
	factors.append("PHP errors exposed to users")
	if indicators.get('database_errors'):
	risk_score += 30
	factors.append("Database errors visible")
	if indicators.get('exposed_emails', 0) > 0:
	risk_score += indicators['exposed_emails'] * 5
	factors.append(f"{indicators['exposed_emails']} email addresses exposed")
	if indicators.get('comments_with_info'):
	risk_score += 20
	factors.append("Sensitive information in HTML comments")

	# Vulnerability predictions
	vuln_predictions = analysis.get('vulnerability_predictions', [])
	critical_risk_vulns = sum(1 for v in vuln_predictions if v['risk_level'] == 'Critical')
	high_risk_vulns = sum(1 for v in vuln_predictions if v['risk_level'] == 'High')
	medium_risk_vulns = sum(1 for v in vuln_predictions if v['risk_level'] == 'Medium')

	if critical_risk_vulns > 0:
	risk_score += critical_risk_vulns * 40
	factors.append(f"{critical_risk_vulns} critical-risk vulnerabilities predicted")
	if high_risk_vulns > 0:
	risk_score += high_risk_vulns * 25
	factors.append(f"{high_risk_vulns} high-risk vulnerabilities predicted")
	if medium_risk_vulns > 0:
	risk_score += medium_risk_vulns * 15
	factors.append(f"{medium_risk_vulns} medium-risk vulnerabilities predicted")

	# Port scan results
	open_ports = analysis.get('port_scan', [])
	if open_ports and isinstance(open_ports, list):
	vulnerable_ports = [p for p in open_ports if p.get('vulnerability_note')]
	if vulnerable_ports:
	risk_score += len(vulnerable_ports) * 10
	factors.append(f"{len(vulnerable_ports)} potentially vulnerable ports open")

	# SSL issues
	ssl_info = analysis.get('ssl_info', {})
	if not ssl_info.get('valid', False):
	risk_score += 20
	factors.append("SSL certificate issues detected")

	# Determine risk level
	if risk_score >= 85:
	level, color = "CRITICAL", "#dc2626"
	elif risk_score >= 65:
	level, color = "HIGH", "#ea580c"
	elif risk_score >= 45:
	level, color = "MEDIUM", "#d97706"
	elif risk_score >= 20:
	level, color = "LOW", "#2563eb"
	else:
	level, color = "MINIMAL", "#16a34a"

	return {
	'level': level,
	'score': min(risk_score, 100),
	'color': color,
	'factors': factors
	}

	# Initialize analyzer
	analyzer = EnhancedPassiveAnalyzer(classifier)

	# =============== Enhanced UI Visualization ============
	def create_confidence_chart(result):
	"""Create enhanced confidence chart with proper visualization"""
	vulns = result.get('vulnerability_predictions', [])

	# Create figure with better styling
	plt.style.use('default')
	fig, ax = plt.subplots(figsize=(14, 8))

	if not vulns:
	# Create a proper empty chart with message
	ax.text(0.5, 0.5, 'No vulnerabilities detected\nAll systems secure!',
	ha='center', va='center', transform=ax.transAxes, fontsize=18,
	bbox=dict(boxstyle="round,pad=0.5", facecolor="#d1fae5", edgecolor="#10b981", alpha=0.8))
	ax.set_xlim(0, 1)
	ax.set_ylim(0, 1)
	ax.axis('off')
	else:
	# Prepare data for chart
	vuln_types = [v['type'] for v in vulns[:8]]
	confidences = [v['confidence'] for v in vulns[:8]]
	thresholds = [v['threshold'] for v in vulns[:8]]

	# Enhanced color coding based on risk level
	colors = []
	risk_colors = {
	'Critical': '#991b1b',
	'High': '#dc2626',
	'Medium': '#ea580c',
	'Low': '#2563eb'
	}

	for v in vulns[:8]:
	colors.append(risk_colors.get(v['risk_level'], '#6b7280'))

	# Create horizontal bar chart
	y_pos = np.arange(len(vuln_types))
	bar_height = 0.6

	# Create main bars
	bars = ax.barh(y_pos, confidences, color=colors, alpha=0.85, height=bar_height, label='Confidence')

	# Add threshold markers
	for i, (confidence, threshold) in enumerate(zip(confidences, thresholds)):
	ax.axvline(x=threshold, ymin=(i-bar_height/2)/len(vuln_types),
	ymax=(i+bar_height/2)/len(vuln_types),
	color='#6b7280', linestyle='--', alpha=0.8, linewidth=2)
	# Add threshold label
	ax.text(threshold + 0.01, i, f'Threshold: {threshold:.0%}',
	va='center', fontsize=9, color='#6b7280', fontweight='bold')

	# Customize the chart
	ax.set_yticks(y_pos)
	ax.set_yticklabels(vuln_types, fontsize=12, fontweight='bold')
	ax.set_xlabel('Confidence Score', fontsize=14, fontweight='bold', color='#374151')
	ax.set_title('Vulnerability Confidence Analysis', fontsize=16, fontweight='bold',
	color='#1f2937', pad=20)
	ax.set_xlim(0, 1.1) # Extra space for labels

	# Remove spines and add grid
	for spine in ['top', 'right']:
	ax.spines[spine].set_visible(False)
	ax.spines['left'].set_color('#d1d5db')
	ax.spines['bottom'].set_color('#d1d5db')

	ax.grid(axis='x', alpha=0.3, linestyle='--', color='#9ca3af')
	ax.set_axisbelow(True)

	# Add value labels on bars with better positioning
	for i, (bar, confidence, threshold) in enumerate(zip(bars, confidences, thresholds)):
	width = bar.get_width()
	label_x = width + 0.02
	label_color = '#1f2937'

	# Add confidence percentage
	ax.text(label_x, bar.get_y() + bar.get_height()/2,
	f'{confidence:.1%}', ha='left', va='center', fontweight='bold',
	fontsize=11, color=label_color)

	# Add risk level inside bar if space permits
	if width > 0.15:
	ax.text(width/2, bar.get_y() + bar.get_height()/2,
	vulns[i]['risk_level'], ha='center', va='center',
	fontweight='bold', fontsize=10, color='white')

	# Add legend for risk levels
	legend_elements = [
	plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['Critical'], alpha=0.85, label='Critical'),
	plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['High'], alpha=0.85, label='High'),
	plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['Medium'], alpha=0.85, label='Medium'),
	plt.Rectangle((0,0), 1, 1, facecolor=risk_colors['Low'], alpha=0.85, label='Low')
	]
	ax.legend(handles=legend_elements, loc='lower right', framealpha=0.9)

	plt.tight_layout()

	# Save with higher quality
	chart_path = "conf_chart.png"
	plt.savefig(chart_path, bbox_inches='tight', dpi=150, facecolor='white',
	edgecolor='none', transparent=False, pad_inches=0.1)
	plt.close()

	return chart_path

	def format_results(result):
	"""Format analysis results with enhanced display"""
	risk = result.get('risk_assessment', {})
	risk_color = risk.get('color', 'gray')

	# Main Card
	html_output = f"""
	<div style="padding:20px;border-radius:12px;background: linear-gradient(135deg, #2d3748 0%, #1e293b 100%); color:white; margin-bottom: 20px; box-shadow:0 4px 12px rgba(0,0,0,0.15);">
	<h2 style="color: black;background: #fff;padding:15px 20px;border-radius:10px;font-size:2.6rem;margin-bottom:1rem;font-weight:800;text-shadow:0 2px 10px rgba(0,0,0,0.1);text-align:center;">🔒 Security AI Vulnerability Analyzer</h2>
	<div style="color:black;background:#f8fafc;font-size:1.3rem;border-radius:12px;padding:6px 15px;font-weight:900;margin-bottom:12px;letter-spacing:0.8px;border:2px solid #e2e8f0;text-align:center;">Ethical ML for Security Research</div>
	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-top:15px;color:white;">
	<div><b>Input:</b> {result.get('input_data', 'N/A')}</div>
	<div><b>Type:</b> {result.get('input_type', 'unknown').upper()}</div>
	<div><b>Mode:</b> {"QUICK" if result.get('quick_mode') else "FULL"}</div>
	<div><b>Timestamp:</b> {result.get('timestamp', 'N/A')}</div>
	</div>
	</div>
	"""

	# Risk Assessment Card
	html_output += f"""
	<div style="padding:20px; margin:20px 0; border-radius:12px; background:{risk_color}; color:white; box-shadow:0 4px 12px rgba(0,0,0,0.1);">
	<h3 style="margin:0 0 15px 0; font-size:1.6rem;text-align:center;">Risk Assessment: {risk.get('level', 'UNKNOWN')}</h3>
	<div style="text-align:center;margin-bottom:15px;">
	<strong style="font-size:1.2rem;">Risk Score: {risk.get('score',0)}/100</strong>
	</div>
	<div style="width:100%;background:rgba(255,255,255,0.2);border-radius:12px;margin:12px 0;overflow:hidden;">
	<div style="width:{risk.get('score',0)}%;height:25px;background:white;border-radius:12px;transition:width 0.5s ease;display:flex;align-items:center;justify-content:center;">
	<span style="color:{risk_color};font-weight:bold;font-size:0.9rem;">{risk.get('score',0)}%</span>
	</div>
	</div>
	<div style="margin-top:15px;">
	<b style="font-size:1.1rem;">Key Factors:</b>
	{''.join(f'<div style="margin:8px 0;padding:8px 12px;background:rgba(255,255,255,0.1);border-radius:8px;border-left:4px solid white;">• {factor}</div>' for factor in risk.get('factors',[]))}
	</div>
	</div>
	"""

	# Confidence Chart - Always show this section
	try:
	chart_path = create_confidence_chart(result)
	html_output += f"""
	<div style="background:white;padding:20px; border-radius:12px;margin:20px 0; border:1px solid #e2e8f0;box-shadow:0 4px 12px rgba(0,0,0,0.05);">
	<h3 style="color:#1a202c; margin:0 0 15px 0;text-align:center;">📊 Vulnerability Confidence Analysis</h3>
	<img src="file/{chart_path}" style="width:100%; border-radius:10px; box-shadow:0 4px 12px rgba(0,0,0,0.1);">
	</div>
	"""
	except Exception as e:
	html_output += f"""
	<div style="background:#fef2f2;padding:20px; border-radius:12px;margin:20px 0; border:2px solid #fecaca;">
	<h3 style="color:#dc2626; margin:0 0 15px 0;text-align:center;">❌ Chart Generation Error</h3>
	<p style="color:#7f1d1d;text-align:center;">Unable to generate confidence chart: {str(e)}</p>
	</div>
	"""
	chart_path = None

	# Vulnerability Predictions
	vulns = result.get('vulnerability_predictions', [])
	if vulns:
	html_output += """<div style="background:white;padding:20px;border-radius:12px;margin:20px 0; border:1px solid #e2e8f0;box-shadow:0 4px 12px rgba(0,0,0,0.05);">
	<h3 style="color:#2d3748; margin:0 0 15px 0;text-align:center;">🚨 Predicted Vulnerabilities</h3>"""
	for vuln in vulns[:8]:
	confidence = vuln['confidence']
	if confidence > 0.8: color,emoji="#991b1b","💀"
	elif confidence > 0.65: color,emoji="#dc2626","🔴"
	elif confidence > 0.45: color,emoji="#ea580c","🟠"
	else: color,emoji="#2563eb","🔵"

	html_output += f"""
	<div style="padding:15px;margin:10px 0; border-left:6px solid {color};background:#f7fafc;border-radius:10px;border:1px solid #e2e8f0;transition:transform 0.2s;">
	<div style="display:flex;justify-content:between;align-items:center;flex-wrap:wrap;gap:10px;">
	<b style="color:#2d3748;font-size:1.2rem;flex:1;">{emoji} {vuln['type']}</b>
	<span style="color:{color};font-weight:600;font-size:1.1rem;">{confidence:.1%}</span>
	<span style="background:{color};color:white;padding:6px 15px;border-radius:20px;font-size:0.9rem;font-weight:600;">{vuln['risk_level']} Risk</span>
	</div>
	<div style="margin-top:8px;color:#4a5568;font-size:0.95rem;display:flex;justify-content:space-between;flex-wrap:wrap;gap:10px;">
	<span>Threshold: {vuln.get('threshold',0.3):.1%}</span>
	<span style="color:{'#22c55e' if vuln['above_threshold'] else '#ef4444'};font-weight:600;">
	{"✅ ABOVE THRESHOLD" if vuln['above_threshold'] else "⚠️ BELOW THRESHOLD"}
	</span>
	</div>
	</div>
	"""
	html_output += "</div>"
	else:
	html_output += """<div style="background:#f0fdf4;padding:20px;border-radius:12px;margin:20px 0; border:2px solid #bbf7d0;">
	<h3 style="color:#166534; margin:0 0 12px 0;text-align:center;">✅ No Significant Vulnerabilities Detected</h3>
	<p style="color:#3f6212;text-align:center;font-size:1.1rem;">All predictions are below calibrated confidence thresholds or no vulnerabilities were identified in the input.</p>
	</div>"""

	# Port Scan Results
	if result.get('port_scan') and isinstance(result['port_scan'], list) and result['port_scan']:
	open_ports = result['port_scan']
	if open_ports:
	html_output += """<div style="background:white;padding:20px;border-radius:12px;margin:20px 0; border:1px solid #e2e8f0;">
	<h3 style="color:#2d3748; margin:0 0 15px 0;text-align:center;">🌐 Port Scan Results</h3>
	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: 12px;">"""

	for port in open_ports[:10]: # Show first 10 open ports
	vuln_note = port.get('vulnerability_note', '')
	border_color = '#ef4444' if vuln_note else '#22c55e'
	bg_color = '#fef2f2' if vuln_note else '#f0fdf4'

	html_output += f"""
	<div style="padding:12px; border:2px solid {border_color}; border-radius:10px; background:{bg_color};">
	<div style="font-weight:bold;color:#1e293b;font-size:1.1rem;">Port {port['port']}</div>
	<div style="color:#475569;margin:4px 0;">Service: {port['service']}</div>
	<div style="color:#16a34a;font-size:0.9rem;font-weight:600;">Status: OPEN</div>
	{f'<div style="color:#dc2626;font-size:0.85rem;margin-top:6px;padding:4px 8px;background:#fecaca;border-radius:6px;">⚠️ {vuln_note}</div>' if vuln_note else ''}
	</div>
	"""
	html_output += "</div></div>"

	# Technologies Detected
	if 'technologies' in result and result['technologies']:
	html_output += f"""<div style="background:white;padding:18px;border-radius:12px;margin:18px 0; border:1px solid #e2e8f0;">
	<h3 style="color:#3730a3;font-size:1.3rem;margin:0 0 12px 0;text-align:center;">🛠️ Technologies Detected</h3>
	<div style="margin-top:12px; display: flex; flex-wrap:wrap; gap:10px;justify-content:center;">
	{''.join(f'<span style="background:#e0e7ff;color:#3730a3;padding:8px 16px;border-radius:20px;font-weight:600;border:1px solid #c7d2fe;">{tech}</span>' for tech in result['technologies'])}
	</div></div>"""

	# Security Headers Status
	if 'security_headers' in result:
	missing = [h for h, info in result['security_headers'].items() if info.get('status') == 'MISSING' and info.get('required', False)]
	if missing:
	html_output += f"""<div style="background:#fef2f2;padding:18px;border-radius:12px;margin:18px 0; border:2px solid #fecaca;">
	<h3 style="color:#dc2626;margin:0 0 12px 0;text-align:center;">⚠️ Missing Critical Security Headers</h3>
	<div style="margin-top:10px;color:#7f1d1d;text-align:center;">
	{', '.join(missing)}
	</div>
	</div>"""

	# Explainability Section - Fixed black text color
	if result.get('explainability_data'):
	html_output += f"""
	<details style='margin:20px 0;border:1px solid #e2e8f0;border-radius:12px;overflow:hidden;'>
	<summary style='cursor:pointer;padding:16px;background:#f8fafc;font-weight:bold;color:#000000;font-size:1.1rem;list-style:none;'>
	🔍 Show Explainability Insights
	</summary>
	<div style='margin-top:0;color:#000000;padding:20px;background:#ffffff;border-top:1px solid #e2e8f0;'>
	{result.get('explainability_data','No explanation available')}
	</div>
	</details>
	"""

	# Export Section
	html_output += f"""
	<div style="background:#fff;padding:20px; border-radius:12px; margin:20px 0; border:1px solid #e2e8f0;box-shadow:0 4px 12px rgba(0,0,0,0.05);text-align:center;">
	<h3 style="color:#1e293b;font-size:1.3rem;margin:0 0 15px 0;">📤 Export Results</h3>
	<button onclick="navigator.clipboard.writeText(JSON.stringify({json.dumps(result)},null,2));alert('JSON report copied to clipboard!');"
	style="padding:12px 24px; background: #2563eb; color: white; border: none; border-radius: 8px; font-size:16px;cursor:pointer;font-weight:600;transition:background 0.3s;">
	📋 Copy JSON Report
	</button>
	</div>
	"""

	# Enhanced Developer details footer
	html_output += """
	<div style="background:#1f2937;color:white;padding:30px 20px 20px 20px;border-radius:12px;margin:20px 0 0 0;font-size:1.1rem;box-shadow:0 4px 12px rgba(0,0,0,0.15);line-height:1.6;">
	<h3 style="font-size:1.8rem;color:#fbbf24;text-align:center;margin-bottom:20px;border-bottom:2px solid #374151;padding-bottom:10px;">Developed By</h3>

	<div style="display:grid;grid-template-columns:repeat(auto-fit, minmax(300px, 1fr));gap:20px;margin-bottom:20px;">
	<div style="background:#374151;padding:15px;border-radius:8px;">
	<strong style="color:#e5e7eb;">👨‍💻 Developer:</strong>
	<div style="color:#f9fafb;margin-top:5px;">Chenna Kesava Reddy Yenugu</div>
	</div>

	<div style="background:#374151;padding:15px;border-radius:8px;">
	<strong style="color:#e5e7eb;">🎯 Model Accuracy:</strong>
	<div style="background:#2563eb;color:#fff;border-radius:8px;padding:6px 12px;margin-top:5px;display:inline-block;font-weight:600;">
	Industry ML/AI Practices Applied, Human Calibrated
	</div>
	</div>
	</div>

	<div style="display:grid;grid-template-columns:repeat(auto-fit, minmax(250px, 1fr));gap:15px;">
	<div style="background:#374151;padding:12px;border-radius:8px;">
	<strong style="color:#e5e7eb;">🌐 Portfolio:</strong>
	<div style="margin-top:5px;">
	<a href='https://mycareer.ccbp.tech/' target='_blank' style='color:#60a5fa;font-weight:600;text-decoration:none;'>https://mycareer.ccbp.tech/</a>
	</div>
	</div>

	<div style="background:#374151;padding:12px;border-radius:8px;">
	<strong style="color:#e5e7eb;">💼 LinkedIn:</strong>
	<div style="margin-top:5px;">
	<a href='https://www.linkedin.com/in/ychennakesavareddy' target='_blank' style='color:#34d399;font-weight:600;text-decoration:none;'>ychennakesavareddy</a>
	</div>
	</div>

	<div style="background:#374151;padding:12px;border-radius:8px;">
	<strong style="color:#e5e7eb;">🤗 HuggingFace:</strong>
	<div style="margin-top:5px;">
	<a href='https://huggingface.co/yenugu' target='_blank' style='color:#a3e635;font-weight:600;text-decoration:none;'>yenugu</a>
	</div>
	</div>

	<div style="background:#374151;padding:12px;border-radius:8px;">
	<strong style="color:#e5e7eb;">🐙 GitHub:</strong>
	<div style="margin-top:5px;">
	<a href='https://github.com/ychennakesavareddy' target='_blank' style='color:#60a5fa;font-weight:600;text-decoration:none;'>ychennakesavareddy</a>
	</div>
	</div>
	</div>

	<div style="display:grid;grid-template-columns:repeat(auto-fit, minmax(250px, 1fr));gap:15px;margin-top:15px;">
	<div style="background:#374151;padding:12px;border-radius:8px;">
	<strong style="color:#e5e7eb;">🐦 Twitter:</strong>
	<div style="margin-top:5px;">
	<a href='https://twitter.com/chenna332003' target='_blank' style='color:#38bdf8;font-weight:600;text-decoration:none;'>@chenna332003</a>
	</div>
	</div>

	<div style="background:#374151;padding:12px;border-radius:8px;grid-column:span 2;">
	<strong style="color:#e5e7eb;">📧 Email:</strong>
	<div style="margin-top:5px;color:#f5d558;">
	<a href='mailto:chenna.yenugu.tech@zohomail.in' style='color:#f5d558;text-decoration:none;font-weight:600;'>chenna.yenugu.tech@zohomail.in</a> \|
	<a href='mailto:c.yenugu.tech@gmail.com' style='color:#f5d558;text-decoration:none;font-weight:600;'>c.yenugu.tech@gmail.com</a>
	</div>
	</div>
	</div>
	</div>
	"""

	return html_output, risk.get('score',0), json.dumps(result,indent=2), chart_path

	def format_error(error_msg):
	"""Format error messages"""
	return f"""<div style="padding:25px;border-radius:12px;background:#fef2f2;border-left:6px solid #dc2626;border:2px solid #fecaca;text-align:center;">
	<h2 style="color:#dc2626;margin:0 0 15px 0;font-size:1.8rem;">❌ Analysis Error</h2>
	<b style="color:#7f1d1d;font-size:1.2rem;">{error_msg}</b>
	<p style="color:#991b1b;margin:15px 0 0 0;font-size:1.1rem;">Please check your input and try again.</p></div>""", 0, "{}", None

	def generate_explainability(input_data, advanced_explainability):
	"""Generate enhanced explainability data"""
	explanation = classifier.explain_prediction(input_data, top_k=10)
	if explanation and explanation.get('features'):
	features_html = "<h4 style='color:#000000;margin-bottom:15px;font-size:1.3rem;text-align:center;'>Top Influencing Features</h4><div style='display:grid;grid-template-columns:repeat(auto-fit, minmax(280px, 1fr));gap:15px;'>"
	for feature in explanation['features']:
	color = "#16a34a" if feature['importance'] > 0.7 else "#ca8a04" if feature['importance'] > 0.5 else "#dc2626"
	icon = "📈" if feature['importance'] > 0.7 else "📊" if feature['importance'] > 0.5 else "📉"
	features_html += f"""<div style="background:white;padding:15px;border-radius:10px;border-left:5px solid {color};border:1px solid #e2e8f0;box-shadow:0 2px 8px rgba(0,0,0,0.1);">
	<div style="font-weight:bold;color:#000000;margin-bottom:6px;font-size:1.1rem;">{icon} {feature['feature'].title()}</div>
	<div style="color:#475569;font-size:1rem;">Importance: <b style="color:{color};font-size:1.1rem;">{feature['importance']:.3f}</b></div>
	<div style="color:#64748b;font-size:0.9rem;">Count: {feature.get('count', 1)} • In text: {'✅' if feature.get('in_text') else '❌'}</div>
	</div>"""
	features_html += "</div>"
	return features_html
	else:
	return "<p style='color:#000000;padding:20px;background:#f8fafc;border-radius:10px;text-align:center;font-size:1.1rem;'>No significant features identified in the input.</p>"

	def create_enhanced_dashboard():
	"""Create the enhanced Gradio dashboard"""
	legal_notice = """
	## ⚠️ Legal & Ethical Notice

	Security AI is for authorized security research only.
	By using this tool, you agree to:
	- Only scan sites you own or have explicit permission to test
	- Comply with all applicable laws and regulations
	- Not use for malicious purposes or unauthorized testing
	- Accept full responsibility for your actions

	Analysis is PASSIVE only. No active exploitation or intrusive scanning is performed.
	Port scanning is limited to common ports and should only be used on authorized systems.
	"""

	with gr.Blocks(
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"),
	title="Security AI Vulnerability Analyzer",
	css="""
	.gradio-container {
	background: linear-gradient(135deg, #e0eaff 0%, #f8fafc 100%);
	font-family: 'Inter', sans-serif;
	}
	.container {
	max-width: 1400px;
	margin: 0 auto;
	}
	.footer {
	background: #1f2937;
	color: white;
	padding: 30px 20px;
	border-radius: 12px;
	margin-top: 30px;
	}
	"""
	) as interface:
	gr.Markdown("""
	<h2 style="color: black; background: #fff; padding:20px 25px; border-radius:12px; font-size:2.8rem; margin-bottom:0.5rem; font-weight:800; box-shadow:0 4px 12px rgba(0,0,0,0.1); text-align:center;">
	🔒 Security AI Vulnerability Analyzer
	</h2>
	<div style="color:black; background:#f8fafc; font-size:1.4rem; border-radius:12px; padding:8px 20px; font-weight:900; display:inline-block; margin-bottom:2rem; letter-spacing:0.8px; border:2px solid #e2e8f0; margin:0 auto; display:table;">
	Ethical ML for Security Research
	</div>
	""")

	with gr.Accordion("⚠️ Legal & Ethical Notice", open=False):
	gr.Markdown(legal_notice)

	with gr.Row():
	with gr.Column(scale=1):
	consent_checkbox = gr.Checkbox(
	label="I agree to the terms above and will use this tool ethically",
	value=False,
	info="Required to enable analysis"
	)
	input_data = gr.Textbox(
	label="Website URL or Security Text",
	placeholder="https://example.com or paste security-related text...",
	lines=3,
	max_lines=5
	)
	with gr.Row():
	analysis_type = gr.Radio(
	choices=["url", "text"],
	label="Analysis Type",
	value="url",
	info="URL: Passive website analysis \| Text: Predict vulnerabilities"
	)
	quick_mode = gr.Checkbox(
	label="Quick Mode",
	value=False,
	info="Faster, less detailed analysis"
	)
	enable_port_scan = gr.Checkbox(
	label="Enable Port Scanning",
	value=False,
	info="Scan common ports (URL analysis only)",
	interactive=True
	)
	enable_explainability = gr.Checkbox(
	label="Enable Explainability",
	value=True,
	info="Show why vulnerabilities are predicted"
	)
	analyze_btn = gr.Button(
	"🔍 Analyze Security",
	variant="primary",
	size="lg",
	interactive=False
	)

	with gr.Column(scale=2):
	output_html = gr.HTML(
	label="Security Analysis Results",
	value="<div style='padding:60px;text-align:center;color:#64748b; background:white; border-radius:12px; border:2px dashed #cbd5e1;'><h3 style='color:#374151;'>Configure Your Security Analysis</h3><p style='font-size:1.1rem;'>Agree to the terms, enter data, and click 'Analyze Security'</p></div>"
	)
	risk_score = gr.Number(
	label="Risk Score",
	precision=1,
	value=0.0,
	info="Overall security risk score (0-100)"
	)

	with gr.Row():
	json_output = gr.JSON(
	label="Technical Report",
	height=400,
	value={},
	show_label=True
	)
	chart_output = gr.Image(
	label="Confidence Chart",
	height=300,
	show_label=True,
	show_download_button=True
	)

	# Enhanced examples
	gr.Examples(
	examples=[
	["https://example.com", "url", False, False],
	["SQL injection vulnerability in login form with unsanitized user input", "text", False, False],
	["XSS cross-site scripting in comment section allowing script execution", "text", False, False],
	["Missing security headers and exposed configuration files with passwords", "text", False, False],
	["Authentication bypass vulnerability in admin panel access control", "text", False, False],
	["Command injection through user input in system() function call", "text", False, False]
	],
	inputs=[input_data, analysis_type, quick_mode, enable_port_scan],
	label="Try These Security Examples:"
	)

	# How it works section with black text
	gr.Markdown("""
	<div style="color: black; background: #f8fafc; padding: 25px; border-radius: 12px; border: 1px solid #e2e8f0; margin-top: 25px;">
	<h3 style="color: black; margin-bottom: 20px;text-align:center;font-size:1.8rem;">🔧 How It Works</h3>
	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: 20px; color: black;">
	<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #3b82f6;box-shadow:0 2px 8px rgba(0,0,0,0.1);">
	<b style="color: black;font-size:1.2rem;">Passive Analysis</b><br>
	<span style="color: #4b5563;">Non-intrusive security assessment without active exploitation</span>
	</div>
	<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #10b981;box-shadow:0 2px 8px rgba(0,0,0,0.1);">
	<b style="color: black;font-size:1.2rem;">ML Ensemble</b><br>
	<span style="color: #4b5563;">Multiple machine learning models combined for accuracy</span>
	</div>
	<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #f59e0b;box-shadow:0 2px 8px rgba(0,0,0,0.1);">
	<b style="color: black;font-size:1.2rem;">Calibrated Confidence</b><br>
	<span style="color: #4b5563;">Industry-standard confidence thresholds for reliable results</span>
	</div>
	<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #ef4444;box-shadow:0 2px 8px rgba(0,0,0,0.1);">
	<b style="color: black;font-size:1.2rem;">Explainable AI</b><br>
	<span style="color: #4b5563;">Understand why vulnerabilities are predicted with feature importance</span>
	</div>
	<div style="background: white; padding: 20px; border-radius: 10px; border-left: 5px solid #8b5cf6;box-shadow:0 2px 8px rgba(0,0,0,0.1);">
	<b style="color: black;font-size:1.2rem;">Risk Assessment</b><br>
	<span style="color: #4b5563;">Comprehensive risk scoring based on multiple factors</span>
	</div>
	</div>
	<div style="margin-top: 25px; color: black;text-align:center;">
	<b style="font-size:1.3rem;">Supported Vulnerability Types:</b>
	<div style="display:flex;flex-wrap:wrap;gap:10px;justify-content:center;margin-top:15px;">
	<span style="background: #fee2e2; color: #dc2626; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">SQL Injection</span>
	<span style="background: #fef3c7; color: #d97706; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">XSS</span>
	<span style="background: #dbeafe; color: #2563eb; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">CSRF</span>
	<span style="background: #f3e8ff; color: #7c3aed; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">Info Disclosure</span>
	<span style="background: #dcfce7; color: #16a34a; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">Auth Bypass</span>
	<span style="background: #fce7f3; color: #db2777; padding: 8px 16px; border-radius: 20px; font-size: 1rem;font-weight:600;">Secure Config</span>
	</div>
	</div>
	</div>
	""")

	def toggle_analyze(consent):
	return gr.Button(interactive=consent)

	consent_checkbox.change(
	fn=toggle_analyze,
	inputs=[consent_checkbox],
	outputs=[analyze_btn]
	)

	def analyze_input(input_data, analysis_type, quick_mode, enable_port_scan, enable_explainability, progress=gr.Progress()):
	"""Enhanced analysis function with progress tracking"""
	if not input_data.strip():
	return format_error("Input URL or security text is required.")

	try:
	if analysis_type == "url":
	progress(0.2, "Connecting to target website...")
	result = analyzer.analyze_website(
	input_data,
	quick_mode,
	enable_port_scan=enable_port_scan
	)
	result['input_type'] = 'url'
	else:
	progress(0.4, "Analyzing security text...")
	result = {
	'input_type': 'text',
	'input_data': input_data,
	'quick_mode': quick_mode,
	'vulnerability_predictions': classifier.get_meaningful_predictions(input_data),
	'risk_assessment': {'level': 'UNKNOWN', 'score': 0, 'color': 'gray'}
	}

	# Calculate risk score for text analysis
	risk_score = 0
	factors = []
	predictions = result['vulnerability_predictions']

	for pred in predictions:
	if pred['risk_level'] == 'Critical':
	risk_score += 35
	elif pred['risk_level'] == 'High':
	risk_score += 25
	elif pred['risk_level'] == 'Medium':
	risk_score += 15
	else:
	risk_score += 8

	if predictions:
	factors.append(f"Found {len(predictions)} potential vulnerabilities")

	result['risk_assessment'] = {
	'level': 'CRITICAL' if risk_score >= 80 else 'HIGH' if risk_score >= 60 else 'MEDIUM' if risk_score >= 30 else 'LOW',
	'score': min(risk_score, 100),
	'color': '#dc2626' if risk_score >= 80 else '#ea580c' if risk_score >= 60 else '#d97706' if risk_score >= 30 else '#2563eb',
	'factors': factors
	}

	result['input_data'] = input_data
	result['timestamp'] = pd.Timestamp.now().isoformat()

	# Generate explainability data if enabled
	if enable_explainability and not quick_mode:
	progress(0.8, "Generating explainability insights...")
	explainability_data = generate_explainability(input_data, False)
	result['explainability_data'] = explainability_data

	progress(0.95, "Finalizing security report...")
	return format_results(result)

	except Exception as e:
	return format_error(f"Security analysis failed: {str(e)}")

	analyze_btn.click(
	fn=analyze_input,
	inputs=[input_data, analysis_type, quick_mode, enable_port_scan, enable_explainability],
	outputs=[output_html, risk_score, json_output, chart_output]
	)

	return interface

	# Launch the application
	interface = create_enhanced_dashboard()

	if __name__ == "__main__":
	# Try multiple ports to find an available one
	ports_to_try = [7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869]

	for port in ports_to_try:
	try:
	print(f"Trying to launch on port {port}...")
	interface.launch(
	server_name="0.0.0.0",
	server_port=port,
	show_error=True,
	share=False,
	inbrowser=True # Automatically open browser
	)
	break
	except OSError as e:
	if "Address already in use" in str(e) or "port" in str(e).lower():
	print(f"Port {port} is busy, trying next port...")
	continue
	else:
	raise e
	else:
	print("Could not find an available port. Please check if any Gradio apps are running.")
	# Launch with default port selection
	interface.launch(
	server_name="0.0.0.0",
	show_error=True,
	share=False,
	inbrowser=True
	)