Spaces:

quantum-drive
/

malware-phishing-detection

Sleeping

App Files Files Community

quantum-drive commited on May 30, 2025

Commit

cf27b43

verified ·

1 Parent(s): 4365d3a

Update app.py

Browse files

Files changed (1) hide show

app.py +478 -393

app.py CHANGED Viewed

@@ -9,454 +9,539 @@ import whois
 import dns.resolver
 from urllib.parse import urlparse
 from datetime import datetime
 # -------------------------------
 # Load Trained Models
 # -------------------------------
-phishing_model = joblib.load("phishing_stack.pkl")
-malware_model = joblib.load("new_malware_stack.pkl")
 # -------------------------------
-# Enhanced Feature Extraction
 # -------------------------------
-def extract_phishing_features(url):
-    parsed = urlparse(url)
-    hostname = parsed.hostname if parsed.hostname else ""
-    tld = hostname.split('.')[-1] if '.' in hostname else ""
-    return {
-        "url_length": len(url),
-        "hostname_length": len(hostname),
-        "num_dots": url.count('.'),
-        "num_hyphens": url.count('-'),
-        "num_digits": sum(char.isdigit() for char in url),
-        "num_special_chars": len(re.findall(r"[^\w]", url)) - url.count('/'),
-        "has_ip_address": 1 if re.match(r"\d+\.\d+\.\d+\.\d+", hostname) else 0,
-        "has_https": 1 if parsed.scheme == "https" else 0,
-        "has_suspicious_words": 1 if any(word in url.lower() for word in
-                                        ["login", "secure", "update", "verify", "account", "banking", "paypal"]) else 0,
-        "is_shortened": 1 if any(short in url for short in
-                                 ["bit.ly", "tinyurl", "goo.gl", "t.co", "ow.ly", "is.gd"]) else 0,
-        "tld": tld
-    }
-def extract_malware_features(url):
-    parsed = urlparse(url)
-    hostname = parsed.hostname or ""
-    scheme = parsed.scheme
-    # Basic URL features
-    url_length = len(url)
-    hostname_length = len(hostname)
-    num_dots = url.count('.')
-    num_hyphens = url.count('-')
-    num_digits = len(re.findall(r'\d', url))
-    special_chars = set(string.punctuation) - {'/'}
-    num_specials = sum(1 for c in url if c in special_chars)
-    has_suspicious_keyword = any(k in url.lower() for k in
-                                 ['login', 'secure', 'verify', 'update', 'download', 'install', 'free'])
-    has_ip = bool(re.match(r'https?://(\d{1,3}\.){3}\d{1,3}', url))
-    is_https = scheme == 'https'
-    is_shortened = any(s in url for s in
-                      ['bit.ly', 'tinyurl.com', 'goo.gl', 't.co', 'ow.ly', 'shorte.st'])
-    tld = hostname.split('.')[-1] if '.' in hostname else ''
-    # Network features
-    try:
-        ip_address = socket.gethostbyname(hostname)
-    except:
-        ip_address = None
-    # WHOIS features
-    try:
-        w = whois.whois(url)
-        domain_age = (datetime.now() - w.creation_date[0]).days if w.creation_date else -1
-        domain_expiry = (w.expiration_date[0] - datetime.now()).days if w.expiration_date else -1
-    except:
-        domain_age = domain_expiry = -1
-    # DNS features
-    try:
-        answers = dns.resolver.resolve(hostname, 'A')
-        ttl = answers.rrset.ttl
-    except:
-        ttl = -1
-    # SSL features
-    ssl_issuer = "Unknown"
-    ssl_valid = False
-    if is_https and hostname:
-        try:
-            ctx = ssl.create_default_context()
-            with ctx.wrap_socket(socket.socket(), server_hostname=hostname) as s:
-                s.settimeout(3)
-                s.connect((hostname, 443))
-                cert = s.getpeercert()
-                issuer = dict(x[0] for x in cert['issuer'])['organizationName']
-                ssl_issuer = issuer if issuer else "Unknown"
-                ssl_valid = datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z') > datetime.now()
-        except:
-            pass
-    return {
-        "url_length": url_length,
-        "hostname_length": hostname_length,
-        "num_dots": num_dots,
-        "num_hyphens": num_hyphens,
-        "num_digits": num_digits,
-        "num_special_chars": num_specials,
-        "has_suspicious_keyword": int(has_suspicious_keyword),
-        "has_ip_address": int(has_ip),
-        "is_https": int(is_https),
-        "is_shortened": int(is_shortened),
-        "tld": tld,
-        "domain_age_days": domain_age,
-        "domain_expiry_days": domain_expiry,
-        "dns_ttl": ttl,
-        "ssl_issuer": ssl_issuer,
-        "ssl_valid": int(ssl_valid)
-    }
 # -------------------------------
-# Prepare Model Inputs
 # -------------------------------
-def prepare_phishing_input(url):
-    features = extract_phishing_features(url)
-    df = pd.DataFrame([features])
-    df = pd.get_dummies(df, columns=["tld"], prefix="tld")
-    df = df.reindex(columns=phishing_model.feature_names_in_, fill_value=0)
-    return df
-def prepare_malware_input(url):
-    features = extract_malware_features(url)
-    df = pd.DataFrame([features])
-    df = pd.get_dummies(df, columns=["tld", "ssl_issuer"], prefix=["tld", "ssl_issuer"])
-    df = df.reindex(columns=malware_model.feature_names_in_, fill_value=0)
-    return df
 # -------------------------------
-# PREDICTION NORMALIZATION
 # -------------------------------
-def normalize_prediction(prediction):
-    """Normalize different prediction formats to standard format"""
-    pred_str = str(prediction).lower().strip()
-    # Handle different formats that might come from models
-    if pred_str in ['phishing', '1', 'malicious', 'threat', 'bad']:
-        return 'threat'
-    elif pred_str in ['benign', '0', 'safe', 'good', 'legitimate']:
-        return 'benign'
-    else:
-        return 'unknown'
 # -------------------------------
-# IMPROVED TRUTH TABLE DECISION LOGIC
 # -------------------------------
-# ENHANCED ANALYZE_URL FUNCTION - Replace completely
-def analyze_url(url):
     try:
-        # Get model predictions
-        phishing_pred_raw = phishing_model.predict(prepare_phishing_input(url))[0]
-        malware_pred_raw = malware_model.predict(prepare_malware_input(url))[0]
-        # Normalize predictions
-        phishing_pred = normalize_prediction(phishing_pred_raw)
-        malware_pred = normalize_prediction(malware_pred_raw)
-        # Get URL features for analysis
         parsed = urlparse(url)
         hostname = parsed.hostname or ""
-        path = parsed.path.lower()
-        # COMPREHENSIVE THREAT DETECTION SYSTEM
-        def comprehensive_threat_analysis(url, hostname, path):
-            threat_score = 0
-            threat_patterns = []
-            threat_level = "benign"
-            # 1. MALWARE/SUSPICIOUS CONTENT INDICATORS
-            malware_keywords = [
-                'download', 'install', 'exe', 'zip', 'rar', 'crack', 'keygen', 'serial',
-                'patch', 'activator', 'loader', 'hack', 'cheat', 'bot', 'tool',
-                'generator', 'free', 'premium', 'pro', 'full', 'latest'
-            ]
-            malware_extensions = [
-                '.exe', '.bat', '.cmd', '.scr', '.pif', '.jar', '.zip',
-                '.rar', '.7z', '.apk', '.deb', '.rpm', '.dmg', '.pkg'
-            ]
-            # Check for malware indicators in URL
-            for keyword in malware_keywords:
-                if keyword in url.lower():
-                    threat_score += 3
-                    threat_patterns.append(f"Malware keyword: {keyword}")
-            for ext in malware_extensions:
-                if ext in url.lower():
-                    threat_score += 4
-                    threat_patterns.append(f"Executable extension: {ext}")
-            # 2. ADULT/INAPPROPRIATE CONTENT DETECTION
-            adult_keywords = [
-                'porn', 'xxx', 'sex', 'adult', 'nude', 'nsfw', 'erotic', 'cam',
-                'escort', 'dating', 'hookup', 'tube', 'celebflix', 'xtube'
-            ]
-            adult_domains = [
-                'pornhub', 'xvideos', 'redtube', 'youporn', 'tube8', 'xtube',
-                'celebflix', 'zodcaps', 'adultfriendfinder', 'ashley'
-            ]
-            for keyword in adult_keywords:
-                if keyword in url.lower():
-                    threat_score += 5
-                    threat_patterns.append(f"Adult content indicator: {keyword}")
-            for domain in adult_domains:
-                if domain in hostname.lower():
-                    threat_score += 6
-                    threat_patterns.append(f"Adult content domain: {domain}")
-            # 3. SUSPICIOUS DOMAIN PATTERNS
-            suspicious_tlds = [
-                '.tk', '.ml', '.ga', '.cf', '.pw', '.cc', '.ws', '.info',
-                '.biz', '.click', '.download', '.stream', '.cam', '.me'
-            ]
-            for tld in suspicious_tlds:
-                if hostname.endswith(tld):
-                    threat_score += 3
-                    threat_patterns.append(f"Suspicious TLD: {tld}")
-            # 4. LONG/COMPLEX URLS (often malicious)
-            if len(url) > 150:
-                threat_score += 4
-                threat_patterns.append("Extremely long URL")
-            elif len(url) > 100:
-                threat_score += 2
-                threat_patterns.append("Long URL")
-            # 5. EXCESSIVE PATH DEPTH
-            path_segments = [seg for seg in path.split('/') if seg]
-            if len(path_segments) > 5:
-                threat_score += 3
-                threat_patterns.append("Deep path structure")
-            # 6. SUSPICIOUS CHARACTERS IN URL
-            suspicious_chars = ['%', '&', '=', '?', '#']
-            char_count = sum(url.count(char) for char in suspicious_chars)
-            if char_count > 10:
-                threat_score += 2
-                threat_patterns.append("Many special characters")
-            # 7. IP ADDRESS INSTEAD OF DOMAIN
-            if re.match(r'\d+\.\d+\.\d+\.\d+', hostname):
-                threat_score += 8
-                threat_patterns.append("IP address used instead of domain")
-            # 8. SUBDOMAIN ANALYSIS
-            if hostname.count('.') > 4:
-                threat_score += 4
-                threat_patterns.append("Excessive subdomains")
-            # 9. KNOWN BAD PATTERNS
-            bad_patterns = [
-                'vanguard.com/totalrewards',  # Your specific example
-                'celebflix', 'xtube', 'zodcaps', 'torrent', 'pirate'
-            ]
-            for pattern in bad_patterns:
-                if pattern in url.lower():
-                    threat_score += 7
-                    threat_patterns.append(f"Known suspicious pattern: {pattern}")
-            # 10. CRYPTOCURRENCY/FINANCIAL SCAM INDICATORS
-            crypto_keywords = ['bitcoin', 'crypto', 'mining', 'wallet', 'investment', 'earn']
-            for keyword in crypto_keywords:
-                if keyword in url.lower():
-                    threat_score += 3
-                    threat_patterns.append(f"Crypto-related: {keyword}")
-            return threat_score, threat_patterns
-        # Get comprehensive threat analysis
-        threat_score, threat_patterns = comprehensive_threat_analysis(url, hostname, path)
-        # Check for known legitimate domains
-        def is_verified_legitimate(hostname):
-            verified_domains = [
-                'google.com', 'youtube.com', 'facebook.com', 'amazon.com', 'microsoft.com',
-                'apple.com', 'netflix.com', 'instagram.com', 'twitter.com', 'linkedin.com',
-                'github.com', 'stackoverflow.com', 'wikipedia.org', 'reddit.com', 'bbc.com',
-                'cnn.com', 'nytimes.com', 'forbes.com', 'techcrunch.com'
-            ]
-            for domain in verified_domains:
-                if hostname == domain or hostname.endswith('.' + domain):
-                    return True
-            return False
-        is_legitimate = is_verified_legitimate(hostname)
-        # ENHANCED DECISION LOGIC WITH THREAT SCORING
-        if threat_score >= 15:
-            final_result = "Malicious"
-            reason = f"🚨 HIGH THREAT: Multiple malicious indicators (Score: {threat_score})"
-        elif threat_score >= 10:
-            final_result = "Suspicious"
-            reason = f"⚠️ MEDIUM THREAT: Suspicious patterns detected (Score: {threat_score})"
-        elif threat_score >= 6:
-            final_result = "Suspicious"
-            reason = f"⚠️ LOW-MEDIUM THREAT: Some concerning patterns (Score: {threat_score})"
-        elif malware_pred == "threat" or phishing_pred == "threat":
-            # Model detected threat even with low pattern score
-            if malware_pred == "threat" and phishing_pred == "threat":
-                final_result = "Malicious"
-                reason = "🚨 Both AI models detected threats"
-            elif malware_pred == "threat":
-                final_result = "Malicious" if threat_score >= 3 else "Suspicious"
-                reason = f"🦠 Malware model + patterns (Score: {threat_score})"
-            else:  # phishing_pred == "threat"
-                final_result = "Phishing" if threat_score >= 3 else "Suspicious"
-                reason = f"🎣 Phishing model + patterns (Score: {threat_score})"
-        elif is_legitimate and threat_score <= 3:
-            final_result = "Benign"
-            reason = "✅ Verified legitimate domain"
-        elif threat_score <= 2:
-            final_result = "Benign"
-            reason = "✅ Low threat score, appears safe"
         else:
-            final_result = "Suspicious"
-            reason = f"⚠️ Pattern analysis suggests caution (Score: {threat_score})"
         return {
-            "url": url,
-            "final_result": final_result,
-            "decision_reason": reason,
-            "phishing_model_prediction": str(phishing_pred_raw),
-            "malware_model_prediction": str(malware_pred_raw),
-            "normalized_phishing": phishing_pred,
-            "normalized_malware": malware_pred,
-            "threat_score": threat_score,
-            "threat_patterns": threat_patterns[:4],  # Top 4 patterns
-            "analysis_type": "Enhanced Pattern + AI Model Analysis"
         }
     except Exception as e:
-        return {"error": str(e)}
 # -------------------------------
-# GRADIO INTERFACE
 # -------------------------------
-# UPDATED INTERFACE FUNCTION
-def interface_fn(url):
-    if not url.strip():
-        return "❌ Please enter a valid URL"
-    # Add protocol if missing
     if not url.startswith(('http://', 'https://')):
         url = 'https://' + url
-    result = analyze_url(url)
-    if "error" in result:
-        return f"❌ Error analyzing URL: {result['error']}"
-    # Format output for better readability
-    output = f"""
-🔍 Analysis Report for: {result['url']}
-⚠️ Final Verdict: {result['final_result']}
-📌 Decision Reason: {result['decision_reason']}
-🔒 Phishing Model: {result['phishing_model_prediction']} (normalized: {result['normalized_phishing']})
-🛡️ Malware Model: {result['malware_model_prediction']} (normalized: {result['normalized_malware']})
-🎯 Threat Score: {result['threat_score']}/30
-📊 Analysis Type: {result['analysis_type']}
 """
-    # Add threat patterns if any
-    if result['threat_patterns']:
-        output += f"🚩 Detected Threats: {', '.join(result['threat_patterns'])}\n"
-    output += "=" * 60
-    # Add appropriate emoji and color coding based on threat score
-    if result['final_result'] == "Benign":
-        output = "✅ SAFE " + output
-    elif result['final_result'] == "Malicious":
-        output = "🚨 DANGEROUS " + output
-    elif result['final_result'] == "Phishing":
-        output = "🎣 PHISHING " + output
-    else:  # Suspicious
-        if result['threat_score'] >= 10:
-            output = "⚠️ HIGH RISK " + output
-        elif result['threat_score'] >= 6:
-            output = "⚠️ MEDIUM RISK " + output
-        else:
-            output = "⚠️ LOW RISK " + output
-    return output
 # -------------------------------
-# GRADIO APP
 # -------------------------------
-demo = gr.Interface(
-    fn=interface_fn,
-    inputs=gr.Text(
-        label="Enter URL to Analyze",
-        placeholder="https://example.com or just example.com",
-        lines=1
-    ),
-    outputs=gr.Textbox(
-        label="🛡️ Threat Analysis Report",
-        lines=10,
-        max_lines=15
-    ),
-    title="🛡️ AI-Powered URL Threat Analyzer",
-    description="""
-    **Advanced URL Security Scanner**
-    This tool uses dual AI models to detect:
-    • 🎣 Phishing attacks
-    • 🦠 Malware threats
-    • 🔒 Overall URL safety
-    Enter any URL to get a comprehensive security analysis.
-    """,
-    examples=[
-        ["https://www.google.com"],
-        ["https://www.paypal.com/signin"],
-        ["https://www.bbc.com/news"],
-        ["bit.ly/suspicious-link"],
-        ["http://malware-site.ru/download.exe"]
-    ],
-    theme=gr.themes.Soft(),
-    css="""
     .gradio-container {
-        max-width: 800px;
         margin: auto;
     }
-    """,
-)
 if __name__ == "__main__":
     demo.launch(
         share=True,
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
-        # Add these parameters to ensure API works
-        show_api=True
-)

 import dns.resolver
 from urllib.parse import urlparse
 from datetime import datetime
+import requests
+from collections import Counter
 # -------------------------------
 # Load Trained Models
 # -------------------------------
+try:
+    phishing_model = joblib.load("phishing_stack.pkl")
+    malware_model = joblib.load("new_malware_stack.pkl")
+except FileNotFoundError as e:
+    print(f"Model file not found: {e}")
+    phishing_model, malware_model = None, None
 # -------------------------------
+# TRUSTED DOMAINS DATABASE
 # -------------------------------
+TRUSTED_DOMAINS = {
+    # Major Tech Companies
+    'google.com', 'youtube.com', 'gmail.com', 'gstatic.com', 'googleapis.com',
+    'facebook.com', 'instagram.com', 'whatsapp.com', 'messenger.com',
+    'microsoft.com', 'outlook.com', 'office.com', 'xbox.com', 'bing.com',
+    'apple.com', 'icloud.com', 'itunes.com', 'app-store.com',
+    'amazon.com', 'aws.amazon.com', 'amazonwebservices.com',
+    # Social Media & Communication
+    'twitter.com', 'x.com', 'linkedin.com', 'reddit.com', 'pinterest.com',
+    'snapchat.com', 'tiktok.com', 'discord.com', 'telegram.org',
+    'zoom.us', 'skype.com', 'teams.microsoft.com',
+    # News & Media
+    'bbc.com', 'cnn.com', 'nytimes.com', 'reuters.com', 'bloomberg.com',
+    'forbes.com', 'techcrunch.com', 'theverge.com', 'arstechnica.com',
+    # Banking & Finance
+    'paypal.com', 'stripe.com', 'visa.com', 'mastercard.com',
+    'chase.com', 'bankofamerica.com', 'wells.com', 'citibank.com',
+    # E-commerce
+    'ebay.com', 'etsy.com', 'shopify.com', 'walmart.com', 'target.com',
+    # Development & Tech
+    'github.com', 'stackoverflow.com', 'mozilla.org', 'w3.org',
+    'cloudflare.com', 'docker.com', 'npmjs.com',
+    # Educational
+    'wikipedia.org', 'wikimedia.org', 'mit.edu', 'stanford.edu',
+    'coursera.org', 'udemy.com', 'khanacademy.org',
+    # Government
+    'gov', 'edu', 'mil', 'org'  # Top-level domains
+}
+# Suspicious TLDs that are often misused
+SUSPICIOUS_TLDS = {
+    '.tk', '.ml', '.ga', '.cf', '.pw', '.cc', '.ws', '.info',
+    '.biz', '.click', '.download', '.stream', '.cam', '.me',
+    '.top', '.work', '.date', '.review', '.country', '.racing'
+}
+# Known malicious patterns
+MALICIOUS_PATTERNS = {
+    'phishing_keywords': [
+        'verify-account', 'account-suspended', 'urgent-action',
+        'click-here-now', 'limited-time', 'act-now', 'winner',
+        'congratulations', 'claim-prize', 'free-money', 'inheritance'
+    ],
+    'malware_keywords': [
+        'crack', 'keygen', 'serial', 'patch', 'activator', 'loader',
+        'hack-tool', 'cheat-engine', 'bot-download', 'virus-scan',
+        'antivirus-update', 'flash-player-update'
+    ],
+    'adult_keywords': [
+        'xxx', 'porn', 'adult', 'sex', 'nude', 'erotic', 'cam',
+        'escort', 'dating-hookup', 'live-cam', 'webcam'
+    ]
+}
 # -------------------------------
+# ENHANCED WHITELISTING SYSTEM
 # -------------------------------
+def is_trusted_domain(hostname):
+    """Enhanced domain trust verification"""
+    if not hostname:
+        return False
+    hostname = hostname.lower().strip()
+    # Direct match
+    if hostname in TRUSTED_DOMAINS:
+        return True
+    # Subdomain check for trusted domains
+    for trusted in TRUSTED_DOMAINS:
+        if hostname.endswith('.' + trusted):
+            return True
+    # Government and educational domains
+    if hostname.endswith(('.gov', '.edu', '.mil')):
+        return True
+    # Major organization domains
+    if hostname.endswith('.org') and any(org in hostname for org in
+                                       ['wikipedia', 'mozilla', 'apache', 'gnu']):
+        return True
+    return False
 # -------------------------------
+# INTELLIGENT FEATURE EXTRACTION
 # -------------------------------
+def extract_enhanced_features(url):
+    """Extract comprehensive features with intelligence"""
+    parsed = urlparse(url)
+    hostname = parsed.hostname or ""
+    path = parsed.path or ""
+    query = parsed.query or ""
+    # Basic metrics
+    url_length = len(url)
+    hostname_length = len(hostname)
+    path_length = len(path)
+    # Character analysis
+    num_dots = hostname.count('.')
+    num_hyphens = hostname.count('-')
+    num_underscores = hostname.count('_')
+    num_digits = sum(c.isdigit() for c in hostname)
+    # Suspicious patterns
+    has_ip = bool(re.match(r'^(\d{1,3}\.){3}\d{1,3}$', hostname))
+    is_https = parsed.scheme == 'https'
+    # URL shortening services
+    shorteners = ['bit.ly', 'tinyurl', 'goo.gl', 't.co', 'ow.ly', 'is.gd', 'short.link']
+    is_shortened = any(short in hostname for short in shorteners)
+    # Suspicious TLD
+    tld = hostname.split('.')[-1] if '.' in hostname else ""
+    has_suspicious_tld = '.' + tld in SUSPICIOUS_TLDS
+    # Keyword analysis
+    url_lower = url.lower()
+    phishing_score = sum(1 for keyword in MALICIOUS_PATTERNS['phishing_keywords']
+                        if keyword in url_lower)
+    malware_score = sum(1 for keyword in MALICIOUS_PATTERNS['malware_keywords']
+                       if keyword in url_lower)
+    adult_score = sum(1 for keyword in MALICIOUS_PATTERNS['adult_keywords']
+                     if keyword in url_lower)
+    # Path analysis
+    suspicious_extensions = ['.exe', '.bat', '.scr', '.zip', '.rar', '.apk']
+    has_suspicious_extension = any(ext in path.lower() for ext in suspicious_extensions)
+    # Query parameter analysis
+    num_params = len(query.split('&')) if query else 0
+    has_encoded_chars = '%' in url
+    return {
+        'url_length': url_length,
+        'hostname_length': hostname_length,
+        'path_length': path_length,
+        'num_dots': num_dots,
+        'num_hyphens': num_hyphens,
+        'num_underscores': num_underscores,
+        'num_digits': num_digits,
+        'has_ip': int(has_ip),
+        'is_https': int(is_https),
+        'is_shortened': int(is_shortened),
+        'has_suspicious_tld': int(has_suspicious_tld),
+        'phishing_score': phishing_score,
+        'malware_score': malware_score,
+        'adult_score': adult_score,
+        'has_suspicious_extension': int(has_suspicious_extension),
+        'num_params': num_params,
+        'has_encoded_chars': int(has_encoded_chars),
+        'is_trusted': int(is_trusted_domain(hostname)),
+        'tld': tld
+    }
 # -------------------------------
+# SMART PREDICTION SYSTEM
 # -------------------------------
+def smart_url_analysis(url):
+    """Intelligent URL analysis with multiple validation layers"""
     try:
         parsed = urlparse(url)
         hostname = parsed.hostname or ""
+        # Layer 1: Trusted Domain Check (Highest Priority)
+        if is_trusted_domain(hostname):
+            return {
+                'final_verdict': 'Safe',
+                'confidence': 95,
+                'reason': 'Verified trusted domain',
+                'threat_level': 'None',
+                'details': f'✅ {hostname} is a verified legitimate domain'
+            }
+        # Layer 2: Extract features
+        features = extract_enhanced_features(url)
+        # Layer 3: Rule-based pre-screening
+        immediate_threats = []
+        threat_score = 0
+        # IP address instead of domain
+        if features['has_ip']:
+            threat_score += 25
+            immediate_threats.append('Using IP address instead of domain name')
+        # Suspicious TLD
+        if features['has_suspicious_tld']:
+            threat_score += 15
+            immediate_threats.append(f'Suspicious top-level domain (.{features["tld"]})')
+        # Malicious keywords
+        if features['phishing_score'] > 2:
+            threat_score += 20
+            immediate_threats.append('Multiple phishing-related keywords detected')
+        elif features['phishing_score'] > 0:
+            threat_score += 10
+            immediate_threats.append('Phishing-related keywords found')
+        if features['malware_score'] > 1:
+            threat_score += 25
+            immediate_threats.append('Malware-related keywords detected')
+        if features['adult_score'] > 1:
+            threat_score += 15
+            immediate_threats.append('Adult content indicators')
+        # Suspicious file extensions
+        if features['has_suspicious_extension']:
+            threat_score += 20
+            immediate_threats.append('Suspicious file extension detected')
+        # URL characteristics
+        if features['url_length'] > 200:
+            threat_score += 10
+            immediate_threats.append('Extremely long URL')
+        elif features['url_length'] > 100:
+            threat_score += 5
+            immediate_threats.append('Long URL')
+        # Too many subdomains
+        if features['num_dots'] > 5:
+            threat_score += 15
+            immediate_threats.append('Excessive subdomains')
+        # Layer 4: ML Model predictions (if available)
+        ml_phishing_threat = False
+        ml_malware_threat = False
+        if phishing_model and malware_model:
+            try:
+                # Prepare data for models
+                feature_df = pd.DataFrame([features])
+                # Get model predictions
+                phishing_pred = phishing_model.predict(feature_df)[0]
+                malware_pred = malware_model.predict(feature_df)[0]
+                # Interpret predictions (adjust based on your model's output format)
+                ml_phishing_threat = str(phishing_pred).lower() in ['1', 'phishing', 'malicious']
+                ml_malware_threat = str(malware_pred).lower() in ['1', 'malware', 'malicious']
+                # Only add ML threat score if rule-based score is already high
+                if ml_phishing_threat and threat_score > 10:
+                    threat_score += 15
+                    immediate_threats.append('ML model detected phishing patterns')
+                if ml_malware_threat and threat_score > 10:
+                    threat_score += 15
+                    immediate_threats.append('ML model detected malware patterns')
+            except Exception as e:
+                print(f"Model prediction error: {e}")
+        # Layer 5: Final decision making
+        if threat_score >= 50:
+            verdict = 'Malicious'
+            confidence = min(95, 60 + threat_score)
+            threat_level = 'High'
+            reason = 'Multiple high-risk indicators detected'
+        elif threat_score >= 30:
+            verdict = 'Suspicious'
+            confidence = min(85, 50 + threat_score)
+            threat_level = 'Medium'
+            reason = 'Several concerning patterns identified'
+        elif threat_score >= 15:
+            verdict = 'Potentially Risky'
+            confidence = min(75, 40 + threat_score)
+            threat_level = 'Low-Medium'
+            reason = 'Some suspicious indicators present'
+        elif threat_score >= 5:
+            verdict = 'Caution Advised'
+            confidence = 60
+            threat_level = 'Low'
+            reason = 'Minor risk indicators detected'
         else:
+            verdict = 'Likely Safe'
+            confidence = max(70, 90 - threat_score)
+            threat_level = 'Minimal'
+            reason = 'No significant threats detected'
         return {
+            'final_verdict': verdict,
+            'confidence': confidence,
+            'reason': reason,
+            'threat_level': threat_level,
+            'threat_score': threat_score,
+            'details': immediate_threats[:5],  # Top 5 threats
+            'ml_predictions': {
+                'phishing': ml_phishing_threat,
+                'malware': ml_malware_threat
+            } if phishing_model and malware_model else None
         }
     except Exception as e:
+        return {
+            'final_verdict': 'Analysis Error',
+            'confidence': 0,
+            'reason': f'Error during analysis: {str(e)}',
+            'threat_level': 'Unknown',
+            'details': []
+        }
 # -------------------------------
+# ENHANCED UI INTERFACE
 # -------------------------------
+def analyze_url_interface(url):
+    """Enhanced interface function with better formatting"""
+    if not url or not url.strip():
+        return """
+🚫 **Error: No URL Provided**
+Please enter a valid URL to analyze.
+**Example formats:**
+• https://example.com
+• http://suspicious-site.com
+• just-domain.com (we'll add https://)
+"""
+    # Clean and prepare URL
+    url = url.strip()
     if not url.startswith(('http://', 'https://')):
         url = 'https://' + url
+    # Perform analysis
+    result = smart_url_analysis(url)
+    # Create emoji indicators
+    verdict_emoji = {
+        'Safe': '✅',
+        'Likely Safe': '✅',
+        'Caution Advised': '⚠️',
+        'Potentially Risky': '⚠️',
+        'Suspicious': '🔴',
+        'Malicious': '🚨',
+        'Analysis Error': '❌'
+    }
+    confidence_bar = "█" * (result['confidence'] // 10) + "░" * (10 - result['confidence'] // 10)
+    # Format main report
+    report = f"""
+{verdict_emoji.get(result['final_verdict'], '❓')} **SECURITY ANALYSIS REPORT**
+═══════════════════════════════════════════════
+🌐 **URL:** {url}
+🛡️ **Security Verdict:** {result['final_verdict']}
+📊 **Confidence Level:** {result['confidence']}% {confidence_bar}
+⚡ **Threat Level:** {result['threat_level']}
+💡 **Primary Reason:** {result['reason']}
 """
+    # Add threat score if applicable
+    if 'threat_score' in result:
+        threat_bar = "🔥" * min(10, result['threat_score'] // 5) + "░" * max(0, 10 - result['threat_score'] // 5)
+        report += f"🎯 **Threat Score:** {result['threat_score']}/100 {threat_bar}\n"
+    # Add detailed findings
+    if result['details']:
+        report += f"\n🔍 **Detailed Findings:**\n"
+        for i, detail in enumerate(result['details'], 1):
+            report += f"   {i}. {detail}\n"
+    # Add ML predictions if available
+    if result.get('ml_predictions'):
+        report += f"\n🤖 **AI Model Analysis:**\n"
+        report += f"   • Phishing Detection: {'⚠️ Detected' if result['ml_predictions']['phishing'] else '✅ Clear'}\n"
+        report += f"   • Malware Detection: {'⚠️ Detected' if result['ml_predictions']['malware'] else '✅ Clear'}\n"
+    # Add recommendations
+    report += f"\n💡 **Recommendations:**\n"
+    if result['final_verdict'] in ['Safe', 'Likely Safe']:
+        report += "   ✅ This URL appears safe to visit\n   ✅ Standard security practices still recommended\n"
+    elif result['final_verdict'] in ['Caution Advised', 'Potentially Risky']:
+        report += "   ⚠️ Exercise caution when visiting\n   ⚠️ Verify the site's legitimacy before entering personal data\n"
+    elif result['final_verdict'] in ['Suspicious', 'Malicious']:
+        report += "   🚨 **DO NOT VISIT** this URL\n   🚨 Consider it a security threat\n   🚨 Report if received via email/message\n"
+    else:
+        report += "   ❓ Unable to determine safety - proceed with extreme caution\n"
+    report += "\n" + "═" * 50
+    report += f"\n⏰ Analysis completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+    return report
 # -------------------------------
+# GRADIO APPLICATION
 # -------------------------------
+def create_interface():
+    """Create enhanced Gradio interface"""
+    # Custom CSS for better appearance
+    custom_css = """
     .gradio-container {
+        max-width: 900px !important;
         margin: auto;
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
     }
+    .input-container {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        padding: 20px;
+        border-radius: 15px;
+        margin-bottom: 20px;
+    }
+    .output-container {
+        background: #f8f9fa;
+        border: 2px solid #e9ecef;
+        border-radius: 10px;
+        padding: 15px;
+    }
+    .title {
+        text-align: center;
+        color: #2c3e50;
+        font-weight: bold;
+        margin-bottom: 10px;
+    }
+    .description {
+        text-align: center;
+        color: #34495e;
+        font-size: 16px;
+        margin-bottom: 20px;
+    }
+    """
+    # Example URLs for testing
+    example_urls = [
+        ["https://www.google.com"],
+        ["https://github.com"],
+        ["https://www.paypal.com"],
+        ["http://suspicious-site.tk"],
+        ["bit.ly/malicious-link"],
+        ["http://192.168.1.1/download.exe"]
+    ]
+    # Create interface
+    demo = gr.Interface(
+        fn=analyze_url_interface,
+        inputs=gr.Textbox(
+            label="🔍 Enter URL to Analyze",
+            placeholder="Enter URL here (e.g., https://example.com or just example.com)",
+            lines=1,
+            max_lines=1,
+            elem_classes="input-container"
+        ),
+        outputs=gr.Textbox(
+            label="🛡️ Security Analysis Report",
+            lines=20,
+            max_lines=25,
+            elem_classes="output-container",
+            show_copy_button=True
+        ),
+        title="🛡️ Advanced URL Security Analyzer",
+        description="""
+        **Professional-Grade URL Threat Detection System**
+        🎯 **Features:**
+        • Dual AI model analysis for phishing and malware detection
+        • Real-time threat pattern recognition
+        • Trusted domain verification system
+        • Comprehensive risk scoring algorithm
+        🔒 **Protection Against:**
+        • Phishing websites • Malware distribution sites • Suspicious short links • Adult content • Financial scams
+        Simply paste any URL below to get instant security analysis!
+        """,
+        examples=example_urls,
+        theme=gr.themes.Soft(
+            primary_hue="blue",
+            secondary_hue="gray",
+            neutral_hue="slate"
+        ),
+        css=custom_css,
+        analytics_enabled=False,
+        allow_flagging="never"
+    )
+    return demo
+# -------------------------------
+# MAIN APPLICATION
+# -------------------------------
 if __name__ == "__main__":
+    print("🚀 Starting Enhanced URL Security Analyzer...")
+    print("🔧 Loading models and initializing system...")
+    # Verify models are loaded
+    if not phishing_model or not malware_model:
+        print("⚠️ Warning: ML models not found. Running with rule-based analysis only.")
+    else:
+        print("✅ ML models loaded successfully!")
+    # Create and launch interface
+    demo = create_interface()
+    print("🌐 Launching web interface...")
     demo.launch(
         share=True,
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
+        show_api=True,
+        quiet=False
+    )