Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import pandas as pd | |
| import socket | |
| import whois | |
| from urllib.parse import urlparse | |
| from bs4 import BeautifulSoup | |
| from datetime import datetime | |
| import pickle | |
| def extract_features(url): | |
| try: | |
| socket.inet_aton(urlparse(url).netloc) | |
| having_IP_Address = 1 | |
| except: | |
| having_IP_Address = 0 | |
| URL_Length = 1 if len(url) >= 54 else 0 | |
| try: | |
| response = requests.get(url, timeout=5) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| anchors = soup.find_all("a", href=True) | |
| if len(anchors) == 0: | |
| URL_of_Anchor = 1 | |
| else: | |
| unsafe = [a for a in anchors if not a['href'].startswith(url)] | |
| URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0 | |
| except: | |
| URL_of_Anchor = 1 | |
| try: | |
| domain_info = whois.whois(urlparse(url).netloc) | |
| if isinstance(domain_info.creation_date, list): | |
| creation_date = domain_info.creation_date[0] | |
| else: | |
| creation_date = domain_info.creation_date | |
| age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0 | |
| except: | |
| age_of_domain = 0 | |
| SSLfinal_State = 1 if url.startswith("https") else 0 | |
| try: | |
| request_response = requests.get(url, timeout=5) | |
| if request_response.url == url: | |
| Request_URL = 0 | |
| else: | |
| Request_URL = 1 | |
| except: | |
| Request_URL = 1 | |
| try: | |
| forms = soup.find_all("form", action=True) | |
| if len(forms) == 0: | |
| SFH = 1 | |
| else: | |
| for form in forms: | |
| if form['action'] == "about:blank" or not form['action'].startswith("http"): | |
| SFH = 1 | |
| break | |
| else: | |
| SFH = 0 | |
| except: | |
| SFH = 1 | |
| try: | |
| if "window.open" in response.text: | |
| popUpWidnow = 1 | |
| else: | |
| popUpWidnow = 0 | |
| except: | |
| popUpWidnow = 0 | |
| return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address] | |
| def predict_url(url, model): | |
| features = extract_features(url) | |
| X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address'] | |
| features_df = pd.DataFrame([features], columns=X_columns) | |
| prediction = model.predict(features_df) | |
| if prediction[0] == 1: | |
| return "Phishing" | |
| elif prediction[0] == 0: | |
| return "Legitimate" | |
| else: | |
| return "Unknown" | |
| # Streamlit app configuration | |
| st.set_page_config(page_title='Phishing URL Detection', layout='centered') | |
| # App Header | |
| st.markdown(""" | |
| <style> | |
| body { background-color: #f0f2f6; } | |
| .main { background-color: white; padding: 2rem; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.title('π Phishing URL Detection App') | |
| st.write('Enter a URL to check if it is Phishing or Legitimate.') | |
| # Load the trained model | |
| with open('phishing_model.pkl', 'rb') as f: | |
| model = pickle.load(f) | |
| # Input URL | |
| url_input = st.text_input('Enter URL:', '') | |
| if st.button('Check URL'): | |
| if url_input: | |
| try: | |
| # Make prediction | |
| result = predict_url(url_input, model) | |
| if result == 'Phishing': | |
| st.error('π¨ This URL is likely a **Phishing Site**. Be careful!') | |
| elif result == 'Legitimate': | |
| st.success('β This URL is likely **Legitimate**.') | |
| else: | |
| st.warning('β οΈ Unable to determine. Try again later.') | |
| except Exception as e: | |
| st.error(f'Error: {e}') | |
| else: | |
| st.warning('Please enter a valid URL.') |