Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,92 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# Streamlit app configuration
|
| 6 |
st.set_page_config(page_title='Phishing URL Detection', layout='centered')
|
|
@@ -17,22 +103,26 @@ st.title('🔍 Phishing URL Detection App')
|
|
| 17 |
st.write('Enter a URL to check if it is Phishing or Legitimate.')
|
| 18 |
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Input URL
|
| 21 |
url_input = st.text_input('Enter URL:', '')
|
| 22 |
|
| 23 |
-
# Hugging Face model endpoint
|
| 24 |
-
API_URL = 'https://huggingface.co/ayeshaishaq004/website-url-classifier/resolve/main/phishing_model.pkl'
|
| 25 |
-
|
| 26 |
if st.button('Check URL'):
|
| 27 |
if url_input:
|
| 28 |
try:
|
| 29 |
-
#
|
| 30 |
-
|
| 31 |
-
prediction = response.json().get('prediction', 'Error: Could not get prediction')
|
| 32 |
|
| 33 |
-
if
|
| 34 |
st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
|
| 35 |
-
elif
|
| 36 |
st.success('✅ This URL is likely **Legitimate**.')
|
| 37 |
else:
|
| 38 |
st.warning('⚠️ Unable to determine. Try again later.')
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import requests
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import socket
|
| 5 |
+
import whois
|
| 6 |
+
from urllib.parse import urlparse
|
| 7 |
+
from bs4 import BeautifulSoup
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
import pickle
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def extract_features(url):
|
| 13 |
+
try:
|
| 14 |
+
socket.inet_aton(urlparse(url).netloc)
|
| 15 |
+
having_IP_Address = 1
|
| 16 |
+
except:
|
| 17 |
+
having_IP_Address = 0
|
| 18 |
+
|
| 19 |
+
URL_Length = 1 if len(url) >= 54 else 0
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
response = requests.get(url, timeout=5)
|
| 23 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 24 |
+
anchors = soup.find_all("a", href=True)
|
| 25 |
+
if len(anchors) == 0:
|
| 26 |
+
URL_of_Anchor = 1
|
| 27 |
+
else:
|
| 28 |
+
unsafe = [a for a in anchors if not a['href'].startswith(url)]
|
| 29 |
+
URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
|
| 30 |
+
except:
|
| 31 |
+
URL_of_Anchor = 1
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
domain_info = whois.whois(urlparse(url).netloc)
|
| 35 |
+
if isinstance(domain_info.creation_date, list):
|
| 36 |
+
creation_date = domain_info.creation_date[0]
|
| 37 |
+
else:
|
| 38 |
+
creation_date = domain_info.creation_date
|
| 39 |
+
age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
|
| 40 |
+
except:
|
| 41 |
+
age_of_domain = 0
|
| 42 |
+
|
| 43 |
+
SSLfinal_State = 1 if url.startswith("https") else 0
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
request_response = requests.get(url, timeout=5)
|
| 47 |
+
if request_response.url == url:
|
| 48 |
+
Request_URL = 0
|
| 49 |
+
else:
|
| 50 |
+
Request_URL = 1
|
| 51 |
+
except:
|
| 52 |
+
Request_URL = 1
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
forms = soup.find_all("form", action=True)
|
| 56 |
+
if len(forms) == 0:
|
| 57 |
+
SFH = 1
|
| 58 |
+
else:
|
| 59 |
+
for form in forms:
|
| 60 |
+
if form['action'] == "about:blank" or not form['action'].startswith("http"):
|
| 61 |
+
SFH = 1
|
| 62 |
+
break
|
| 63 |
+
else:
|
| 64 |
+
SFH = 0
|
| 65 |
+
except:
|
| 66 |
+
SFH = 1
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
if "window.open" in response.text:
|
| 70 |
+
popUpWidnow = 1
|
| 71 |
+
else:
|
| 72 |
+
popUpWidnow = 0
|
| 73 |
+
except:
|
| 74 |
+
popUpWidnow = 0
|
| 75 |
+
|
| 76 |
+
return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def predict_url(url, model, X):
|
| 80 |
+
features = extract_features(url)
|
| 81 |
+
features_df = pd.DataFrame([features], columns=X.columns)
|
| 82 |
+
prediction = model.predict(features_df)
|
| 83 |
+
if prediction[0] == 1:
|
| 84 |
+
return "Phishing"
|
| 85 |
+
elif prediction[0] == 0:
|
| 86 |
+
return "Legitimate"
|
| 87 |
+
else:
|
| 88 |
+
return "Unknown"
|
| 89 |
+
|
| 90 |
|
| 91 |
# Streamlit app configuration
|
| 92 |
st.set_page_config(page_title='Phishing URL Detection', layout='centered')
|
|
|
|
| 103 |
st.write('Enter a URL to check if it is Phishing or Legitimate.')
|
| 104 |
|
| 105 |
|
| 106 |
+
# Load the trained model
|
| 107 |
+
with open('phishing_model.pkl', 'rb') as f:
|
| 108 |
+
model = pickle.load(f)
|
| 109 |
+
|
| 110 |
+
# Load the feature columns
|
| 111 |
+
with open('X_columns.pkl', 'rb') as f:
|
| 112 |
+
X_columns = pickle.load(f)
|
| 113 |
+
|
| 114 |
# Input URL
|
| 115 |
url_input = st.text_input('Enter URL:', '')
|
| 116 |
|
|
|
|
|
|
|
|
|
|
| 117 |
if st.button('Check URL'):
|
| 118 |
if url_input:
|
| 119 |
try:
|
| 120 |
+
# Make prediction
|
| 121 |
+
result = predict_url(url_input, model, X_columns)
|
|
|
|
| 122 |
|
| 123 |
+
if result == 'Phishing':
|
| 124 |
st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
|
| 125 |
+
elif result == 'Legitimate':
|
| 126 |
st.success('✅ This URL is likely **Legitimate**.')
|
| 127 |
else:
|
| 128 |
st.warning('⚠️ Unable to determine. Try again later.')
|