Spaces:

ayeshaishaq004
/

WebsiteURLClassifier

Sleeping

App Files Files Community

WebsiteURLClassifier / app.py

ayeshaishaq004

Update app.py

ea95415 verified 11 months ago

raw

history blame contribute delete

3.78 kB

	import streamlit as st
	import requests
	import pandas as pd
	import socket
	import whois
	from urllib.parse import urlparse
	from bs4 import BeautifulSoup
	from datetime import datetime
	import pickle


	def extract_features(url):
	try:
	socket.inet_aton(urlparse(url).netloc)
	having_IP_Address = 1
	except:
	having_IP_Address = 0

	URL_Length = 1 if len(url) >= 54 else 0

	try:
	response = requests.get(url, timeout=5)
	soup = BeautifulSoup(response.content, "html.parser")
	anchors = soup.find_all("a", href=True)
	if len(anchors) == 0:
	URL_of_Anchor = 1
	else:
	unsafe = [a for a in anchors if not a['href'].startswith(url)]
	URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
	except:
	URL_of_Anchor = 1

	try:
	domain_info = whois.whois(urlparse(url).netloc)
	if isinstance(domain_info.creation_date, list):
	creation_date = domain_info.creation_date[0]
	else:
	creation_date = domain_info.creation_date
	age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
	except:
	age_of_domain = 0

	SSLfinal_State = 1 if url.startswith("https") else 0

	try:
	request_response = requests.get(url, timeout=5)
	if request_response.url == url:
	Request_URL = 0
	else:
	Request_URL = 1
	except:
	Request_URL = 1

	try:
	forms = soup.find_all("form", action=True)
	if len(forms) == 0:
	SFH = 1
	else:
	for form in forms:
	if form['action'] == "about:blank" or not form['action'].startswith("http"):
	SFH = 1
	break
	else:
	SFH = 0
	except:
	SFH = 1

	try:
	if "window.open" in response.text:
	popUpWidnow = 1
	else:
	popUpWidnow = 0
	except:
	popUpWidnow = 0

	return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]


	def predict_url(url, model):
	features = extract_features(url)
	X_columns = ['SFH', 'popUpWidnow', 'SSLfinal_State', 'Request_URL', 'URL_of_Anchor', 'URL_Length', 'age_of_domain', 'having_IP_Address']
	features_df = pd.DataFrame([features], columns=X_columns)
	prediction = model.predict(features_df)
	if prediction[0] == 1:
	return "Phishing"
	elif prediction[0] == 0:
	return "Legitimate"
	else:
	return "Unknown"


	# Streamlit app configuration
	st.set_page_config(page_title='Phishing URL Detection', layout='centered')

	# App Header
	st.markdown("""
	<style>
	body { background-color: #f0f2f6; }
	.main { background-color: white; padding: 2rem; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
	</style>
	""", unsafe_allow_html=True)

	st.title('🔍 Phishing URL Detection App')
	st.write('Enter a URL to check if it is Phishing or Legitimate.')


	# Load the trained model
	with open('phishing_model.pkl', 'rb') as f:
	model = pickle.load(f)


	# Input URL
	url_input = st.text_input('Enter URL:', '')

	if st.button('Check URL'):
	if url_input:
	try:
	# Make prediction
	result = predict_url(url_input, model)

	if result == 'Phishing':
	st.error('🚨 This URL is likely a Phishing Site. Be careful!')
	elif result == 'Legitimate':
	st.success('✅ This URL is likely Legitimate.')
	else:
	st.warning('⚠️ Unable to determine. Try again later.')

	except Exception as e:
	st.error(f'Error: {e}')
	else:
	st.warning('Please enter a valid URL.')