Spaces:

ayeshaishaq004
/

WebsiteURLClassifier

Sleeping

App Files Files Community

ayeshaishaq004 commited on Mar 23, 2025

Commit

526e0de

verified ·

1 Parent(s): ad78801

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -9

app.py CHANGED Viewed

@@ -1,6 +1,92 @@
 import streamlit as st
 import requests
-from PIL import Image
 # Streamlit app configuration
 st.set_page_config(page_title='Phishing URL Detection', layout='centered')
@@ -17,22 +103,26 @@ st.title('🔍 Phishing URL Detection App')
 st.write('Enter a URL to check if it is Phishing or Legitimate.')
 # Input URL
 url_input = st.text_input('Enter URL:', '')
-# Hugging Face model endpoint
-API_URL = 'https://huggingface.co/ayeshaishaq004/website-url-classifier/resolve/main/phishing_model.pkl'
 if st.button('Check URL'):
     if url_input:
         try:
-            # Sending URL to model for prediction
-            response = requests.post(API_URL, json={'url': url_input})
-            prediction = response.json().get('prediction', 'Error: Could not get prediction')
-            if prediction == 'Phishing':
                 st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
-            elif prediction == 'Legitimate':
                 st.success('✅ This URL is likely **Legitimate**.')
             else:
                 st.warning('⚠️ Unable to determine. Try again later.')

 import streamlit as st
 import requests
+import pandas as pd
+import socket
+import whois
+from urllib.parse import urlparse
+from bs4 import BeautifulSoup
+from datetime import datetime
+import pickle
+def extract_features(url):
+    try:
+        socket.inet_aton(urlparse(url).netloc)
+        having_IP_Address = 1
+    except:
+        having_IP_Address = 0
+    URL_Length = 1 if len(url) >= 54 else 0
+    try:
+        response = requests.get(url, timeout=5)
+        soup = BeautifulSoup(response.content, "html.parser")
+        anchors = soup.find_all("a", href=True)
+        if len(anchors) == 0:
+            URL_of_Anchor = 1
+        else:
+            unsafe = [a for a in anchors if not a['href'].startswith(url)]
+            URL_of_Anchor = 1 if len(unsafe) / len(anchors) > 0.5 else 0
+    except:
+        URL_of_Anchor = 1
+    try:
+        domain_info = whois.whois(urlparse(url).netloc)
+        if isinstance(domain_info.creation_date, list):
+            creation_date = domain_info.creation_date[0]
+        else:
+            creation_date = domain_info.creation_date
+        age_of_domain = 1 if (datetime.now() - creation_date).days > 180 else 0
+    except:
+        age_of_domain = 0
+    SSLfinal_State = 1 if url.startswith("https") else 0
+    try:
+        request_response = requests.get(url, timeout=5)
+        if request_response.url == url:
+            Request_URL = 0
+        else:
+            Request_URL = 1
+    except:
+        Request_URL = 1
+    try:
+        forms = soup.find_all("form", action=True)
+        if len(forms) == 0:
+            SFH = 1
+        else:
+            for form in forms:
+                if form['action'] == "about:blank" or not form['action'].startswith("http"):
+                    SFH = 1
+                    break
+            else:
+                SFH = 0
+    except:
+        SFH = 1
+    try:
+        if "window.open" in response.text:
+            popUpWidnow = 1
+        else:
+            popUpWidnow = 0
+    except:
+        popUpWidnow = 0
+    return [SFH, popUpWidnow, SSLfinal_State, Request_URL, URL_of_Anchor, URL_Length, age_of_domain, having_IP_Address]
+def predict_url(url, model, X):
+    features = extract_features(url)
+    features_df = pd.DataFrame([features], columns=X.columns)
+    prediction = model.predict(features_df)
+    if prediction[0] == 1:
+        return "Phishing"
+    elif prediction[0] == 0:
+        return "Legitimate"
+    else:
+        return "Unknown"
 # Streamlit app configuration
 st.set_page_config(page_title='Phishing URL Detection', layout='centered')
 st.write('Enter a URL to check if it is Phishing or Legitimate.')
+# Load the trained model
+with open('phishing_model.pkl', 'rb') as f:
+    model = pickle.load(f)
+# Load the feature columns
+with open('X_columns.pkl', 'rb') as f:
+    X_columns = pickle.load(f)
 # Input URL
 url_input = st.text_input('Enter URL:', '')
 if st.button('Check URL'):
     if url_input:
         try:
+            # Make prediction
+            result = predict_url(url_input, model, X_columns)
+            if result == 'Phishing':
                 st.error('🚨 This URL is likely a **Phishing Site**. Be careful!')
+            elif result == 'Legitimate':
                 st.success('✅ This URL is likely **Legitimate**.')
             else:
                 st.warning('⚠️ Unable to determine. Try again later.')