Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import re
|
| 5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 6 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 7 |
+
from sklearn.naive_bayes import MultinomialNB
|
| 8 |
+
from sklearn.pipeline import make_pipeline
|
| 9 |
+
|
| 10 |
+
# --- KONFIGURASI HALAMAN ---
|
| 11 |
+
st.set_page_config(
|
| 12 |
+
page_title="Human Firewall AI",
|
| 13 |
+
page_icon="🛡️",
|
| 14 |
+
layout="centered"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
st.title("🛡️ AI Phishing & Scam Detector")
|
| 18 |
+
st.markdown("### The Human Firewall Project | Edy Qineos Academy")
|
| 19 |
+
st.info("Aplikasi ini menggunakan AI untuk mendeteksi anomali pada URL (Link) dan Pola Bahasa Penipuan (NLP) pada pesan teks.")
|
| 20 |
+
|
| 21 |
+
# --- BAGIAN 1: OTAK AI (TRAINING OTOMATIS SAAT STARTUP) ---
|
| 22 |
+
# Kita gunakan @st.cache_resource agar training hanya terjadi 1x saat aplikasi nyala
|
| 23 |
+
# Jadi tidak berat loadingnya.
|
| 24 |
+
|
| 25 |
+
@st.cache_resource
|
| 26 |
+
def train_models():
|
| 27 |
+
# --- A. DATASET URL (SIMULASI) ---
|
| 28 |
+
data_url = {
|
| 29 |
+
'url': [
|
| 30 |
+
'google.com', 'facebook.com', 'klikbca.com', 'ugm.ac.id', 'microsoft.com', # Legit
|
| 31 |
+
'secure-login-bca.com', 'g00gle-security.xyz', 'free-iphone.net', 'klikbca-verify.info', # Phishing
|
| 32 |
+
'paypal-limited.com', 'dana-kaget.biz', 'brimo-undian.apk', 'netflix-payment.com'
|
| 33 |
+
],
|
| 34 |
+
'label': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # 0=Aman, 1=Phishing
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
# Ekstraksi Fitur URL (Lexical)
|
| 38 |
+
def extract_features(url):
|
| 39 |
+
return [
|
| 40 |
+
len(url), # Panjang URL
|
| 41 |
+
url.count('.'), # Jumlah titik
|
| 42 |
+
url.count('-'), # Jumlah strip
|
| 43 |
+
1 if any(c.isdigit() for c in url) else 0, # Ada angka?
|
| 44 |
+
1 if "http" in url and "https" not in url else 0 # HTTP tidak aman
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
X_url = [extract_features(u) for u in data_url['url']]
|
| 48 |
+
y_url = data_url['label']
|
| 49 |
+
|
| 50 |
+
# Train Model URL (Random Forest)
|
| 51 |
+
model_url = RandomForestClassifier(n_estimators=50, random_state=42)
|
| 52 |
+
model_url.fit(X_url, y_url)
|
| 53 |
+
|
| 54 |
+
# --- B. DATASET SMS (SIMULASI) ---
|
| 55 |
+
data_sms = [
|
| 56 |
+
("Bro, nanti futsal jam berapa?", 0),
|
| 57 |
+
("Selamat! Anda menang undian Rp 100jt. Klik bit.ly/klaim", 1),
|
| 58 |
+
("Paket Anda tertahan. Bayar ongkir segera.", 1),
|
| 59 |
+
("Rapat besok diundur ke jam 10 pagi ya.", 0),
|
| 60 |
+
("Mama minta pulsa ke nomor ini dulu.", 1),
|
| 61 |
+
("Kode OTP Anda 5820. JANGAN BERIKAN KE SIAPAPUN.", 0),
|
| 62 |
+
("Butuh pinjaman cepat cair bunga rendah? Hubungi kami.", 1),
|
| 63 |
+
("Terima kasih sudah belanja di Tokopedia.", 0)
|
| 64 |
+
]
|
| 65 |
+
df_sms = pd.DataFrame(data_sms, columns=['text', 'label'])
|
| 66 |
+
|
| 67 |
+
# Train Model SMS (Naive Bayes)
|
| 68 |
+
model_sms = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
| 69 |
+
model_sms.fit(df_sms['text'], df_sms['label'])
|
| 70 |
+
|
| 71 |
+
return model_url, model_sms
|
| 72 |
+
|
| 73 |
+
# Load Models (Langsung dipanggil)
|
| 74 |
+
model_url, model_sms = train_models()
|
| 75 |
+
|
| 76 |
+
# --- FUNGSI EKSTRAKSI (Harus sama dengan saat training) ---
|
| 77 |
+
def extract_features_realtime(url):
|
| 78 |
+
return [
|
| 79 |
+
len(url),
|
| 80 |
+
url.count('.'),
|
| 81 |
+
url.count('-'),
|
| 82 |
+
1 if any(c.isdigit() for c in url) else 0,
|
| 83 |
+
1 if "http" in url and "https" not in url else 0
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
# --- UI DISPLAY (TABS) ---
|
| 87 |
+
tab1, tab2 = st.tabs(["🔗 Cek Link (URL)", "📩 Cek Pesan (SMS/WA)"])
|
| 88 |
+
|
| 89 |
+
with tab1:
|
| 90 |
+
st.header("URL Phishing Hunter")
|
| 91 |
+
st.write("Menganalisa struktur tulisan link untuk mendeteksi penipuan.")
|
| 92 |
+
|
| 93 |
+
url_input = st.text_input("Masukkan Link / URL mencurigakan:", placeholder="Contoh: www.klikbca-promo-undian.com")
|
| 94 |
+
|
| 95 |
+
if st.button("🔍 Scan URL"):
|
| 96 |
+
if url_input:
|
| 97 |
+
# Prediksi
|
| 98 |
+
features = [extract_features_realtime(url_input)]
|
| 99 |
+
pred = model_url.predict(features)[0]
|
| 100 |
+
prob = model_url.predict_proba(features)[0]
|
| 101 |
+
confidence = max(prob) * 100
|
| 102 |
+
|
| 103 |
+
st.divider()
|
| 104 |
+
if pred == 1: # PHISHING
|
| 105 |
+
st.error(f"🚨 HASIL: BERBAHAYA (PHISHING)")
|
| 106 |
+
st.write(f"AI yakin **{confidence:.1f}%** link ini jahat.")
|
| 107 |
+
st.markdown("**Alasan Deteksi:**")
|
| 108 |
+
st.markdown(f"- Panjang Karakter: `{len(url_input)}` (Terlalu panjang/pendek)")
|
| 109 |
+
st.markdown(f"- Jumlah Simbol Titik/Strip: `{url_input.count('.') + url_input.count('-')}` (Indikasi obfuscation)")
|
| 110 |
+
if "http" in url_input and "https" not in url_input:
|
| 111 |
+
st.markdown("- Protokol: `HTTP` (Tidak Aman)")
|
| 112 |
+
else: # AMAN
|
| 113 |
+
st.success(f"✅ HASIL: AMAN (LEGITIMATE)")
|
| 114 |
+
st.write(f"AI yakin **{confidence:.1f}%** link ini aman.")
|
| 115 |
+
|
| 116 |
+
with tab2:
|
| 117 |
+
st.header("Scam Message Detector")
|
| 118 |
+
st.write("Menggunakan NLP untuk membaca niat jahat dalam teks pesan.")
|
| 119 |
+
|
| 120 |
+
sms_input = st.text_area("Copy-Paste isi pesan SMS/WhatsApp di sini:", placeholder="Contoh: Selamat Anda menang undian...")
|
| 121 |
+
|
| 122 |
+
if st.button("🧠 Analisa Bahasa"):
|
| 123 |
+
if sms_input:
|
| 124 |
+
# Prediksi
|
| 125 |
+
pred = model_sms.predict([sms_input])[0]
|
| 126 |
+
prob = model_sms.predict_proba([sms_input])[0]
|
| 127 |
+
confidence = max(prob) * 100
|
| 128 |
+
|
| 129 |
+
st.divider()
|
| 130 |
+
if pred == 1: # SCAM
|
| 131 |
+
st.error(f"🚨 HASIL: PENIPUAN (SCAM)")
|
| 132 |
+
st.write(f"AI yakin **{confidence:.1f}%** ini pesan penipuan.")
|
| 133 |
+
st.warning("⚠️ **Peringatan:** Pesan ini mengandung pola 'Urgensi' atau 'Iming-iming Hadiah'. Jangan klik link apapun!")
|
| 134 |
+
else: # NORMAL
|
| 135 |
+
st.success(f"✅ HASIL: PESAN NORMAL")
|
| 136 |
+
st.write(f"AI yakin **{confidence:.1f}%** ini percakapan biasa.")
|
| 137 |
+
|
| 138 |
+
# --- FOOTER ---
|
| 139 |
+
st.divider()
|
| 140 |
+
st.caption("Disclaimer: Model ini dilatih dengan data simulasi untuk tujuan edukasi demonstrasi AI Security.")
|