siedysioes commited on
Commit
a571e45
·
verified ·
1 Parent(s): ef3cc17

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.ensemble import RandomForestClassifier
7
+ from sklearn.naive_bayes import MultinomialNB
8
+ from sklearn.pipeline import make_pipeline
9
+
10
+ # --- KONFIGURASI HALAMAN ---
11
+ st.set_page_config(
12
+ page_title="Human Firewall AI",
13
+ page_icon="🛡️",
14
+ layout="centered"
15
+ )
16
+
17
+ st.title("🛡️ AI Phishing & Scam Detector")
18
+ st.markdown("### The Human Firewall Project | Edy Qineos Academy")
19
+ st.info("Aplikasi ini menggunakan AI untuk mendeteksi anomali pada URL (Link) dan Pola Bahasa Penipuan (NLP) pada pesan teks.")
20
+
21
+ # --- BAGIAN 1: OTAK AI (TRAINING OTOMATIS SAAT STARTUP) ---
22
+ # Kita gunakan @st.cache_resource agar training hanya terjadi 1x saat aplikasi nyala
23
+ # Jadi tidak berat loadingnya.
24
+
25
+ @st.cache_resource
26
+ def train_models():
27
+ # --- A. DATASET URL (SIMULASI) ---
28
+ data_url = {
29
+ 'url': [
30
+ 'google.com', 'facebook.com', 'klikbca.com', 'ugm.ac.id', 'microsoft.com', # Legit
31
+ 'secure-login-bca.com', 'g00gle-security.xyz', 'free-iphone.net', 'klikbca-verify.info', # Phishing
32
+ 'paypal-limited.com', 'dana-kaget.biz', 'brimo-undian.apk', 'netflix-payment.com'
33
+ ],
34
+ 'label': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # 0=Aman, 1=Phishing
35
+ }
36
+
37
+ # Ekstraksi Fitur URL (Lexical)
38
+ def extract_features(url):
39
+ return [
40
+ len(url), # Panjang URL
41
+ url.count('.'), # Jumlah titik
42
+ url.count('-'), # Jumlah strip
43
+ 1 if any(c.isdigit() for c in url) else 0, # Ada angka?
44
+ 1 if "http" in url and "https" not in url else 0 # HTTP tidak aman
45
+ ]
46
+
47
+ X_url = [extract_features(u) for u in data_url['url']]
48
+ y_url = data_url['label']
49
+
50
+ # Train Model URL (Random Forest)
51
+ model_url = RandomForestClassifier(n_estimators=50, random_state=42)
52
+ model_url.fit(X_url, y_url)
53
+
54
+ # --- B. DATASET SMS (SIMULASI) ---
55
+ data_sms = [
56
+ ("Bro, nanti futsal jam berapa?", 0),
57
+ ("Selamat! Anda menang undian Rp 100jt. Klik bit.ly/klaim", 1),
58
+ ("Paket Anda tertahan. Bayar ongkir segera.", 1),
59
+ ("Rapat besok diundur ke jam 10 pagi ya.", 0),
60
+ ("Mama minta pulsa ke nomor ini dulu.", 1),
61
+ ("Kode OTP Anda 5820. JANGAN BERIKAN KE SIAPAPUN.", 0),
62
+ ("Butuh pinjaman cepat cair bunga rendah? Hubungi kami.", 1),
63
+ ("Terima kasih sudah belanja di Tokopedia.", 0)
64
+ ]
65
+ df_sms = pd.DataFrame(data_sms, columns=['text', 'label'])
66
+
67
+ # Train Model SMS (Naive Bayes)
68
+ model_sms = make_pipeline(TfidfVectorizer(), MultinomialNB())
69
+ model_sms.fit(df_sms['text'], df_sms['label'])
70
+
71
+ return model_url, model_sms
72
+
73
+ # Load Models (Langsung dipanggil)
74
+ model_url, model_sms = train_models()
75
+
76
+ # --- FUNGSI EKSTRAKSI (Harus sama dengan saat training) ---
77
+ def extract_features_realtime(url):
78
+ return [
79
+ len(url),
80
+ url.count('.'),
81
+ url.count('-'),
82
+ 1 if any(c.isdigit() for c in url) else 0,
83
+ 1 if "http" in url and "https" not in url else 0
84
+ ]
85
+
86
+ # --- UI DISPLAY (TABS) ---
87
+ tab1, tab2 = st.tabs(["🔗 Cek Link (URL)", "📩 Cek Pesan (SMS/WA)"])
88
+
89
+ with tab1:
90
+ st.header("URL Phishing Hunter")
91
+ st.write("Menganalisa struktur tulisan link untuk mendeteksi penipuan.")
92
+
93
+ url_input = st.text_input("Masukkan Link / URL mencurigakan:", placeholder="Contoh: www.klikbca-promo-undian.com")
94
+
95
+ if st.button("🔍 Scan URL"):
96
+ if url_input:
97
+ # Prediksi
98
+ features = [extract_features_realtime(url_input)]
99
+ pred = model_url.predict(features)[0]
100
+ prob = model_url.predict_proba(features)[0]
101
+ confidence = max(prob) * 100
102
+
103
+ st.divider()
104
+ if pred == 1: # PHISHING
105
+ st.error(f"🚨 HASIL: BERBAHAYA (PHISHING)")
106
+ st.write(f"AI yakin **{confidence:.1f}%** link ini jahat.")
107
+ st.markdown("**Alasan Deteksi:**")
108
+ st.markdown(f"- Panjang Karakter: `{len(url_input)}` (Terlalu panjang/pendek)")
109
+ st.markdown(f"- Jumlah Simbol Titik/Strip: `{url_input.count('.') + url_input.count('-')}` (Indikasi obfuscation)")
110
+ if "http" in url_input and "https" not in url_input:
111
+ st.markdown("- Protokol: `HTTP` (Tidak Aman)")
112
+ else: # AMAN
113
+ st.success(f"✅ HASIL: AMAN (LEGITIMATE)")
114
+ st.write(f"AI yakin **{confidence:.1f}%** link ini aman.")
115
+
116
+ with tab2:
117
+ st.header("Scam Message Detector")
118
+ st.write("Menggunakan NLP untuk membaca niat jahat dalam teks pesan.")
119
+
120
+ sms_input = st.text_area("Copy-Paste isi pesan SMS/WhatsApp di sini:", placeholder="Contoh: Selamat Anda menang undian...")
121
+
122
+ if st.button("🧠 Analisa Bahasa"):
123
+ if sms_input:
124
+ # Prediksi
125
+ pred = model_sms.predict([sms_input])[0]
126
+ prob = model_sms.predict_proba([sms_input])[0]
127
+ confidence = max(prob) * 100
128
+
129
+ st.divider()
130
+ if pred == 1: # SCAM
131
+ st.error(f"🚨 HASIL: PENIPUAN (SCAM)")
132
+ st.write(f"AI yakin **{confidence:.1f}%** ini pesan penipuan.")
133
+ st.warning("⚠️ **Peringatan:** Pesan ini mengandung pola 'Urgensi' atau 'Iming-iming Hadiah'. Jangan klik link apapun!")
134
+ else: # NORMAL
135
+ st.success(f"✅ HASIL: PESAN NORMAL")
136
+ st.write(f"AI yakin **{confidence:.1f}%** ini percakapan biasa.")
137
+
138
+ # --- FOOTER ---
139
+ st.divider()
140
+ st.caption("Disclaimer: Model ini dilatih dengan data simulasi untuk tujuan edukasi demonstrasi AI Security.")