alzami's picture
Update src/streamlit_app.py
ebfa9cb verified
Raw
History Blame Contribute Delete
26.9 kB
"""
Backdoor Android Malware Detection
Streamlit deployment application.
Peneliti Utama : RAMA ARIA MEGANTARA
Co-Peneliti : DEWI PERGIWATI
Institusi : UNIVERSITAS DIAN NUSWANTORO
Hibah : PENELITIAN DASAR PERGURUAN TINGGI 2025/2026 Semester Gasal
Dataset : CCCS-CIC-AndMal-2020 (University of New Brunswick)
"""
import os
import json
import joblib
import pathlib
import numpy as np
import pandas as pd
import streamlit as st
# =============================================================================
# PAGE CONFIG -- must be first Streamlit call
# =============================================================================
st.set_page_config(
page_title="Backdoor Malware Detection",
layout="wide",
initial_sidebar_state="expanded",
)
# =============================================================================
# CONSTANTS
# Paths are anchored to this script's directory so they resolve correctly
# regardless of the working directory set by the deployment environment.
# =============================================================================
BASE_DIR = pathlib.Path(__file__).parent
MODEL_DIR = BASE_DIR / "model"
FIGURES_DIR = BASE_DIR / "figures"
MODEL_PATH = MODEL_DIR / "best_model.joblib"
CONFIG_PATH = MODEL_DIR / "model_config.json"
PERM_PATH = MODEL_DIR / "top500_permissions.json"
SIG_PATH = MODEL_DIR / "table_backdoor_signature.csv"
NAMED_SIG_PATH = MODEL_DIR / "table_permission_signature_named.csv"
FIG_BAR_PATH = FIGURES_DIR / "fig_shap_bar_top50.png"
FIG_BEE_PATH = FIGURES_DIR / "fig_shap_beeswarm_top20.png"
FIG_GROUP_PATH = FIGURES_DIR / "fig_shap_group_bar.png"
UDINUS_LOGO_URL = "https://dinus.ac.id/wp-content/uploads/2022/12/Logo-Udinus-Official-02-300x300-1.png"
JOURNAL_URL = "https://journal.universitasbumigora.ac.id/matrik/article/view/6198"
DATASET_URL = "https://www.unb.ca/cic/datasets/andmal2020.html"
MAX_ROWS = 100
# =============================================================================
# GLOBAL CSS
# =============================================================================
st.markdown("""
<style>
/* Typography: IBM Plex Sans for body, IBM Plex Mono for code/data */
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@400;500;600;700&display=swap');
html, body, [class*="css"] {
font-family: 'IBM Plex Sans', sans-serif;
}
/* Sidebar */
section[data-testid="stSidebar"] {
background-color: #0f1923;
}
section[data-testid="stSidebar"] * {
color: #c8d6e5 !important;
}
section[data-testid="stSidebar"] .stRadio label {
font-size: 0.95rem;
padding: 4px 0;
}
/* Main background */
.main .block-container {
padding-top: 2rem;
max-width: 1100px;
}
/* Page title */
h1 {
font-family: 'IBM Plex Sans', sans-serif;
font-weight: 700;
font-size: 1.75rem;
color: #0f1923;
border-bottom: 3px solid #1a6fa8;
padding-bottom: 0.5rem;
margin-bottom: 1.25rem;
}
h2 {
font-family: 'IBM Plex Sans', sans-serif;
font-weight: 600;
font-size: 1.2rem;
color: #1a3a5c;
margin-top: 1.5rem;
margin-bottom: 0.5rem;
}
h3 {
font-family: 'IBM Plex Sans', sans-serif;
font-weight: 600;
font-size: 1.05rem;
color: #234d72;
margin-top: 1rem;
}
/* Metric box: remove default card style, use flat box */
[data-testid="stMetric"] {
background-color: #f0f4f8;
border: 1px solid #c8d6e5;
border-radius: 4px;
padding: 0.75rem 1rem;
}
[data-testid="stMetricLabel"] {
font-size: 0.8rem;
color: #4a6278;
text-transform: uppercase;
letter-spacing: 0.05em;
}
[data-testid="stMetricValue"] {
font-family: 'IBM Plex Mono', monospace;
font-size: 1.5rem;
color: #0f1923;
}
/* Info box */
.info-box {
background-color: #eef4fb;
border-left: 4px solid #1a6fa8;
padding: 0.75rem 1rem;
margin-bottom: 1rem;
font-size: 0.9rem;
color: #1a3a5c;
}
/* Result badge */
.badge-backdoor {
display: inline-block;
background-color: #c0392b;
color: #ffffff;
font-family: 'IBM Plex Mono', monospace;
font-size: 0.8rem;
font-weight: 600;
padding: 2px 8px;
border-radius: 2px;
letter-spacing: 0.05em;
}
.badge-benign {
display: inline-block;
background-color: #1e7e34;
color: #ffffff;
font-family: 'IBM Plex Mono', monospace;
font-size: 0.8rem;
font-weight: 600;
padding: 2px 8px;
border-radius: 2px;
letter-spacing: 0.05em;
}
/* Summary stat card */
.stat-card {
background-color: #f8fafc;
border: 1px solid #d0dde8;
border-top: 3px solid #1a6fa8;
border-radius: 3px;
padding: 1rem;
text-align: center;
}
.stat-card .stat-label {
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.07em;
color: #4a6278;
}
.stat-card .stat-value {
font-family: 'IBM Plex Mono', monospace;
font-size: 2rem;
font-weight: 600;
color: #0f1923;
line-height: 1.2;
}
.stat-card-backdoor .stat-value {
color: #c0392b;
}
.stat-card-benign .stat-value {
color: #1e7e34;
}
/* Divider */
hr {
border: none;
border-top: 1px solid #d0dde8;
margin: 1.5rem 0;
}
/* Download button */
.stDownloadButton button {
background-color: #1a6fa8;
color: #ffffff;
border: none;
border-radius: 3px;
font-family: 'IBM Plex Sans', sans-serif;
font-weight: 500;
font-size: 0.9rem;
padding: 0.5rem 1.25rem;
}
.stDownloadButton button:hover {
background-color: #155d8e;
}
/* Monospace for feature names */
.feat-mono {
font-family: 'IBM Plex Mono', monospace;
font-size: 0.85rem;
}
/* About page info table */
.about-table {
width: 100%;
border-collapse: collapse;
font-size: 0.92rem;
}
.about-table td {
padding: 0.5rem 0.75rem;
border-bottom: 1px solid #e5ecf3;
vertical-align: top;
}
.about-table td:first-child {
font-weight: 600;
color: #1a3a5c;
width: 220px;
white-space: nowrap;
}
.about-table .perf-metric {
font-family: 'IBM Plex Mono', monospace;
font-size: 0.88rem;
}
</style>
""", unsafe_allow_html=True)
# =============================================================================
# CACHED RESOURCES
# =============================================================================
@st.cache_resource
def load_model():
return joblib.load(MODEL_PATH)
@st.cache_data
def load_config():
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
return json.load(f)
@st.cache_data
def load_sample_bytes():
"""Generate sample input CSV in memory. 9503 columns named 0..9502, one row of zeros."""
col_names = [str(i) for i in range(9503)]
df_sample = pd.DataFrame([[0] * 9503], columns=col_names)
return df_sample.to_csv(index=False).encode("utf-8")
@st.cache_data
def load_signature_table():
return pd.read_csv(SIG_PATH, index_col=0)
@st.cache_data
def load_named_signature_table():
return pd.read_csv(NAMED_SIG_PATH, index_col=0)
# =============================================================================
# SIDEBAR NAVIGATION
# =============================================================================
st.sidebar.markdown("""
<div style="margin-bottom: 1.5rem;">
<div style="font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.1em;
color: #7fa3c0; margin-bottom: 0.25rem;">Penelitian Hibah DIKTI</div>
<div style="font-size: 0.95rem; font-weight: 600; line-height: 1.4;
color: #e8f0f8;">Backdoor Android<br>Malware Detection</div>
<div style="font-size: 0.75rem; color: #5a8aab; margin-top: 0.4rem;">
Universitas Dian Nuswantoro
</div>
</div>
""", unsafe_allow_html=True)
st.sidebar.divider()
st.sidebar.markdown("<div style='font-size:0.78rem; color:#7fa3c0; text-transform:uppercase; letter-spacing:0.08em; margin-bottom:0.4rem;'>Navigasi</div>", unsafe_allow_html=True)
page = st.sidebar.radio(
label="Pilih halaman",
options=["Prediksi", "SHAP Signature", "Tentang Penelitian"],
label_visibility="collapsed",
)
st.sidebar.divider()
st.sidebar.markdown("""
<div style="font-size: 0.72rem; color: #4a6a82; line-height: 1.6;">
<b style="color:#7fa3c0;">Model</b><br>
Random Forest + SMOTE<br><br>
<b style="color:#7fa3c0;">Dataset</b><br>
CCCS-CIC-AndMal-2020<br><br>
<b style="color:#7fa3c0;">Fitur</b><br>
9503 static features<br>
Top-500 selected<br><br>
<b style="color:#7fa3c0;">Max input</b><br>
100 baris per upload
</div>
""", unsafe_allow_html=True)
# =============================================================================
# PAGE 1: PREDIKSI
# =============================================================================
if page == "Prediksi":
st.title("Prediksi Backdoor Android Malware")
st.markdown("""
<div class="info-box">
Upload file CSV yang berisi static features dari satu atau lebih Android APK.
Model akan memprediksi apakah setiap APK termasuk kategori
<strong>Backdoor</strong> atau <strong>Non-Backdoor</strong>,
beserta nilai probabilitas dari Random Forest classifier.
</div>
""", unsafe_allow_html=True)
# -- Template download
st.subheader("Template Input CSV")
col_dl, col_desc = st.columns([2, 3])
with col_dl:
st.download_button(
label="Download sample_input.csv",
data=load_sample_bytes(),
file_name="sample_input.csv",
mime="text/csv",
)
with col_desc:
st.markdown("""
<div style="font-size:0.87rem; color:#2c4a63; padding-top:0.5rem;">
Template berisi <span class="feat-mono">9503 kolom</span>
(kolom <span class="feat-mono">0</span> hingga
<span class="feat-mono">9502</span>), satu baris bernilai nol.
Isi nilai kolom sesuai fitur APK Anda (0 atau 1),
tambahkan baris baru untuk setiap APK tambahan, lalu upload di bawah.
</div>
""", unsafe_allow_html=True)
st.divider()
# -- Upload
st.subheader("Upload File CSV")
uploaded_file = st.file_uploader(
label="Pilih file CSV (maks. {} baris)".format(MAX_ROWS),
type=["csv"],
help="File harus memiliki 9503 kolom bernama 0 sampai 9502.",
)
if uploaded_file is not None:
# -- Read
try:
df_input = pd.read_csv(uploaded_file, dtype=np.float32)
except Exception as e:
st.error("Gagal membaca file CSV: {}".format(e))
st.stop()
# -- Validate columns
expected_cols = [str(i) for i in range(9503)]
df_input.columns = [str(c) for c in df_input.columns]
missing_cols = [c for c in expected_cols if c not in df_input.columns]
if missing_cols:
st.error(
"File CSV tidak valid. Dibutuhkan 9503 kolom bernama 0 sampai 9502. "
"Jumlah kolom yang hilang: {}. "
"Gunakan template sample_input.csv di atas sebagai panduan.".format(len(missing_cols))
)
st.stop()
# -- Row limit
total_rows = len(df_input)
if total_rows > MAX_ROWS:
st.warning(
"File memiliki {} baris. Hanya {} baris pertama yang diproses "
"(batas maksimal per upload).".format(total_rows, MAX_ROWS)
)
df_input = df_input.head(MAX_ROWS)
n_samples = len(df_input)
# -- Extract top-500 features
config = load_config()
top500_idx = config["top500_original_indices"]
col_names = [str(i) for i in top500_idx]
X = df_input[col_names].values.astype(np.float32)
# -- Predict
with st.spinner("Memproses {} sampel...".format(n_samples)):
model = load_model()
y_pred = model.predict(X)
y_proba = model.predict_proba(X)[:, 1]
# -- Aggregate summary
n_backdoor = int((y_pred == 1).sum())
n_non_backdoor = int((y_pred == 0).sum())
st.divider()
st.subheader("Ringkasan Hasil")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
<div class="stat-card">
<div class="stat-label">Total Sampel</div>
<div class="stat-value">{}</div>
</div>
""".format(n_samples), unsafe_allow_html=True)
with col2:
st.markdown("""
<div class="stat-card stat-card-backdoor">
<div class="stat-label">Backdoor</div>
<div class="stat-value">{}</div>
</div>
""".format(n_backdoor), unsafe_allow_html=True)
with col3:
st.markdown("""
<div class="stat-card stat-card-benign">
<div class="stat-label">Non-Backdoor</div>
<div class="stat-value">{}</div>
</div>
""".format(n_non_backdoor), unsafe_allow_html=True)
# -- Per-row results
st.divider()
st.subheader("Hasil per Sampel")
results_df = pd.DataFrame({
"Sample" : range(1, n_samples + 1),
"Prediction": ["Backdoor" if p == 1 else "Non-Backdoor" for p in y_pred],
"Probability (Backdoor)": [round(float(p), 4) for p in y_proba],
})
def _style_row(row):
if row["Prediction"] == "Backdoor":
return ["background-color: #fce8e6; color: #7b1e1e"] * len(row)
return ["background-color: #e8f5e9; color: #1b4d2a"] * len(row)
styled = results_df.style.apply(_style_row, axis=1).format(
{"Probability (Backdoor)": "{:.4f}"}
)
st.dataframe(styled, use_container_width=True, height=min(400, 40 + n_samples * 38))
# -- Download per-row
csv_out = results_df.to_csv(index=False).encode("utf-8")
st.download_button(
label="Download hasil prediksi (.csv)",
data=csv_out,
file_name="hasil_prediksi.csv",
mime="text/csv",
)
st.markdown("""
<div style="font-size:0.8rem; color:#4a6278; margin-top:0.5rem;">
Kolom <span class="feat-mono">Probability (Backdoor)</span> adalah
probabilitas kelas positif (Backdoor) dari Random Forest classifier.
Nilai mendekati 1.0 menunjukkan keyakinan tinggi bahwa APK adalah Backdoor.
</div>
""", unsafe_allow_html=True)
# =============================================================================
# PAGE 2: SHAP SIGNATURE
# =============================================================================
elif page == "SHAP Signature":
st.title("SHAP-Based Backdoor Permission Signature")
st.markdown("""
<div class="info-box">
Halaman ini menampilkan hasil analisis SHAP (<i>SHapley Additive exPlanations</i>)
terhadap model terbaik: <strong>Random Forest + SMOTE (RF-C2)</strong>.
SHAP digunakan untuk mengidentifikasi permission dan fitur yang paling berkontribusi
terhadap deteksi Backdoor malware, serta mengekstrak
<em>behavioral permission signature</em> yang membedakan Backdoor dari kategori malware lain.
</div>
""", unsafe_allow_html=True)
# -- Figure 1: Bar Top-50
st.subheader("SHAP Bar Plot: Top-50 Features")
st.markdown("""
<div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;">
Mean absolute SHAP value untuk 50 fitur dengan kontribusi terbesar
terhadap prediksi model pada test set.
Semakin besar nilainya, semakin kuat pengaruh fitur tersebut terhadap keputusan classifier.
</div>
""", unsafe_allow_html=True)
if os.path.exists(FIG_BAR_PATH):
st.image(FIG_BAR_PATH, use_container_width=True)
else:
st.warning("File gambar tidak ditemukan: {}".format(FIG_BAR_PATH))
st.divider()
# -- Figure 2: Beeswarm Top-20
st.subheader("SHAP Beeswarm Plot: Top-20 Features")
st.markdown("""
<div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;">
Distribusi SHAP value untuk 20 fitur teratas pada seluruh sampel test set.
Warna menunjukkan nilai fitur (merah = tinggi, biru = rendah).
Plot ini memperlihatkan arah dan besaran pengaruh setiap fitur.
</div>
""", unsafe_allow_html=True)
if os.path.exists(FIG_BEE_PATH):
st.image(FIG_BEE_PATH, use_container_width=True)
else:
st.warning("File gambar tidak ditemukan: {}".format(FIG_BEE_PATH))
st.divider()
# -- Figure 3: Group bar
st.subheader("Group-Level SHAP Contribution")
st.markdown("""
<div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;">
Perbandingan mean absolute SHAP value antara kelompok fitur
<strong>Permissions</strong> (indeks 0-3267) dan
<strong>Non-Permissions</strong> (indeks 3268-9501),
yang mencakup Services, Intent Actions, dan Intent Categories.
</div>
""", unsafe_allow_html=True)
if os.path.exists(FIG_GROUP_PATH):
st.image(FIG_GROUP_PATH, use_container_width=True)
else:
st.warning("File gambar tidak ditemukan: {}".format(FIG_GROUP_PATH))
st.divider()
# -- Signature Table
st.subheader("Backdoor Behavioral Signature (Top-50)")
st.markdown("""
<div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;">
Fitur-fitur dengan mean SHAP positif pada sampel True Positive
(APK yang secara benar terdeteksi sebagai Backdoor).
Fitur-fitur ini membentuk <em>behavioral signature</em> Backdoor malware.
Kolom <span class="feat-mono">feature_col_idx</span> adalah indeks fitur
dalam ruang fitur asli (0-9502).
</div>
""", unsafe_allow_html=True)
df_sig = load_signature_table()
st.dataframe(df_sig, use_container_width=True, height=400)
st.divider()
# -- Named Permission Signature
st.subheader("Named Permission Signature")
st.markdown("""
<div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;">
Permission-permission yang termasuk dalam Backdoor behavioral signature,
beserta nama resminya dari Android permission list
dan nilai mean SHAP pada True Positive samples.
</div>
""", unsafe_allow_html=True)
df_named = load_named_signature_table()
st.dataframe(df_named, use_container_width=True, height=350)
# =============================================================================
# PAGE 3: TENTANG PENELITIAN
# =============================================================================
elif page == "Tentang Penelitian":
st.title("Tentang Penelitian")
# -- Header: logo + info
col_logo, col_header = st.columns([1, 4])
with col_logo:
st.image(UDINUS_LOGO_URL, width=120)
with col_header:
st.markdown("""
<div style="padding-top: 0.5rem;">
<div style="font-size: 1.05rem; font-weight: 700; color: #0f1923; line-height: 1.4;">
Universitas Dian Nuswantoro
</div>
<div style="font-size: 0.9rem; color: #2c4a63; margin-top: 0.2rem;">
Semarang, Jawa Tengah, Indonesia
</div>
<div style="margin-top: 0.5rem; display: inline-block;
background-color: #eef4fb; border: 1px solid #b0cce4;
padding: 3px 10px; border-radius: 2px;
font-size: 0.78rem; color: #1a4f78; font-weight: 600;
letter-spacing: 0.04em;">
PENELITIAN DASAR PERGURUAN TINGGI 2025/2026 Semester Gasal
</div>
</div>
""", unsafe_allow_html=True)
st.divider()
# -- Informasi penelitian
st.subheader("Informasi Penelitian")
st.markdown("""
<table class="about-table">
<tr>
<td>Judul</td>
<td>Backdoor Android Malware Detection under Extreme Class Imbalance
using Ensemble Learning and SHAP-Based Permission Signature Analysis</td>
</tr>
<tr>
<td>Peneliti Utama</td>
<td>Rama Aria Megantara</td>
</tr>
<tr>
<td>Co-Peneliti</td>
<td>Dewi Pergiwati</td>
</tr>
<tr>
<td>Institusi</td>
<td>Universitas Dian Nuswantoro</td>
</tr>
<tr>
<td>Jenis Hibah</td>
<td>Penelitian Dasar Perguruan Tinggi (DIKTI)</td>
</tr>
<tr>
<td>Periode</td>
<td>2025/2026 Semester Gasal</td>
</tr>
<tr>
<td>Publikasi</td>
<td>
<a href="{journal}" target="_blank" style="color:#1a6fa8;">
Matrik: Jurnal Manajemen, Teknik Informatika, dan Rekayasa Komputer
</a>
</td>
</tr>
<tr>
<td>Dataset</td>
<td>
<a href="{dataset}" target="_blank" style="color:#1a6fa8;">
CCCS-CIC-AndMal-2020 (University of New Brunswick)
</a>
</td>
</tr>
</table>
""".format(journal=JOURNAL_URL, dataset=DATASET_URL), unsafe_allow_html=True)
st.divider()
# -- Abstract
st.subheader("Abstract")
st.markdown("""
<div style="font-size:0.92rem; color:#1a2b3c; line-height:1.75;
text-align:justify; max-width:860px;">
Backdoor malware pada platform Android merupakan ancaman serius karena memungkinkan
akses tidak sah secara tersembunyi ke dalam sistem perangkat korban.
Deteksi Backdoor menghadapi tantangan berupa <em>extreme class imbalance</em>
yang parah pada dataset dunia nyata.
Penelitian ini mengusulkan pipeline deteksi berbasis <em>static features</em>
menggunakan dataset <strong>CCCS-CIC-AndMal-2020</strong>,
dengan rasio ketidakseimbangan kelas 1:221.5 antara kelas Backdoor dan kelas lainnya.
Pipeline dua tahap <em>feature selection</em> diterapkan:
<em>Variance Threshold</em> mereduksi 9.503 fitur menjadi 1.433 fitur,
diikuti seleksi berbasis <em>Random Forest feature importance</em>
untuk memilih 500 fitur terbaik.
Lima classifier dievaluasi: Decision Tree, Logistic Regression, Random Forest,
XGBoost, dan LightGBM, masing-masing di bawah tiga kondisi penanganan imbalance:
tanpa penanganan (C1), SMOTE (C2), dan SMOTE dengan <em>cost-sensitive learning</em> (C3),
dengan 10 <em>random seeds</em> untuk validasi statistik via uji Wilcoxon Signed-Rank.
Model terbaik berdasarkan <em>composite ranking</em> adalah
<strong>Random Forest dengan kondisi SMOTE (RF-C2)</strong>,
yang mencapai F1-Score=0.9046, AUC-ROC=0.9917, dan G-Mean=0.9426.
Analisis SHAP (<em>SHapley Additive exPlanations</em>) digunakan untuk mengekstraksi
<em>behavioral permission signature</em> Backdoor malware,
memberikan interpretabilitas terhadap keputusan model.
</div>
""", unsafe_allow_html=True)
st.divider()
# -- Model performance
st.subheader("Performa Model Terbaik: Random Forest + SMOTE (RF-C2)")
st.markdown("""
<div style="font-size:0.83rem; color:#4a6278; margin-bottom:0.75rem;">
Rata-rata dan standar deviasi dari 10 random seeds pada test set.
Evaluasi menggunakan metrik khusus untuk kelas minority (Backdoor).
</div>
""", unsafe_allow_html=True)
perf_rows = [
("F1-Score (Backdoor class)", "0.9046", "0.0020"),
("AUC-ROC", "0.9917", "0.0010"),
("G-Mean", "0.9426", "0.0030"),
("Precision", "0.9133", "0.0035"),
("Recall", "0.8961", "0.0061"),
("MCC", "0.8952", "0.0021"),
("Training Time", "2.18 s", "0.04 s"),
("Inference Time per Sample", "0.0174 ms", "0.0004 ms"),
]
rows_html = ""
for metric, mean_val, std_val in perf_rows:
rows_html += """
<tr>
<td>{metric}</td>
<td class="perf-metric">{mean} +/- {std}</td>
</tr>
""".format(metric=metric, mean=mean_val, std=std_val)
st.markdown("""
<table class="about-table">
<tr>
<td style="font-weight:700; font-size:0.75rem; text-transform:uppercase;
letter-spacing:0.06em; color:#4a6278; border-bottom:2px solid #c0d4e5;">
Metric
</td>
<td style="font-weight:700; font-size:0.75rem; text-transform:uppercase;
letter-spacing:0.06em; color:#4a6278; border-bottom:2px solid #c0d4e5;">
Mean +/- Std
</td>
</tr>
{}
</table>
""".format(rows_html), unsafe_allow_html=True)
st.divider()
# -- Methodology overview
st.subheader("Ringkasan Metodologi")
st.markdown("""
<div style="font-size:0.88rem; color:#1a2b3c; line-height:1.7;">
<table class="about-table">
<tr><td>Dataset</td><td>CCCS-CIC-AndMal-2020 &mdash; 342.169 sampel, 18 kategori</td></tr>
<tr><td>Task</td><td>Binary classification: Backdoor (1) vs. semua kategori lain (0)</td></tr>
<tr><td>Fitur</td><td>Static features &mdash; 9.503 kolom (permissions, services, intents, categories)</td></tr>
<tr><td>Feature Selection</td><td>Variance Threshold + Random Forest Importance (top-500)</td></tr>
<tr><td>Imbalance Handling</td><td>C1: None | C2: SMOTE | C3: SMOTE + cost-sensitive</td></tr>
<tr><td>Classifiers</td><td>Decision Tree, Logistic Regression, Random Forest, XGBoost, LightGBM</td></tr>
<tr><td>Validasi</td><td>10 random seeds, Wilcoxon Signed-Rank test, effect size r</td></tr>
<tr><td>Explainability</td><td>SHAP TreeExplainer &mdash; behavioral permission signature</td></tr>
</table>
</div>
""", unsafe_allow_html=True)
st.divider()
st.markdown("""
<div style="font-size:0.78rem; color:#7a95aa; text-align:center;">
Universitas Dian Nuswantoro &nbsp;|&nbsp;
Penelitian Dasar Perguruan Tinggi 2025/2026 &nbsp;|&nbsp;
Rama Aria Megantara &amp; Dewi Pergiwati
</div>
""", unsafe_allow_html=True)