| """ |
| Backdoor Android Malware Detection |
| Streamlit deployment application. |
| |
| Peneliti Utama : RAMA ARIA MEGANTARA |
| Co-Peneliti : DEWI PERGIWATI |
| Institusi : UNIVERSITAS DIAN NUSWANTORO |
| Hibah : PENELITIAN DASAR PERGURUAN TINGGI 2025/2026 Semester Gasal |
| Dataset : CCCS-CIC-AndMal-2020 (University of New Brunswick) |
| """ |
|
|
| import os |
| import json |
| import joblib |
| import pathlib |
| import numpy as np |
| import pandas as pd |
| import streamlit as st |
|
|
| |
| |
| |
|
|
| st.set_page_config( |
| page_title="Backdoor Malware Detection", |
| layout="wide", |
| initial_sidebar_state="expanded", |
| ) |
|
|
| |
| |
| |
| |
| |
|
|
| BASE_DIR = pathlib.Path(__file__).parent |
| MODEL_DIR = BASE_DIR / "model" |
| FIGURES_DIR = BASE_DIR / "figures" |
|
|
| MODEL_PATH = MODEL_DIR / "best_model.joblib" |
| CONFIG_PATH = MODEL_DIR / "model_config.json" |
| PERM_PATH = MODEL_DIR / "top500_permissions.json" |
| SIG_PATH = MODEL_DIR / "table_backdoor_signature.csv" |
| NAMED_SIG_PATH = MODEL_DIR / "table_permission_signature_named.csv" |
|
|
| FIG_BAR_PATH = FIGURES_DIR / "fig_shap_bar_top50.png" |
| FIG_BEE_PATH = FIGURES_DIR / "fig_shap_beeswarm_top20.png" |
| FIG_GROUP_PATH = FIGURES_DIR / "fig_shap_group_bar.png" |
|
|
| UDINUS_LOGO_URL = "https://dinus.ac.id/wp-content/uploads/2022/12/Logo-Udinus-Official-02-300x300-1.png" |
| JOURNAL_URL = "https://journal.universitasbumigora.ac.id/matrik/article/view/6198" |
| DATASET_URL = "https://www.unb.ca/cic/datasets/andmal2020.html" |
|
|
| MAX_ROWS = 100 |
|
|
| |
| |
| |
|
|
| st.markdown(""" |
| <style> |
| /* Typography: IBM Plex Sans for body, IBM Plex Mono for code/data */ |
| @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@400;500;600;700&display=swap'); |
| |
| html, body, [class*="css"] { |
| font-family: 'IBM Plex Sans', sans-serif; |
| } |
| |
| /* Sidebar */ |
| section[data-testid="stSidebar"] { |
| background-color: #0f1923; |
| } |
| section[data-testid="stSidebar"] * { |
| color: #c8d6e5 !important; |
| } |
| section[data-testid="stSidebar"] .stRadio label { |
| font-size: 0.95rem; |
| padding: 4px 0; |
| } |
| |
| /* Main background */ |
| .main .block-container { |
| padding-top: 2rem; |
| max-width: 1100px; |
| } |
| |
| /* Page title */ |
| h1 { |
| font-family: 'IBM Plex Sans', sans-serif; |
| font-weight: 700; |
| font-size: 1.75rem; |
| color: #0f1923; |
| border-bottom: 3px solid #1a6fa8; |
| padding-bottom: 0.5rem; |
| margin-bottom: 1.25rem; |
| } |
| |
| h2 { |
| font-family: 'IBM Plex Sans', sans-serif; |
| font-weight: 600; |
| font-size: 1.2rem; |
| color: #1a3a5c; |
| margin-top: 1.5rem; |
| margin-bottom: 0.5rem; |
| } |
| |
| h3 { |
| font-family: 'IBM Plex Sans', sans-serif; |
| font-weight: 600; |
| font-size: 1.05rem; |
| color: #234d72; |
| margin-top: 1rem; |
| } |
| |
| /* Metric box: remove default card style, use flat box */ |
| [data-testid="stMetric"] { |
| background-color: #f0f4f8; |
| border: 1px solid #c8d6e5; |
| border-radius: 4px; |
| padding: 0.75rem 1rem; |
| } |
| [data-testid="stMetricLabel"] { |
| font-size: 0.8rem; |
| color: #4a6278; |
| text-transform: uppercase; |
| letter-spacing: 0.05em; |
| } |
| [data-testid="stMetricValue"] { |
| font-family: 'IBM Plex Mono', monospace; |
| font-size: 1.5rem; |
| color: #0f1923; |
| } |
| |
| /* Info box */ |
| .info-box { |
| background-color: #eef4fb; |
| border-left: 4px solid #1a6fa8; |
| padding: 0.75rem 1rem; |
| margin-bottom: 1rem; |
| font-size: 0.9rem; |
| color: #1a3a5c; |
| } |
| |
| /* Result badge */ |
| .badge-backdoor { |
| display: inline-block; |
| background-color: #c0392b; |
| color: #ffffff; |
| font-family: 'IBM Plex Mono', monospace; |
| font-size: 0.8rem; |
| font-weight: 600; |
| padding: 2px 8px; |
| border-radius: 2px; |
| letter-spacing: 0.05em; |
| } |
| .badge-benign { |
| display: inline-block; |
| background-color: #1e7e34; |
| color: #ffffff; |
| font-family: 'IBM Plex Mono', monospace; |
| font-size: 0.8rem; |
| font-weight: 600; |
| padding: 2px 8px; |
| border-radius: 2px; |
| letter-spacing: 0.05em; |
| } |
| |
| /* Summary stat card */ |
| .stat-card { |
| background-color: #f8fafc; |
| border: 1px solid #d0dde8; |
| border-top: 3px solid #1a6fa8; |
| border-radius: 3px; |
| padding: 1rem; |
| text-align: center; |
| } |
| .stat-card .stat-label { |
| font-size: 0.75rem; |
| text-transform: uppercase; |
| letter-spacing: 0.07em; |
| color: #4a6278; |
| } |
| .stat-card .stat-value { |
| font-family: 'IBM Plex Mono', monospace; |
| font-size: 2rem; |
| font-weight: 600; |
| color: #0f1923; |
| line-height: 1.2; |
| } |
| .stat-card-backdoor .stat-value { |
| color: #c0392b; |
| } |
| .stat-card-benign .stat-value { |
| color: #1e7e34; |
| } |
| |
| /* Divider */ |
| hr { |
| border: none; |
| border-top: 1px solid #d0dde8; |
| margin: 1.5rem 0; |
| } |
| |
| /* Download button */ |
| .stDownloadButton button { |
| background-color: #1a6fa8; |
| color: #ffffff; |
| border: none; |
| border-radius: 3px; |
| font-family: 'IBM Plex Sans', sans-serif; |
| font-weight: 500; |
| font-size: 0.9rem; |
| padding: 0.5rem 1.25rem; |
| } |
| .stDownloadButton button:hover { |
| background-color: #155d8e; |
| } |
| |
| /* Monospace for feature names */ |
| .feat-mono { |
| font-family: 'IBM Plex Mono', monospace; |
| font-size: 0.85rem; |
| } |
| |
| /* About page info table */ |
| .about-table { |
| width: 100%; |
| border-collapse: collapse; |
| font-size: 0.92rem; |
| } |
| .about-table td { |
| padding: 0.5rem 0.75rem; |
| border-bottom: 1px solid #e5ecf3; |
| vertical-align: top; |
| } |
| .about-table td:first-child { |
| font-weight: 600; |
| color: #1a3a5c; |
| width: 220px; |
| white-space: nowrap; |
| } |
| .about-table .perf-metric { |
| font-family: 'IBM Plex Mono', monospace; |
| font-size: 0.88rem; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| |
| |
|
|
| @st.cache_resource |
| def load_model(): |
| return joblib.load(MODEL_PATH) |
|
|
|
|
| @st.cache_data |
| def load_config(): |
| with open(CONFIG_PATH, "r", encoding="utf-8") as f: |
| return json.load(f) |
|
|
|
|
| @st.cache_data |
| def load_sample_bytes(): |
| """Generate sample input CSV in memory. 9503 columns named 0..9502, one row of zeros.""" |
| col_names = [str(i) for i in range(9503)] |
| df_sample = pd.DataFrame([[0] * 9503], columns=col_names) |
| return df_sample.to_csv(index=False).encode("utf-8") |
|
|
|
|
| @st.cache_data |
| def load_signature_table(): |
| return pd.read_csv(SIG_PATH, index_col=0) |
|
|
|
|
| @st.cache_data |
| def load_named_signature_table(): |
| return pd.read_csv(NAMED_SIG_PATH, index_col=0) |
|
|
|
|
| |
| |
| |
|
|
| st.sidebar.markdown(""" |
| <div style="margin-bottom: 1.5rem;"> |
| <div style="font-size: 0.7rem; text-transform: uppercase; letter-spacing: 0.1em; |
| color: #7fa3c0; margin-bottom: 0.25rem;">Penelitian Hibah DIKTI</div> |
| <div style="font-size: 0.95rem; font-weight: 600; line-height: 1.4; |
| color: #e8f0f8;">Backdoor Android<br>Malware Detection</div> |
| <div style="font-size: 0.75rem; color: #5a8aab; margin-top: 0.4rem;"> |
| Universitas Dian Nuswantoro |
| </div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| st.sidebar.divider() |
| st.sidebar.markdown("<div style='font-size:0.78rem; color:#7fa3c0; text-transform:uppercase; letter-spacing:0.08em; margin-bottom:0.4rem;'>Navigasi</div>", unsafe_allow_html=True) |
|
|
| page = st.sidebar.radio( |
| label="Pilih halaman", |
| options=["Prediksi", "SHAP Signature", "Tentang Penelitian"], |
| label_visibility="collapsed", |
| ) |
|
|
| st.sidebar.divider() |
| st.sidebar.markdown(""" |
| <div style="font-size: 0.72rem; color: #4a6a82; line-height: 1.6;"> |
| <b style="color:#7fa3c0;">Model</b><br> |
| Random Forest + SMOTE<br><br> |
| <b style="color:#7fa3c0;">Dataset</b><br> |
| CCCS-CIC-AndMal-2020<br><br> |
| <b style="color:#7fa3c0;">Fitur</b><br> |
| 9503 static features<br> |
| Top-500 selected<br><br> |
| <b style="color:#7fa3c0;">Max input</b><br> |
| 100 baris per upload |
| </div> |
| """, unsafe_allow_html=True) |
|
|
|
|
| |
| |
| |
|
|
| if page == "Prediksi": |
|
|
| st.title("Prediksi Backdoor Android Malware") |
|
|
| st.markdown(""" |
| <div class="info-box"> |
| Upload file CSV yang berisi static features dari satu atau lebih Android APK. |
| Model akan memprediksi apakah setiap APK termasuk kategori |
| <strong>Backdoor</strong> atau <strong>Non-Backdoor</strong>, |
| beserta nilai probabilitas dari Random Forest classifier. |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| |
| st.subheader("Template Input CSV") |
|
|
| col_dl, col_desc = st.columns([2, 3]) |
| with col_dl: |
| st.download_button( |
| label="Download sample_input.csv", |
| data=load_sample_bytes(), |
| file_name="sample_input.csv", |
| mime="text/csv", |
| ) |
| with col_desc: |
| st.markdown(""" |
| <div style="font-size:0.87rem; color:#2c4a63; padding-top:0.5rem;"> |
| Template berisi <span class="feat-mono">9503 kolom</span> |
| (kolom <span class="feat-mono">0</span> hingga |
| <span class="feat-mono">9502</span>), satu baris bernilai nol. |
| Isi nilai kolom sesuai fitur APK Anda (0 atau 1), |
| tambahkan baris baru untuk setiap APK tambahan, lalu upload di bawah. |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Upload File CSV") |
| uploaded_file = st.file_uploader( |
| label="Pilih file CSV (maks. {} baris)".format(MAX_ROWS), |
| type=["csv"], |
| help="File harus memiliki 9503 kolom bernama 0 sampai 9502.", |
| ) |
|
|
| if uploaded_file is not None: |
|
|
| |
| try: |
| df_input = pd.read_csv(uploaded_file, dtype=np.float32) |
| except Exception as e: |
| st.error("Gagal membaca file CSV: {}".format(e)) |
| st.stop() |
|
|
| |
| expected_cols = [str(i) for i in range(9503)] |
| df_input.columns = [str(c) for c in df_input.columns] |
| missing_cols = [c for c in expected_cols if c not in df_input.columns] |
|
|
| if missing_cols: |
| st.error( |
| "File CSV tidak valid. Dibutuhkan 9503 kolom bernama 0 sampai 9502. " |
| "Jumlah kolom yang hilang: {}. " |
| "Gunakan template sample_input.csv di atas sebagai panduan.".format(len(missing_cols)) |
| ) |
| st.stop() |
|
|
| |
| total_rows = len(df_input) |
| if total_rows > MAX_ROWS: |
| st.warning( |
| "File memiliki {} baris. Hanya {} baris pertama yang diproses " |
| "(batas maksimal per upload).".format(total_rows, MAX_ROWS) |
| ) |
| df_input = df_input.head(MAX_ROWS) |
|
|
| n_samples = len(df_input) |
|
|
| |
| config = load_config() |
| top500_idx = config["top500_original_indices"] |
| col_names = [str(i) for i in top500_idx] |
|
|
| X = df_input[col_names].values.astype(np.float32) |
|
|
| |
| with st.spinner("Memproses {} sampel...".format(n_samples)): |
| model = load_model() |
| y_pred = model.predict(X) |
| y_proba = model.predict_proba(X)[:, 1] |
|
|
| |
| n_backdoor = int((y_pred == 1).sum()) |
| n_non_backdoor = int((y_pred == 0).sum()) |
|
|
| st.divider() |
| st.subheader("Ringkasan Hasil") |
|
|
| col1, col2, col3 = st.columns(3) |
| with col1: |
| st.markdown(""" |
| <div class="stat-card"> |
| <div class="stat-label">Total Sampel</div> |
| <div class="stat-value">{}</div> |
| </div> |
| """.format(n_samples), unsafe_allow_html=True) |
| with col2: |
| st.markdown(""" |
| <div class="stat-card stat-card-backdoor"> |
| <div class="stat-label">Backdoor</div> |
| <div class="stat-value">{}</div> |
| </div> |
| """.format(n_backdoor), unsafe_allow_html=True) |
| with col3: |
| st.markdown(""" |
| <div class="stat-card stat-card-benign"> |
| <div class="stat-label">Non-Backdoor</div> |
| <div class="stat-value">{}</div> |
| </div> |
| """.format(n_non_backdoor), unsafe_allow_html=True) |
|
|
| |
| st.divider() |
| st.subheader("Hasil per Sampel") |
|
|
| results_df = pd.DataFrame({ |
| "Sample" : range(1, n_samples + 1), |
| "Prediction": ["Backdoor" if p == 1 else "Non-Backdoor" for p in y_pred], |
| "Probability (Backdoor)": [round(float(p), 4) for p in y_proba], |
| }) |
|
|
| def _style_row(row): |
| if row["Prediction"] == "Backdoor": |
| return ["background-color: #fce8e6; color: #7b1e1e"] * len(row) |
| return ["background-color: #e8f5e9; color: #1b4d2a"] * len(row) |
|
|
| styled = results_df.style.apply(_style_row, axis=1).format( |
| {"Probability (Backdoor)": "{:.4f}"} |
| ) |
| st.dataframe(styled, use_container_width=True, height=min(400, 40 + n_samples * 38)) |
|
|
| |
| csv_out = results_df.to_csv(index=False).encode("utf-8") |
| st.download_button( |
| label="Download hasil prediksi (.csv)", |
| data=csv_out, |
| file_name="hasil_prediksi.csv", |
| mime="text/csv", |
| ) |
|
|
| st.markdown(""" |
| <div style="font-size:0.8rem; color:#4a6278; margin-top:0.5rem;"> |
| Kolom <span class="feat-mono">Probability (Backdoor)</span> adalah |
| probabilitas kelas positif (Backdoor) dari Random Forest classifier. |
| Nilai mendekati 1.0 menunjukkan keyakinan tinggi bahwa APK adalah Backdoor. |
| </div> |
| """, unsafe_allow_html=True) |
|
|
|
|
| |
| |
| |
|
|
| elif page == "SHAP Signature": |
|
|
| st.title("SHAP-Based Backdoor Permission Signature") |
|
|
| st.markdown(""" |
| <div class="info-box"> |
| Halaman ini menampilkan hasil analisis SHAP (<i>SHapley Additive exPlanations</i>) |
| terhadap model terbaik: <strong>Random Forest + SMOTE (RF-C2)</strong>. |
| SHAP digunakan untuk mengidentifikasi permission dan fitur yang paling berkontribusi |
| terhadap deteksi Backdoor malware, serta mengekstrak |
| <em>behavioral permission signature</em> yang membedakan Backdoor dari kategori malware lain. |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| |
| st.subheader("SHAP Bar Plot: Top-50 Features") |
| st.markdown(""" |
| <div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;"> |
| Mean absolute SHAP value untuk 50 fitur dengan kontribusi terbesar |
| terhadap prediksi model pada test set. |
| Semakin besar nilainya, semakin kuat pengaruh fitur tersebut terhadap keputusan classifier. |
| </div> |
| """, unsafe_allow_html=True) |
| if os.path.exists(FIG_BAR_PATH): |
| st.image(FIG_BAR_PATH, use_container_width=True) |
| else: |
| st.warning("File gambar tidak ditemukan: {}".format(FIG_BAR_PATH)) |
|
|
| st.divider() |
|
|
| |
| st.subheader("SHAP Beeswarm Plot: Top-20 Features") |
| st.markdown(""" |
| <div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;"> |
| Distribusi SHAP value untuk 20 fitur teratas pada seluruh sampel test set. |
| Warna menunjukkan nilai fitur (merah = tinggi, biru = rendah). |
| Plot ini memperlihatkan arah dan besaran pengaruh setiap fitur. |
| </div> |
| """, unsafe_allow_html=True) |
| if os.path.exists(FIG_BEE_PATH): |
| st.image(FIG_BEE_PATH, use_container_width=True) |
| else: |
| st.warning("File gambar tidak ditemukan: {}".format(FIG_BEE_PATH)) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Group-Level SHAP Contribution") |
| st.markdown(""" |
| <div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;"> |
| Perbandingan mean absolute SHAP value antara kelompok fitur |
| <strong>Permissions</strong> (indeks 0-3267) dan |
| <strong>Non-Permissions</strong> (indeks 3268-9501), |
| yang mencakup Services, Intent Actions, dan Intent Categories. |
| </div> |
| """, unsafe_allow_html=True) |
| if os.path.exists(FIG_GROUP_PATH): |
| st.image(FIG_GROUP_PATH, use_container_width=True) |
| else: |
| st.warning("File gambar tidak ditemukan: {}".format(FIG_GROUP_PATH)) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Backdoor Behavioral Signature (Top-50)") |
| st.markdown(""" |
| <div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;"> |
| Fitur-fitur dengan mean SHAP positif pada sampel True Positive |
| (APK yang secara benar terdeteksi sebagai Backdoor). |
| Fitur-fitur ini membentuk <em>behavioral signature</em> Backdoor malware. |
| Kolom <span class="feat-mono">feature_col_idx</span> adalah indeks fitur |
| dalam ruang fitur asli (0-9502). |
| </div> |
| """, unsafe_allow_html=True) |
| df_sig = load_signature_table() |
| st.dataframe(df_sig, use_container_width=True, height=400) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Named Permission Signature") |
| st.markdown(""" |
| <div style="font-size:0.87rem; color:#2c4a63; margin-bottom:0.5rem;"> |
| Permission-permission yang termasuk dalam Backdoor behavioral signature, |
| beserta nama resminya dari Android permission list |
| dan nilai mean SHAP pada True Positive samples. |
| </div> |
| """, unsafe_allow_html=True) |
| df_named = load_named_signature_table() |
| st.dataframe(df_named, use_container_width=True, height=350) |
|
|
|
|
| |
| |
| |
|
|
| elif page == "Tentang Penelitian": |
|
|
| st.title("Tentang Penelitian") |
|
|
| |
| col_logo, col_header = st.columns([1, 4]) |
| with col_logo: |
| st.image(UDINUS_LOGO_URL, width=120) |
| with col_header: |
| st.markdown(""" |
| <div style="padding-top: 0.5rem;"> |
| <div style="font-size: 1.05rem; font-weight: 700; color: #0f1923; line-height: 1.4;"> |
| Universitas Dian Nuswantoro |
| </div> |
| <div style="font-size: 0.9rem; color: #2c4a63; margin-top: 0.2rem;"> |
| Semarang, Jawa Tengah, Indonesia |
| </div> |
| <div style="margin-top: 0.5rem; display: inline-block; |
| background-color: #eef4fb; border: 1px solid #b0cce4; |
| padding: 3px 10px; border-radius: 2px; |
| font-size: 0.78rem; color: #1a4f78; font-weight: 600; |
| letter-spacing: 0.04em;"> |
| PENELITIAN DASAR PERGURUAN TINGGI 2025/2026 Semester Gasal |
| </div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Informasi Penelitian") |
| st.markdown(""" |
| <table class="about-table"> |
| <tr> |
| <td>Judul</td> |
| <td>Backdoor Android Malware Detection under Extreme Class Imbalance |
| using Ensemble Learning and SHAP-Based Permission Signature Analysis</td> |
| </tr> |
| <tr> |
| <td>Peneliti Utama</td> |
| <td>Rama Aria Megantara</td> |
| </tr> |
| <tr> |
| <td>Co-Peneliti</td> |
| <td>Dewi Pergiwati</td> |
| </tr> |
| <tr> |
| <td>Institusi</td> |
| <td>Universitas Dian Nuswantoro</td> |
| </tr> |
| <tr> |
| <td>Jenis Hibah</td> |
| <td>Penelitian Dasar Perguruan Tinggi (DIKTI)</td> |
| </tr> |
| <tr> |
| <td>Periode</td> |
| <td>2025/2026 Semester Gasal</td> |
| </tr> |
| <tr> |
| <td>Publikasi</td> |
| <td> |
| <a href="{journal}" target="_blank" style="color:#1a6fa8;"> |
| Matrik: Jurnal Manajemen, Teknik Informatika, dan Rekayasa Komputer |
| </a> |
| </td> |
| </tr> |
| <tr> |
| <td>Dataset</td> |
| <td> |
| <a href="{dataset}" target="_blank" style="color:#1a6fa8;"> |
| CCCS-CIC-AndMal-2020 (University of New Brunswick) |
| </a> |
| </td> |
| </tr> |
| </table> |
| """.format(journal=JOURNAL_URL, dataset=DATASET_URL), unsafe_allow_html=True) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Abstract") |
| st.markdown(""" |
| <div style="font-size:0.92rem; color:#1a2b3c; line-height:1.75; |
| text-align:justify; max-width:860px;"> |
| Backdoor malware pada platform Android merupakan ancaman serius karena memungkinkan |
| akses tidak sah secara tersembunyi ke dalam sistem perangkat korban. |
| Deteksi Backdoor menghadapi tantangan berupa <em>extreme class imbalance</em> |
| yang parah pada dataset dunia nyata. |
| Penelitian ini mengusulkan pipeline deteksi berbasis <em>static features</em> |
| menggunakan dataset <strong>CCCS-CIC-AndMal-2020</strong>, |
| dengan rasio ketidakseimbangan kelas 1:221.5 antara kelas Backdoor dan kelas lainnya. |
| Pipeline dua tahap <em>feature selection</em> diterapkan: |
| <em>Variance Threshold</em> mereduksi 9.503 fitur menjadi 1.433 fitur, |
| diikuti seleksi berbasis <em>Random Forest feature importance</em> |
| untuk memilih 500 fitur terbaik. |
| Lima classifier dievaluasi: Decision Tree, Logistic Regression, Random Forest, |
| XGBoost, dan LightGBM, masing-masing di bawah tiga kondisi penanganan imbalance: |
| tanpa penanganan (C1), SMOTE (C2), dan SMOTE dengan <em>cost-sensitive learning</em> (C3), |
| dengan 10 <em>random seeds</em> untuk validasi statistik via uji Wilcoxon Signed-Rank. |
| Model terbaik berdasarkan <em>composite ranking</em> adalah |
| <strong>Random Forest dengan kondisi SMOTE (RF-C2)</strong>, |
| yang mencapai F1-Score=0.9046, AUC-ROC=0.9917, dan G-Mean=0.9426. |
| Analisis SHAP (<em>SHapley Additive exPlanations</em>) digunakan untuk mengekstraksi |
| <em>behavioral permission signature</em> Backdoor malware, |
| memberikan interpretabilitas terhadap keputusan model. |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Performa Model Terbaik: Random Forest + SMOTE (RF-C2)") |
| st.markdown(""" |
| <div style="font-size:0.83rem; color:#4a6278; margin-bottom:0.75rem;"> |
| Rata-rata dan standar deviasi dari 10 random seeds pada test set. |
| Evaluasi menggunakan metrik khusus untuk kelas minority (Backdoor). |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| perf_rows = [ |
| ("F1-Score (Backdoor class)", "0.9046", "0.0020"), |
| ("AUC-ROC", "0.9917", "0.0010"), |
| ("G-Mean", "0.9426", "0.0030"), |
| ("Precision", "0.9133", "0.0035"), |
| ("Recall", "0.8961", "0.0061"), |
| ("MCC", "0.8952", "0.0021"), |
| ("Training Time", "2.18 s", "0.04 s"), |
| ("Inference Time per Sample", "0.0174 ms", "0.0004 ms"), |
| ] |
|
|
| rows_html = "" |
| for metric, mean_val, std_val in perf_rows: |
| rows_html += """ |
| <tr> |
| <td>{metric}</td> |
| <td class="perf-metric">{mean} +/- {std}</td> |
| </tr> |
| """.format(metric=metric, mean=mean_val, std=std_val) |
|
|
| st.markdown(""" |
| <table class="about-table"> |
| <tr> |
| <td style="font-weight:700; font-size:0.75rem; text-transform:uppercase; |
| letter-spacing:0.06em; color:#4a6278; border-bottom:2px solid #c0d4e5;"> |
| Metric |
| </td> |
| <td style="font-weight:700; font-size:0.75rem; text-transform:uppercase; |
| letter-spacing:0.06em; color:#4a6278; border-bottom:2px solid #c0d4e5;"> |
| Mean +/- Std |
| </td> |
| </tr> |
| {} |
| </table> |
| """.format(rows_html), unsafe_allow_html=True) |
|
|
| st.divider() |
|
|
| |
| st.subheader("Ringkasan Metodologi") |
| st.markdown(""" |
| <div style="font-size:0.88rem; color:#1a2b3c; line-height:1.7;"> |
| <table class="about-table"> |
| <tr><td>Dataset</td><td>CCCS-CIC-AndMal-2020 — 342.169 sampel, 18 kategori</td></tr> |
| <tr><td>Task</td><td>Binary classification: Backdoor (1) vs. semua kategori lain (0)</td></tr> |
| <tr><td>Fitur</td><td>Static features — 9.503 kolom (permissions, services, intents, categories)</td></tr> |
| <tr><td>Feature Selection</td><td>Variance Threshold + Random Forest Importance (top-500)</td></tr> |
| <tr><td>Imbalance Handling</td><td>C1: None | C2: SMOTE | C3: SMOTE + cost-sensitive</td></tr> |
| <tr><td>Classifiers</td><td>Decision Tree, Logistic Regression, Random Forest, XGBoost, LightGBM</td></tr> |
| <tr><td>Validasi</td><td>10 random seeds, Wilcoxon Signed-Rank test, effect size r</td></tr> |
| <tr><td>Explainability</td><td>SHAP TreeExplainer — behavioral permission signature</td></tr> |
| </table> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| st.divider() |
|
|
| st.markdown(""" |
| <div style="font-size:0.78rem; color:#7a95aa; text-align:center;"> |
| Universitas Dian Nuswantoro | |
| Penelitian Dasar Perguruan Tinggi 2025/2026 | |
| Rama Aria Megantara & Dewi Pergiwati |
| </div> |
| """, unsafe_allow_html=True) |