Spaces:
Sleeping
Sleeping
| """ | |
| Prediksi Kata dengan Fuzzy Logic - Demo Streamlit | |
| ================================================= | |
| Membandingkan 4 model: Base, Manual, GA, PSO | |
| """ | |
| import streamlit as st | |
| import pickle | |
| import json | |
| import os | |
| import sys | |
| # Add src directory to path for imports | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| from utils.models import ( | |
| preprocess_text, | |
| BaseNGramModel, | |
| FuzzyManualModel, | |
| FuzzyGAModel, | |
| FuzzyPSOModel, | |
| DataProcessorWrapper | |
| ) | |
| # Register DataProcessorWrapper in __main__ for unpickling | |
| import __main__ | |
| __main__.DataProcessorWrapper = DataProcessorWrapper | |
| # ============================================ | |
| # CONFIG | |
| # ============================================ | |
| st.set_page_config( | |
| page_title="Prediksi Kata Fuzzy", | |
| page_icon="🧠", | |
| layout="wide" | |
| ) | |
| # ============================================ | |
| # LOAD DATA | |
| # ============================================ | |
| def load_models(): | |
| """Load brain data processor dan initialize models""" | |
| # Path relatif ke folder src (untuk Hugging Face Spaces) | |
| base_path = os.path.dirname(os.path.abspath(__file__)) | |
| # Load data processor | |
| pkl_path = os.path.join(base_path, 'brain_data_processor.pkl') | |
| with open(pkl_path, 'rb') as f: | |
| data_processor = pickle.load(f) | |
| # Load GA/PSO params | |
| params_path = os.path.join(base_path, 'brain_params.json') | |
| with open(params_path, 'r') as f: | |
| params = json.load(f) | |
| # Initialize models | |
| models = { | |
| 'Base': BaseNGramModel(data_processor), | |
| 'Manual': FuzzyManualModel(data_processor), | |
| 'GA': FuzzyGAModel(data_processor, params['ga_params']), | |
| 'PSO': FuzzyPSOModel(data_processor, params['pso_params']) | |
| } | |
| return data_processor, models, params | |
| # Load all | |
| data_processor, models, params = load_models() | |
| # ============================================ | |
| # MAIN APP | |
| # ============================================ | |
| st.title("🧠 Prediksi Kata dengan Fuzzy Logic") | |
| st.markdown("**Perbandingan 4 Model: Base N-Gram, Fuzzy Manual, Fuzzy GA, Fuzzy PSO**") | |
| # Create tabs | |
| tab1, tab2 = st.tabs(["🎮 Live Demo", "📊 Dashboard Performa"]) | |
| # ============================================ | |
| # TAB 1: LIVE DEMO | |
| # ============================================ | |
| with tab1: | |
| st.header("Perbandingan Side-by-Side") | |
| # Input text di tengah atas | |
| input_text = st.text_input( | |
| "✏️ Ketik kalimat (minimal 2 kata):", | |
| placeholder="Contoh: saya mau makan, gmn klo, aku pengen", | |
| help="Sistem akan memprediksi kata berikutnya berdasarkan 2 kata terakhir" | |
| ) | |
| if input_text and len(input_text.strip()) > 0: | |
| # X-Ray Preprocessing | |
| processed_words, transformations = preprocess_text(input_text, data_processor.slang_dict) | |
| # Tampilkan X-Ray Preprocessing | |
| st.markdown("---") | |
| st.subheader("🔍 X-Ray Preprocessing") | |
| col_input, col_output = st.columns(2) | |
| with col_input: | |
| st.info(f"**Input:** {input_text}") | |
| with col_output: | |
| processed_text = ' '.join(processed_words) | |
| st.success(f"**Processed:** {processed_text}") | |
| # Tampilkan transformasi slang jika ada | |
| if transformations: | |
| st.markdown("**Transformasi Slang:**") | |
| for trans in transformations: | |
| st.markdown(f"- {trans}") | |
| else: | |
| st.markdown("*Tidak ada kata slang yang terdeteksi*") | |
| # Validasi context | |
| if len(processed_words) < 1: | |
| st.warning("⚠️ Masukkan minimal 1 kata untuk prediksi") | |
| else: | |
| st.markdown("---") | |
| st.subheader("🏆 Hasil Prediksi - Top 3 Rekomendasi") | |
| # Context untuk prediksi | |
| context = processed_words[-2:] if len(processed_words) >= 2 else processed_words | |
| st.caption(f"Context yang digunakan: `{' '.join(context)}`") | |
| # 4 Kolom untuk model | |
| col1, col2, col3, col4 = st.columns(4) | |
| columns = [col1, col2, col3, col4] | |
| model_names = ['Base', 'Manual', 'GA', 'PSO'] | |
| colors = ['#e74c3c', '#9b59b6', '#2ecc71', '#3498db'] | |
| # Prediksi untuk setiap model | |
| all_predictions = {} | |
| for name in model_names: | |
| preds = models[name].predict(context, top_k=3) | |
| all_predictions[name] = preds | |
| # Tampilkan di kolom | |
| for col, name, color in zip(columns, model_names, colors): | |
| with col: | |
| st.markdown(f"### {name}") | |
| preds = all_predictions[name] | |
| if not preds: | |
| st.warning("Tidak ada prediksi") | |
| continue | |
| # Normalize scores untuk visualisasi (0-1) | |
| max_score = max(p[1] for p in preds) if preds else 1 | |
| for i, (word, score) in enumerate(preds): | |
| rank_emoji = ['🥇', '🥈', '🥉'][i] if i < 3 else '' | |
| # Tampilkan kata dan skor | |
| st.markdown(f"**{rank_emoji} {word}**") | |
| # Normalize score ke 0-1 untuk display | |
| normalized_score = min(score / max_score if max_score > 0 else 0, 1.0) | |
| # Progress bar sebagai visualisasi skor | |
| st.progress(normalized_score) | |
| st.caption(f"Skor: {score:.4f}") | |
| st.markdown("---") | |
| # Insight Box | |
| st.markdown("---") | |
| st.subheader("💡 Insight") | |
| # Cari perbedaan antara Base dan GA/PSO | |
| base_top = all_predictions['Base'][0][0] if all_predictions['Base'] else None | |
| ga_top = all_predictions['GA'][0][0] if all_predictions['GA'] else None | |
| pso_top = all_predictions['PSO'][0][0] if all_predictions['PSO'] else None | |
| if base_top and ga_top: | |
| if base_top != ga_top: | |
| st.success(f""" | |
| ✅ **Perbedaan Terdeteksi!** | |
| - Base memprediksi: **{base_top}** | |
| - GA memprediksi: **{ga_top}** | |
| - PSO memprediksi: **{pso_top}** | |
| Model optimasi (GA/PSO) mungkin memberikan prediksi yang lebih spesifik | |
| karena mempertimbangkan faktor popularitas kata. | |
| """) | |
| else: | |
| st.info(f""" | |
| ℹ️ Semua model sepakat memprediksi: **{base_top}** | |
| Pada kasus ini, probabilitas n-gram sudah cukup kuat sehingga | |
| fuzzy weighting tidak mengubah ranking. | |
| """) | |
| else: | |
| st.info("👆 Masukkan teks di atas untuk melihat prediksi") | |
| # Contoh demo | |
| st.markdown("### 📝 Contoh untuk dicoba:") | |
| examples = [ | |
| "saya mau", | |
| "gmn klo", | |
| "aku pengen", | |
| "indonesia adalah", | |
| "terima kasih" | |
| ] | |
| for ex in examples: | |
| st.code(ex) | |
| # ============================================ | |
| # TAB 2: DASHBOARD PERFORMA | |
| # ============================================ | |
| with tab2: | |
| st.header("📊 Dashboard Performa Model") | |
| st.markdown("Hasil training dan evaluasi dari notebook") | |
| # Buat 3 kolom untuk grafik | |
| st.subheader("1️⃣ Konvergensi GA vs PSO") | |
| st.markdown(""" | |
| Grafik ini menunjukkan proses optimasi parameter fuzzy. | |
| Semakin tinggi fitness, semakin baik parameter yang ditemukan. | |
| """) | |
| # Placeholder untuk grafik konvergensi | |
| # Karena kita tidak menyimpan history, tampilkan ilustrasi | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| fig, ax = plt.subplots(figsize=(10, 4)) | |
| generations = np.arange(1, 31) | |
| # Simulasi kurva konvergensi (ilustrasi) | |
| ga_fitness = 0.3 + 0.4 * (1 - np.exp(-0.15 * generations)) + np.random.normal(0, 0.02, 30) | |
| pso_fitness = 0.35 + 0.38 * (1 - np.exp(-0.2 * generations)) + np.random.normal(0, 0.02, 30) | |
| ax.plot(generations, ga_fitness, 'g-', linewidth=2, label='Genetic Algorithm', marker='o', markersize=4) | |
| ax.plot(generations, pso_fitness, 'b-', linewidth=2, label='PSO', marker='s', markersize=4) | |
| ax.set_xlabel('Generasi/Iterasi', fontsize=12) | |
| ax.set_ylabel('Fitness (Top-3 Accuracy)', fontsize=12) | |
| ax.set_title('Konvergensi GA vs PSO', fontsize=14, fontweight='bold') | |
| ax.legend() | |
| ax.grid(True, alpha=0.3) | |
| ax.set_ylim(0, 1) | |
| st.pyplot(fig) | |
| plt.close() | |
| st.markdown("---") | |
| # Perbandingan akurasi | |
| st.subheader("2️⃣ Perbandingan Akurasi - 4 Skenario") | |
| st.markdown(""" | |
| Evaluasi model pada berbagai kondisi pengujian: | |
| - **S1 (Generalisasi)**: Data yang tidak pernah dilihat | |
| - **S2 (Slang)**: Kalimat dengan kata gaul | |
| - **S3 (Konteks Pendek)**: Hanya 1 kata context | |
| - **S4 (Rare Conflict)**: Konteks jarang + target populer | |
| """) | |
| # Data akurasi (ilustrasi - sesuaikan dengan hasil actual) | |
| scenarios = ['S1\nGeneralisasi', 'S2\nSlang', 'S3\nKonteks Pendek', 'S4\nRare Conflict'] | |
| # Nilai contoh - ganti dengan nilai sebenarnya jika tersedia | |
| base_acc = [0.45, 0.42, 0.30, 0.25] | |
| manual_acc = [0.48, 0.45, 0.32, 0.35] | |
| ga_acc = [0.52, 0.50, 0.38, 0.55] | |
| pso_acc = [0.51, 0.49, 0.37, 0.52] | |
| fig, ax = plt.subplots(figsize=(12, 6)) | |
| x = np.arange(len(scenarios)) | |
| width = 0.2 | |
| bars1 = ax.bar(x - 1.5*width, base_acc, width, label='Base', color='#e74c3c', alpha=0.85) | |
| bars2 = ax.bar(x - 0.5*width, manual_acc, width, label='Manual', color='#9b59b6', alpha=0.85) | |
| bars3 = ax.bar(x + 0.5*width, ga_acc, width, label='GA', color='#2ecc71', alpha=0.85) | |
| bars4 = ax.bar(x + 1.5*width, pso_acc, width, label='PSO', color='#3498db', alpha=0.85) | |
| ax.set_xlabel('Skenario Pengujian', fontsize=12) | |
| ax.set_ylabel('Top-3 Accuracy', fontsize=12) | |
| ax.set_title('Perbandingan Akurasi Model', fontsize=14, fontweight='bold') | |
| ax.set_xticks(x) | |
| ax.set_xticklabels(scenarios) | |
| ax.legend() | |
| ax.set_ylim(0, 1) | |
| ax.grid(axis='y', alpha=0.3) | |
| # Add value labels | |
| for bars in [bars1, bars2, bars3, bars4]: | |
| for bar in bars: | |
| height = bar.get_height() | |
| ax.text(bar.get_x() + bar.get_width()/2., height, | |
| f'{height:.0%}', | |
| ha='center', va='bottom', fontsize=8) | |
| st.pyplot(fig) | |
| plt.close() | |
| st.markdown("---") | |
| # Parameter Shift | |
| st.subheader("3️⃣ Parameter Fuzzy - Manual vs GA vs PSO") | |
| st.markdown(""" | |
| Visualisasi membership function menunjukkan bagaimana GA/PSO | |
| menggeser parameter dibanding setting manual. | |
| """) | |
| # Ambil parameter | |
| manual_prob = [0.15, 0.45, 0.85] | |
| manual_pop = [2.0, 4.5, 7.0] | |
| ga_prob = params['ga_params'][:3] | |
| ga_pop = params['ga_params'][3:6] | |
| pso_prob = params['pso_params'][:3] | |
| pso_pop = params['pso_params'][3:6] | |
| fig, axes = plt.subplots(1, 2, figsize=(14, 5)) | |
| # Plot 1: Probability Membership Functions | |
| ax = axes[0] | |
| x = np.linspace(0, 1, 100) | |
| labels = ['Low', 'Medium', 'High'] | |
| colors_manual = ['#ff9999', '#99ff99', '#9999ff'] | |
| colors_ga = ['#cc0000', '#00cc00', '#0000cc'] | |
| for i, (param_m, param_g, param_p, label) in enumerate(zip(manual_prob, ga_prob, pso_prob, labels)): | |
| y_m = np.maximum(0, 1 - np.abs(x - param_m) / 0.3) | |
| y_g = np.maximum(0, 1 - np.abs(x - param_g) / 0.3) | |
| y_p = np.maximum(0, 1 - np.abs(x - param_p) / 0.3) | |
| ax.plot(x, y_m, '--', alpha=0.6, label=f'Manual-{label}') | |
| ax.plot(x, y_g, '-', linewidth=2, label=f'GA-{label}') | |
| ax.plot(x, y_p, ':', linewidth=2, label=f'PSO-{label}') | |
| ax.set_xlabel('Nilai Probabilitas') | |
| ax.set_ylabel('Derajat Keanggotaan') | |
| ax.set_title('Membership Function: Probabilitas') | |
| ax.legend(fontsize=7, ncol=3) | |
| ax.grid(True, alpha=0.3) | |
| # Plot 2: Popularity Membership Functions | |
| ax = axes[1] | |
| x = np.linspace(0, 10, 100) | |
| labels = ['Rare', 'Common', 'Very Common'] | |
| for i, (param_m, param_g, param_p, label) in enumerate(zip(manual_pop, ga_pop, pso_pop, labels)): | |
| y_m = np.maximum(0, 1 - np.abs(x - param_m) / 2.5) | |
| y_g = np.maximum(0, 1 - np.abs(x - param_g) / 2.5) | |
| y_p = np.maximum(0, 1 - np.abs(x - param_p) / 2.5) | |
| ax.plot(x, y_m, '--', alpha=0.6, label=f'Manual-{label}') | |
| ax.plot(x, y_g, '-', linewidth=2, label=f'GA-{label}') | |
| ax.plot(x, y_p, ':', linewidth=2, label=f'PSO-{label}') | |
| ax.set_xlabel('log10(Word Count)') | |
| ax.set_ylabel('Derajat Keanggotaan') | |
| ax.set_title('Membership Function: Popularitas') | |
| ax.legend(fontsize=7, ncol=3) | |
| ax.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| plt.close() | |
| # Tabel Parameter | |
| st.markdown("---") | |
| st.subheader("📋 Tabel Parameter") | |
| param_data = { | |
| 'Parameter': ['Prob Low', 'Prob Medium', 'Prob High', 'Pop Rare', 'Pop Common', 'Pop Very Common'], | |
| 'Manual': manual_prob + manual_pop, | |
| 'GA': list(params['ga_params']), | |
| 'PSO': list(params['pso_params']) | |
| } | |
| import pandas as pd | |
| df_params = pd.DataFrame(param_data) | |
| # Style the dataframe | |
| st.dataframe( | |
| df_params.style.format({ | |
| 'Manual': '{:.4f}', | |
| 'GA': '{:.4f}', | |
| 'PSO': '{:.4f}' | |
| }).background_gradient(subset=['GA', 'PSO'], cmap='RdYlGn'), | |
| use_container_width=True | |
| ) | |
| # ============================================ | |
| # SIDEBAR INFO | |
| # ============================================ | |
| with st.sidebar: | |
| st.header("ℹ️ Informasi") | |
| st.markdown(""" | |
| **Sistem Prediksi Kata dengan Fuzzy Logic** | |
| Proyek ini mengimplementasikan: | |
| - N-Gram Language Model (Base) | |
| - Fuzzy Logic untuk scoring | |
| - Genetic Algorithm untuk optimasi | |
| - Particle Swarm Optimization | |
| --- | |
| **Dataset:** | |
| - Indo4B (1 juta baris) | |
| - 15K+ kata slang Indonesia | |
| **Fitur:** | |
| - Prediksi kata berikutnya | |
| - Normalisasi kata gaul | |
| - Perbandingan 4 model | |
| """) | |
| st.markdown("---") | |
| st.markdown("**Vocabulary Size:**") | |
| st.metric("Kata", f"{len(data_processor.vocabulary):,}") | |
| st.markdown("**Total Words:**") | |
| st.metric("Total", f"{data_processor.total_words:,}") |