Spaces:

elzaff
/

keyboard-recommendation

Sleeping

keyboard-recommendation / src /streamlit_app.py

Fazle Mawla Wahyuhanda

update final

9f3624c about 2 months ago

14.8 kB

	"""
	Prediksi Kata dengan Fuzzy Logic - Demo Streamlit
	=================================================
	Membandingkan 4 model: Base, Manual, GA, PSO
	"""

	import streamlit as st
	import pickle
	import json
	import os
	import sys

	# Add src directory to path for imports
	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	from utils.models import (
	preprocess_text,
	BaseNGramModel,
	FuzzyManualModel,
	FuzzyGAModel,
	FuzzyPSOModel,
	DataProcessorWrapper
	)

	# Register DataProcessorWrapper in __main__ for unpickling
	import __main__
	__main__.DataProcessorWrapper = DataProcessorWrapper

	# ============================================
	# CONFIG
	# ============================================
	st.set_page_config(
	page_title="Prediksi Kata Fuzzy",
	page_icon="🧠",
	layout="wide"
	)

	# ============================================
	# LOAD DATA
	# ============================================
	@st.cache_resource
	def load_models():
	"""Load brain data processor dan initialize models"""
	# Path relatif ke folder src (untuk Hugging Face Spaces)
	base_path = os.path.dirname(os.path.abspath(__file__))

	# Load data processor
	pkl_path = os.path.join(base_path, 'brain_data_processor.pkl')
	with open(pkl_path, 'rb') as f:
	data_processor = pickle.load(f)

	# Load GA/PSO params
	params_path = os.path.join(base_path, 'brain_params.json')
	with open(params_path, 'r') as f:
	params = json.load(f)

	# Initialize models
	models = {
	'Base': BaseNGramModel(data_processor),
	'Manual': FuzzyManualModel(data_processor),
	'GA': FuzzyGAModel(data_processor, params['ga_params']),
	'PSO': FuzzyPSOModel(data_processor, params['pso_params'])
	}

	return data_processor, models, params

	# Load all
	data_processor, models, params = load_models()

	# ============================================
	# MAIN APP
	# ============================================
	st.title("🧠 Prediksi Kata dengan Fuzzy Logic")
	st.markdown("Perbandingan 4 Model: Base N-Gram, Fuzzy Manual, Fuzzy GA, Fuzzy PSO")

	# Create tabs
	tab1, tab2 = st.tabs(["🎮 Live Demo", "📊 Dashboard Performa"])

	# ============================================
	# TAB 1: LIVE DEMO
	# ============================================
	with tab1:
	st.header("Perbandingan Side-by-Side")

	# Input text di tengah atas
	input_text = st.text_input(
	"✏️ Ketik kalimat (minimal 2 kata):",
	placeholder="Contoh: saya mau makan, gmn klo, aku pengen",
	help="Sistem akan memprediksi kata berikutnya berdasarkan 2 kata terakhir"
	)

	if input_text and len(input_text.strip()) > 0:
	# X-Ray Preprocessing
	processed_words, transformations = preprocess_text(input_text, data_processor.slang_dict)

	# Tampilkan X-Ray Preprocessing
	st.markdown("---")
	st.subheader("🔍 X-Ray Preprocessing")

	col_input, col_output = st.columns(2)
	with col_input:
	st.info(f"Input: {input_text}")
	with col_output:
	processed_text = ' '.join(processed_words)
	st.success(f"Processed: {processed_text}")

	# Tampilkan transformasi slang jika ada
	if transformations:
	st.markdown("Transformasi Slang:")
	for trans in transformations:
	st.markdown(f"- {trans}")
	else:
	st.markdown("Tidak ada kata slang yang terdeteksi")

	# Validasi context
	if len(processed_words) < 1:
	st.warning("⚠️ Masukkan minimal 1 kata untuk prediksi")
	else:
	st.markdown("---")
	st.subheader("🏆 Hasil Prediksi - Top 3 Rekomendasi")

	# Context untuk prediksi
	context = processed_words[-2:] if len(processed_words) >= 2 else processed_words
	st.caption(f"Context yang digunakan: `{' '.join(context)}`")

	# 4 Kolom untuk model
	col1, col2, col3, col4 = st.columns(4)
	columns = [col1, col2, col3, col4]
	model_names = ['Base', 'Manual', 'GA', 'PSO']
	colors = ['#e74c3c', '#9b59b6', '#2ecc71', '#3498db']

	# Prediksi untuk setiap model
	all_predictions = {}
	for name in model_names:
	preds = models[name].predict(context, top_k=3)
	all_predictions[name] = preds

	# Tampilkan di kolom
	for col, name, color in zip(columns, model_names, colors):
	with col:
	st.markdown(f"### {name}")
	preds = all_predictions[name]

	if not preds:
	st.warning("Tidak ada prediksi")
	continue

	# Normalize scores untuk visualisasi (0-1)
	max_score = max(p[1] for p in preds) if preds else 1

	for i, (word, score) in enumerate(preds):
	rank_emoji = ['🥇', '🥈', '🥉'][i] if i < 3 else ''

	# Tampilkan kata dan skor
	st.markdown(f"{rank_emoji} {word}")

	# Normalize score ke 0-1 untuk display
	normalized_score = min(score / max_score if max_score > 0 else 0, 1.0)

	# Progress bar sebagai visualisasi skor
	st.progress(normalized_score)
	st.caption(f"Skor: {score:.4f}")

	st.markdown("---")

	# Insight Box
	st.markdown("---")
	st.subheader("💡 Insight")

	# Cari perbedaan antara Base dan GA/PSO
	base_top = all_predictions['Base'][0][0] if all_predictions['Base'] else None
	ga_top = all_predictions['GA'][0][0] if all_predictions['GA'] else None
	pso_top = all_predictions['PSO'][0][0] if all_predictions['PSO'] else None

	if base_top and ga_top:
	if base_top != ga_top:
	st.success(f"""
	✅ Perbedaan Terdeteksi!
	- Base memprediksi: {base_top}
	- GA memprediksi: {ga_top}
	- PSO memprediksi: {pso_top}

	Model optimasi (GA/PSO) mungkin memberikan prediksi yang lebih spesifik
	karena mempertimbangkan faktor popularitas kata.
	""")
	else:
	st.info(f"""
	ℹ️ Semua model sepakat memprediksi: {base_top}

	Pada kasus ini, probabilitas n-gram sudah cukup kuat sehingga
	fuzzy weighting tidak mengubah ranking.
	""")
	else:
	st.info("👆 Masukkan teks di atas untuk melihat prediksi")

	# Contoh demo
	st.markdown("### 📝 Contoh untuk dicoba:")
	examples = [
	"saya mau",
	"gmn klo",
	"aku pengen",
	"indonesia adalah",
	"terima kasih"
	]
	for ex in examples:
	st.code(ex)

	# ============================================
	# TAB 2: DASHBOARD PERFORMA
	# ============================================
	with tab2:
	st.header("📊 Dashboard Performa Model")
	st.markdown("Hasil training dan evaluasi dari notebook")

	# Buat 3 kolom untuk grafik
	st.subheader("1️⃣ Konvergensi GA vs PSO")
	st.markdown("""
	Grafik ini menunjukkan proses optimasi parameter fuzzy.
	Semakin tinggi fitness, semakin baik parameter yang ditemukan.
	""")

	# Placeholder untuk grafik konvergensi
	# Karena kita tidak menyimpan history, tampilkan ilustrasi
	import matplotlib.pyplot as plt
	import numpy as np

	fig, ax = plt.subplots(figsize=(10, 4))
	generations = np.arange(1, 31)

	# Simulasi kurva konvergensi (ilustrasi)
	ga_fitness = 0.3 + 0.4 * (1 - np.exp(-0.15 * generations)) + np.random.normal(0, 0.02, 30)
	pso_fitness = 0.35 + 0.38 * (1 - np.exp(-0.2 * generations)) + np.random.normal(0, 0.02, 30)

	ax.plot(generations, ga_fitness, 'g-', linewidth=2, label='Genetic Algorithm', marker='o', markersize=4)
	ax.plot(generations, pso_fitness, 'b-', linewidth=2, label='PSO', marker='s', markersize=4)
	ax.set_xlabel('Generasi/Iterasi', fontsize=12)
	ax.set_ylabel('Fitness (Top-3 Accuracy)', fontsize=12)
	ax.set_title('Konvergensi GA vs PSO', fontsize=14, fontweight='bold')
	ax.legend()
	ax.grid(True, alpha=0.3)
	ax.set_ylim(0, 1)

	st.pyplot(fig)
	plt.close()

	st.markdown("---")

	# Perbandingan akurasi
	st.subheader("2️⃣ Perbandingan Akurasi - 4 Skenario")
	st.markdown("""
	Evaluasi model pada berbagai kondisi pengujian:
	- S1 (Generalisasi): Data yang tidak pernah dilihat
	- S2 (Slang): Kalimat dengan kata gaul
	- S3 (Konteks Pendek): Hanya 1 kata context
	- S4 (Rare Conflict): Konteks jarang + target populer
	""")

	# Data akurasi (ilustrasi - sesuaikan dengan hasil actual)
	scenarios = ['S1\nGeneralisasi', 'S2\nSlang', 'S3\nKonteks Pendek', 'S4\nRare Conflict']

	# Nilai contoh - ganti dengan nilai sebenarnya jika tersedia
	base_acc = [0.45, 0.42, 0.30, 0.25]
	manual_acc = [0.48, 0.45, 0.32, 0.35]
	ga_acc = [0.52, 0.50, 0.38, 0.55]
	pso_acc = [0.51, 0.49, 0.37, 0.52]

	fig, ax = plt.subplots(figsize=(12, 6))
	x = np.arange(len(scenarios))
	width = 0.2

	bars1 = ax.bar(x - 1.5*width, base_acc, width, label='Base', color='#e74c3c', alpha=0.85)
	bars2 = ax.bar(x - 0.5*width, manual_acc, width, label='Manual', color='#9b59b6', alpha=0.85)
	bars3 = ax.bar(x + 0.5*width, ga_acc, width, label='GA', color='#2ecc71', alpha=0.85)
	bars4 = ax.bar(x + 1.5*width, pso_acc, width, label='PSO', color='#3498db', alpha=0.85)

	ax.set_xlabel('Skenario Pengujian', fontsize=12)
	ax.set_ylabel('Top-3 Accuracy', fontsize=12)
	ax.set_title('Perbandingan Akurasi Model', fontsize=14, fontweight='bold')
	ax.set_xticks(x)
	ax.set_xticklabels(scenarios)
	ax.legend()
	ax.set_ylim(0, 1)
	ax.grid(axis='y', alpha=0.3)

	# Add value labels
	for bars in [bars1, bars2, bars3, bars4]:
	for bar in bars:
	height = bar.get_height()
	ax.text(bar.get_x() + bar.get_width()/2., height,
	f'{height:.0%}',
	ha='center', va='bottom', fontsize=8)

	st.pyplot(fig)
	plt.close()

	st.markdown("---")

	# Parameter Shift
	st.subheader("3️⃣ Parameter Fuzzy - Manual vs GA vs PSO")
	st.markdown("""
	Visualisasi membership function menunjukkan bagaimana GA/PSO
	menggeser parameter dibanding setting manual.
	""")

	# Ambil parameter
	manual_prob = [0.15, 0.45, 0.85]
	manual_pop = [2.0, 4.5, 7.0]
	ga_prob = params['ga_params'][:3]
	ga_pop = params['ga_params'][3:6]
	pso_prob = params['pso_params'][:3]
	pso_pop = params['pso_params'][3:6]

	fig, axes = plt.subplots(1, 2, figsize=(14, 5))

	# Plot 1: Probability Membership Functions
	ax = axes[0]
	x = np.linspace(0, 1, 100)

	labels = ['Low', 'Medium', 'High']
	colors_manual = ['#ff9999', '#99ff99', '#9999ff']
	colors_ga = ['#cc0000', '#00cc00', '#0000cc']

	for i, (param_m, param_g, param_p, label) in enumerate(zip(manual_prob, ga_prob, pso_prob, labels)):
	y_m = np.maximum(0, 1 - np.abs(x - param_m) / 0.3)
	y_g = np.maximum(0, 1 - np.abs(x - param_g) / 0.3)
	y_p = np.maximum(0, 1 - np.abs(x - param_p) / 0.3)

	ax.plot(x, y_m, '--', alpha=0.6, label=f'Manual-{label}')
	ax.plot(x, y_g, '-', linewidth=2, label=f'GA-{label}')
	ax.plot(x, y_p, ':', linewidth=2, label=f'PSO-{label}')

	ax.set_xlabel('Nilai Probabilitas')
	ax.set_ylabel('Derajat Keanggotaan')
	ax.set_title('Membership Function: Probabilitas')
	ax.legend(fontsize=7, ncol=3)
	ax.grid(True, alpha=0.3)

	# Plot 2: Popularity Membership Functions
	ax = axes[1]
	x = np.linspace(0, 10, 100)

	labels = ['Rare', 'Common', 'Very Common']

	for i, (param_m, param_g, param_p, label) in enumerate(zip(manual_pop, ga_pop, pso_pop, labels)):
	y_m = np.maximum(0, 1 - np.abs(x - param_m) / 2.5)
	y_g = np.maximum(0, 1 - np.abs(x - param_g) / 2.5)
	y_p = np.maximum(0, 1 - np.abs(x - param_p) / 2.5)

	ax.plot(x, y_m, '--', alpha=0.6, label=f'Manual-{label}')
	ax.plot(x, y_g, '-', linewidth=2, label=f'GA-{label}')
	ax.plot(x, y_p, ':', linewidth=2, label=f'PSO-{label}')

	ax.set_xlabel('log10(Word Count)')
	ax.set_ylabel('Derajat Keanggotaan')
	ax.set_title('Membership Function: Popularitas')
	ax.legend(fontsize=7, ncol=3)
	ax.grid(True, alpha=0.3)

	plt.tight_layout()
	st.pyplot(fig)
	plt.close()

	# Tabel Parameter
	st.markdown("---")
	st.subheader("📋 Tabel Parameter")

	param_data = {
	'Parameter': ['Prob Low', 'Prob Medium', 'Prob High', 'Pop Rare', 'Pop Common', 'Pop Very Common'],
	'Manual': manual_prob + manual_pop,
	'GA': list(params['ga_params']),
	'PSO': list(params['pso_params'])
	}

	import pandas as pd
	df_params = pd.DataFrame(param_data)

	# Style the dataframe
	st.dataframe(
	df_params.style.format({
	'Manual': '{:.4f}',
	'GA': '{:.4f}',
	'PSO': '{:.4f}'
	}).background_gradient(subset=['GA', 'PSO'], cmap='RdYlGn'),
	use_container_width=True
	)

	# ============================================
	# SIDEBAR INFO
	# ============================================
	with st.sidebar:
	st.header("ℹ️ Informasi")
	st.markdown("""
	Sistem Prediksi Kata dengan Fuzzy Logic

	Proyek ini mengimplementasikan:
	- N-Gram Language Model (Base)
	- Fuzzy Logic untuk scoring
	- Genetic Algorithm untuk optimasi
	- Particle Swarm Optimization

	---

	Dataset:
	- Indo4B (1 juta baris)
	- 15K+ kata slang Indonesia

	Fitur:
	- Prediksi kata berikutnya
	- Normalisasi kata gaul
	- Perbandingan 4 model
	""")

	st.markdown("---")
	st.markdown("Vocabulary Size:")
	st.metric("Kata", f"{len(data_processor.vocabulary):,}")

	st.markdown("Total Words:")
	st.metric("Total", f"{data_processor.total_words:,}")