import gradio as gr from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans import pandas as pd # Load SBERT model model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") def cluster_questions(questions_text, num_clusters): # Pisahkan pertanyaan per baris questions = [q.strip() for q in questions_text.split("\n") if q.strip()] if len(questions) < num_clusters: return "Jumlah pertanyaan harus >= jumlah cluster" # Embedding embeddings = model.encode(questions) # Clustering kmeans = KMeans(n_clusters=num_clusters, random_state=42) labels = kmeans.fit_predict(embeddings) # Buat dataframe hasil df = pd.DataFrame({"Question": questions, "Cluster": labels}) return df # UI Gradio with gr.Blocks() as demo: gr.Markdown("# 🧠 Question Clustering Demo (SBERT)") with gr.Row(): with gr.Column(): questions_input = gr.Textbox( lines=10, placeholder="Tulis pertanyaan per baris...", label="Daftar Pertanyaan" ) num_clusters = gr.Slider(2, 10, step=1, value=3, label="Jumlah Cluster") btn = gr.Button("Proses Clustering") with gr.Column(): output = gr.Dataframe(headers=["Question", "Cluster"], interactive=False) btn.click(cluster_questions, inputs=[questions_input, num_clusters], outputs=output) demo.launch()