Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.cluster import KMeans | |
| import pandas as pd | |
| # Load SBERT model | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| def cluster_questions(questions_text, num_clusters): | |
| # Pisahkan pertanyaan per baris | |
| questions = [q.strip() for q in questions_text.split("\n") if q.strip()] | |
| if len(questions) < num_clusters: | |
| return "Jumlah pertanyaan harus >= jumlah cluster" | |
| # Embedding | |
| embeddings = model.encode(questions) | |
| # Clustering | |
| kmeans = KMeans(n_clusters=num_clusters, random_state=42) | |
| labels = kmeans.fit_predict(embeddings) | |
| # Buat dataframe hasil | |
| df = pd.DataFrame({"Question": questions, "Cluster": labels}) | |
| return df | |
| # UI Gradio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🧠 Question Clustering Demo (SBERT)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| questions_input = gr.Textbox( | |
| lines=10, | |
| placeholder="Tulis pertanyaan per baris...", | |
| label="Daftar Pertanyaan" | |
| ) | |
| num_clusters = gr.Slider(2, 10, step=1, value=3, label="Jumlah Cluster") | |
| btn = gr.Button("Proses Clustering") | |
| with gr.Column(): | |
| output = gr.Dataframe(headers=["Question", "Cluster"], interactive=False) | |
| btn.click(cluster_questions, inputs=[questions_input, num_clusters], outputs=output) | |
| demo.launch() | |