coozyme's picture
first
e5ca644
import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
import pandas as pd
# Load SBERT model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
def cluster_questions(questions_text, num_clusters):
# Pisahkan pertanyaan per baris
questions = [q.strip() for q in questions_text.split("\n") if q.strip()]
if len(questions) < num_clusters:
return "Jumlah pertanyaan harus >= jumlah cluster"
# Embedding
embeddings = model.encode(questions)
# Clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
labels = kmeans.fit_predict(embeddings)
# Buat dataframe hasil
df = pd.DataFrame({"Question": questions, "Cluster": labels})
return df
# UI Gradio
with gr.Blocks() as demo:
gr.Markdown("# 🧠 Question Clustering Demo (SBERT)")
with gr.Row():
with gr.Column():
questions_input = gr.Textbox(
lines=10,
placeholder="Tulis pertanyaan per baris...",
label="Daftar Pertanyaan"
)
num_clusters = gr.Slider(2, 10, step=1, value=3, label="Jumlah Cluster")
btn = gr.Button("Proses Clustering")
with gr.Column():
output = gr.Dataframe(headers=["Question", "Cluster"], interactive=False)
btn.click(cluster_questions, inputs=[questions_input, num_clusters], outputs=output)
demo.launch()