Spaces:

coozyme
/

semantic-cluster-sbert

Sleeping

coozyme commited on Aug 11, 2025

Commit

e5ca644

1 Parent(s): a93885b

first

Files changed (2) hide show

app.py ADDED Viewed

+import gradio as gr
+from sentence_transformers import SentenceTransformer
+from sklearn.cluster import KMeans
+import pandas as pd
+# Load SBERT model
+model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+def cluster_questions(questions_text, num_clusters):
+    # Pisahkan pertanyaan per baris
+    questions = [q.strip() for q in questions_text.split("\n") if q.strip()]
+    if len(questions) < num_clusters:
+        return "Jumlah pertanyaan harus >= jumlah cluster"
+    # Embedding
+    embeddings = model.encode(questions)
+    # Clustering
+    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
+    labels = kmeans.fit_predict(embeddings)
+    # Buat dataframe hasil
+    df = pd.DataFrame({"Question": questions, "Cluster": labels})
+    return df
+# UI Gradio
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 Question Clustering Demo (SBERT)")
+    with gr.Row():
+        with gr.Column():
+            questions_input = gr.Textbox(
+                lines=10,
+                placeholder="Tulis pertanyaan per baris...",
+                label="Daftar Pertanyaan"
+            )
+            num_clusters = gr.Slider(2, 10, step=1, value=3, label="Jumlah Cluster")
+            btn = gr.Button("Proses Clustering")
+        with gr.Column():
+            output = gr.Dataframe(headers=["Question", "Cluster"], interactive=False)
+    btn.click(cluster_questions, inputs=[questions_input, num_clusters], outputs=output)
+demo.launch()

requirements.txt ADDED Viewed

+gradio
+sentence-transformers
+scikit-learn