coozyme commited on
Commit
e5ca644
·
1 Parent(s): a93885b
Files changed (2) hide show
  1. app.py +43 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.cluster import KMeans
4
+ import pandas as pd
5
+
6
+ # Load SBERT model
7
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
8
+
9
+ def cluster_questions(questions_text, num_clusters):
10
+ # Pisahkan pertanyaan per baris
11
+ questions = [q.strip() for q in questions_text.split("\n") if q.strip()]
12
+ if len(questions) < num_clusters:
13
+ return "Jumlah pertanyaan harus >= jumlah cluster"
14
+
15
+ # Embedding
16
+ embeddings = model.encode(questions)
17
+
18
+ # Clustering
19
+ kmeans = KMeans(n_clusters=num_clusters, random_state=42)
20
+ labels = kmeans.fit_predict(embeddings)
21
+
22
+ # Buat dataframe hasil
23
+ df = pd.DataFrame({"Question": questions, "Cluster": labels})
24
+ return df
25
+
26
+ # UI Gradio
27
+ with gr.Blocks() as demo:
28
+ gr.Markdown("# 🧠 Question Clustering Demo (SBERT)")
29
+ with gr.Row():
30
+ with gr.Column():
31
+ questions_input = gr.Textbox(
32
+ lines=10,
33
+ placeholder="Tulis pertanyaan per baris...",
34
+ label="Daftar Pertanyaan"
35
+ )
36
+ num_clusters = gr.Slider(2, 10, step=1, value=3, label="Jumlah Cluster")
37
+ btn = gr.Button("Proses Clustering")
38
+ with gr.Column():
39
+ output = gr.Dataframe(headers=["Question", "Cluster"], interactive=False)
40
+
41
+ btn.click(cluster_questions, inputs=[questions_input, num_clusters], outputs=output)
42
+
43
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ sentence-transformers
3
+ scikit-learn