Spaces:

coozyme
/

semantic-cluster-sbert

Sleeping

semantic-cluster-sbert / app.py

first

e5ca644 6 months ago

1.45 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	from sklearn.cluster import KMeans
	import pandas as pd

	# Load SBERT model
	model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	def cluster_questions(questions_text, num_clusters):
	# Pisahkan pertanyaan per baris
	questions = [q.strip() for q in questions_text.split("\n") if q.strip()]
	if len(questions) < num_clusters:
	return "Jumlah pertanyaan harus >= jumlah cluster"

	# Embedding
	embeddings = model.encode(questions)

	# Clustering
	kmeans = KMeans(n_clusters=num_clusters, random_state=42)
	labels = kmeans.fit_predict(embeddings)

	# Buat dataframe hasil
	df = pd.DataFrame({"Question": questions, "Cluster": labels})
	return df

	# UI Gradio
	with gr.Blocks() as demo:
	gr.Markdown("# 🧠 Question Clustering Demo (SBERT)")
	with gr.Row():
	with gr.Column():
	questions_input = gr.Textbox(
	lines=10,
	placeholder="Tulis pertanyaan per baris...",
	label="Daftar Pertanyaan"
	)
	num_clusters = gr.Slider(2, 10, step=1, value=3, label="Jumlah Cluster")
	btn = gr.Button("Proses Clustering")
	with gr.Column():
	output = gr.Dataframe(headers=["Question", "Cluster"], interactive=False)

	btn.click(cluster_questions, inputs=[questions_input, num_clusters], outputs=output)

	demo.launch()