Spaces:

KGNINJA
/

ProblemDiscoveryMap

Sleeping

Update app.py

456461d verified 14 days ago

1.75 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	from sklearn.cluster import KMeans
	import pandas as pd
	import plotly.express as px
	import umap

	model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	default_problems = """
	I manually rename files every week
	I convert PDFs to Excel
	I copy data between spreadsheets
	I send weekly reports manually
	I merge CSV files daily
	I manually download invoices
	I extract tables from PDFs
	I clean messy Excel sheets
	I manually schedule social posts
	I track expenses in spreadsheets
	"""

	def analyze_problems(text):

	problems = [p.strip() for p in text.split("\n") if p.strip()]

	embeddings = model.encode(problems)

	k = min(5, len(problems))
	kmeans = KMeans(n_clusters=k, random_state=0).fit(embeddings)

	reducer = umap.UMAP()
	coords = reducer.fit_transform(embeddings)

	df = pd.DataFrame({
	"problem": problems,
	"cluster": kmeans.labels_,
	"x": coords[:,0],
	"y": coords[:,1]
	})

	fig = px.scatter(
	df,
	x="x",
	y="y",
	color=df["cluster"].astype(str),
	text="problem",
	title="Problem Market Map"
	)

	cluster_summary = df.groupby("cluster")["problem"].apply(list).to_dict()

	summary = ""

	for c, items in cluster_summary.items():
	summary += f"\nCluster {c}\n"
	for i in items:
	summary += f"- {i}\n"

	return summary, fig


	demo = gr.Interface(
	fn=analyze_problems,
	inputs=gr.Textbox(value=default_problems, lines=15, label="Problem Signals"),
	outputs=[
	gr.Textbox(label="Problem Clusters"),
	gr.Plot(label="Problem Market Map")
	],
	title="Problem Discovery Engine Demo",
	)

	demo.launch()