import gradio as gr from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans import pandas as pd import plotly.express as px import umap model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") default_problems = """ I manually rename files every week I convert PDFs to Excel I copy data between spreadsheets I send weekly reports manually I merge CSV files daily I manually download invoices I extract tables from PDFs I clean messy Excel sheets I manually schedule social posts I track expenses in spreadsheets """ def analyze_problems(text): problems = [p.strip() for p in text.split("\n") if p.strip()] embeddings = model.encode(problems) k = min(5, len(problems)) kmeans = KMeans(n_clusters=k, random_state=0).fit(embeddings) reducer = umap.UMAP() coords = reducer.fit_transform(embeddings) df = pd.DataFrame({ "problem": problems, "cluster": kmeans.labels_, "x": coords[:,0], "y": coords[:,1] }) fig = px.scatter( df, x="x", y="y", color=df["cluster"].astype(str), text="problem", title="Problem Market Map" ) cluster_summary = df.groupby("cluster")["problem"].apply(list).to_dict() summary = "" for c, items in cluster_summary.items(): summary += f"\nCluster {c}\n" for i in items: summary += f"- {i}\n" return summary, fig demo = gr.Interface( fn=analyze_problems, inputs=gr.Textbox(value=default_problems, lines=15, label="Problem Signals"), outputs=[ gr.Textbox(label="Problem Clusters"), gr.Plot(label="Problem Market Map") ], title="Problem Discovery Engine Demo", ) demo.launch()