Simrandhiman's picture
Update app.py
3ff843f verified
import gradio as gr
import pandas as pd
import numpy as np
import os
from data_prep import load_data, basic_clean, feature_engineer, prepare_features
from embed import build_text_for_embedding, embed_texts
from clustering import reduce_and_cluster
def run_pipeline(uploaded_csv, k=6, use_hdbscan=False):
# create data folder
os.makedirs("data", exist_ok=True)
# save uploaded CSV
csv_path = "data/uploaded.csv"
uploaded_csv.save(csv_path)
# load & preprocess
df = load_data(csv_path)
df = basic_clean(df)
df = feature_engineer(df)
features = prepare_features(df)
# text embedding
texts = build_text_for_embedding(df)
embs = embed_texts(texts)
# clustering
labels, arts = reduce_and_cluster(
embs,
k=int(k),
use_hdbscan=use_hdbscan
)
df["cluster"] = labels
# summary
summary_text = f"Clusters created: {len(set(labels))}"
# sample customers
sample_df = df.groupby("cluster").head(3)
return summary_text, sample_df
def main():
with gr.Blocks() as demo:
gr.Markdown("# Customer Segmentation β€” Hugging Face Space")
with gr.Row():
csv_in = gr.File(label="Upload Customer CSV (required)")
k = gr.Slider(2, 20, value=6, step=1, label="K (for KMeans)")
use_hdbscan = gr.Checkbox(label="Use HDBSCAN instead of KMeans")
out_text = gr.Textbox(label="Output Summary")
out_table = gr.Dataframe(label="Sample Clustered Rows")
run_btn = gr.Button("Run Segmentation")
run_btn.click(
fn=run_pipeline,
inputs=[csv_in, k, use_hdbscan],
outputs=[out_text, out_table]
)
demo.launch()
if __name__ == "__main__":
main()