Spaces:
Runtime error
Runtime error
File size: 1,774 Bytes
afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f afca748 3ff843f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import pandas as pd
import numpy as np
import os
from data_prep import load_data, basic_clean, feature_engineer, prepare_features
from embed import build_text_for_embedding, embed_texts
from clustering import reduce_and_cluster
def run_pipeline(uploaded_csv, k=6, use_hdbscan=False):
# create data folder
os.makedirs("data", exist_ok=True)
# save uploaded CSV
csv_path = "data/uploaded.csv"
uploaded_csv.save(csv_path)
# load & preprocess
df = load_data(csv_path)
df = basic_clean(df)
df = feature_engineer(df)
features = prepare_features(df)
# text embedding
texts = build_text_for_embedding(df)
embs = embed_texts(texts)
# clustering
labels, arts = reduce_and_cluster(
embs,
k=int(k),
use_hdbscan=use_hdbscan
)
df["cluster"] = labels
# summary
summary_text = f"Clusters created: {len(set(labels))}"
# sample customers
sample_df = df.groupby("cluster").head(3)
return summary_text, sample_df
def main():
with gr.Blocks() as demo:
gr.Markdown("# Customer Segmentation — Hugging Face Space")
with gr.Row():
csv_in = gr.File(label="Upload Customer CSV (required)")
k = gr.Slider(2, 20, value=6, step=1, label="K (for KMeans)")
use_hdbscan = gr.Checkbox(label="Use HDBSCAN instead of KMeans")
out_text = gr.Textbox(label="Output Summary")
out_table = gr.Dataframe(label="Sample Clustered Rows")
run_btn = gr.Button("Run Segmentation")
run_btn.click(
fn=run_pipeline,
inputs=[csv_in, k, use_hdbscan],
outputs=[out_text, out_table]
)
demo.launch()
if __name__ == "__main__":
main()
|