Simrandhiman commited on
Commit
afca748
·
verified ·
1 Parent(s): 8775e4c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # Simple Gradio interface: upload CSV -> run pipeline -> show clusters + sample customers
3
+
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ import numpy as np
8
+ import os
9
+ import subprocess
10
+
11
+
12
+ from data_prep import load_data, basic_clean, feature_engineer, prepare_features
13
+ from embed import build_text_for_embedding, embed_texts
14
+ from clustering import reduce_and_cluster
15
+
16
+
17
+
18
+
19
+ def run_pipeline(uploaded_csv, k=6, use_hdbscan=False):
20
+ # save upload
21
+ csv_path = 'data/uploaded.csv'
22
+ os.makedirs('data', exist_ok=True)
23
+ uploaded_csv.save(csv_path)
24
+
25
+
26
+ df = load_data(csv_path)
27
+ df = basic_clean(df)
28
+ df = feature_engineer(df)
29
+ features = prepare_features(df)
30
+
31
+
32
+ texts = build_text_for_embedding(df)
33
+ embs = embed_texts(texts)
34
+
35
+
36
+ labels, arts = reduce_and_cluster(embs, k=int(k), use_hdbscan=use_hdbscan)
37
+ df['cluster'] = labels
38
+
39
+
40
+ # return simple summary
41
+ summary = df.groupby('cluster').agg({'customer_id':'count'}).to_dict()
42
+ sample = df.groupby('cluster').head(3).to_dict(orient='records')
43
+ return f"Clusters created: {len(set(labels))}", pd.DataFrame(sample)
44
+
45
+
46
+
47
+
48
+ def main():
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown('# Customer Segmentation — Hugging Face Space')
51
+ with gr.Row():
52
+ csv_in = gr.File(label='Upload customers CSV')
53
+ k = gr.Slider(minimum=2, maximum=20, step=1, label='K (for KMeans)')
54
+ use_hdbscan = gr.Checkbox(label='Use HDBSCAN (instead of KMeans)')
55
+ out_text = gr.Textbox()
56
+ out_table = gr.Dataframe()
57
+
58
+
59
+ run_btn = gr.Button('Run pipeline')
60
+ run_btn.click(fn=run_pipeline, inputs=[csv_in, k, use_hdbscan], outputs=[out_text, out_table])
61
+
62
+
63
+ demo.launch()
64
+
65
+
66
+ if __name__ == '__main__':
67
+ main()