Simrandhiman commited on
Commit
3ff843f
·
verified ·
1 Parent(s): 9836d6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -38
app.py CHANGED
@@ -1,67 +1,74 @@
1
- # app.py
2
- # Simple Gradio interface: upload CSV -> run pipeline -> show clusters + sample customers
3
-
4
-
5
  import gradio as gr
6
  import pandas as pd
7
  import numpy as np
8
  import os
9
- import subprocess
10
-
11
 
12
  from data_prep import load_data, basic_clean, feature_engineer, prepare_features
13
  from embed import build_text_for_embedding, embed_texts
14
  from clustering import reduce_and_cluster
15
 
16
 
17
-
18
-
19
  def run_pipeline(uploaded_csv, k=6, use_hdbscan=False):
20
- # save upload
21
- csv_path = 'data/uploaded.csv'
22
- os.makedirs('data', exist_ok=True)
23
- uploaded_csv.save(csv_path)
24
 
 
 
 
 
 
 
25
 
26
- df = load_data(csv_path)
27
- df = basic_clean(df)
28
- df = feature_engineer(df)
29
- features = prepare_features(df)
 
30
 
 
 
 
31
 
32
- texts = build_text_for_embedding(df)
33
- embs = embed_texts(texts)
 
 
 
 
 
 
34
 
 
 
35
 
36
- labels, arts = reduce_and_cluster(embs, k=int(k), use_hdbscan=use_hdbscan)
37
- df['cluster'] = labels
38
 
 
39
 
40
- # return simple summary
41
- summary = df.groupby('cluster').agg({'customer_id':'count'}).to_dict()
42
- sample = df.groupby('cluster').head(3).to_dict(orient='records')
43
- return f"Clusters created: {len(set(labels))}", pd.DataFrame(sample)
44
 
 
 
45
 
 
46
 
 
 
 
47
 
48
- def main():
49
- with gr.Blocks() as demo:
50
- gr.Markdown('# Customer Segmentation — Hugging Face Space')
51
- with gr.Row():
52
- csv_in = gr.File(label='Upload customers CSV')
53
- k = gr.Slider(minimum=2, maximum=20, step=1, label='K (for KMeans)')
54
- use_hdbscan = gr.Checkbox(label='Use HDBSCAN (instead of KMeans)')
55
- out_text = gr.Textbox()
56
- out_table = gr.Dataframe()
57
 
 
 
58
 
59
- run_btn = gr.Button('Run pipeline')
60
- run_btn.click(fn=run_pipeline, inputs=[csv_in, k, use_hdbscan], outputs=[out_text, out_table])
61
 
 
 
 
 
 
62
 
63
- demo.launch()
64
 
65
 
66
- if __name__ == '__main__':
67
- main()
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import os
 
 
5
 
6
  from data_prep import load_data, basic_clean, feature_engineer, prepare_features
7
  from embed import build_text_for_embedding, embed_texts
8
  from clustering import reduce_and_cluster
9
 
10
 
 
 
11
  def run_pipeline(uploaded_csv, k=6, use_hdbscan=False):
 
 
 
 
12
 
13
+ # create data folder
14
+ os.makedirs("data", exist_ok=True)
15
+
16
+ # save uploaded CSV
17
+ csv_path = "data/uploaded.csv"
18
+ uploaded_csv.save(csv_path)
19
 
20
+ # load & preprocess
21
+ df = load_data(csv_path)
22
+ df = basic_clean(df)
23
+ df = feature_engineer(df)
24
+ features = prepare_features(df)
25
 
26
+ # text embedding
27
+ texts = build_text_for_embedding(df)
28
+ embs = embed_texts(texts)
29
 
30
+ # clustering
31
+ labels, arts = reduce_and_cluster(
32
+ embs,
33
+ k=int(k),
34
+ use_hdbscan=use_hdbscan
35
+ )
36
+
37
+ df["cluster"] = labels
38
 
39
+ # summary
40
+ summary_text = f"Clusters created: {len(set(labels))}"
41
 
42
+ # sample customers
43
+ sample_df = df.groupby("cluster").head(3)
44
 
45
+ return summary_text, sample_df
46
 
 
 
 
 
47
 
48
+ def main():
49
+ with gr.Blocks() as demo:
50
 
51
+ gr.Markdown("# Customer Segmentation — Hugging Face Space")
52
 
53
+ with gr.Row():
54
+ csv_in = gr.File(label="Upload Customer CSV (required)")
55
+ k = gr.Slider(2, 20, value=6, step=1, label="K (for KMeans)")
56
 
57
+ use_hdbscan = gr.Checkbox(label="Use HDBSCAN instead of KMeans")
 
 
 
 
 
 
 
 
58
 
59
+ out_text = gr.Textbox(label="Output Summary")
60
+ out_table = gr.Dataframe(label="Sample Clustered Rows")
61
 
62
+ run_btn = gr.Button("Run Segmentation")
 
63
 
64
+ run_btn.click(
65
+ fn=run_pipeline,
66
+ inputs=[csv_in, k, use_hdbscan],
67
+ outputs=[out_text, out_table]
68
+ )
69
 
70
+ demo.launch()
71
 
72
 
73
+ if __name__ == "__main__":
74
+ main()