skyvera commited on
Commit
dc9f3db
·
verified ·
1 Parent(s): 45f02e0

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +73 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import pandas as pd
 
2
  from sklearn.cluster import KMeans
3
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
4
  from sklearn.compose import ColumnTransformer
@@ -9,7 +10,7 @@ import gradio as gr
9
  # Configure logging
10
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
12
- # Expanded sample data
13
  data = pd.DataFrame({
14
  'User ID': [1, 2, 3, 4, 5],
15
  'Session Duration': [300, 450, 200, 600, 350],
@@ -28,13 +29,58 @@ data = pd.DataFrame({
28
 
29
  logging.info("Sample data prepared.")
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Updated preprocessing
32
  preprocessor = ColumnTransformer(
33
  transformers=[
34
  ('num', StandardScaler(), ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score',
35
  'Time Spent per Page', 'Click Through Rate', 'Conversion Rate',
36
  'Frequency of Visits', 'Bounce Rate']),
37
- ('cat', OneHotEncoder(), ['User Interests', 'Device Type', 'Time of Day'])
38
  ])
39
 
40
  logging.info("Preprocessor setup complete.")
@@ -127,6 +173,29 @@ def ad_performance_analytics():
127
  return report
128
 
129
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  with gr.Tab("Cluster Prediction"):
131
  with gr.Row():
132
  gr.Markdown("**This form allows you to input user session data to predict which cluster the user belongs to and provides actionable insights based on their behavior.**")
@@ -143,7 +212,7 @@ with gr.Blocks() as demo:
143
  frequency_of_visits = gr.Number(label="Frequency of Visits", value=10) # Set initial value
144
  bounce_rate = gr.Slider(0, 1, step=0.01, label="Bounce Rate", value=0.2) # Set initial value
145
  predict_button = gr.Button("Predict")
146
- output_textbox = gr.Textbox(label="Prediction Output")
147
  predict_button.click(
148
  predict_cluster,
149
  inputs=[
@@ -165,7 +234,7 @@ with gr.Blocks() as demo:
165
  Understanding these metrics can help optimize ad strategies and improve overall campaign performance.
166
  """)
167
  analytics_button = gr.Button("Analyze Ad Performance")
168
- analytics_output = gr.Textbox(label="Analytics Output")
169
  analytics_button.click(
170
  ad_performance_analytics,
171
  outputs=analytics_output
 
1
  import pandas as pd
2
+ import numpy as np
3
  from sklearn.cluster import KMeans
4
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
5
  from sklearn.compose import ColumnTransformer
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
13
+ # Initial hardcoded sample data
14
  data = pd.DataFrame({
15
  'User ID': [1, 2, 3, 4, 5],
16
  'Session Duration': [300, 450, 200, 600, 350],
 
29
 
30
  logging.info("Sample data prepared.")
31
 
32
+ # Define expected columns including 'User ID'
33
+ expected_columns = {
34
+ 'User ID': int,
35
+ 'Session Duration': int,
36
+ 'Pages Visited': int,
37
+ 'Ads Clicked': int,
38
+ 'User Interests': str,
39
+ 'Engagement Score': float,
40
+ 'Device Type': str,
41
+ 'Time of Day': str,
42
+ 'Time Spent per Page': int,
43
+ 'Click Through Rate': float,
44
+ 'Conversion Rate': float,
45
+ 'Frequency of Visits': int,
46
+ 'Bounce Rate': float
47
+ }
48
+
49
+ def validate_data(user_data):
50
+ if not all(col in user_data.columns for col in expected_columns):
51
+ logging.error("Missing columns in the uploaded data.")
52
+ return False, "Missing columns in the uploaded data."
53
+ for col, dtype in expected_columns.items():
54
+ # Check if the expected type is string and the actual type is object
55
+ if dtype == str and user_data[col].dtype == object:
56
+ continue
57
+ if user_data[col].dtype != np.dtype(dtype):
58
+ logging.error(f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}.")
59
+ return False, f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}."
60
+ logging.info("Data is valid.")
61
+ return True, "Data is valid."
62
+
63
+ def load_user_data(file):
64
+ try:
65
+ user_data = pd.read_csv(file)
66
+ is_valid, message = validate_data(user_data)
67
+ if not is_valid:
68
+ return message
69
+ global data
70
+ data = user_data
71
+ # Retrain the pipeline with new data
72
+ pipeline.fit(data)
73
+ return "Data uploaded, validated, and model retrained successfully. You can now make predictions by selecting the 'Cluster Prediction' tab above"
74
+ except Exception as e:
75
+ return str(e)
76
+
77
  # Updated preprocessing
78
  preprocessor = ColumnTransformer(
79
  transformers=[
80
  ('num', StandardScaler(), ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score',
81
  'Time Spent per Page', 'Click Through Rate', 'Conversion Rate',
82
  'Frequency of Visits', 'Bounce Rate']),
83
+ ('cat', OneHotEncoder(handle_unknown='ignore'), ['User Interests', 'Device Type', 'Time of Day'])
84
  ])
85
 
86
  logging.info("Preprocessor setup complete.")
 
173
  return report
174
 
175
  with gr.Blocks() as demo:
176
+ with gr.Tab("Upload Data"):
177
+ gr.Markdown("""
178
+ **Upload your data file in CSV format. Ensure it contains the following columns with appropriate data types:**
179
+ - User ID (int)
180
+ - Session Duration (int)
181
+ - Pages Visited (int)
182
+ - Ads Clicked (int)
183
+ - User Interests (str)
184
+ - Engagement Score (float)
185
+ - Device Type (str)
186
+ - Time of Day (str)
187
+ - Time Spent per Page (int)
188
+ - Click Through Rate (float)
189
+ - Conversion Rate (float)
190
+ - Frequency of Visits (int)
191
+ - Bounce Rate (float)
192
+
193
+ **Note:** You can upload your own data for analysis, or continue using the existing sample data for predictions by selecting the **'Cluster Prediction'** tab above.
194
+ """)
195
+ file_input = gr.File(label="Upload your CSV data file")
196
+ upload_message = gr.Textbox()
197
+ file_input.change(load_user_data, inputs=file_input, outputs=upload_message)
198
+
199
  with gr.Tab("Cluster Prediction"):
200
  with gr.Row():
201
  gr.Markdown("**This form allows you to input user session data to predict which cluster the user belongs to and provides actionable insights based on their behavior.**")
 
212
  frequency_of_visits = gr.Number(label="Frequency of Visits", value=10) # Set initial value
213
  bounce_rate = gr.Slider(0, 1, step=0.01, label="Bounce Rate", value=0.2) # Set initial value
214
  predict_button = gr.Button("Predict")
215
+ output_textbox = gr.Textbox(label="Prediction Output", lines=4)
216
  predict_button.click(
217
  predict_cluster,
218
  inputs=[
 
234
  Understanding these metrics can help optimize ad strategies and improve overall campaign performance.
235
  """)
236
  analytics_button = gr.Button("Analyze Ad Performance")
237
+ analytics_output = gr.Textbox(label="Analytics Output", lines=3)
238
  analytics_button.click(
239
  ad_performance_analytics,
240
  outputs=analytics_output
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  pandas
2
  scikit-learn
3
- gradio
 
 
1
  pandas
2
  scikit-learn
3
+ gradio
4
+ numpy