Spaces:

skyvera
/

AIAdServerOptimizer

Sleeping

App Files Files Community

skyvera commited on May 28, 2024

Commit

dc9f3db

verified ·

1 Parent(s): 45f02e0

Upload 3 files

Browse files

Files changed (2) hide show

app.py +73 -4
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import pandas as pd
 from sklearn.cluster import KMeans
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import ColumnTransformer
@@ -9,7 +10,7 @@ import gradio as gr
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
-# Expanded sample data
 data = pd.DataFrame({
     'User ID': [1, 2, 3, 4, 5],
     'Session Duration': [300, 450, 200, 600, 350],
@@ -28,13 +29,58 @@ data = pd.DataFrame({
 logging.info("Sample data prepared.")
 # Updated preprocessing
 preprocessor = ColumnTransformer(
     transformers=[
         ('num', StandardScaler(), ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score',
                                    'Time Spent per Page', 'Click Through Rate', 'Conversion Rate',
                                    'Frequency of Visits', 'Bounce Rate']),
-        ('cat', OneHotEncoder(), ['User Interests', 'Device Type', 'Time of Day'])
     ])
 logging.info("Preprocessor setup complete.")
@@ -127,6 +173,29 @@ def ad_performance_analytics():
     return report
 with gr.Blocks() as demo:
     with gr.Tab("Cluster Prediction"):
         with gr.Row():
             gr.Markdown("**This form allows you to input user session data to predict which cluster the user belongs to and provides actionable insights based on their behavior.**")
@@ -143,7 +212,7 @@ with gr.Blocks() as demo:
             frequency_of_visits = gr.Number(label="Frequency of Visits", value=10)  # Set initial value
             bounce_rate = gr.Slider(0, 1, step=0.01, label="Bounce Rate", value=0.2)  # Set initial value
         predict_button = gr.Button("Predict")
-        output_textbox = gr.Textbox(label="Prediction Output")
         predict_button.click(
             predict_cluster,
             inputs=[
@@ -165,7 +234,7 @@ with gr.Blocks() as demo:
         Understanding these metrics can help optimize ad strategies and improve overall campaign performance.
         """)
         analytics_button = gr.Button("Analyze Ad Performance")
-        analytics_output = gr.Textbox(label="Analytics Output")
         analytics_button.click(
             ad_performance_analytics,
             outputs=analytics_output

 import pandas as pd
+import numpy as np
 from sklearn.cluster import KMeans
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import ColumnTransformer
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+# Initial hardcoded sample data
 data = pd.DataFrame({
     'User ID': [1, 2, 3, 4, 5],
     'Session Duration': [300, 450, 200, 600, 350],
 logging.info("Sample data prepared.")
+# Define expected columns including 'User ID'
+expected_columns = {
+    'User ID': int,
+    'Session Duration': int,
+    'Pages Visited': int,
+    'Ads Clicked': int,
+    'User Interests': str,
+    'Engagement Score': float,
+    'Device Type': str,
+    'Time of Day': str,
+    'Time Spent per Page': int,
+    'Click Through Rate': float,
+    'Conversion Rate': float,
+    'Frequency of Visits': int,
+    'Bounce Rate': float
+}
+def validate_data(user_data):
+    if not all(col in user_data.columns for col in expected_columns):
+        logging.error("Missing columns in the uploaded data.")
+        return False, "Missing columns in the uploaded data."
+    for col, dtype in expected_columns.items():
+        # Check if the expected type is string and the actual type is object
+        if dtype == str and user_data[col].dtype == object:
+            continue
+        if user_data[col].dtype != np.dtype(dtype):
+            logging.error(f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}.")
+            return False, f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}."
+    logging.info("Data is valid.")
+    return True, "Data is valid."
+def load_user_data(file):
+    try:
+        user_data = pd.read_csv(file)
+        is_valid, message = validate_data(user_data)
+        if not is_valid:
+            return message
+        global data
+        data = user_data
+        # Retrain the pipeline with new data
+        pipeline.fit(data)
+        return "Data uploaded, validated, and model retrained successfully. You can now make predictions by selecting the 'Cluster Prediction' tab above"
+    except Exception as e:
+        return str(e)
 # Updated preprocessing
 preprocessor = ColumnTransformer(
     transformers=[
         ('num', StandardScaler(), ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score',
                                    'Time Spent per Page', 'Click Through Rate', 'Conversion Rate',
                                    'Frequency of Visits', 'Bounce Rate']),
+        ('cat', OneHotEncoder(handle_unknown='ignore'), ['User Interests', 'Device Type', 'Time of Day'])
     ])
 logging.info("Preprocessor setup complete.")
     return report
 with gr.Blocks() as demo:
+    with gr.Tab("Upload Data"):
+        gr.Markdown("""
+        **Upload your data file in CSV format. Ensure it contains the following columns with appropriate data types:**
+        - User ID (int)
+        - Session Duration (int)
+        - Pages Visited (int)
+        - Ads Clicked (int)
+        - User Interests (str)
+        - Engagement Score (float)
+        - Device Type (str)
+        - Time of Day (str)
+        - Time Spent per Page (int)
+        - Click Through Rate (float)
+        - Conversion Rate (float)
+        - Frequency of Visits (int)
+        - Bounce Rate (float)
+        **Note:** You can upload your own data for analysis, or continue using the existing sample data for predictions by selecting the **'Cluster Prediction'** tab above.
+        """)
+        file_input = gr.File(label="Upload your CSV data file")
+        upload_message = gr.Textbox()
+        file_input.change(load_user_data, inputs=file_input, outputs=upload_message)
     with gr.Tab("Cluster Prediction"):
         with gr.Row():
             gr.Markdown("**This form allows you to input user session data to predict which cluster the user belongs to and provides actionable insights based on their behavior.**")
             frequency_of_visits = gr.Number(label="Frequency of Visits", value=10)  # Set initial value
             bounce_rate = gr.Slider(0, 1, step=0.01, label="Bounce Rate", value=0.2)  # Set initial value
         predict_button = gr.Button("Predict")
+        output_textbox = gr.Textbox(label="Prediction Output", lines=4)
         predict_button.click(
             predict_cluster,
             inputs=[
         Understanding these metrics can help optimize ad strategies and improve overall campaign performance.
         """)
         analytics_button = gr.Button("Analyze Ad Performance")
+        analytics_output = gr.Textbox(label="Analytics Output", lines=3)
         analytics_button.click(
             ad_performance_analytics,
             outputs=analytics_output

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 pandas
 scikit-learn
-gradio

 pandas
 scikit-learn
+gradio
+numpy