Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.cluster import KMeans | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.pipeline import Pipeline | |
| import logging | |
| import gradio as gr | |
| # Configure logging | |
| logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Initial hardcoded sample data | |
| data = pd.DataFrame({ | |
| 'User ID': [1, 2, 3, 4, 5], | |
| 'Session Duration': [300, 450, 200, 600, 350], | |
| 'Pages Visited': [5, 8, 3, 12, 7], | |
| 'Ads Clicked': [2, 1, 0, 3, 2], | |
| 'User Interests': ['technology', 'sports', 'technology', 'arts', 'sports'], | |
| 'Engagement Score': [0.8, 0.5, 0.3, 0.9, 0.7], | |
| 'Device Type': ['mobile', 'desktop', 'mobile', 'tablet', 'desktop'], | |
| 'Time of Day': ['morning', 'afternoon', 'evening', 'morning', 'afternoon'], | |
| 'Time Spent per Page': [30, 25, 45, 20, 50], | |
| 'Click Through Rate': [0.1, 0.2, 0.05, 0.3, 0.15], | |
| 'Conversion Rate': [0.05, 0.1, 0, 0.2, 0.1], | |
| 'Frequency of Visits': [10, 20, 5, 15, 10], | |
| 'Bounce Rate': [0.2, 0.1, 0.5, 0.05, 0.3] | |
| }) | |
| logging.info("Sample data prepared.") | |
| # Define expected columns including 'User ID' | |
| expected_columns = { | |
| 'User ID': int, | |
| 'Session Duration': int, | |
| 'Pages Visited': int, | |
| 'Ads Clicked': int, | |
| 'User Interests': str, | |
| 'Engagement Score': float, | |
| 'Device Type': str, | |
| 'Time of Day': str, | |
| 'Time Spent per Page': int, | |
| 'Click Through Rate': float, | |
| 'Conversion Rate': float, | |
| 'Frequency of Visits': int, | |
| 'Bounce Rate': float | |
| } | |
| def validate_data(user_data): | |
| if not all(col in user_data.columns for col in expected_columns): | |
| logging.error("Missing columns in the uploaded data.") | |
| return False, "Missing columns in the uploaded data." | |
| for col, dtype in expected_columns.items(): | |
| # Check if the expected type is string and the actual type is object | |
| if dtype == str and user_data[col].dtype == object: | |
| continue | |
| if user_data[col].dtype != np.dtype(dtype): | |
| logging.error(f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}.") | |
| return False, f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}." | |
| logging.info("Data is valid.") | |
| return True, "Data is valid." | |
| def load_user_data(file): | |
| try: | |
| user_data = pd.read_csv(file) | |
| is_valid, message = validate_data(user_data) | |
| if not is_valid: | |
| return message | |
| global data | |
| data = user_data | |
| # Retrain the pipeline with new data | |
| pipeline.fit(data) | |
| return "Data uploaded, validated, and model retrained successfully. You can now make predictions by selecting the 'Cluster Prediction' tab above" | |
| except Exception as e: | |
| return str(e) | |
| # Updated preprocessing | |
| preprocessor = ColumnTransformer( | |
| transformers=[ | |
| ('num', StandardScaler(), ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score', | |
| 'Time Spent per Page', 'Click Through Rate', 'Conversion Rate', | |
| 'Frequency of Visits', 'Bounce Rate']), | |
| ('cat', OneHotEncoder(handle_unknown='ignore'), ['User Interests', 'Device Type', 'Time of Day']) | |
| ]) | |
| logging.info("Preprocessor setup complete.") | |
| # Clustering | |
| kmeans = KMeans(n_clusters=3, random_state=42) | |
| logging.info("KMeans clustering configured.") | |
| # Define the pipeline | |
| pipeline = Pipeline([ | |
| ('preprocessor', preprocessor), | |
| ('cluster', kmeans) | |
| ]) | |
| logging.info("Pipeline created.") | |
| # Fit the pipeline to the data | |
| pipeline.fit(data) | |
| def generate_insights(cluster_characteristics): | |
| # Example insights based on hypothetical thresholds | |
| insights = [] | |
| if cluster_characteristics['Engagement Score'] > 0.7 and cluster_characteristics['Conversion Rate'] < 0.1: | |
| insights.append("High engagement but low conversion: Consider optimizing the checkout process or providing targeted offers.") | |
| if cluster_characteristics['Click Through Rate'] > 0.2: | |
| insights.append("High click-through rate: Users are interacting well with ads. Increase ad relevance to boost conversions.") | |
| if cluster_characteristics['Bounce Rate'] > 0.3: | |
| insights.append("High bounce rate: Review landing page design and content relevance to improve user retention.") | |
| if cluster_characteristics['Frequency of Visits'] > 15: | |
| insights.append("Frequent visits: Users are returning often, consider loyalty programs or personalized content to maintain engagement.") | |
| if cluster_characteristics['Time Spent per Page'] < 20: | |
| insights.append("Low time spent per page: Content may not be engaging or relevant enough. Consider content optimization.") | |
| if cluster_characteristics['Conversion Rate'] > 0.15: | |
| insights.append("High conversion rate: Effective ad targeting. Explore scaling up ad spend on similar user segments.") | |
| return " ".join(insights) | |
| def predict_cluster(session_duration, pages_visited, ads_clicked, engagement_score, user_interests, device_type, time_of_day, time_spent_per_page, click_through_rate, conversion_rate, frequency_of_visits, bounce_rate): | |
| logging.info("Starting cluster prediction.") | |
| input_df = pd.DataFrame({ | |
| 'Session Duration': [session_duration], | |
| 'Pages Visited': [pages_visited], | |
| 'Ads Clicked': [ads_clicked], | |
| 'Engagement Score': [engagement_score], | |
| 'User Interests': [user_interests], | |
| 'Device Type': [device_type], | |
| 'Time of Day': [time_of_day], | |
| 'Time Spent per Page': [time_spent_per_page], | |
| 'Click Through Rate': [click_through_rate], | |
| 'Conversion Rate': [conversion_rate], | |
| 'Frequency of Visits': [frequency_of_visits], | |
| 'Bounce Rate': [bounce_rate] | |
| }) | |
| logging.debug(f"Input DataFrame: {input_df}") | |
| cluster = pipeline.predict(input_df)[0] | |
| logging.info(f"Predicted cluster: {cluster}") | |
| centroids = pipeline.named_steps['cluster'].cluster_centers_ | |
| cluster_characteristics = centroids[cluster] | |
| # Decode features for insights | |
| num_features = ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score', 'Time Spent per Page', 'Click Through Rate', 'Conversion Rate', 'Frequency of Visits', 'Bounce Rate'] | |
| scaled_features = cluster_characteristics[:9] | |
| original_num_values = pipeline.named_steps['preprocessor'].named_transformers_['num'].inverse_transform([scaled_features])[0] | |
| cat_features = ['User Interests', 'Device Type', 'Time of Day'] | |
| encoded_features = cluster_characteristics[9:] | |
| original_cat_values = pipeline.named_steps['preprocessor'].named_transformers_['cat'].inverse_transform([encoded_features])[0] | |
| # Combine numerical and categorical features into a dictionary | |
| cluster_characteristics = dict(zip(num_features, original_num_values)) | |
| cluster_characteristics.update(dict(zip(cat_features, original_cat_values))) | |
| # Generate actionable insights | |
| insights = generate_insights(cluster_characteristics) | |
| logging.info("Cluster prediction completed.") | |
| return f"Predicted Cluster: {cluster}\nCharacteristics: {cluster_characteristics}\nActionable Insights: {insights}" | |
| def ad_performance_analytics(): | |
| logging.info("Calculating ad performance analytics.") | |
| avg_ctr = data['Click Through Rate'].mean() | |
| avg_conversion_rate = data['Conversion Rate'].mean() | |
| avg_bounce_rate = data['Bounce Rate'].mean() | |
| logging.debug(f"Average CTR: {avg_ctr}, Average Conversion Rate: {avg_conversion_rate}, Average Bounce Rate: {avg_bounce_rate}") | |
| # Prepare the analytics report | |
| report = f"Average Click Through Rate: {avg_ctr:.2%}\n" | |
| report += f"Average Conversion Rate: {avg_conversion_rate:.2%}\n" | |
| report += f"Average Bounce Rate: {avg_bounce_rate:.2%}" | |
| logging.info("Ad performance analytics calculation completed.") | |
| return report | |
| with gr.Blocks() as demo: | |
| with gr.Tab("Upload Data"): | |
| gr.Markdown(""" | |
| **Upload your data file in CSV format. Ensure it contains the following columns with appropriate data types:** | |
| - User ID (int) | |
| - Session Duration (int) | |
| - Pages Visited (int) | |
| - Ads Clicked (int) | |
| - User Interests (str) | |
| - Engagement Score (float) | |
| - Device Type (str) | |
| - Time of Day (str) | |
| - Time Spent per Page (int) | |
| - Click Through Rate (float) | |
| - Conversion Rate (float) | |
| - Frequency of Visits (int) | |
| - Bounce Rate (float) | |
| **Note:** You can upload your own data for analysis, or continue using the existing sample data for predictions by selecting the **'Cluster Prediction'** tab above. | |
| """) | |
| file_input = gr.File(label="Upload your CSV data file") | |
| upload_message = gr.Textbox() | |
| file_input.change(load_user_data, inputs=file_input, outputs=upload_message) | |
| with gr.Tab("Cluster Prediction"): | |
| with gr.Row(): | |
| gr.Markdown("**This form allows you to input user session data to predict which cluster the user belongs to and provides actionable insights based on their behavior.**") | |
| session_duration = gr.Number(label="Session Duration", value=300) # Set initial value | |
| pages_visited = gr.Number(label="Pages Visited", value=5) # Set initial value | |
| ads_clicked = gr.Number(label="Ads Clicked", value=2) # Set initial value | |
| engagement_score = gr.Slider(0, 1, label="Engagement Score", value=0.5) # Set initial value | |
| user_interests = gr.Dropdown(['technology', 'sports', 'arts'], label="User Interests", value='technology') # Set initial value | |
| device_type = gr.Radio(['mobile', 'desktop', 'tablet'], label="Device Type", value='mobile') # Set initial value | |
| time_of_day = gr.Radio(['morning', 'afternoon', 'evening'], label="Time of Day", value='morning') # Set initial value | |
| time_spent_per_page = gr.Number(label="Time Spent per Page", value=30) # Set initial value | |
| click_through_rate = gr.Slider(0, 1, step=0.01, label="Click Through Rate", value=0.1) # Set initial value | |
| conversion_rate = gr.Slider(0, 1, step=0.01, label="Conversion Rate", value=0.05) # Set initial value | |
| frequency_of_visits = gr.Number(label="Frequency of Visits", value=10) # Set initial value | |
| bounce_rate = gr.Slider(0, 1, step=0.01, label="Bounce Rate", value=0.2) # Set initial value | |
| predict_button = gr.Button("Predict") | |
| output_textbox = gr.Textbox(label="Prediction Output", lines=4) | |
| predict_button.click( | |
| predict_cluster, | |
| inputs=[ | |
| session_duration, pages_visited, ads_clicked, engagement_score, user_interests, device_type, | |
| time_of_day, time_spent_per_page, click_through_rate, conversion_rate, frequency_of_visits, bounce_rate | |
| ], | |
| outputs=output_textbox | |
| ) | |
| logging.info("Gradio predict button configured.") | |
| with gr.Tab("Ad Performance Analytics"): | |
| gr.Markdown(""" | |
| **This form provides a summary of key performance metrics for ads.** | |
| - **Average Click-Through Rate (CTR):** Measures the percentage of ad views that result in clicks. Higher values indicate more effective ad engagement. | |
| - **Average Conversion Rate:** Indicates the percentage of clicks that convert into actions, such as purchases or sign-ups. This metric helps assess the effectiveness of ad targeting and the overall conversion potential. | |
| - **Average Bounce Rate:** Reflects the percentage of single-page visits. Lower bounce rates suggest that the landing pages are relevant to the visitors' interests. | |
| Understanding these metrics can help optimize ad strategies and improve overall campaign performance. | |
| """) | |
| analytics_button = gr.Button("Analyze Ad Performance") | |
| analytics_output = gr.Textbox(label="Analytics Output", lines=3) | |
| analytics_button.click( | |
| ad_performance_analytics, | |
| outputs=analytics_output | |
| ) | |
| logging.info("Gradio analytics button configured.") | |
| demo.launch() | |
| logging.info("Gradio interface launched.") | |