Spaces:

skyvera
/

AIAdServerOptimizer

Sleeping

App Files Files Community

AIAdServerOptimizer / app.py

skyvera

Upload 3 files

dc9f3db verified over 1 year ago

raw

history blame contribute delete

12.3 kB

	import pandas as pd
	import numpy as np
	from sklearn.cluster import KMeans
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	import logging
	import gradio as gr

	# Configure logging
	logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

	# Initial hardcoded sample data
	data = pd.DataFrame({
	'User ID': [1, 2, 3, 4, 5],
	'Session Duration': [300, 450, 200, 600, 350],
	'Pages Visited': [5, 8, 3, 12, 7],
	'Ads Clicked': [2, 1, 0, 3, 2],
	'User Interests': ['technology', 'sports', 'technology', 'arts', 'sports'],
	'Engagement Score': [0.8, 0.5, 0.3, 0.9, 0.7],
	'Device Type': ['mobile', 'desktop', 'mobile', 'tablet', 'desktop'],
	'Time of Day': ['morning', 'afternoon', 'evening', 'morning', 'afternoon'],
	'Time Spent per Page': [30, 25, 45, 20, 50],
	'Click Through Rate': [0.1, 0.2, 0.05, 0.3, 0.15],
	'Conversion Rate': [0.05, 0.1, 0, 0.2, 0.1],
	'Frequency of Visits': [10, 20, 5, 15, 10],
	'Bounce Rate': [0.2, 0.1, 0.5, 0.05, 0.3]
	})

	logging.info("Sample data prepared.")

	# Define expected columns including 'User ID'
	expected_columns = {
	'User ID': int,
	'Session Duration': int,
	'Pages Visited': int,
	'Ads Clicked': int,
	'User Interests': str,
	'Engagement Score': float,
	'Device Type': str,
	'Time of Day': str,
	'Time Spent per Page': int,
	'Click Through Rate': float,
	'Conversion Rate': float,
	'Frequency of Visits': int,
	'Bounce Rate': float
	}

	def validate_data(user_data):
	if not all(col in user_data.columns for col in expected_columns):
	logging.error("Missing columns in the uploaded data.")
	return False, "Missing columns in the uploaded data."
	for col, dtype in expected_columns.items():
	# Check if the expected type is string and the actual type is object
	if dtype == str and user_data[col].dtype == object:
	continue
	if user_data[col].dtype != np.dtype(dtype):
	logging.error(f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}.")
	return False, f"Incorrect data type for column {col}. Expected {dtype}, got {user_data[col].dtype}."
	logging.info("Data is valid.")
	return True, "Data is valid."

	def load_user_data(file):
	try:
	user_data = pd.read_csv(file)
	is_valid, message = validate_data(user_data)
	if not is_valid:
	return message
	global data
	data = user_data
	# Retrain the pipeline with new data
	pipeline.fit(data)
	return "Data uploaded, validated, and model retrained successfully. You can now make predictions by selecting the 'Cluster Prediction' tab above"
	except Exception as e:
	return str(e)

	# Updated preprocessing
	preprocessor = ColumnTransformer(
	transformers=[
	('num', StandardScaler(), ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score',
	'Time Spent per Page', 'Click Through Rate', 'Conversion Rate',
	'Frequency of Visits', 'Bounce Rate']),
	('cat', OneHotEncoder(handle_unknown='ignore'), ['User Interests', 'Device Type', 'Time of Day'])
	])

	logging.info("Preprocessor setup complete.")

	# Clustering
	kmeans = KMeans(n_clusters=3, random_state=42)
	logging.info("KMeans clustering configured.")

	# Define the pipeline
	pipeline = Pipeline([
	('preprocessor', preprocessor),
	('cluster', kmeans)
	])

	logging.info("Pipeline created.")

	# Fit the pipeline to the data
	pipeline.fit(data)

	def generate_insights(cluster_characteristics):
	# Example insights based on hypothetical thresholds
	insights = []
	if cluster_characteristics['Engagement Score'] > 0.7 and cluster_characteristics['Conversion Rate'] < 0.1:
	insights.append("High engagement but low conversion: Consider optimizing the checkout process or providing targeted offers.")
	if cluster_characteristics['Click Through Rate'] > 0.2:
	insights.append("High click-through rate: Users are interacting well with ads. Increase ad relevance to boost conversions.")
	if cluster_characteristics['Bounce Rate'] > 0.3:
	insights.append("High bounce rate: Review landing page design and content relevance to improve user retention.")
	if cluster_characteristics['Frequency of Visits'] > 15:
	insights.append("Frequent visits: Users are returning often, consider loyalty programs or personalized content to maintain engagement.")
	if cluster_characteristics['Time Spent per Page'] < 20:
	insights.append("Low time spent per page: Content may not be engaging or relevant enough. Consider content optimization.")
	if cluster_characteristics['Conversion Rate'] > 0.15:
	insights.append("High conversion rate: Effective ad targeting. Explore scaling up ad spend on similar user segments.")
	return " ".join(insights)

	def predict_cluster(session_duration, pages_visited, ads_clicked, engagement_score, user_interests, device_type, time_of_day, time_spent_per_page, click_through_rate, conversion_rate, frequency_of_visits, bounce_rate):
	logging.info("Starting cluster prediction.")
	input_df = pd.DataFrame({
	'Session Duration': [session_duration],
	'Pages Visited': [pages_visited],
	'Ads Clicked': [ads_clicked],
	'Engagement Score': [engagement_score],
	'User Interests': [user_interests],
	'Device Type': [device_type],
	'Time of Day': [time_of_day],
	'Time Spent per Page': [time_spent_per_page],
	'Click Through Rate': [click_through_rate],
	'Conversion Rate': [conversion_rate],
	'Frequency of Visits': [frequency_of_visits],
	'Bounce Rate': [bounce_rate]
	})
	logging.debug(f"Input DataFrame: {input_df}")
	cluster = pipeline.predict(input_df)[0]
	logging.info(f"Predicted cluster: {cluster}")
	centroids = pipeline.named_steps['cluster'].cluster_centers_
	cluster_characteristics = centroids[cluster]

	# Decode features for insights
	num_features = ['Session Duration', 'Pages Visited', 'Ads Clicked', 'Engagement Score', 'Time Spent per Page', 'Click Through Rate', 'Conversion Rate', 'Frequency of Visits', 'Bounce Rate']
	scaled_features = cluster_characteristics[:9]
	original_num_values = pipeline.named_steps['preprocessor'].named_transformers_['num'].inverse_transform([scaled_features])[0]
	cat_features = ['User Interests', 'Device Type', 'Time of Day']
	encoded_features = cluster_characteristics[9:]
	original_cat_values = pipeline.named_steps['preprocessor'].named_transformers_['cat'].inverse_transform([encoded_features])[0]

	# Combine numerical and categorical features into a dictionary
	cluster_characteristics = dict(zip(num_features, original_num_values))
	cluster_characteristics.update(dict(zip(cat_features, original_cat_values)))

	# Generate actionable insights
	insights = generate_insights(cluster_characteristics)

	logging.info("Cluster prediction completed.")
	return f"Predicted Cluster: {cluster}\nCharacteristics: {cluster_characteristics}\nActionable Insights: {insights}"

	def ad_performance_analytics():
	logging.info("Calculating ad performance analytics.")
	avg_ctr = data['Click Through Rate'].mean()
	avg_conversion_rate = data['Conversion Rate'].mean()
	avg_bounce_rate = data['Bounce Rate'].mean()
	logging.debug(f"Average CTR: {avg_ctr}, Average Conversion Rate: {avg_conversion_rate}, Average Bounce Rate: {avg_bounce_rate}")

	# Prepare the analytics report
	report = f"Average Click Through Rate: {avg_ctr:.2%}\n"
	report += f"Average Conversion Rate: {avg_conversion_rate:.2%}\n"
	report += f"Average Bounce Rate: {avg_bounce_rate:.2%}"

	logging.info("Ad performance analytics calculation completed.")
	return report

	with gr.Blocks() as demo:
	with gr.Tab("Upload Data"):
	gr.Markdown("""
	Upload your data file in CSV format. Ensure it contains the following columns with appropriate data types:
	- User ID (int)
	- Session Duration (int)
	- Pages Visited (int)
	- Ads Clicked (int)
	- User Interests (str)
	- Engagement Score (float)
	- Device Type (str)
	- Time of Day (str)
	- Time Spent per Page (int)
	- Click Through Rate (float)
	- Conversion Rate (float)
	- Frequency of Visits (int)
	- Bounce Rate (float)

	Note: You can upload your own data for analysis, or continue using the existing sample data for predictions by selecting the 'Cluster Prediction' tab above.
	""")
	file_input = gr.File(label="Upload your CSV data file")
	upload_message = gr.Textbox()
	file_input.change(load_user_data, inputs=file_input, outputs=upload_message)

	with gr.Tab("Cluster Prediction"):
	with gr.Row():
	gr.Markdown("This form allows you to input user session data to predict which cluster the user belongs to and provides actionable insights based on their behavior.")
	session_duration = gr.Number(label="Session Duration", value=300) # Set initial value
	pages_visited = gr.Number(label="Pages Visited", value=5) # Set initial value
	ads_clicked = gr.Number(label="Ads Clicked", value=2) # Set initial value
	engagement_score = gr.Slider(0, 1, label="Engagement Score", value=0.5) # Set initial value
	user_interests = gr.Dropdown(['technology', 'sports', 'arts'], label="User Interests", value='technology') # Set initial value
	device_type = gr.Radio(['mobile', 'desktop', 'tablet'], label="Device Type", value='mobile') # Set initial value
	time_of_day = gr.Radio(['morning', 'afternoon', 'evening'], label="Time of Day", value='morning') # Set initial value
	time_spent_per_page = gr.Number(label="Time Spent per Page", value=30) # Set initial value
	click_through_rate = gr.Slider(0, 1, step=0.01, label="Click Through Rate", value=0.1) # Set initial value
	conversion_rate = gr.Slider(0, 1, step=0.01, label="Conversion Rate", value=0.05) # Set initial value
	frequency_of_visits = gr.Number(label="Frequency of Visits", value=10) # Set initial value
	bounce_rate = gr.Slider(0, 1, step=0.01, label="Bounce Rate", value=0.2) # Set initial value
	predict_button = gr.Button("Predict")
	output_textbox = gr.Textbox(label="Prediction Output", lines=4)
	predict_button.click(
	predict_cluster,
	inputs=[
	session_duration, pages_visited, ads_clicked, engagement_score, user_interests, device_type,
	time_of_day, time_spent_per_page, click_through_rate, conversion_rate, frequency_of_visits, bounce_rate
	],
	outputs=output_textbox
	)
	logging.info("Gradio predict button configured.")

	with gr.Tab("Ad Performance Analytics"):
	gr.Markdown("""
	This form provides a summary of key performance metrics for ads.

	- Average Click-Through Rate (CTR): Measures the percentage of ad views that result in clicks. Higher values indicate more effective ad engagement.
	- Average Conversion Rate: Indicates the percentage of clicks that convert into actions, such as purchases or sign-ups. This metric helps assess the effectiveness of ad targeting and the overall conversion potential.
	- Average Bounce Rate: Reflects the percentage of single-page visits. Lower bounce rates suggest that the landing pages are relevant to the visitors' interests.

	Understanding these metrics can help optimize ad strategies and improve overall campaign performance.
	""")
	analytics_button = gr.Button("Analyze Ad Performance")
	analytics_output = gr.Textbox(label="Analytics Output", lines=3)
	analytics_button.click(
	ad_performance_analytics,
	outputs=analytics_output
	)
	logging.info("Gradio analytics button configured.")

	demo.launch()
	logging.info("Gradio interface launched.")