Spaces:

ESCP
/

music-streaming-analytics

Sleeping

App Files Files Community

music-streaming-analytics / app.py

axelmc9

Update app.py

de358d3 verified 30 days ago

raw

history blame contribute delete

10.3 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt

	# -----------------------------
	# Synthetic demo dataset
	# -----------------------------
	np.random.seed(42)

	n_tracks = 60
	genres = ["Pop", "Hip-Hop", "Rock", "Electronic", "R&B", "Indie"]
	tracks = [f"Track {i}" for i in range(1, n_tracks + 1)]

	df = pd.DataFrame({
	"track": tracks,
	"genre": np.random.choice(genres, n_tracks),
	"sentiment_score": np.round(np.random.normal(0.2, 0.45, n_tracks), 2),
	"streams": np.random.randint(5000, 250000, n_tracks),
	"skip_rate": np.round(np.random.uniform(0.05, 0.45, n_tracks), 2),
	"completion_rate": np.round(np.random.uniform(0.45, 0.95, n_tracks), 2),
	"save_rate": np.round(np.random.uniform(0.02, 0.30, n_tracks), 2),
	"subscription_price": np.round(np.random.uniform(5.99, 14.99, n_tracks), 2)
	})

	df["sentiment_label"] = pd.cut(
	df["sentiment_score"],
	bins=[-10, -0.1, 0.1, 10],
	labels=["Negative", "Neutral", "Positive"]
	)

	df["forecast_streams"] = (
	df["streams"] * (1 + 0.35 * df["sentiment_score"] + 0.25 * df["save_rate"] - 0.20 * df["skip_rate"])
	).clip(lower=1000).astype(int)

	df["playlist_score"] = (
	0.4 * ((df["sentiment_score"] + 1) / 2)
	+ 0.3 * (df["forecast_streams"] / df["forecast_streams"].max())
	+ 0.2 * df["completion_rate"]
	+ 0.1 * df["save_rate"]
	)

	df["playlist_action"] = pd.cut(
	df["playlist_score"],
	bins=[-1, 0.45, 0.65, 2],
	labels=["Demote", "Keep", "Promote"]
	)

	segment_price_sensitivity = {
	"Student": 1.35,
	"Casual Listener": 1.15,
	"Standard": 1.00,
	"Family": 0.85,
	"Premium Heavy User": 0.65
	}

	# -----------------------------
	# Helper functions
	# -----------------------------
	def dataset_preview():
	preview = df[[
	"track", "genre", "sentiment_score", "sentiment_label",
	"streams", "forecast_streams", "playlist_action"
	]].sort_values("forecast_streams", ascending=False).head(15)
	return preview

	def sentiment_summary():
	avg_sent = round(df["sentiment_score"].mean(), 3)
	pos_pct = round((df["sentiment_label"].eq("Positive").mean()) * 100, 1)
	neg_pct = round((df["sentiment_label"].eq("Negative").mean()) * 100, 1)

	top_genre = (
	df.groupby("genre")["sentiment_score"]
	.mean()
	.sort_values(ascending=False)
	.index[0]
	)

	text = (
	f"Average sentiment score: {avg_sent}\n"
	f"Positive reviews share: {pos_pct}%\n"
	f"Negative reviews share: {neg_pct}%\n"
	f"Best-performing genre by sentiment: {top_genre}\n\n"
	f"Interpretation: genres and tracks with stronger sentiment should be prioritized "
	f"for playlist exposure because they are more likely to support future engagement."
	)
	return text

	def sentiment_chart():
	counts = df["sentiment_label"].value_counts().reindex(["Positive", "Neutral", "Negative"]).fillna(0)

	fig, ax = plt.subplots(figsize=(7, 4))
	counts.plot(kind="bar", ax=ax)
	ax.set_title("Review Sentiment Distribution")
	ax.set_xlabel("Sentiment")
	ax.set_ylabel("Number of Tracks")
	plt.tight_layout()
	return fig

	def genre_sentiment_chart():
	genre_scores = df.groupby("genre")["sentiment_score"].mean().sort_values(ascending=False)

	fig, ax = plt.subplots(figsize=(8, 4))
	genre_scores.plot(kind="bar", ax=ax)
	ax.set_title("Average Sentiment by Genre")
	ax.set_xlabel("Genre")
	ax.set_ylabel("Average Sentiment Score")
	plt.tight_layout()
	return fig

	def top_playlist_recommendations():
	top_df = df[[
	"track", "genre", "sentiment_score", "forecast_streams",
	"completion_rate", "save_rate", "playlist_action"
	]].sort_values(["playlist_action", "forecast_streams"], ascending=[False, False]).head(10)

	explanation = (
	"Top tracks are ranked using sentiment, forecasted streams, completion rate, "
	"and save rate. Tracks marked 'Promote' are the strongest candidates for editorial playlists."
	)
	return top_df, explanation

	def forecast_summary():
	current_avg = int(df["streams"].mean())
	future_avg = int(df["forecast_streams"].mean())
	growth = round(((future_avg - current_avg) / current_avg) * 100, 1)

	top_track = df.sort_values("forecast_streams", ascending=False).iloc[0]["track"]

	text = (
	f"Current average streams: {current_avg}\n"
	f"Forecast average streams: {future_avg}\n"
	f"Expected growth: {growth}%\n"
	f"Top forecasted track: {top_track}\n\n"
	f"Interpretation: use forecast signals together with sentiment to decide which tracks "
	f"deserve promotion in major playlists."
	)
	return text

	def forecast_chart():
	top10 = df.sort_values("forecast_streams", ascending=False).head(10)

	fig, ax = plt.subplots(figsize=(9, 4))
	ax.bar(top10["track"], top10["streams"], label="Current Streams")
	ax.plot(top10["track"], top10["forecast_streams"], marker="o", label="Forecast Streams")
	ax.set_title("Current vs Forecasted Streams for Top Tracks")
	ax.set_xlabel("Track")
	ax.set_ylabel("Streams")
	ax.legend()
	plt.xticks(rotation=45, ha="right")
	plt.tight_layout()
	return fig

	def pricing_simulator(segment, price):
	sensitivity = segment_price_sensitivity[segment]

	base_conversion = 0.42
	base_churn = 0.18
	base_users = 10000

	conversion = max(0.05, min(0.85, base_conversion - sensitivity * (price - 9.99) * 0.035))
	churn = max(0.03, min(0.60, base_churn + sensitivity * (price - 9.99) * 0.025))

	paying_users = int(base_users * conversion * (1 - churn))
	monthly_revenue = round(paying_users * price, 2)

	if conversion < 0.20:
	recommendation = "Price is likely too high for this segment. Consider a discount or bundle."
	elif churn > 0.28:
	recommendation = "Retention risk is elevated. A softer offer may improve subscriber stability."
	else:
	recommendation = "This pricing level looks commercially reasonable for the selected segment."

	result = (
	f"Segment: {segment}\n"
	f"Tested monthly price: €{price:.2f}\n"
	f"Estimated conversion rate: {conversion:.2%}\n"
	f"Estimated churn rate: {churn:.2%}\n"
	f"Estimated retained paying users: {paying_users}\n"
	f"Estimated monthly revenue: €{monthly_revenue:,.2f}\n\n"
	f"Recommendation: {recommendation}"
	)
	return result

	def pricing_curve(segment):
	sensitivity = segment_price_sensitivity[segment]
	prices = np.arange(5.99, 16.49, 0.5)

	revenues = []
	conversions = []

	for price in prices:
	conversion = max(0.05, min(0.85, 0.42 - sensitivity * (price - 9.99) * 0.035))
	churn = max(0.03, min(0.60, 0.18 + sensitivity * (price - 9.99) * 0.025))
	paying_users = 10000 * conversion * (1 - churn)
	revenue = paying_users * price
	revenues.append(revenue)
	conversions.append(conversion)

	fig, ax = plt.subplots(figsize=(8, 4))
	ax.plot(prices, revenues, marker="o")
	ax.set_title(f"Estimated Revenue Curve - {segment}")
	ax.set_xlabel("Monthly Price (€)")
	ax.set_ylabel("Estimated Monthly Revenue (€)")
	plt.tight_layout()
	return fig

	# -----------------------------
	# Gradio UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# Music Streaming Analytics App")
	gr.Markdown(
	"Interactive demo for optimizing playlist curation and subscription pricing "
	"using listener review sentiment and stream forecasting."
	)

	with gr.Tab("Dataset Overview"):
	gr.Markdown("Preview of the enriched dataset used in the project.")
	preview_btn = gr.Button("Load Sample Dataset")
	preview_table = gr.Dataframe()
	preview_btn.click(fn=dataset_preview, outputs=preview_table)

	with gr.Tab("Sentiment Insights"):
	gr.Markdown("Qualitative analysis based on music review sentiment.")
	with gr.Row():
	sentiment_btn = gr.Button("Generate Sentiment Summary")
	genre_btn = gr.Button("Show Genre Sentiment")
	sentiment_text = gr.Textbox(lines=8, label="Sentiment Summary")
	sentiment_plot = gr.Plot(label="Sentiment Distribution")
	genre_plot = gr.Plot(label="Genre Sentiment")

	sentiment_btn.click(fn=sentiment_summary, outputs=sentiment_text)
	sentiment_btn.click(fn=sentiment_chart, outputs=sentiment_plot)
	genre_btn.click(fn=genre_sentiment_chart, outputs=genre_plot)

	with gr.Tab("Playlist Curation"):
	gr.Markdown("Quantitative recommendation engine for playlist promotion decisions.")
	playlist_btn = gr.Button("Show Top Playlist Recommendations")
	playlist_table = gr.Dataframe()
	playlist_text = gr.Textbox(lines=4, label="Recommendation Logic")
	playlist_btn.click(fn=top_playlist_recommendations, outputs=[playlist_table, playlist_text])

	with gr.Tab("Stream Forecast"):
	gr.Markdown("Forecasting future streams using sentiment and engagement signals.")
	forecast_btn = gr.Button("Run Forecast Analysis")
	forecast_text = gr.Textbox(lines=8, label="Forecast Summary")
	forecast_plot = gr.Plot(label="Current vs Forecasted Streams")
	forecast_btn.click(fn=forecast_summary, outputs=forecast_text)
	forecast_btn.click(fn=forecast_chart, outputs=forecast_plot)

	with gr.Tab("Pricing Strategy"):
	gr.Markdown("Simulate pricing decisions for different listener segments.")
	with gr.Row():
	segment_input = gr.Dropdown(
	choices=list(segment_price_sensitivity.keys()),
	value="Standard",
	label="Listener Segment"
	)
	price_input = gr.Slider(
	minimum=5.99,
	maximum=15.99,
	value=9.99,
	step=0.5,
	label="Monthly Price (€)"
	)
	price_btn = gr.Button("Evaluate Pricing Scenario")
	pricing_text = gr.Textbox(lines=10, label="Pricing Recommendation")
	pricing_plot = gr.Plot(label="Revenue Curve")
	price_btn.click(fn=pricing_simulator, inputs=[segment_input, price_input], outputs=pricing_text)
	price_btn.click(fn=pricing_curve, inputs=segment_input, outputs=pricing_plot)

	demo.launch()