axelmc9's picture
Update app.py
de358d3 verified
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# -----------------------------
# Synthetic demo dataset
# -----------------------------
np.random.seed(42)
n_tracks = 60
genres = ["Pop", "Hip-Hop", "Rock", "Electronic", "R&B", "Indie"]
tracks = [f"Track {i}" for i in range(1, n_tracks + 1)]
df = pd.DataFrame({
"track": tracks,
"genre": np.random.choice(genres, n_tracks),
"sentiment_score": np.round(np.random.normal(0.2, 0.45, n_tracks), 2),
"streams": np.random.randint(5000, 250000, n_tracks),
"skip_rate": np.round(np.random.uniform(0.05, 0.45, n_tracks), 2),
"completion_rate": np.round(np.random.uniform(0.45, 0.95, n_tracks), 2),
"save_rate": np.round(np.random.uniform(0.02, 0.30, n_tracks), 2),
"subscription_price": np.round(np.random.uniform(5.99, 14.99, n_tracks), 2)
})
df["sentiment_label"] = pd.cut(
df["sentiment_score"],
bins=[-10, -0.1, 0.1, 10],
labels=["Negative", "Neutral", "Positive"]
)
df["forecast_streams"] = (
df["streams"] * (1 + 0.35 * df["sentiment_score"] + 0.25 * df["save_rate"] - 0.20 * df["skip_rate"])
).clip(lower=1000).astype(int)
df["playlist_score"] = (
0.4 * ((df["sentiment_score"] + 1) / 2)
+ 0.3 * (df["forecast_streams"] / df["forecast_streams"].max())
+ 0.2 * df["completion_rate"]
+ 0.1 * df["save_rate"]
)
df["playlist_action"] = pd.cut(
df["playlist_score"],
bins=[-1, 0.45, 0.65, 2],
labels=["Demote", "Keep", "Promote"]
)
segment_price_sensitivity = {
"Student": 1.35,
"Casual Listener": 1.15,
"Standard": 1.00,
"Family": 0.85,
"Premium Heavy User": 0.65
}
# -----------------------------
# Helper functions
# -----------------------------
def dataset_preview():
preview = df[[
"track", "genre", "sentiment_score", "sentiment_label",
"streams", "forecast_streams", "playlist_action"
]].sort_values("forecast_streams", ascending=False).head(15)
return preview
def sentiment_summary():
avg_sent = round(df["sentiment_score"].mean(), 3)
pos_pct = round((df["sentiment_label"].eq("Positive").mean()) * 100, 1)
neg_pct = round((df["sentiment_label"].eq("Negative").mean()) * 100, 1)
top_genre = (
df.groupby("genre")["sentiment_score"]
.mean()
.sort_values(ascending=False)
.index[0]
)
text = (
f"Average sentiment score: {avg_sent}\n"
f"Positive reviews share: {pos_pct}%\n"
f"Negative reviews share: {neg_pct}%\n"
f"Best-performing genre by sentiment: {top_genre}\n\n"
f"Interpretation: genres and tracks with stronger sentiment should be prioritized "
f"for playlist exposure because they are more likely to support future engagement."
)
return text
def sentiment_chart():
counts = df["sentiment_label"].value_counts().reindex(["Positive", "Neutral", "Negative"]).fillna(0)
fig, ax = plt.subplots(figsize=(7, 4))
counts.plot(kind="bar", ax=ax)
ax.set_title("Review Sentiment Distribution")
ax.set_xlabel("Sentiment")
ax.set_ylabel("Number of Tracks")
plt.tight_layout()
return fig
def genre_sentiment_chart():
genre_scores = df.groupby("genre")["sentiment_score"].mean().sort_values(ascending=False)
fig, ax = plt.subplots(figsize=(8, 4))
genre_scores.plot(kind="bar", ax=ax)
ax.set_title("Average Sentiment by Genre")
ax.set_xlabel("Genre")
ax.set_ylabel("Average Sentiment Score")
plt.tight_layout()
return fig
def top_playlist_recommendations():
top_df = df[[
"track", "genre", "sentiment_score", "forecast_streams",
"completion_rate", "save_rate", "playlist_action"
]].sort_values(["playlist_action", "forecast_streams"], ascending=[False, False]).head(10)
explanation = (
"Top tracks are ranked using sentiment, forecasted streams, completion rate, "
"and save rate. Tracks marked 'Promote' are the strongest candidates for editorial playlists."
)
return top_df, explanation
def forecast_summary():
current_avg = int(df["streams"].mean())
future_avg = int(df["forecast_streams"].mean())
growth = round(((future_avg - current_avg) / current_avg) * 100, 1)
top_track = df.sort_values("forecast_streams", ascending=False).iloc[0]["track"]
text = (
f"Current average streams: {current_avg}\n"
f"Forecast average streams: {future_avg}\n"
f"Expected growth: {growth}%\n"
f"Top forecasted track: {top_track}\n\n"
f"Interpretation: use forecast signals together with sentiment to decide which tracks "
f"deserve promotion in major playlists."
)
return text
def forecast_chart():
top10 = df.sort_values("forecast_streams", ascending=False).head(10)
fig, ax = plt.subplots(figsize=(9, 4))
ax.bar(top10["track"], top10["streams"], label="Current Streams")
ax.plot(top10["track"], top10["forecast_streams"], marker="o", label="Forecast Streams")
ax.set_title("Current vs Forecasted Streams for Top Tracks")
ax.set_xlabel("Track")
ax.set_ylabel("Streams")
ax.legend()
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
return fig
def pricing_simulator(segment, price):
sensitivity = segment_price_sensitivity[segment]
base_conversion = 0.42
base_churn = 0.18
base_users = 10000
conversion = max(0.05, min(0.85, base_conversion - sensitivity * (price - 9.99) * 0.035))
churn = max(0.03, min(0.60, base_churn + sensitivity * (price - 9.99) * 0.025))
paying_users = int(base_users * conversion * (1 - churn))
monthly_revenue = round(paying_users * price, 2)
if conversion < 0.20:
recommendation = "Price is likely too high for this segment. Consider a discount or bundle."
elif churn > 0.28:
recommendation = "Retention risk is elevated. A softer offer may improve subscriber stability."
else:
recommendation = "This pricing level looks commercially reasonable for the selected segment."
result = (
f"Segment: {segment}\n"
f"Tested monthly price: €{price:.2f}\n"
f"Estimated conversion rate: {conversion:.2%}\n"
f"Estimated churn rate: {churn:.2%}\n"
f"Estimated retained paying users: {paying_users}\n"
f"Estimated monthly revenue: €{monthly_revenue:,.2f}\n\n"
f"Recommendation: {recommendation}"
)
return result
def pricing_curve(segment):
sensitivity = segment_price_sensitivity[segment]
prices = np.arange(5.99, 16.49, 0.5)
revenues = []
conversions = []
for price in prices:
conversion = max(0.05, min(0.85, 0.42 - sensitivity * (price - 9.99) * 0.035))
churn = max(0.03, min(0.60, 0.18 + sensitivity * (price - 9.99) * 0.025))
paying_users = 10000 * conversion * (1 - churn)
revenue = paying_users * price
revenues.append(revenue)
conversions.append(conversion)
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(prices, revenues, marker="o")
ax.set_title(f"Estimated Revenue Curve - {segment}")
ax.set_xlabel("Monthly Price (€)")
ax.set_ylabel("Estimated Monthly Revenue (€)")
plt.tight_layout()
return fig
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown("# Music Streaming Analytics App")
gr.Markdown(
"Interactive demo for optimizing playlist curation and subscription pricing "
"using listener review sentiment and stream forecasting."
)
with gr.Tab("Dataset Overview"):
gr.Markdown("Preview of the enriched dataset used in the project.")
preview_btn = gr.Button("Load Sample Dataset")
preview_table = gr.Dataframe()
preview_btn.click(fn=dataset_preview, outputs=preview_table)
with gr.Tab("Sentiment Insights"):
gr.Markdown("Qualitative analysis based on music review sentiment.")
with gr.Row():
sentiment_btn = gr.Button("Generate Sentiment Summary")
genre_btn = gr.Button("Show Genre Sentiment")
sentiment_text = gr.Textbox(lines=8, label="Sentiment Summary")
sentiment_plot = gr.Plot(label="Sentiment Distribution")
genre_plot = gr.Plot(label="Genre Sentiment")
sentiment_btn.click(fn=sentiment_summary, outputs=sentiment_text)
sentiment_btn.click(fn=sentiment_chart, outputs=sentiment_plot)
genre_btn.click(fn=genre_sentiment_chart, outputs=genre_plot)
with gr.Tab("Playlist Curation"):
gr.Markdown("Quantitative recommendation engine for playlist promotion decisions.")
playlist_btn = gr.Button("Show Top Playlist Recommendations")
playlist_table = gr.Dataframe()
playlist_text = gr.Textbox(lines=4, label="Recommendation Logic")
playlist_btn.click(fn=top_playlist_recommendations, outputs=[playlist_table, playlist_text])
with gr.Tab("Stream Forecast"):
gr.Markdown("Forecasting future streams using sentiment and engagement signals.")
forecast_btn = gr.Button("Run Forecast Analysis")
forecast_text = gr.Textbox(lines=8, label="Forecast Summary")
forecast_plot = gr.Plot(label="Current vs Forecasted Streams")
forecast_btn.click(fn=forecast_summary, outputs=forecast_text)
forecast_btn.click(fn=forecast_chart, outputs=forecast_plot)
with gr.Tab("Pricing Strategy"):
gr.Markdown("Simulate pricing decisions for different listener segments.")
with gr.Row():
segment_input = gr.Dropdown(
choices=list(segment_price_sensitivity.keys()),
value="Standard",
label="Listener Segment"
)
price_input = gr.Slider(
minimum=5.99,
maximum=15.99,
value=9.99,
step=0.5,
label="Monthly Price (€)"
)
price_btn = gr.Button("Evaluate Pricing Scenario")
pricing_text = gr.Textbox(lines=10, label="Pricing Recommendation")
pricing_plot = gr.Plot(label="Revenue Curve")
price_btn.click(fn=pricing_simulator, inputs=[segment_input, price_input], outputs=pricing_text)
price_btn.click(fn=pricing_curve, inputs=segment_input, outputs=pricing_plot)
demo.launch()