Spaces:

ESCP
/

music-streaming-analytics

Sleeping

File size: 10,346 Bytes

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# -----------------------------
# Synthetic demo dataset
# -----------------------------
np.random.seed(42)

n_tracks = 60
genres = ["Pop", "Hip-Hop", "Rock", "Electronic", "R&B", "Indie"]
tracks = [f"Track {i}" for i in range(1, n_tracks + 1)]

df = pd.DataFrame({
    "track": tracks,
    "genre": np.random.choice(genres, n_tracks),
    "sentiment_score": np.round(np.random.normal(0.2, 0.45, n_tracks), 2),
    "streams": np.random.randint(5000, 250000, n_tracks),
    "skip_rate": np.round(np.random.uniform(0.05, 0.45, n_tracks), 2),
    "completion_rate": np.round(np.random.uniform(0.45, 0.95, n_tracks), 2),
    "save_rate": np.round(np.random.uniform(0.02, 0.30, n_tracks), 2),
    "subscription_price": np.round(np.random.uniform(5.99, 14.99, n_tracks), 2)
})

df["sentiment_label"] = pd.cut(
    df["sentiment_score"],
    bins=[-10, -0.1, 0.1, 10],
    labels=["Negative", "Neutral", "Positive"]
)

df["forecast_streams"] = (
    df["streams"] * (1 + 0.35 * df["sentiment_score"] + 0.25 * df["save_rate"] - 0.20 * df["skip_rate"])
).clip(lower=1000).astype(int)

df["playlist_score"] = (
    0.4 * ((df["sentiment_score"] + 1) / 2)
    + 0.3 * (df["forecast_streams"] / df["forecast_streams"].max())
    + 0.2 * df["completion_rate"]
    + 0.1 * df["save_rate"]
)

df["playlist_action"] = pd.cut(
    df["playlist_score"],
    bins=[-1, 0.45, 0.65, 2],
    labels=["Demote", "Keep", "Promote"]
)

segment_price_sensitivity = {
    "Student": 1.35,
    "Casual Listener": 1.15,
    "Standard": 1.00,
    "Family": 0.85,
    "Premium Heavy User": 0.65
}

# -----------------------------
# Helper functions
# -----------------------------
def dataset_preview():
    preview = df[[
        "track", "genre", "sentiment_score", "sentiment_label",
        "streams", "forecast_streams", "playlist_action"
    ]].sort_values("forecast_streams", ascending=False).head(15)
    return preview

def sentiment_summary():
    avg_sent = round(df["sentiment_score"].mean(), 3)
    pos_pct = round((df["sentiment_label"].eq("Positive").mean()) * 100, 1)
    neg_pct = round((df["sentiment_label"].eq("Negative").mean()) * 100, 1)

    top_genre = (
        df.groupby("genre")["sentiment_score"]
        .mean()
        .sort_values(ascending=False)
        .index[0]
    )

    text = (
        f"Average sentiment score: {avg_sent}\n"
        f"Positive reviews share: {pos_pct}%\n"
        f"Negative reviews share: {neg_pct}%\n"
        f"Best-performing genre by sentiment: {top_genre}\n\n"
        f"Interpretation: genres and tracks with stronger sentiment should be prioritized "
        f"for playlist exposure because they are more likely to support future engagement."
    )
    return text

def sentiment_chart():
    counts = df["sentiment_label"].value_counts().reindex(["Positive", "Neutral", "Negative"]).fillna(0)

    fig, ax = plt.subplots(figsize=(7, 4))
    counts.plot(kind="bar", ax=ax)
    ax.set_title("Review Sentiment Distribution")
    ax.set_xlabel("Sentiment")
    ax.set_ylabel("Number of Tracks")
    plt.tight_layout()
    return fig

def genre_sentiment_chart():
    genre_scores = df.groupby("genre")["sentiment_score"].mean().sort_values(ascending=False)

    fig, ax = plt.subplots(figsize=(8, 4))
    genre_scores.plot(kind="bar", ax=ax)
    ax.set_title("Average Sentiment by Genre")
    ax.set_xlabel("Genre")
    ax.set_ylabel("Average Sentiment Score")
    plt.tight_layout()
    return fig

def top_playlist_recommendations():
    top_df = df[[
        "track", "genre", "sentiment_score", "forecast_streams",
        "completion_rate", "save_rate", "playlist_action"
    ]].sort_values(["playlist_action", "forecast_streams"], ascending=[False, False]).head(10)

    explanation = (
        "Top tracks are ranked using sentiment, forecasted streams, completion rate, "
        "and save rate. Tracks marked 'Promote' are the strongest candidates for editorial playlists."
    )
    return top_df, explanation

def forecast_summary():
    current_avg = int(df["streams"].mean())
    future_avg = int(df["forecast_streams"].mean())
    growth = round(((future_avg - current_avg) / current_avg) * 100, 1)

    top_track = df.sort_values("forecast_streams", ascending=False).iloc[0]["track"]

    text = (
        f"Current average streams: {current_avg}\n"
        f"Forecast average streams: {future_avg}\n"
        f"Expected growth: {growth}%\n"
        f"Top forecasted track: {top_track}\n\n"
        f"Interpretation: use forecast signals together with sentiment to decide which tracks "
        f"deserve promotion in major playlists."
    )
    return text

def forecast_chart():
    top10 = df.sort_values("forecast_streams", ascending=False).head(10)

    fig, ax = plt.subplots(figsize=(9, 4))
    ax.bar(top10["track"], top10["streams"], label="Current Streams")
    ax.plot(top10["track"], top10["forecast_streams"], marker="o", label="Forecast Streams")
    ax.set_title("Current vs Forecasted Streams for Top Tracks")
    ax.set_xlabel("Track")
    ax.set_ylabel("Streams")
    ax.legend()
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    return fig

def pricing_simulator(segment, price):
    sensitivity = segment_price_sensitivity[segment]

    base_conversion = 0.42
    base_churn = 0.18
    base_users = 10000

    conversion = max(0.05, min(0.85, base_conversion - sensitivity * (price - 9.99) * 0.035))
    churn = max(0.03, min(0.60, base_churn + sensitivity * (price - 9.99) * 0.025))

    paying_users = int(base_users * conversion * (1 - churn))
    monthly_revenue = round(paying_users * price, 2)

    if conversion < 0.20:
        recommendation = "Price is likely too high for this segment. Consider a discount or bundle."
    elif churn > 0.28:
        recommendation = "Retention risk is elevated. A softer offer may improve subscriber stability."
    else:
        recommendation = "This pricing level looks commercially reasonable for the selected segment."

    result = (
        f"Segment: {segment}\n"
        f"Tested monthly price: €{price:.2f}\n"
        f"Estimated conversion rate: {conversion:.2%}\n"
        f"Estimated churn rate: {churn:.2%}\n"
        f"Estimated retained paying users: {paying_users}\n"
        f"Estimated monthly revenue: €{monthly_revenue:,.2f}\n\n"
        f"Recommendation: {recommendation}"
    )
    return result

def pricing_curve(segment):
    sensitivity = segment_price_sensitivity[segment]
    prices = np.arange(5.99, 16.49, 0.5)

    revenues = []
    conversions = []

    for price in prices:
        conversion = max(0.05, min(0.85, 0.42 - sensitivity * (price - 9.99) * 0.035))
        churn = max(0.03, min(0.60, 0.18 + sensitivity * (price - 9.99) * 0.025))
        paying_users = 10000 * conversion * (1 - churn)
        revenue = paying_users * price
        revenues.append(revenue)
        conversions.append(conversion)

    fig, ax = plt.subplots(figsize=(8, 4))
    ax.plot(prices, revenues, marker="o")
    ax.set_title(f"Estimated Revenue Curve - {segment}")
    ax.set_xlabel("Monthly Price (€)")
    ax.set_ylabel("Estimated Monthly Revenue (€)")
    plt.tight_layout()
    return fig

# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown("# Music Streaming Analytics App")
    gr.Markdown(
        "Interactive demo for optimizing playlist curation and subscription pricing "
        "using listener review sentiment and stream forecasting."
    )

    with gr.Tab("Dataset Overview"):
        gr.Markdown("Preview of the enriched dataset used in the project.")
        preview_btn = gr.Button("Load Sample Dataset")
        preview_table = gr.Dataframe()
        preview_btn.click(fn=dataset_preview, outputs=preview_table)

    with gr.Tab("Sentiment Insights"):
        gr.Markdown("Qualitative analysis based on music review sentiment.")
        with gr.Row():
            sentiment_btn = gr.Button("Generate Sentiment Summary")
            genre_btn = gr.Button("Show Genre Sentiment")
        sentiment_text = gr.Textbox(lines=8, label="Sentiment Summary")
        sentiment_plot = gr.Plot(label="Sentiment Distribution")
        genre_plot = gr.Plot(label="Genre Sentiment")

        sentiment_btn.click(fn=sentiment_summary, outputs=sentiment_text)
        sentiment_btn.click(fn=sentiment_chart, outputs=sentiment_plot)
        genre_btn.click(fn=genre_sentiment_chart, outputs=genre_plot)

    with gr.Tab("Playlist Curation"):
        gr.Markdown("Quantitative recommendation engine for playlist promotion decisions.")
        playlist_btn = gr.Button("Show Top Playlist Recommendations")
        playlist_table = gr.Dataframe()
        playlist_text = gr.Textbox(lines=4, label="Recommendation Logic")
        playlist_btn.click(fn=top_playlist_recommendations, outputs=[playlist_table, playlist_text])

    with gr.Tab("Stream Forecast"):
        gr.Markdown("Forecasting future streams using sentiment and engagement signals.")
        forecast_btn = gr.Button("Run Forecast Analysis")
        forecast_text = gr.Textbox(lines=8, label="Forecast Summary")
        forecast_plot = gr.Plot(label="Current vs Forecasted Streams")
        forecast_btn.click(fn=forecast_summary, outputs=forecast_text)
        forecast_btn.click(fn=forecast_chart, outputs=forecast_plot)

    with gr.Tab("Pricing Strategy"):
        gr.Markdown("Simulate pricing decisions for different listener segments.")
        with gr.Row():
            segment_input = gr.Dropdown(
                choices=list(segment_price_sensitivity.keys()),
                value="Standard",
                label="Listener Segment"
            )
            price_input = gr.Slider(
                minimum=5.99,
                maximum=15.99,
                value=9.99,
                step=0.5,
                label="Monthly Price (€)"
            )
        price_btn = gr.Button("Evaluate Pricing Scenario")
        pricing_text = gr.Textbox(lines=10, label="Pricing Recommendation")
        pricing_plot = gr.Plot(label="Revenue Curve")
        price_btn.click(fn=pricing_simulator, inputs=[segment_input, price_input], outputs=pricing_text)
        price_btn.click(fn=pricing_curve, inputs=segment_input, outputs=pricing_plot)

demo.launch()