import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt # ----------------------------- # Synthetic demo dataset # ----------------------------- np.random.seed(42) n_tracks = 60 genres = ["Pop", "Hip-Hop", "Rock", "Electronic", "R&B", "Indie"] tracks = [f"Track {i}" for i in range(1, n_tracks + 1)] df = pd.DataFrame({ "track": tracks, "genre": np.random.choice(genres, n_tracks), "sentiment_score": np.round(np.random.normal(0.2, 0.45, n_tracks), 2), "streams": np.random.randint(5000, 250000, n_tracks), "skip_rate": np.round(np.random.uniform(0.05, 0.45, n_tracks), 2), "completion_rate": np.round(np.random.uniform(0.45, 0.95, n_tracks), 2), "save_rate": np.round(np.random.uniform(0.02, 0.30, n_tracks), 2), "subscription_price": np.round(np.random.uniform(5.99, 14.99, n_tracks), 2) }) df["sentiment_label"] = pd.cut( df["sentiment_score"], bins=[-10, -0.1, 0.1, 10], labels=["Negative", "Neutral", "Positive"] ) df["forecast_streams"] = ( df["streams"] * (1 + 0.35 * df["sentiment_score"] + 0.25 * df["save_rate"] - 0.20 * df["skip_rate"]) ).clip(lower=1000).astype(int) df["playlist_score"] = ( 0.4 * ((df["sentiment_score"] + 1) / 2) + 0.3 * (df["forecast_streams"] / df["forecast_streams"].max()) + 0.2 * df["completion_rate"] + 0.1 * df["save_rate"] ) df["playlist_action"] = pd.cut( df["playlist_score"], bins=[-1, 0.45, 0.65, 2], labels=["Demote", "Keep", "Promote"] ) segment_price_sensitivity = { "Student": 1.35, "Casual Listener": 1.15, "Standard": 1.00, "Family": 0.85, "Premium Heavy User": 0.65 } # ----------------------------- # Helper functions # ----------------------------- def dataset_preview(): preview = df[[ "track", "genre", "sentiment_score", "sentiment_label", "streams", "forecast_streams", "playlist_action" ]].sort_values("forecast_streams", ascending=False).head(15) return preview def sentiment_summary(): avg_sent = round(df["sentiment_score"].mean(), 3) pos_pct = round((df["sentiment_label"].eq("Positive").mean()) * 100, 1) neg_pct = round((df["sentiment_label"].eq("Negative").mean()) * 100, 1) top_genre = ( df.groupby("genre")["sentiment_score"] .mean() .sort_values(ascending=False) .index[0] ) text = ( f"Average sentiment score: {avg_sent}\n" f"Positive reviews share: {pos_pct}%\n" f"Negative reviews share: {neg_pct}%\n" f"Best-performing genre by sentiment: {top_genre}\n\n" f"Interpretation: genres and tracks with stronger sentiment should be prioritized " f"for playlist exposure because they are more likely to support future engagement." ) return text def sentiment_chart(): counts = df["sentiment_label"].value_counts().reindex(["Positive", "Neutral", "Negative"]).fillna(0) fig, ax = plt.subplots(figsize=(7, 4)) counts.plot(kind="bar", ax=ax) ax.set_title("Review Sentiment Distribution") ax.set_xlabel("Sentiment") ax.set_ylabel("Number of Tracks") plt.tight_layout() return fig def genre_sentiment_chart(): genre_scores = df.groupby("genre")["sentiment_score"].mean().sort_values(ascending=False) fig, ax = plt.subplots(figsize=(8, 4)) genre_scores.plot(kind="bar", ax=ax) ax.set_title("Average Sentiment by Genre") ax.set_xlabel("Genre") ax.set_ylabel("Average Sentiment Score") plt.tight_layout() return fig def top_playlist_recommendations(): top_df = df[[ "track", "genre", "sentiment_score", "forecast_streams", "completion_rate", "save_rate", "playlist_action" ]].sort_values(["playlist_action", "forecast_streams"], ascending=[False, False]).head(10) explanation = ( "Top tracks are ranked using sentiment, forecasted streams, completion rate, " "and save rate. Tracks marked 'Promote' are the strongest candidates for editorial playlists." ) return top_df, explanation def forecast_summary(): current_avg = int(df["streams"].mean()) future_avg = int(df["forecast_streams"].mean()) growth = round(((future_avg - current_avg) / current_avg) * 100, 1) top_track = df.sort_values("forecast_streams", ascending=False).iloc[0]["track"] text = ( f"Current average streams: {current_avg}\n" f"Forecast average streams: {future_avg}\n" f"Expected growth: {growth}%\n" f"Top forecasted track: {top_track}\n\n" f"Interpretation: use forecast signals together with sentiment to decide which tracks " f"deserve promotion in major playlists." ) return text def forecast_chart(): top10 = df.sort_values("forecast_streams", ascending=False).head(10) fig, ax = plt.subplots(figsize=(9, 4)) ax.bar(top10["track"], top10["streams"], label="Current Streams") ax.plot(top10["track"], top10["forecast_streams"], marker="o", label="Forecast Streams") ax.set_title("Current vs Forecasted Streams for Top Tracks") ax.set_xlabel("Track") ax.set_ylabel("Streams") ax.legend() plt.xticks(rotation=45, ha="right") plt.tight_layout() return fig def pricing_simulator(segment, price): sensitivity = segment_price_sensitivity[segment] base_conversion = 0.42 base_churn = 0.18 base_users = 10000 conversion = max(0.05, min(0.85, base_conversion - sensitivity * (price - 9.99) * 0.035)) churn = max(0.03, min(0.60, base_churn + sensitivity * (price - 9.99) * 0.025)) paying_users = int(base_users * conversion * (1 - churn)) monthly_revenue = round(paying_users * price, 2) if conversion < 0.20: recommendation = "Price is likely too high for this segment. Consider a discount or bundle." elif churn > 0.28: recommendation = "Retention risk is elevated. A softer offer may improve subscriber stability." else: recommendation = "This pricing level looks commercially reasonable for the selected segment." result = ( f"Segment: {segment}\n" f"Tested monthly price: €{price:.2f}\n" f"Estimated conversion rate: {conversion:.2%}\n" f"Estimated churn rate: {churn:.2%}\n" f"Estimated retained paying users: {paying_users}\n" f"Estimated monthly revenue: €{monthly_revenue:,.2f}\n\n" f"Recommendation: {recommendation}" ) return result def pricing_curve(segment): sensitivity = segment_price_sensitivity[segment] prices = np.arange(5.99, 16.49, 0.5) revenues = [] conversions = [] for price in prices: conversion = max(0.05, min(0.85, 0.42 - sensitivity * (price - 9.99) * 0.035)) churn = max(0.03, min(0.60, 0.18 + sensitivity * (price - 9.99) * 0.025)) paying_users = 10000 * conversion * (1 - churn) revenue = paying_users * price revenues.append(revenue) conversions.append(conversion) fig, ax = plt.subplots(figsize=(8, 4)) ax.plot(prices, revenues, marker="o") ax.set_title(f"Estimated Revenue Curve - {segment}") ax.set_xlabel("Monthly Price (€)") ax.set_ylabel("Estimated Monthly Revenue (€)") plt.tight_layout() return fig # ----------------------------- # Gradio UI # ----------------------------- with gr.Blocks() as demo: gr.Markdown("# Music Streaming Analytics App") gr.Markdown( "Interactive demo for optimizing playlist curation and subscription pricing " "using listener review sentiment and stream forecasting." ) with gr.Tab("Dataset Overview"): gr.Markdown("Preview of the enriched dataset used in the project.") preview_btn = gr.Button("Load Sample Dataset") preview_table = gr.Dataframe() preview_btn.click(fn=dataset_preview, outputs=preview_table) with gr.Tab("Sentiment Insights"): gr.Markdown("Qualitative analysis based on music review sentiment.") with gr.Row(): sentiment_btn = gr.Button("Generate Sentiment Summary") genre_btn = gr.Button("Show Genre Sentiment") sentiment_text = gr.Textbox(lines=8, label="Sentiment Summary") sentiment_plot = gr.Plot(label="Sentiment Distribution") genre_plot = gr.Plot(label="Genre Sentiment") sentiment_btn.click(fn=sentiment_summary, outputs=sentiment_text) sentiment_btn.click(fn=sentiment_chart, outputs=sentiment_plot) genre_btn.click(fn=genre_sentiment_chart, outputs=genre_plot) with gr.Tab("Playlist Curation"): gr.Markdown("Quantitative recommendation engine for playlist promotion decisions.") playlist_btn = gr.Button("Show Top Playlist Recommendations") playlist_table = gr.Dataframe() playlist_text = gr.Textbox(lines=4, label="Recommendation Logic") playlist_btn.click(fn=top_playlist_recommendations, outputs=[playlist_table, playlist_text]) with gr.Tab("Stream Forecast"): gr.Markdown("Forecasting future streams using sentiment and engagement signals.") forecast_btn = gr.Button("Run Forecast Analysis") forecast_text = gr.Textbox(lines=8, label="Forecast Summary") forecast_plot = gr.Plot(label="Current vs Forecasted Streams") forecast_btn.click(fn=forecast_summary, outputs=forecast_text) forecast_btn.click(fn=forecast_chart, outputs=forecast_plot) with gr.Tab("Pricing Strategy"): gr.Markdown("Simulate pricing decisions for different listener segments.") with gr.Row(): segment_input = gr.Dropdown( choices=list(segment_price_sensitivity.keys()), value="Standard", label="Listener Segment" ) price_input = gr.Slider( minimum=5.99, maximum=15.99, value=9.99, step=0.5, label="Monthly Price (€)" ) price_btn = gr.Button("Evaluate Pricing Scenario") pricing_text = gr.Textbox(lines=10, label="Pricing Recommendation") pricing_plot = gr.Plot(label="Revenue Curve") price_btn.click(fn=pricing_simulator, inputs=[segment_input, price_input], outputs=pricing_text) price_btn.click(fn=pricing_curve, inputs=segment_input, outputs=pricing_plot) demo.launch()