File size: 7,103 Bytes
ebea030
 
 
 
3e30f6d
 
 
 
 
ebea030
 
3e30f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebea030
 
3e30f6d
 
 
 
 
 
 
 
 
ebea030
 
3e30f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebea030
3e30f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebea030
3e30f6d
 
 
 
 
 
 
 
 
 
 
 
ebea030
 
3e30f6d
 
 
 
 
 
 
 
ebea030
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# AI-Assisted Code — Academic Integrity Notice
# Generated with The App Builder. ESCP coursework.
# Student must be able to explain all code when asked.

"""Gradio Space that runs the fixed notebook workflow on bundled CSV files."""

from pathlib import Path
import shutil
import warnings
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from itertools import product
from zipfile import ZipFile

DATA_REVIEWS = "synthetic_book_reviews.csv"
DATA_SALES = "synthetic_sales_data.csv"
ART_DIR = Path("artifacts")
FIG_DIR = ART_DIR / "figures"
TAB_DIR = ART_DIR / "tables"


def ensure_dirs():
    """Create output folders used by the app."""
    FIG_DIR.mkdir(parents=True, exist_ok=True)
    TAB_DIR.mkdir(parents=True, exist_ok=True)


def load_data():
    """Load the two fixed datasets bundled with the Space."""
    reviews = pd.read_csv(DATA_REVIEWS)
    sales = pd.read_csv(DATA_SALES)
    required_reviews = {"title", "review_text", "rating", "popularity_score"}
    required_sales = {"title", "month", "units_sold"}
    if not required_reviews.issubset(reviews.columns):
        raise ValueError(f"Missing review columns: {required_reviews - set(reviews.columns)}")
    if not required_sales.issubset(sales.columns):
        raise ValueError(f"Missing sales columns: {required_sales - set(sales.columns)}")
    return reviews, sales


def build_sample_titles(reviews):
    """Pick up to 5 titles from each popularity score, like in the notebook."""
    sampled_titles = []
    for score in sorted(reviews["popularity_score"].dropna().unique()):
        titles = reviews.loc[reviews["popularity_score"] == score, "title"].dropna().unique().tolist()
        sampled_titles.extend(titles[:5])
    return sampled_titles


def save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles):
    """Create the sampled sales trend figure."""
    popularity_colors = {1: "darkred", 2: "orangered", 3: "gold", 4: "mediumseagreen", 5: "royalblue"}
    fig, ax = plt.subplots(figsize=(14, 6))
    for title in sampled_titles:
        row = sampled_books[sampled_books["title"] == title].iloc[0]
        subset = sampled_sales[sampled_sales["title"] == title]
        ax.plot(subset["month"], subset["units_sold"], label=title,
                color=popularity_colors.get(row["popularity_score"], "gray"))
    ax.set_title("Sales Trends Over Time")
    ax.set_xlabel("Month")
    ax.set_ylabel("Units Sold")
    ax.tick_params(axis="x", rotation=45)
    ax.grid(True, alpha=0.3)
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
    fig.tight_layout()
    out = FIG_DIR / "sales_trends_sampled_titles.png"
    fig.savefig(out, dpi=150, bbox_inches="tight")
    plt.close(fig)
    return str(out)


def save_sentiment_chart(sampled_reviews):
    """Create the stacked sentiment distribution chart."""
    sampled_reviews = sampled_reviews.copy()
    sampled_reviews["grouped_title"] = sampled_reviews["rating"].astype(str) + "★ | " + sampled_reviews["title"]
    counts = sampled_reviews.groupby(["grouped_title", "sentiment_label"]).size().unstack(fill_value=0)
    counts = counts.reindex(columns=["negative", "neutral", "positive"], fill_value=0)
    counts.reset_index().to_csv(TAB_DIR / "sentiment_counts_sampled.csv", index=False)
    fig, ax = plt.subplots(figsize=(12, 12))
    counts.plot.barh(stacked=True, ax=ax, color={"negative": "royalblue", "neutral": "lightgray", "positive": "crimson"})
    ax.set_title("Sentiment Distribution in Reviews")
    ax.set_xlabel("Number of Reviews")
    ax.set_ylabel("Book Title")
    ax.grid(axis="x", linestyle="--", alpha=0.4)
    fig.tight_layout()
    out = FIG_DIR / "sentiment_distribution_sampled_titles.png"
    fig.savefig(out, dpi=150, bbox_inches="tight")
    plt.close(fig)
    return str(out)


def pricing_action(row):
    """Apply the exact notebook decision rules."""
    if row["avg_units_sold"] >= 120 and row.get("positive_ratio", 0) >= 0.6:
        return "increase price"
    if row["avg_units_sold"] <= 60 and row.get("negative_ratio", 0) >= 0.4:
        return "decrease price"
    return "keep price"


def save_decision_table(reviews, sales):
    """Compute and save the final pricing decision table."""
    avg_sales = sales.groupby("title", as_index=False)["units_sold"].mean().rename(columns={"units_sold": "avg_units_sold"})
    sentiment = reviews.groupby(["title", "sentiment_label"]).size().unstack(fill_value=0)
    sentiment["total"] = sentiment.sum(axis=1)
    sentiment["positive_ratio"] = sentiment.get("positive", 0) / sentiment["total"]
    sentiment["negative_ratio"] = sentiment.get("negative", 0) / sentiment["total"]
    decisions = avg_sales.merge(sentiment, on="title", how="left").fillna(0)
    decisions["pricing_action"] = decisions.apply(pricing_action, axis=1)
    final_cols = ["title", "avg_units_sold", "positive_ratio", "negative_ratio", "pricing_action"]
    final_df = decisions[final_cols].sort_values("title").reset_index(drop=True)
    final_df.to_csv(TAB_DIR / "pricing_decisions.csv", index=False)
    return final_df


def save_dashboard_export(sales):
    """Save a monthly dashboard export like the notebook fallback."""
    dashboard = sales.groupby("month", as_index=False).agg(total_units_sold=("units_sold", "sum")).sort_values("month")
    dashboard.to_csv(TAB_DIR / "df_dashboard.csv", index=False)


def bundle_exports():
    """Zip all generated export files for easy download."""
    zip_path = ART_DIR / "exports.zip"
    with ZipFile(zip_path, "w") as zf:
        for path in list(FIG_DIR.glob("*")) + list(TAB_DIR.glob("*")):
            zf.write(path, arcname=path.relative_to(ART_DIR))
    return str(zip_path)


def run_analysis():
    """Run the complete pipeline and return final outputs only."""
    ensure_dirs()
    reviews, sales = load_data()
    sales["month"] = pd.to_datetime(sales["month"])
    sampled_titles = build_sample_titles(reviews)
    sampled_sales = sales[sales["title"].isin(sampled_titles)].copy()
    sampled_reviews = reviews[reviews["title"].isin(sampled_titles)].copy()
    sampled_books = reviews[reviews["title"].isin(sampled_titles)].copy()
    chart_1 = save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles)
    chart_2 = save_sentiment_chart(sampled_reviews)
    decision_df = save_decision_table(reviews, sales)
    save_dashboard_export(sales)
    export_zip = bundle_exports()
    return chart_1, chart_2, decision_df, export_zip


with gr.Blocks() as demo:
    gr.Markdown("# Book Analytics Dashboard")
    gr.Markdown("Runs the fixed notebook workflow on the bundled review and sales datasets.")
    run_btn = gr.Button("Run analysis")
    sales_chart = gr.Image(label="Sales trends")
    sentiment_chart = gr.Image(label="Sentiment distribution")
    decision_table = gr.Dataframe(label="Pricing decisions")
    exports = gr.File(label="Download all exports")
    run_btn.click(fn=run_analysis, inputs=None, outputs=[sales_chart, sentiment_chart, decision_table, exports])

if __name__ == "__main__":
    demo.launch()