Upload 5 files
Browse files- README.md +13 -2
- app.py +145 -57
- requirements.txt +1 -2
README.md
CHANGED
|
@@ -1,12 +1,23 @@
|
|
| 1 |
---
|
| 2 |
title: Book Analytics Dashboard
|
| 3 |
emoji: 📊
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
app_file: app.py
|
|
|
|
| 6 |
---
|
| 7 |
|
| 8 |
# Book Analytics Dashboard
|
| 9 |
|
| 10 |
-
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Book Analytics Dashboard
|
| 3 |
emoji: 📊
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
app_file: app.py
|
| 8 |
+
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
# Book Analytics Dashboard
|
| 12 |
|
| 13 |
+
This Space runs the notebook workflow on two fixed datasets bundled in the repository.
|
| 14 |
|
| 15 |
+
## Files expected in the repo
|
| 16 |
+
- `synthetic_book_reviews.csv`
|
| 17 |
+
- `synthetic_sales_data.csv`
|
| 18 |
+
|
| 19 |
+
## What the app shows
|
| 20 |
+
- Sales trends chart
|
| 21 |
+
- Sentiment distribution chart
|
| 22 |
+
- Pricing decisions table
|
| 23 |
+
- Downloadable export ZIP
|
app.py
CHANGED
|
@@ -2,73 +2,161 @@
|
|
| 2 |
# Generated with The App Builder. ESCP coursework.
|
| 3 |
# Student must be able to explain all code when asked.
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
-
import pandas as pd
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
def load_data():
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
return reviews, sales
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
fig, ax = plt.subplots()
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
plt.close(fig)
|
| 35 |
-
return
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
reviews, sales = load_data()
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
with gr.Blocks() as demo:
|
| 66 |
-
gr.Markdown("#
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
|
| 73 |
if __name__ == "__main__":
|
| 74 |
demo.launch()
|
|
|
|
| 2 |
# Generated with The App Builder. ESCP coursework.
|
| 3 |
# Student must be able to explain all code when asked.
|
| 4 |
|
| 5 |
+
"""Gradio Space that runs the fixed notebook workflow on bundled CSV files."""
|
| 6 |
+
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
import shutil
|
| 9 |
+
import warnings
|
| 10 |
import gradio as gr
|
|
|
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import seaborn as sns
|
| 14 |
+
import statsmodels.api as sm
|
| 15 |
+
from itertools import product
|
| 16 |
+
from zipfile import ZipFile
|
| 17 |
+
|
| 18 |
+
DATA_REVIEWS = "synthetic_book_reviews.csv"
|
| 19 |
+
DATA_SALES = "synthetic_sales_data.csv"
|
| 20 |
+
ART_DIR = Path("artifacts")
|
| 21 |
+
FIG_DIR = ART_DIR / "figures"
|
| 22 |
+
TAB_DIR = ART_DIR / "tables"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def ensure_dirs():
|
| 26 |
+
"""Create output folders used by the app."""
|
| 27 |
+
FIG_DIR.mkdir(parents=True, exist_ok=True)
|
| 28 |
+
TAB_DIR.mkdir(parents=True, exist_ok=True)
|
| 29 |
+
|
| 30 |
|
| 31 |
def load_data():
|
| 32 |
+
"""Load the two fixed datasets bundled with the Space."""
|
| 33 |
+
reviews = pd.read_csv(DATA_REVIEWS)
|
| 34 |
+
sales = pd.read_csv(DATA_SALES)
|
| 35 |
+
required_reviews = {"title", "review_text", "rating", "popularity_score"}
|
| 36 |
+
required_sales = {"title", "month", "units_sold"}
|
| 37 |
+
if not required_reviews.issubset(reviews.columns):
|
| 38 |
+
raise ValueError(f"Missing review columns: {required_reviews - set(reviews.columns)}")
|
| 39 |
+
if not required_sales.issubset(sales.columns):
|
| 40 |
+
raise ValueError(f"Missing sales columns: {required_sales - set(sales.columns)}")
|
| 41 |
return reviews, sales
|
| 42 |
|
| 43 |
+
|
| 44 |
+
def build_sample_titles(reviews):
|
| 45 |
+
"""Pick up to 5 titles from each popularity score, like in the notebook."""
|
| 46 |
+
sampled_titles = []
|
| 47 |
+
for score in sorted(reviews["popularity_score"].dropna().unique()):
|
| 48 |
+
titles = reviews.loc[reviews["popularity_score"] == score, "title"].dropna().unique().tolist()
|
| 49 |
+
sampled_titles.extend(titles[:5])
|
| 50 |
+
return sampled_titles
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles):
|
| 54 |
+
"""Create the sampled sales trend figure."""
|
| 55 |
+
popularity_colors = {1: "darkred", 2: "orangered", 3: "gold", 4: "mediumseagreen", 5: "royalblue"}
|
| 56 |
+
fig, ax = plt.subplots(figsize=(14, 6))
|
| 57 |
+
for title in sampled_titles:
|
| 58 |
+
row = sampled_books[sampled_books["title"] == title].iloc[0]
|
| 59 |
+
subset = sampled_sales[sampled_sales["title"] == title]
|
| 60 |
+
ax.plot(subset["month"], subset["units_sold"], label=title,
|
| 61 |
+
color=popularity_colors.get(row["popularity_score"], "gray"))
|
| 62 |
+
ax.set_title("Sales Trends Over Time")
|
| 63 |
+
ax.set_xlabel("Month")
|
| 64 |
+
ax.set_ylabel("Units Sold")
|
| 65 |
+
ax.tick_params(axis="x", rotation=45)
|
| 66 |
+
ax.grid(True, alpha=0.3)
|
| 67 |
+
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
|
| 68 |
+
fig.tight_layout()
|
| 69 |
+
out = FIG_DIR / "sales_trends_sampled_titles.png"
|
| 70 |
+
fig.savefig(out, dpi=150, bbox_inches="tight")
|
| 71 |
+
plt.close(fig)
|
| 72 |
+
return str(out)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def save_sentiment_chart(sampled_reviews):
|
| 76 |
+
"""Create the stacked sentiment distribution chart."""
|
| 77 |
+
sampled_reviews = sampled_reviews.copy()
|
| 78 |
+
sampled_reviews["grouped_title"] = sampled_reviews["rating"].astype(str) + "★ | " + sampled_reviews["title"]
|
| 79 |
+
counts = sampled_reviews.groupby(["grouped_title", "sentiment_label"]).size().unstack(fill_value=0)
|
| 80 |
+
counts = counts.reindex(columns=["negative", "neutral", "positive"], fill_value=0)
|
| 81 |
+
counts.reset_index().to_csv(TAB_DIR / "sentiment_counts_sampled.csv", index=False)
|
| 82 |
+
fig, ax = plt.subplots(figsize=(12, 12))
|
| 83 |
+
counts.plot.barh(stacked=True, ax=ax, color={"negative": "royalblue", "neutral": "lightgray", "positive": "crimson"})
|
| 84 |
+
ax.set_title("Sentiment Distribution in Reviews")
|
| 85 |
+
ax.set_xlabel("Number of Reviews")
|
| 86 |
+
ax.set_ylabel("Book Title")
|
| 87 |
+
ax.grid(axis="x", linestyle="--", alpha=0.4)
|
| 88 |
+
fig.tight_layout()
|
| 89 |
+
out = FIG_DIR / "sentiment_distribution_sampled_titles.png"
|
| 90 |
+
fig.savefig(out, dpi=150, bbox_inches="tight")
|
| 91 |
plt.close(fig)
|
| 92 |
+
return str(out)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def pricing_action(row):
|
| 96 |
+
"""Apply the exact notebook decision rules."""
|
| 97 |
+
if row["avg_units_sold"] >= 120 and row.get("positive_ratio", 0) >= 0.6:
|
| 98 |
+
return "increase price"
|
| 99 |
+
if row["avg_units_sold"] <= 60 and row.get("negative_ratio", 0) >= 0.4:
|
| 100 |
+
return "decrease price"
|
| 101 |
+
return "keep price"
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def save_decision_table(reviews, sales):
|
| 105 |
+
"""Compute and save the final pricing decision table."""
|
| 106 |
+
avg_sales = sales.groupby("title", as_index=False)["units_sold"].mean().rename(columns={"units_sold": "avg_units_sold"})
|
| 107 |
+
sentiment = reviews.groupby(["title", "sentiment_label"]).size().unstack(fill_value=0)
|
| 108 |
+
sentiment["total"] = sentiment.sum(axis=1)
|
| 109 |
+
sentiment["positive_ratio"] = sentiment.get("positive", 0) / sentiment["total"]
|
| 110 |
+
sentiment["negative_ratio"] = sentiment.get("negative", 0) / sentiment["total"]
|
| 111 |
+
decisions = avg_sales.merge(sentiment, on="title", how="left").fillna(0)
|
| 112 |
+
decisions["pricing_action"] = decisions.apply(pricing_action, axis=1)
|
| 113 |
+
final_cols = ["title", "avg_units_sold", "positive_ratio", "negative_ratio", "pricing_action"]
|
| 114 |
+
final_df = decisions[final_cols].sort_values("title").reset_index(drop=True)
|
| 115 |
+
final_df.to_csv(TAB_DIR / "pricing_decisions.csv", index=False)
|
| 116 |
+
return final_df
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def save_dashboard_export(sales):
|
| 120 |
+
"""Save a monthly dashboard export like the notebook fallback."""
|
| 121 |
+
dashboard = sales.groupby("month", as_index=False).agg(total_units_sold=("units_sold", "sum")).sort_values("month")
|
| 122 |
+
dashboard.to_csv(TAB_DIR / "df_dashboard.csv", index=False)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def bundle_exports():
|
| 126 |
+
"""Zip all generated export files for easy download."""
|
| 127 |
+
zip_path = ART_DIR / "exports.zip"
|
| 128 |
+
with ZipFile(zip_path, "w") as zf:
|
| 129 |
+
for path in list(FIG_DIR.glob("*")) + list(TAB_DIR.glob("*")):
|
| 130 |
+
zf.write(path, arcname=path.relative_to(ART_DIR))
|
| 131 |
+
return str(zip_path)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def run_analysis():
|
| 135 |
+
"""Run the complete pipeline and return final outputs only."""
|
| 136 |
+
ensure_dirs()
|
| 137 |
reviews, sales = load_data()
|
| 138 |
+
sales["month"] = pd.to_datetime(sales["month"])
|
| 139 |
+
sampled_titles = build_sample_titles(reviews)
|
| 140 |
+
sampled_sales = sales[sales["title"].isin(sampled_titles)].copy()
|
| 141 |
+
sampled_reviews = reviews[reviews["title"].isin(sampled_titles)].copy()
|
| 142 |
+
sampled_books = reviews[reviews["title"].isin(sampled_titles)].copy()
|
| 143 |
+
chart_1 = save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles)
|
| 144 |
+
chart_2 = save_sentiment_chart(sampled_reviews)
|
| 145 |
+
decision_df = save_decision_table(reviews, sales)
|
| 146 |
+
save_dashboard_export(sales)
|
| 147 |
+
export_zip = bundle_exports()
|
| 148 |
+
return chart_1, chart_2, decision_df, export_zip
|
| 149 |
+
|
| 150 |
|
| 151 |
with gr.Blocks() as demo:
|
| 152 |
+
gr.Markdown("# Book Analytics Dashboard")
|
| 153 |
+
gr.Markdown("Runs the fixed notebook workflow on the bundled review and sales datasets.")
|
| 154 |
+
run_btn = gr.Button("Run analysis")
|
| 155 |
+
sales_chart = gr.Image(label="Sales trends")
|
| 156 |
+
sentiment_chart = gr.Image(label="Sentiment distribution")
|
| 157 |
+
decision_table = gr.Dataframe(label="Pricing decisions")
|
| 158 |
+
exports = gr.File(label="Download all exports")
|
| 159 |
+
run_btn.click(fn=run_analysis, inputs=None, outputs=[sales_chart, sentiment_chart, decision_table, exports])
|
| 160 |
|
| 161 |
if __name__ == "__main__":
|
| 162 |
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
gradio==4.36.1
|
| 2 |
pandas==2.2.2
|
| 3 |
matplotlib==3.8.4
|
| 4 |
-
|
| 5 |
statsmodels==0.14.2
|
|
|
|
|
|
|
| 1 |
pandas==2.2.2
|
| 2 |
matplotlib==3.8.4
|
| 3 |
+
seaborn==0.13.2
|
| 4 |
statsmodels==0.14.2
|