File size: 7,103 Bytes
ebea030 3e30f6d ebea030 3e30f6d ebea030 3e30f6d ebea030 3e30f6d ebea030 3e30f6d ebea030 3e30f6d ebea030 3e30f6d ebea030 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | # AI-Assisted Code — Academic Integrity Notice
# Generated with The App Builder. ESCP coursework.
# Student must be able to explain all code when asked.
"""Gradio Space that runs the fixed notebook workflow on bundled CSV files."""
from pathlib import Path
import shutil
import warnings
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from itertools import product
from zipfile import ZipFile
DATA_REVIEWS = "synthetic_book_reviews.csv"
DATA_SALES = "synthetic_sales_data.csv"
ART_DIR = Path("artifacts")
FIG_DIR = ART_DIR / "figures"
TAB_DIR = ART_DIR / "tables"
def ensure_dirs():
"""Create output folders used by the app."""
FIG_DIR.mkdir(parents=True, exist_ok=True)
TAB_DIR.mkdir(parents=True, exist_ok=True)
def load_data():
"""Load the two fixed datasets bundled with the Space."""
reviews = pd.read_csv(DATA_REVIEWS)
sales = pd.read_csv(DATA_SALES)
required_reviews = {"title", "review_text", "rating", "popularity_score"}
required_sales = {"title", "month", "units_sold"}
if not required_reviews.issubset(reviews.columns):
raise ValueError(f"Missing review columns: {required_reviews - set(reviews.columns)}")
if not required_sales.issubset(sales.columns):
raise ValueError(f"Missing sales columns: {required_sales - set(sales.columns)}")
return reviews, sales
def build_sample_titles(reviews):
"""Pick up to 5 titles from each popularity score, like in the notebook."""
sampled_titles = []
for score in sorted(reviews["popularity_score"].dropna().unique()):
titles = reviews.loc[reviews["popularity_score"] == score, "title"].dropna().unique().tolist()
sampled_titles.extend(titles[:5])
return sampled_titles
def save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles):
"""Create the sampled sales trend figure."""
popularity_colors = {1: "darkred", 2: "orangered", 3: "gold", 4: "mediumseagreen", 5: "royalblue"}
fig, ax = plt.subplots(figsize=(14, 6))
for title in sampled_titles:
row = sampled_books[sampled_books["title"] == title].iloc[0]
subset = sampled_sales[sampled_sales["title"] == title]
ax.plot(subset["month"], subset["units_sold"], label=title,
color=popularity_colors.get(row["popularity_score"], "gray"))
ax.set_title("Sales Trends Over Time")
ax.set_xlabel("Month")
ax.set_ylabel("Units Sold")
ax.tick_params(axis="x", rotation=45)
ax.grid(True, alpha=0.3)
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small")
fig.tight_layout()
out = FIG_DIR / "sales_trends_sampled_titles.png"
fig.savefig(out, dpi=150, bbox_inches="tight")
plt.close(fig)
return str(out)
def save_sentiment_chart(sampled_reviews):
"""Create the stacked sentiment distribution chart."""
sampled_reviews = sampled_reviews.copy()
sampled_reviews["grouped_title"] = sampled_reviews["rating"].astype(str) + "★ | " + sampled_reviews["title"]
counts = sampled_reviews.groupby(["grouped_title", "sentiment_label"]).size().unstack(fill_value=0)
counts = counts.reindex(columns=["negative", "neutral", "positive"], fill_value=0)
counts.reset_index().to_csv(TAB_DIR / "sentiment_counts_sampled.csv", index=False)
fig, ax = plt.subplots(figsize=(12, 12))
counts.plot.barh(stacked=True, ax=ax, color={"negative": "royalblue", "neutral": "lightgray", "positive": "crimson"})
ax.set_title("Sentiment Distribution in Reviews")
ax.set_xlabel("Number of Reviews")
ax.set_ylabel("Book Title")
ax.grid(axis="x", linestyle="--", alpha=0.4)
fig.tight_layout()
out = FIG_DIR / "sentiment_distribution_sampled_titles.png"
fig.savefig(out, dpi=150, bbox_inches="tight")
plt.close(fig)
return str(out)
def pricing_action(row):
"""Apply the exact notebook decision rules."""
if row["avg_units_sold"] >= 120 and row.get("positive_ratio", 0) >= 0.6:
return "increase price"
if row["avg_units_sold"] <= 60 and row.get("negative_ratio", 0) >= 0.4:
return "decrease price"
return "keep price"
def save_decision_table(reviews, sales):
"""Compute and save the final pricing decision table."""
avg_sales = sales.groupby("title", as_index=False)["units_sold"].mean().rename(columns={"units_sold": "avg_units_sold"})
sentiment = reviews.groupby(["title", "sentiment_label"]).size().unstack(fill_value=0)
sentiment["total"] = sentiment.sum(axis=1)
sentiment["positive_ratio"] = sentiment.get("positive", 0) / sentiment["total"]
sentiment["negative_ratio"] = sentiment.get("negative", 0) / sentiment["total"]
decisions = avg_sales.merge(sentiment, on="title", how="left").fillna(0)
decisions["pricing_action"] = decisions.apply(pricing_action, axis=1)
final_cols = ["title", "avg_units_sold", "positive_ratio", "negative_ratio", "pricing_action"]
final_df = decisions[final_cols].sort_values("title").reset_index(drop=True)
final_df.to_csv(TAB_DIR / "pricing_decisions.csv", index=False)
return final_df
def save_dashboard_export(sales):
"""Save a monthly dashboard export like the notebook fallback."""
dashboard = sales.groupby("month", as_index=False).agg(total_units_sold=("units_sold", "sum")).sort_values("month")
dashboard.to_csv(TAB_DIR / "df_dashboard.csv", index=False)
def bundle_exports():
"""Zip all generated export files for easy download."""
zip_path = ART_DIR / "exports.zip"
with ZipFile(zip_path, "w") as zf:
for path in list(FIG_DIR.glob("*")) + list(TAB_DIR.glob("*")):
zf.write(path, arcname=path.relative_to(ART_DIR))
return str(zip_path)
def run_analysis():
"""Run the complete pipeline and return final outputs only."""
ensure_dirs()
reviews, sales = load_data()
sales["month"] = pd.to_datetime(sales["month"])
sampled_titles = build_sample_titles(reviews)
sampled_sales = sales[sales["title"].isin(sampled_titles)].copy()
sampled_reviews = reviews[reviews["title"].isin(sampled_titles)].copy()
sampled_books = reviews[reviews["title"].isin(sampled_titles)].copy()
chart_1 = save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles)
chart_2 = save_sentiment_chart(sampled_reviews)
decision_df = save_decision_table(reviews, sales)
save_dashboard_export(sales)
export_zip = bundle_exports()
return chart_1, chart_2, decision_df, export_zip
with gr.Blocks() as demo:
gr.Markdown("# Book Analytics Dashboard")
gr.Markdown("Runs the fixed notebook workflow on the bundled review and sales datasets.")
run_btn = gr.Button("Run analysis")
sales_chart = gr.Image(label="Sales trends")
sentiment_chart = gr.Image(label="Sentiment distribution")
decision_table = gr.Dataframe(label="Pricing decisions")
exports = gr.File(label="Download all exports")
run_btn.click(fn=run_analysis, inputs=None, outputs=[sales_chart, sentiment_chart, decision_table, exports])
if __name__ == "__main__":
demo.launch()
|