# AI-Assisted Code — Academic Integrity Notice # Generated with The App Builder. ESCP coursework. # Student must be able to explain all code when asked. """Gradio Space that runs the fixed notebook workflow on bundled CSV files.""" from pathlib import Path import shutil import warnings import gradio as gr import matplotlib.pyplot as plt import pandas as pd import seaborn as sns import statsmodels.api as sm from itertools import product from zipfile import ZipFile DATA_REVIEWS = "synthetic_book_reviews.csv" DATA_SALES = "synthetic_sales_data.csv" ART_DIR = Path("artifacts") FIG_DIR = ART_DIR / "figures" TAB_DIR = ART_DIR / "tables" def ensure_dirs(): """Create output folders used by the app.""" FIG_DIR.mkdir(parents=True, exist_ok=True) TAB_DIR.mkdir(parents=True, exist_ok=True) def load_data(): """Load the two fixed datasets bundled with the Space.""" reviews = pd.read_csv(DATA_REVIEWS) sales = pd.read_csv(DATA_SALES) required_reviews = {"title", "review_text", "rating", "popularity_score"} required_sales = {"title", "month", "units_sold"} if not required_reviews.issubset(reviews.columns): raise ValueError(f"Missing review columns: {required_reviews - set(reviews.columns)}") if not required_sales.issubset(sales.columns): raise ValueError(f"Missing sales columns: {required_sales - set(sales.columns)}") return reviews, sales def build_sample_titles(reviews): """Pick up to 5 titles from each popularity score, like in the notebook.""" sampled_titles = [] for score in sorted(reviews["popularity_score"].dropna().unique()): titles = reviews.loc[reviews["popularity_score"] == score, "title"].dropna().unique().tolist() sampled_titles.extend(titles[:5]) return sampled_titles def save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles): """Create the sampled sales trend figure.""" popularity_colors = {1: "darkred", 2: "orangered", 3: "gold", 4: "mediumseagreen", 5: "royalblue"} fig, ax = plt.subplots(figsize=(14, 6)) for title in sampled_titles: row = sampled_books[sampled_books["title"] == title].iloc[0] subset = sampled_sales[sampled_sales["title"] == title] ax.plot(subset["month"], subset["units_sold"], label=title, color=popularity_colors.get(row["popularity_score"], "gray")) ax.set_title("Sales Trends Over Time") ax.set_xlabel("Month") ax.set_ylabel("Units Sold") ax.tick_params(axis="x", rotation=45) ax.grid(True, alpha=0.3) ax.legend(loc="center left", bbox_to_anchor=(1, 0.5), fontsize="small") fig.tight_layout() out = FIG_DIR / "sales_trends_sampled_titles.png" fig.savefig(out, dpi=150, bbox_inches="tight") plt.close(fig) return str(out) def save_sentiment_chart(sampled_reviews): """Create the stacked sentiment distribution chart.""" sampled_reviews = sampled_reviews.copy() sampled_reviews["grouped_title"] = sampled_reviews["rating"].astype(str) + "★ | " + sampled_reviews["title"] counts = sampled_reviews.groupby(["grouped_title", "sentiment_label"]).size().unstack(fill_value=0) counts = counts.reindex(columns=["negative", "neutral", "positive"], fill_value=0) counts.reset_index().to_csv(TAB_DIR / "sentiment_counts_sampled.csv", index=False) fig, ax = plt.subplots(figsize=(12, 12)) counts.plot.barh(stacked=True, ax=ax, color={"negative": "royalblue", "neutral": "lightgray", "positive": "crimson"}) ax.set_title("Sentiment Distribution in Reviews") ax.set_xlabel("Number of Reviews") ax.set_ylabel("Book Title") ax.grid(axis="x", linestyle="--", alpha=0.4) fig.tight_layout() out = FIG_DIR / "sentiment_distribution_sampled_titles.png" fig.savefig(out, dpi=150, bbox_inches="tight") plt.close(fig) return str(out) def pricing_action(row): """Apply the exact notebook decision rules.""" if row["avg_units_sold"] >= 120 and row.get("positive_ratio", 0) >= 0.6: return "increase price" if row["avg_units_sold"] <= 60 and row.get("negative_ratio", 0) >= 0.4: return "decrease price" return "keep price" def save_decision_table(reviews, sales): """Compute and save the final pricing decision table.""" avg_sales = sales.groupby("title", as_index=False)["units_sold"].mean().rename(columns={"units_sold": "avg_units_sold"}) sentiment = reviews.groupby(["title", "sentiment_label"]).size().unstack(fill_value=0) sentiment["total"] = sentiment.sum(axis=1) sentiment["positive_ratio"] = sentiment.get("positive", 0) / sentiment["total"] sentiment["negative_ratio"] = sentiment.get("negative", 0) / sentiment["total"] decisions = avg_sales.merge(sentiment, on="title", how="left").fillna(0) decisions["pricing_action"] = decisions.apply(pricing_action, axis=1) final_cols = ["title", "avg_units_sold", "positive_ratio", "negative_ratio", "pricing_action"] final_df = decisions[final_cols].sort_values("title").reset_index(drop=True) final_df.to_csv(TAB_DIR / "pricing_decisions.csv", index=False) return final_df def save_dashboard_export(sales): """Save a monthly dashboard export like the notebook fallback.""" dashboard = sales.groupby("month", as_index=False).agg(total_units_sold=("units_sold", "sum")).sort_values("month") dashboard.to_csv(TAB_DIR / "df_dashboard.csv", index=False) def bundle_exports(): """Zip all generated export files for easy download.""" zip_path = ART_DIR / "exports.zip" with ZipFile(zip_path, "w") as zf: for path in list(FIG_DIR.glob("*")) + list(TAB_DIR.glob("*")): zf.write(path, arcname=path.relative_to(ART_DIR)) return str(zip_path) def run_analysis(): """Run the complete pipeline and return final outputs only.""" ensure_dirs() reviews, sales = load_data() sales["month"] = pd.to_datetime(sales["month"]) sampled_titles = build_sample_titles(reviews) sampled_sales = sales[sales["title"].isin(sampled_titles)].copy() sampled_reviews = reviews[reviews["title"].isin(sampled_titles)].copy() sampled_books = reviews[reviews["title"].isin(sampled_titles)].copy() chart_1 = save_sales_trend_chart(sampled_sales, sampled_books, sampled_titles) chart_2 = save_sentiment_chart(sampled_reviews) decision_df = save_decision_table(reviews, sales) save_dashboard_export(sales) export_zip = bundle_exports() return chart_1, chart_2, decision_df, export_zip with gr.Blocks() as demo: gr.Markdown("# Book Analytics Dashboard") gr.Markdown("Runs the fixed notebook workflow on the bundled review and sales datasets.") run_btn = gr.Button("Run analysis") sales_chart = gr.Image(label="Sales trends") sentiment_chart = gr.Image(label="Sentiment distribution") decision_table = gr.Dataframe(label="Pricing decisions") exports = gr.File(label="Download all exports") run_btn.click(fn=run_analysis, inputs=None, outputs=[sales_chart, sentiment_chart, decision_table, exports]) if __name__ == "__main__": demo.launch()