# AI-Assisted Code — Academic Integrity Notice # Generated with The App Builder. ESCP coursework. # Student must be able to explain all code when asked. from pathlib import Path import json import traceback import random import warnings import gradio as gr import pandas as pd import plotly.graph_objects as go from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from statsmodels.tsa.arima.model import ARIMA BASE_DIR = Path(__file__).resolve().parent ART_DIR = BASE_DIR / "artifacts" PY_FIG_DIR = ART_DIR / "py" / "figures" PY_TAB_DIR = ART_DIR / "py" / "tables" REVIEWS_CSV = BASE_DIR / "synthetic_book_reviews.csv" SALES_CSV = BASE_DIR / "synthetic_sales_data.csv" def ensure_dirs(): """Create artifact folders used by the dashboard.""" for path in [PY_FIG_DIR, PY_TAB_DIR]: path.mkdir(parents=True, exist_ok=True) def artifacts_index(): """Return the current list of generated files.""" def list_ext(folder: Path, exts: tuple[str, ...]): if not folder.exists(): return [] return sorted([p.name for p in folder.iterdir() if p.is_file() and p.suffix.lower() in exts]) return { "python": { "figures": list_ext(PY_FIG_DIR, (".png", ".jpg", ".jpeg")), "tables": list_ext(PY_TAB_DIR, (".csv", ".json")), } } def load_inputs(): """Load the uploaded input datasets.""" if not REVIEWS_CSV.exists(): raise FileNotFoundError("synthetic_book_reviews.csv not found.") if not SALES_CSV.exists(): raise FileNotFoundError("synthetic_sales_data.csv not found.") df_reviews = pd.read_csv(REVIEWS_CSV) df_sales = pd.read_csv(SALES_CSV) if "title" not in df_reviews.columns: raise ValueError("Reviews CSV must contain a 'title' column.") if "title" not in df_sales.columns or "month" not in df_sales.columns or "units_sold" not in df_sales.columns: raise ValueError("Sales CSV must contain 'title', 'month', and 'units_sold' columns.") return df_reviews, df_sales def add_review_text(df_reviews: pd.DataFrame) -> pd.DataFrame: """Recreate simple review text because the provided CSV has no text field.""" if "review_text" in df_reviews.columns: return df_reviews.copy() random.seed(42) templates = { "One": [ "Terrible quality and very disappointing.", "I would not recommend this book.", "Weak writing and poor value for money.", ], "Two": [ "Some good ideas, but overall underwhelming.", "Average read with a few frustrating parts.", "Not the worst, but I expected better.", ], "Three": [ "An okay book with mixed strengths and weaknesses.", "Decent overall and worth considering.", "Balanced experience, neither great nor bad.", ], "Four": [ "Enjoyable and well written.", "A strong book with a lot to like.", "Very good read and easy to recommend.", ], "Five": [ "Excellent book and highly recommended.", "Outstanding quality from start to finish.", "One of the best books in this sample.", ], } def build_text(row): rating = str(row.get("rating", "Three")).strip() choices = templates.get(rating, templates["Three"]) return random.choice(choices) out = df_reviews.copy() out["review_text"] = out.apply(build_text, axis=1) return out def classify_sentiment(df_reviews: pd.DataFrame) -> pd.DataFrame: """Score review text with VADER and map to labels.""" analyzer = SentimentIntensityAnalyzer() out = df_reviews.copy() def label_text(text: str): score = analyzer.polarity_scores(str(text))["compound"] if score >= 0.05: return "positive" if score <= -0.05: return "negative" return "neutral" out["sentiment_label"] = out["review_text"].apply(label_text) return out def build_dashboard_table(df_sales: pd.DataFrame, df_reviews: pd.DataFrame) -> pd.DataFrame: """Create the monthly dashboard table.""" sales = df_sales.copy() sales["month"] = pd.to_datetime(sales["month"], errors="coerce") revenue_df = sales.merge(df_reviews[["title", "price"]].drop_duplicates(), on="title", how="left") revenue_df["estimated_revenue"] = revenue_df["units_sold"] * revenue_df["price"] dashboard = ( revenue_df.groupby("month", as_index=False) .agg( total_units_sold=("units_sold", "sum"), avg_units_sold=("units_sold", "mean"), total_revenue=("estimated_revenue", "sum"), ) .sort_values("month") ) return dashboard def build_top_titles(df_sales: pd.DataFrame) -> pd.DataFrame: """Aggregate total units sold by title.""" return ( df_sales.groupby("title", as_index=False)["units_sold"] .sum() .sort_values("units_sold", ascending=False) .rename(columns={"units_sold": "total_units_sold"}) ) def build_pricing_decisions(df_sales: pd.DataFrame, df_reviews: pd.DataFrame) -> pd.DataFrame: """Create a simple pricing recommendation table.""" merged = df_sales.merge(df_reviews[["title", "price", "rating"]].drop_duplicates(), on="title", how="left") agg = ( merged.groupby(["title", "price", "rating"], as_index=False)["units_sold"] .sum() .rename(columns={"units_sold": "total_units_sold"}) .sort_values("total_units_sold", ascending=False) ) def recommend(row): rating = str(row["rating"]) units = float(row["total_units_sold"]) if rating in {"Four", "Five"} and units > agg["total_units_sold"].median(): return "Consider premium pricing" if rating in {"One", "Two"}: return "Consider discounting" return "Keep current price" agg["pricing_decision"] = agg.apply(recommend, axis=1) return agg def save_sales_figure(df_dashboard: pd.DataFrame): """Save the monthly overview figure.""" plt.figure(figsize=(10, 5)) plt.plot(df_dashboard["month"], df_dashboard["total_units_sold"], marker="o") plt.title("Monthly Units Sold") plt.xlabel("Month") plt.ylabel("Units Sold") plt.xticks(rotation=45) plt.tight_layout() plt.savefig(PY_FIG_DIR / "sales_trends_sampled_titles.png", dpi=150) plt.close() def save_sentiment_figure(df_sentiment: pd.DataFrame): """Save the sentiment distribution figure.""" counts = ( df_sentiment.groupby(["title", "sentiment_label"]) .size() .unstack(fill_value=0) .reset_index() ) for col in ["negative", "neutral", "positive"]: if col not in counts.columns: counts[col] = 0 top = counts.head(20).copy() top = top.sort_values("positive", ascending=True) plt.figure(figsize=(11, 8)) plt.barh(top["title"], top["negative"], label="negative") plt.barh(top["title"], top["neutral"], left=top["negative"], label="neutral") plt.barh(top["title"], top["positive"], left=top["negative"] + top["neutral"], label="positive") plt.title("Sentiment Distribution by Book") plt.xlabel("Number of Reviews") plt.ylabel("Book Title") plt.legend() plt.tight_layout() plt.savefig(PY_FIG_DIR / "sentiment_distribution_sampled_titles.png", dpi=150) plt.close() counts.rename(columns={"title": "grouped_title"}).to_csv( PY_TAB_DIR / "sentiment_counts_sampled.csv", index=False ) def save_arima_figure(df_sales: pd.DataFrame): """Fit a lightweight ARIMA forecast for the top title and save a figure.""" top_title = ( df_sales.groupby("title")["units_sold"].sum().sort_values(ascending=False).index[0] ) title_sales = ( df_sales[df_sales["title"] == top_title] .copy() .sort_values("month") ) title_sales["month"] = pd.to_datetime(title_sales["month"], errors="coerce") title_sales = title_sales.dropna(subset=["month"]).set_index("month") if len(title_sales) < 6: return with warnings.catch_warnings(): warnings.simplefilter("ignore") model = ARIMA(title_sales["units_sold"], order=(1, 1, 1)) fitted = model.fit() forecast = fitted.forecast(steps=6) forecast_index = pd.date_range( start=title_sales.index.max() + pd.offsets.MonthBegin(1), periods=6, freq="MS", ) plt.figure(figsize=(10, 5)) plt.plot(title_sales.index, title_sales["units_sold"], marker="o", label="Observed") plt.plot(forecast_index, forecast.values, linestyle="--", marker="o", label="Forecast") plt.title(f"ARIMA Forecast for Top Title: {top_title}") plt.xlabel("Month") plt.ylabel("Units Sold") plt.legend() plt.xticks(rotation=45) plt.tight_layout() plt.savefig(PY_FIG_DIR / "arima_forecast_sampled_titles.png", dpi=150) plt.close() def save_kpis(df_sales: pd.DataFrame, df_dashboard: pd.DataFrame): """Save KPI summary JSON.""" payload = { "n_titles": int(df_sales["title"].nunique()), "n_months": int(df_dashboard["month"].nunique()), "total_units_sold": float(df_sales["units_sold"].sum()), } if "total_revenue" in df_dashboard.columns: payload["total_revenue"] = float(df_dashboard["total_revenue"].sum()) with open(PY_FIG_DIR / "kpis.json", "w", encoding="utf-8") as file: json.dump(payload, file, indent=2) def run_pipeline(): """Run the notebook logic as normal Python functions.""" try: ensure_dirs() df_reviews_raw, df_sales = load_inputs() df_reviews = add_review_text(df_reviews_raw) df_sentiment = classify_sentiment(df_reviews) df_dashboard = build_dashboard_table(df_sales, df_reviews) df_top = build_top_titles(df_sales) df_pricing = build_pricing_decisions(df_sales, df_reviews) df_dashboard.to_csv(PY_TAB_DIR / "df_dashboard.csv", index=False) df_top.to_csv(PY_TAB_DIR / "top_titles_by_units_sold.csv", index=False) df_pricing.to_csv(PY_TAB_DIR / "pricing_decisions.csv", index=False) save_sales_figure(df_dashboard) save_sentiment_figure(df_sentiment) save_arima_figure(df_sales) save_kpis(df_sales, df_dashboard) return ( "Pipeline completed successfully.\n\n" f"Rows in reviews: {len(df_reviews):,}\n" f"Rows in sales: {len(df_sales):,}\n\n" f"Generated figures: {', '.join(artifacts_index()['python']['figures'])}\n" f"Generated tables: {', '.join(artifacts_index()['python']['tables'])}" ) except Exception as error: return f"Pipeline failed:\n{error}\n\n{traceback.format_exc()}" def load_kpis(): """Load KPI JSON if available.""" path = PY_FIG_DIR / "kpis.json" if path.exists(): return json.loads(path.read_text(encoding="utf-8")) return {} def render_kpi_cards(): """Render simple HTML KPI cards.""" kpis = load_kpis() if not kpis: return "
Run the pipeline to generate KPIs.
" html = "
" for key, value in kpis.items(): label = key.replace("_", " ").title() if isinstance(value, (int, float)): value = f"{value:,.0f}" html += ( "
" f"
{label}
" f"
{value}
" "
" ) html += "
" return html def _load_table(path: Path): """Load CSV or JSON into a DataFrame preview.""" if not path.exists(): return pd.DataFrame([{"error": f"Missing file: {path.name}"}]) if path.suffix == ".json": obj = json.loads(path.read_text(encoding="utf-8")) if isinstance(obj, dict): return pd.DataFrame([obj]) return pd.DataFrame(obj) return pd.read_csv(path) def build_sales_chart(): """Interactive plotly version of the monthly overview.""" path = PY_TAB_DIR / "df_dashboard.csv" if not path.exists(): return go.Figure().update_layout(title="Run the pipeline first") df = pd.read_csv(path) df["month"] = pd.to_datetime(df["month"], errors="coerce") fig = go.Figure() for col in [c for c in df.columns if c != "month"]: fig.add_trace(go.Scatter(x=df["month"], y=df[col], mode="lines+markers", name=col)) fig.update_layout(title="Monthly Overview", template="plotly_white", height=420) return fig def build_sentiment_chart(): """Interactive stacked chart for sentiment counts.""" path = PY_TAB_DIR / "sentiment_counts_sampled.csv" if not path.exists(): return go.Figure().update_layout(title="Run the pipeline first") df = pd.read_csv(path).head(20) fig = go.Figure() for col in ["negative", "neutral", "positive"]: if col in df.columns: fig.add_trace(go.Bar(y=df["grouped_title"], x=df[col], orientation="h", name=col)) fig.update_layout(title="Sentiment Distribution", barmode="stack", template="plotly_white", height=500) return fig def build_top_chart(): """Interactive top sellers chart.""" path = PY_TAB_DIR / "top_titles_by_units_sold.csv" if not path.exists(): return go.Figure().update_layout(title="Run the pipeline first") df = pd.read_csv(path).head(15) title_col = "title" if "title" in df.columns else df.columns[0] val_col = "total_units_sold" if "total_units_sold" in df.columns else df.columns[-1] fig = go.Figure(go.Bar(y=df[title_col], x=df[val_col], orientation="h")) fig.update_layout(title="Top Sellers", template="plotly_white", height=500) return fig def refresh_gallery(): """Refresh static figures and the first available table.""" figures = [] for path in sorted(PY_FIG_DIR.glob("*.png")): figures.append((str(path), path.stem.replace("_", " ").title())) table_choices = artifacts_index()["python"]["tables"] first_df = pd.DataFrame() first_choice = None if table_choices: first_choice = table_choices[0] first_df = _load_table(PY_TAB_DIR / first_choice) return figures, gr.update(choices=table_choices, value=first_choice), first_df def on_table_select(choice): """Load the selected table preview.""" if not choice: return pd.DataFrame([{"hint": "Select a table above."}]) return _load_table(PY_TAB_DIR / choice) def refresh_dashboard(): """Refresh KPI cards and all interactive charts.""" return render_kpi_cards(), build_sales_chart(), build_sentiment_chart(), build_top_chart() def ai_reply(question: str): """Simple keyword routing for the AI tab.""" msg = (question or "").lower() kpis = load_kpis() summary = "" if kpis: summary = ( f"We currently have {int(kpis.get('n_titles', 0))} titles and " f"{int(kpis.get('total_units_sold', 0)):,} total units sold." ) if any(word in msg for word in ["sentiment", "review", "positive", "negative"]): return f"Here is the sentiment view. {summary}", build_sentiment_chart(), None if any(word in msg for word in ["top", "seller", "best", "popular"]): return f"Here are the top sellers. {summary}", None, _load_table(PY_TAB_DIR / "top_titles_by_units_sold.csv") if any(word in msg for word in ["price", "pricing"]): return f"Here are the pricing decisions. {summary}", None, _load_table(PY_TAB_DIR / "pricing_decisions.csv") return f"Here is the monthly overview. {summary}", build_sales_chart(), None def ai_chat(user_msg, history): """Chat wrapper compatible with Gradio Chatbot.""" if not user_msg: return history, "", None, None reply, chart, table = ai_reply(user_msg) history = (history or []) + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": reply}] return history, "", chart, table def load_css(): """Load optional CSS file.""" path = BASE_DIR / "style.css" return path.read_text(encoding="utf-8") if path.exists() else "" ensure_dirs() with gr.Blocks(title="AIBDM 2026 Workshop App") as demo: gr.Markdown( "# SE21 App Template\n" "*Notebook converted into a Hugging Face Space*", elem_id="escp_title", ) with gr.Tab("Pipeline Runner"): gr.Markdown("Run the full notebook logic with the uploaded CSV files.") run_btn = gr.Button("Run Full Pipeline", variant="primary") run_log = gr.Textbox(label="Execution Log", lines=18, max_lines=30, interactive=False) run_btn.click(run_pipeline, outputs=[run_log]) with gr.Tab("Dashboard"): kpi_html = gr.HTML(value=render_kpi_cards()) refresh_btn = gr.Button("Refresh Dashboard", variant="primary") gr.Markdown("#### Interactive Charts") chart_sales = gr.Plot(label="Monthly Overview") chart_sentiment = gr.Plot(label="Sentiment Distribution") chart_top = gr.Plot(label="Top Sellers") gr.Markdown("#### Static Figures") gallery = gr.Gallery(label="Generated Figures", columns=2, height=480, object_fit="contain") gr.Markdown("#### Data Tables") table_dropdown = gr.Dropdown(label="Select a table to view", choices=[], interactive=True) table_display = gr.Dataframe(label="Table Preview", interactive=False) def _on_refresh(): kpi, c1, c2, c3 = refresh_dashboard() figs, dd, df = refresh_gallery() return kpi, c1, c2, c3, figs, dd, df refresh_btn.click( _on_refresh, outputs=[kpi_html, chart_sales, chart_sentiment, chart_top, gallery, table_dropdown, table_display], ) table_dropdown.change(on_table_select, inputs=[table_dropdown], outputs=[table_display]) with gr.Tab('"AI" Dashboard'): gr.Markdown( "### Ask questions about your data\n" "This version uses built-in keyword routing, so it works without API keys." ) with gr.Row(equal_height=True): with gr.Column(scale=1): chatbot = gr.Chatbot(label="Conversation", type="messages", height=380) user_input = gr.Textbox( label="Ask about your data", placeholder="e.g. Show me sales trends / top sellers / sentiment", lines=1, ) with gr.Column(scale=1): ai_figure = gr.Plot(label="Interactive Chart") ai_table = gr.Dataframe(label="Data Table", interactive=False) user_input.submit( ai_chat, inputs=[user_input, chatbot], outputs=[chatbot, user_input, ai_figure, ai_table], ) demo.launch(css=load_css())