# AI-Assisted Code — Academic Integrity Notice # Generated with The App Builder. ESCP coursework. # Student must be able to explain all code when asked. import json import os import shutil import time import traceback from pathlib import Path from typing import Any, Dict, List, Tuple import gradio as gr import pandas as pd import papermill as pm import plotly.graph_objects as go BASE_DIR = Path(__file__).resolve().parent NOTEBOOK_NAME = os.environ.get("NB_ANALYSIS", "2a_Python_Analysis_Constance_Gonnelle.ipynb").strip() # Input files shipped with the Space SALES_SOURCE = BASE_DIR / "books_data.csv" REVIEWS_SOURCE = BASE_DIR / "synthetic_book_reviews.csv" # Filenames expected by the notebook SALES_ALIAS = BASE_DIR / "synthetic_sales_data.csv" REVIEWS_ALIAS = BASE_DIR / "synthetic_book_reviews.csv" RUNS_DIR = BASE_DIR / "runs" ART_DIR = BASE_DIR / "artifacts" PY_FIG_DIR = ART_DIR / "py" / "figures" PY_TAB_DIR = ART_DIR / "py" / "tables" PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800")) MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "100")) MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000")) def ensure_dirs() -> None: """Create all folders used by the app.""" for folder in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]: folder.mkdir(parents=True, exist_ok=True) def stamp() -> str: """Return a timestamp for run outputs.""" return time.strftime("%Y%m%d-%H%M%S") def tail(text: str, limit: int = MAX_LOG_CHARS) -> str: """Keep logs short enough for the UI.""" return (text or "")[-limit:] def prepare_input_files() -> List[str]: """ Make sure the notebook sees the exact filenames it expects. This is like giving a spreadsheet the same tab names every time. """ messages = [] if not SALES_SOURCE.exists(): raise FileNotFoundError("books_data.csv is missing.") if not REVIEWS_SOURCE.exists(): raise FileNotFoundError("synthetic_book_reviews.csv is missing.") if SALES_SOURCE.resolve() != SALES_ALIAS.resolve(): shutil.copyfile(SALES_SOURCE, SALES_ALIAS) messages.append("Copied books_data.csv -> synthetic_sales_data.csv") else: messages.append("synthetic_book_reviews.csv already uses the expected filename") messages.append("synthetic_book_reviews.csv ready") return messages def artifacts_index() -> Dict[str, Any]: """List the files created by the notebook.""" return { "figures": sorted( p.name for p in PY_FIG_DIR.glob("*") if p.suffix.lower() in {".png", ".jpg", ".jpeg"} ), "tables": sorted( p.name for p in PY_TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"} ), } def load_json(path: Path) -> Dict[str, Any]: """Read JSON safely.""" with path.open(encoding="utf-8") as file: return json.load(file) def load_table(path: Path) -> pd.DataFrame: """Load a CSV or JSON artifact for preview.""" try: if path.suffix.lower() == ".json": data = load_json(path) if isinstance(data, dict): return pd.DataFrame([data]) return pd.DataFrame(data) return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS) except Exception as exc: return pd.DataFrame([{"error": str(exc)}]) def run_notebook() -> str: """Execute the student's analysis notebook with papermill.""" ensure_dirs() notebook_path = BASE_DIR / NOTEBOOK_NAME if not notebook_path.exists(): return f"ERROR: Notebook not found: {NOTEBOOK_NAME}" try: prep_messages = prepare_input_files() output_path = RUNS_DIR / f"run_{stamp()}_{NOTEBOOK_NAME}" pm.execute_notebook( input_path=str(notebook_path), output_path=str(output_path), cwd=str(BASE_DIR), log_output=True, progress_bar=False, request_save_on_cell_execute=True, execution_timeout=PAPERMILL_TIMEOUT, ) idx = artifacts_index() figure_lines = [f" - {name}" for name in idx["figures"]] or [" - (none)"] table_lines = [f" - {name}" for name in idx["tables"]] or [" - (none)"] lines = [ "Notebook executed successfully.", *prep_messages, "", f"Notebook: {NOTEBOOK_NAME}", f"Run file: {output_path.name}", "", "Generated figures:", *figure_lines, "", "Generated tables:", *table_lines, ] return "\n".join(lines) except Exception as exc: return ( f"FAILED: {exc}\n\n" f"{tail(traceback.format_exc(), 3000)}" ) def load_kpis() -> Dict[str, Any]: """Load the KPI summary exported by the notebook.""" for candidate in [PY_FIG_DIR / "kpis.json", PY_TAB_DIR / "kpis.json"]: if candidate.exists(): try: return load_json(candidate) except Exception: pass return {} def render_kpis() -> str: """Build lightweight KPI cards without background images.""" kpis = load_kpis() if not kpis: return """
📊
No outputs yet
Click “Run notebook” to generate figures, tables, and KPI cards.
""" cards = [] labels = { "n_titles": "Book Titles", "n_months": "Months", "total_units_sold": "Units Sold", "total_revenue": "Revenue", } for key, value in kpis.items(): nice_label = labels.get(key, key.replace("_", " ").title()) if isinstance(value, (int, float)) and abs(value) >= 1000: value = f"{value:,.0f}" cards.append( f"""
{nice_label}
{value}
""" ) return f'
{"".join(cards)}
' def empty_chart(title: str) -> go.Figure: """Return a placeholder chart.""" fig = go.Figure() fig.update_layout( title=title, template="plotly_white", height=420, annotations=[ dict( text="Run the notebook to populate this chart", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, ) ], ) return fig def build_sales_chart() -> go.Figure: """Create an interactive monthly trend chart from df_dashboard.csv.""" path = PY_TAB_DIR / "df_dashboard.csv" if not path.exists(): return empty_chart("Monthly Overview") df = pd.read_csv(path) date_col = next((col for col in df.columns if "month" in col.lower() or "date" in col.lower()), None) metric_cols = [col for col in df.columns if col != date_col and pd.api.types.is_numeric_dtype(df[col])] if not date_col or not metric_cols: return empty_chart("Monthly Overview") df[date_col] = pd.to_datetime(df[date_col], errors="coerce") fig = go.Figure() for column in metric_cols: fig.add_trace( go.Scatter( x=df[date_col], y=df[column], mode="lines+markers", name=column.replace("_", " ").title(), hovertemplate=f"{column.replace('_', ' ').title()}
%{{x|%b %Y}}: %{{y:,.0f}}", ) ) fig.update_layout(template="plotly_white", height=450, title="Monthly Overview", hovermode="x unified") return fig def build_top_sellers_chart() -> go.Figure: """Create an interactive top-sellers bar chart.""" path = PY_TAB_DIR / "top_titles_by_units_sold.csv" if not path.exists(): return empty_chart("Top Selling Titles") df = pd.read_csv(path).head(15) title_col = next((col for col in df.columns if "title" in col.lower()), df.columns[0]) value_col = next((col for col in df.columns if "unit" in col.lower() or "sold" in col.lower()), df.columns[-1]) fig = go.Figure( go.Bar( x=df[value_col], y=df[title_col], orientation="h", hovertemplate="%{y}
Units: %{x:,.0f}", ) ) fig.update_layout(template="plotly_white", height=max(420, len(df) * 28), title="Top Selling Titles") fig.update_yaxes(autorange="reversed") return fig def build_sentiment_chart() -> go.Figure: """Create an interactive sentiment chart.""" path = PY_TAB_DIR / "sentiment_counts_sampled.csv" if not path.exists(): return empty_chart("Sentiment Distribution") df = pd.read_csv(path) title_col = df.columns[0] sentiment_cols = [col for col in ["negative", "neutral", "positive"] if col in df.columns] if not sentiment_cols: return empty_chart("Sentiment Distribution") fig = go.Figure() for col in sentiment_cols: fig.add_trace( go.Bar( x=df[col], y=df[title_col], name=col.title(), orientation="h", hovertemplate=f"{col.title()}: %{{x}}", ) ) fig.update_layout( template="plotly_white", barmode="stack", height=max(420, len(df) * 28), title="Sentiment Distribution by Book", ) fig.update_yaxes(autorange="reversed") return fig def refresh_gallery() -> Tuple[List[Tuple[str, str]], gr.Dropdown, pd.DataFrame]: """Refresh static figures and tables.""" figures = [(str(path), path.stem.replace("_", " ").title()) for path in sorted(PY_FIG_DIR.glob("*.png"))] idx = artifacts_index() table_choices = idx["tables"] first_table = load_table(PY_TAB_DIR / table_choices[0]) if table_choices else pd.DataFrame() return figures, gr.update(choices=table_choices, value=table_choices[0] if table_choices else None), first_table def on_table_select(choice: str) -> pd.DataFrame: """Load the selected table.""" if not choice: return pd.DataFrame([{"hint": "Select a table above."}]) path = PY_TAB_DIR / choice if not path.exists(): return pd.DataFrame([{"error": f"File not found: {choice}"}]) return load_table(path) def answer_question(user_msg: str, history: List[Dict[str, str]]): """Simple assistant that routes the user to the right chart or table.""" idx = artifacts_index() text = (user_msg or "").strip() if not text: return history, "", None, None msg = text.lower() reply = "Ask about sales trends, sentiment, top sellers, or dashboard overview." chart = None table = None if not idx["figures"] and not idx["tables"]: reply = "No artifacts exist yet. Run the notebook first in the first tab." elif any(word in msg for word in ["trend", "sales", "monthly", "forecast", "arima"]): reply = "Here is the monthly overview built from the notebook outputs." chart = build_sales_chart() elif any(word in msg for word in ["sentiment", "review", "positive", "negative"]): reply = "Here is the sentiment distribution across the sampled books." chart = build_sentiment_chart() elif any(word in msg for word in ["top", "best", "popular", "rank"]): reply = "Here is the top-sellers table exported by the notebook." path = PY_TAB_DIR / "top_titles_by_units_sold.csv" table = load_table(path) if path.exists() else pd.DataFrame([{"error": "top_titles_by_units_sold.csv not found"}]) elif any(word in msg for word in ["overview", "dashboard", "kpi", "summary"]): reply = "Here is the dashboard overview table." path = PY_TAB_DIR / "df_dashboard.csv" table = load_table(path) if path.exists() else pd.DataFrame([{"error": "df_dashboard.csv not found"}]) new_history = (history or []) + [ {"role": "user", "content": text}, {"role": "assistant", "content": reply}, ] return new_history, "", chart, table def refresh_dashboard(): """Refresh all dashboard widgets together.""" kpis = render_kpis() sales = build_sales_chart() sentiment = build_sentiment_chart() top = build_top_sellers_chart() figures, dropdown, table = refresh_gallery() return kpis, sales, sentiment, top, figures, dropdown, table ensure_dirs() with gr.Blocks(title="Book Analytics Space", css_paths=["style.css"]) as demo: gr.Markdown( "# Book Analytics Space\n" "This Gradio app runs your notebook on the two CSV datasets and then exposes the exported outputs." ) with gr.Tab("1. Run Notebook"): gr.Markdown( "Use this tab to execute the notebook on the packaged datasets. " "The app automatically maps `books_data.csv` to the filename expected by the notebook." ) run_btn = gr.Button("Run notebook", variant="primary") run_log = gr.Textbox(label="Execution log", lines=20, interactive=False) run_btn.click(run_notebook, outputs=run_log) with gr.Tab("2. Dashboard"): kpi_html = gr.HTML(value=render_kpis()) refresh_btn = gr.Button("Refresh dashboard", variant="secondary") sales_plot = gr.Plot(label="Monthly Overview") sentiment_plot = gr.Plot(label="Sentiment Distribution") top_plot = gr.Plot(label="Top Sellers") gallery = gr.Gallery(label="Static Figures", columns=2, height=420, object_fit="contain") table_dropdown = gr.Dropdown(label="Tables", choices=[], interactive=True) table_display = gr.Dataframe(label="Table Preview", interactive=False) refresh_btn.click( refresh_dashboard, outputs=[kpi_html, sales_plot, sentiment_plot, top_plot, gallery, table_dropdown, table_display], ) table_dropdown.change(on_table_select, inputs=table_dropdown, outputs=table_display) with gr.Tab("3. AI Dashboard"): gr.Markdown("Ask a simple question and the app will show the relevant chart or table.") chatbot = gr.Chatbot(type="messages", height=360) user_input = gr.Textbox( label="Ask about your data", placeholder="Example: Show me the sales trends", ) ai_chart = gr.Plot(label="Interactive Chart") ai_table = gr.Dataframe(label="Data Table", interactive=False) gr.Examples( examples=[ "Show me the sales trends", "What does the sentiment look like?", "Which titles sell the most?", "Give me a dashboard overview", ], inputs=user_input, ) user_input.submit( answer_question, inputs=[user_input, chatbot], outputs=[chatbot, user_input, ai_chart, ai_table], ) demo.launch()