Spaces:
Build error
Build error
| # AI-Assisted Code — Academic Integrity Notice | |
| # Generated with The App Builder. ESCP coursework. | |
| # Student must be able to explain all code when asked. | |
| import shutil | |
| import time | |
| import traceback | |
| from pathlib import Path | |
| import gradio as gr | |
| import pandas as pd | |
| import papermill as pm | |
| import plotly.graph_objects as go | |
| BASE_DIR = Path(__file__).resolve().parent | |
| RUNS_DIR = BASE_DIR / "runs" | |
| ART_DIR = BASE_DIR / "artifacts" | |
| FIG_DIR = ART_DIR / "py" / "figures" | |
| TAB_DIR = ART_DIR / "py" / "tables" | |
| DEFAULT_NOTEBOOK = BASE_DIR / "analysis.ipynb" | |
| DEFAULT_REVIEWS = BASE_DIR / "synthetic_book_reviews.csv" | |
| DEFAULT_SALES = BASE_DIR / "synthetic_sales_data.csv" | |
| PAPERMILL_TIMEOUT = 1800 | |
| MAX_PREVIEW_ROWS = 50 | |
| def ensure_dirs() -> None: | |
| """Create folders used by the app.""" | |
| for path in [RUNS_DIR, FIG_DIR, TAB_DIR]: | |
| path.mkdir(parents=True, exist_ok=True) | |
| def load_css() -> str: | |
| """Read local CSS once at startup.""" | |
| css_path = BASE_DIR / "style.css" | |
| return css_path.read_text(encoding="utf-8") if css_path.exists() else "" | |
| def timestamp() -> str: | |
| return time.strftime("%Y%m%d-%H%M%S") | |
| def copy_input(source_path: str | None, fallback: Path, target: Path) -> None: | |
| """Copy the uploaded file or reuse the bundled default file.""" | |
| source = Path(source_path) if source_path else fallback | |
| if not source.exists(): | |
| raise FileNotFoundError(f"Missing required file: {source.name}") | |
| shutil.copy2(source, target) | |
| def prepare_inputs(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> None: | |
| """Normalize filenames so the notebook can use fixed paths.""" | |
| copy_input(notebook_path, DEFAULT_NOTEBOOK, BASE_DIR / "analysis.ipynb") | |
| copy_input(reviews_path, DEFAULT_REVIEWS, BASE_DIR / "synthetic_book_reviews.csv") | |
| copy_input(sales_path, DEFAULT_SALES, BASE_DIR / "synthetic_sales_data.csv") | |
| def run_pipeline(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> str: | |
| """Execute the notebook with papermill and return a readable log.""" | |
| ensure_dirs() | |
| try: | |
| prepare_inputs(notebook_path, reviews_path, sales_path) | |
| output_nb = RUNS_DIR / f"run_{timestamp()}_analysis.ipynb" | |
| pm.execute_notebook( | |
| input_path=str(BASE_DIR / "analysis.ipynb"), | |
| output_path=str(output_nb), | |
| cwd=str(BASE_DIR), | |
| log_output=True, | |
| progress_bar=False, | |
| request_save_on_cell_execute=True, | |
| execution_timeout=PAPERMILL_TIMEOUT, | |
| ) | |
| figures = sorted(p.name for p in FIG_DIR.glob("*") if p.is_file()) | |
| tables = sorted(p.name for p in TAB_DIR.glob("*") if p.is_file()) | |
| return ( | |
| "Pipeline completed successfully.\n\n" | |
| f"Notebook output: {output_nb.name}\n" | |
| f"Figures: {', '.join(figures) or '(none)'}\n" | |
| f"Tables: {', '.join(tables) or '(none)'}" | |
| ) | |
| except Exception as exc: | |
| return f"Pipeline failed: {exc}\n\n{traceback.format_exc()[-5000:]}" | |
| def read_json(path: Path) -> dict: | |
| import json | |
| with path.open(encoding="utf-8") as file: | |
| return json.load(file) | |
| def load_table(path: Path) -> pd.DataFrame: | |
| """Safely preview a CSV or JSON artifact.""" | |
| try: | |
| if path.suffix.lower() == ".json": | |
| obj = read_json(path) | |
| return pd.DataFrame([obj]) if isinstance(obj, dict) else pd.DataFrame(obj) | |
| return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS) | |
| except Exception as exc: | |
| return pd.DataFrame([{"error": str(exc)}]) | |
| def list_tables() -> list[str]: | |
| return sorted(p.name for p in TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"}) | |
| def gallery_items() -> list[tuple[str, str]]: | |
| return [(str(path), path.stem.replace("_", " ").title()) for path in sorted(FIG_DIR.glob("*.png"))] | |
| def load_kpis() -> dict: | |
| for candidate in [TAB_DIR / "kpis.json", FIG_DIR / "kpis.json"]: | |
| if candidate.exists(): | |
| try: | |
| return read_json(candidate) | |
| except Exception: | |
| return {} | |
| return {} | |
| def kpi_cards_html() -> str: | |
| """Render compact KPI cards without any background image.""" | |
| kpis = load_kpis() | |
| if not kpis: | |
| return '<div class="card-grid"><div class="card"><b>No data yet</b><br>Run the pipeline first.</div></div>' | |
| config = [ | |
| ("n_titles", "Book Titles"), | |
| ("n_months", "Time Periods"), | |
| ("total_units_sold", "Units Sold"), | |
| ("total_revenue", "Revenue"), | |
| ] | |
| cards = [] | |
| for key, label in config: | |
| if key in kpis: | |
| value = kpis[key] | |
| if isinstance(value, (int, float)) and abs(value) >= 100: | |
| value = f"{value:,.0f}" | |
| cards.append(f'<div class="card"><div class="label">{label}</div><div class="value">{value}</div></div>') | |
| return '<div class="card-grid">' + "".join(cards) + "</div>" | |
| def empty_chart(title: str) -> go.Figure: | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title=title, | |
| template="plotly_white", | |
| height=420, | |
| paper_bgcolor="white", | |
| plot_bgcolor="white", | |
| annotations=[dict(text="Run the pipeline first", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)], | |
| ) | |
| return fig | |
| def build_sales_chart() -> go.Figure: | |
| path = TAB_DIR / "df_dashboard.csv" | |
| if not path.exists(): | |
| return empty_chart("Monthly Overview") | |
| df = pd.read_csv(path) | |
| date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None) | |
| val_cols = [c for c in df.columns if c != date_col and pd.api.types.is_numeric_dtype(df[c])] | |
| if not date_col or not val_cols: | |
| return empty_chart("Monthly Overview") | |
| df[date_col] = pd.to_datetime(df[date_col], errors="coerce") | |
| fig = go.Figure() | |
| for col in val_cols: | |
| fig.add_trace(go.Scatter(x=df[date_col], y=df[col], mode="lines+markers", name=col.replace("_", " ").title())) | |
| fig.update_layout(title="Monthly Overview", template="plotly_white", height=450, paper_bgcolor="white", plot_bgcolor="white") | |
| return fig | |
| def build_sentiment_chart() -> go.Figure: | |
| path = TAB_DIR / "sentiment_counts_sampled.csv" | |
| if not path.exists(): | |
| return empty_chart("Sentiment Distribution") | |
| df = pd.read_csv(path) | |
| title_col = df.columns[0] | |
| fig = go.Figure() | |
| for col in [c for c in ["negative", "neutral", "positive"] if c in df.columns]: | |
| fig.add_trace(go.Bar(y=df[title_col], x=df[col], orientation="h", name=col.title())) | |
| fig.update_layout(title="Sentiment Distribution", barmode="stack", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white") | |
| fig.update_yaxes(autorange="reversed") | |
| return fig | |
| def build_top_sellers_chart() -> go.Figure: | |
| path = TAB_DIR / "top_titles_by_units_sold.csv" | |
| if not path.exists(): | |
| return empty_chart("Top Sellers") | |
| df = pd.read_csv(path).head(15) | |
| title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0]) | |
| value_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1]) | |
| fig = go.Figure(go.Bar(y=df[title_col], x=df[value_col], orientation="h")) | |
| fig.update_layout(title="Top Sellers", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white") | |
| fig.update_yaxes(autorange="reversed") | |
| return fig | |
| def refresh_table(choice: str | None) -> pd.DataFrame: | |
| if not choice: | |
| return pd.DataFrame([{"hint": "Choose a table first."}]) | |
| return load_table(TAB_DIR / choice) | |
| def refresh_dashboard() -> tuple: | |
| choices = list_tables() | |
| selected = choices[0] if choices else None | |
| table_df = refresh_table(selected) if selected else pd.DataFrame() | |
| return ( | |
| kpi_cards_html(), | |
| build_sales_chart(), | |
| build_sentiment_chart(), | |
| build_top_sellers_chart(), | |
| gallery_items(), | |
| gr.update(choices=choices, value=selected), | |
| table_df, | |
| ) | |
| ensure_dirs() | |
| with gr.Blocks(title="Notebook Runner Space", css=load_css()) as demo: | |
| gr.Markdown( | |
| "# ESCP Notebook Runner\n" | |
| "Run the bundled notebook on the two bundled CSV datasets, or replace them with your own files." | |
| ) | |
| with gr.Tab("1. Run Notebook"): | |
| gr.Markdown( | |
| "Default project files already included in the Space:\n" | |
| "- `analysis.ipynb`\n" | |
| "- `synthetic_book_reviews.csv`\n" | |
| "- `synthetic_sales_data.csv`\n\n" | |
| "You can leave all upload fields empty to use the bundled files." | |
| ) | |
| notebook_file = gr.File(label="Optional notebook (.ipynb)", file_types=[".ipynb"], type="filepath") | |
| reviews_file = gr.File(label="Optional reviews CSV", file_types=[".csv"], type="filepath") | |
| sales_file = gr.File(label="Optional sales CSV", file_types=[".csv"], type="filepath") | |
| run_button = gr.Button("Run Full Pipeline", variant="primary") | |
| run_log = gr.Textbox(label="Execution Log", lines=18, interactive=False) | |
| run_button.click(run_pipeline, inputs=[notebook_file, reviews_file, sales_file], outputs=run_log) | |
| with gr.Tab("2. Dashboard"): | |
| kpis = gr.HTML(value=kpi_cards_html()) | |
| refresh_button = gr.Button("Refresh Dashboard", variant="primary") | |
| chart_sales = gr.Plot(label="Monthly Overview") | |
| chart_sentiment = gr.Plot(label="Sentiment Distribution") | |
| chart_top = gr.Plot(label="Top Sellers") | |
| gallery = gr.Gallery(label="Generated Figures", columns=2, height=420, object_fit="contain") | |
| table_name = gr.Dropdown(label="Generated Tables", choices=[], interactive=True) | |
| table_preview = gr.Dataframe(label="Table Preview", interactive=False) | |
| refresh_button.click(refresh_dashboard, outputs=[kpis, chart_sales, chart_sentiment, chart_top, gallery, table_name, table_preview]) | |
| table_name.change(refresh_table, inputs=table_name, outputs=table_preview) | |
| with gr.Tab("3. Project Files"): | |
| gr.Markdown( | |
| "The package includes the notebook, the two CSV datasets, `requirements.txt`, `style.css`, and the `artifacts/` folders." | |
| ) | |
| demo.launch(allowed_paths=[str(BASE_DIR)]) | |