Spaces:

Danielleeee
/

123

Build error

File size: 10,295 Bytes

4501e16

# AI-Assisted Code — Academic Integrity Notice
# Generated with The App Builder. ESCP coursework.
# Student must be able to explain all code when asked.

import shutil
import time
import traceback
from pathlib import Path

import gradio as gr
import pandas as pd
import papermill as pm
import plotly.graph_objects as go

BASE_DIR = Path(__file__).resolve().parent
RUNS_DIR = BASE_DIR / "runs"
ART_DIR = BASE_DIR / "artifacts"
FIG_DIR = ART_DIR / "py" / "figures"
TAB_DIR = ART_DIR / "py" / "tables"

DEFAULT_NOTEBOOK = BASE_DIR / "analysis.ipynb"
DEFAULT_REVIEWS = BASE_DIR / "synthetic_book_reviews.csv"
DEFAULT_SALES = BASE_DIR / "synthetic_sales_data.csv"

PAPERMILL_TIMEOUT = 1800
MAX_PREVIEW_ROWS = 50


def ensure_dirs() -> None:
    """Create folders used by the app."""
    for path in [RUNS_DIR, FIG_DIR, TAB_DIR]:
        path.mkdir(parents=True, exist_ok=True)


def load_css() -> str:
    """Read local CSS once at startup."""
    css_path = BASE_DIR / "style.css"
    return css_path.read_text(encoding="utf-8") if css_path.exists() else ""


def timestamp() -> str:
    return time.strftime("%Y%m%d-%H%M%S")


def copy_input(source_path: str | None, fallback: Path, target: Path) -> None:
    """Copy the uploaded file or reuse the bundled default file."""
    source = Path(source_path) if source_path else fallback
    if not source.exists():
        raise FileNotFoundError(f"Missing required file: {source.name}")
    shutil.copy2(source, target)


def prepare_inputs(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> None:
    """Normalize filenames so the notebook can use fixed paths."""
    copy_input(notebook_path, DEFAULT_NOTEBOOK, BASE_DIR / "analysis.ipynb")
    copy_input(reviews_path, DEFAULT_REVIEWS, BASE_DIR / "synthetic_book_reviews.csv")
    copy_input(sales_path, DEFAULT_SALES, BASE_DIR / "synthetic_sales_data.csv")


def run_pipeline(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> str:
    """Execute the notebook with papermill and return a readable log."""
    ensure_dirs()
    try:
        prepare_inputs(notebook_path, reviews_path, sales_path)
        output_nb = RUNS_DIR / f"run_{timestamp()}_analysis.ipynb"
        pm.execute_notebook(
            input_path=str(BASE_DIR / "analysis.ipynb"),
            output_path=str(output_nb),
            cwd=str(BASE_DIR),
            log_output=True,
            progress_bar=False,
            request_save_on_cell_execute=True,
            execution_timeout=PAPERMILL_TIMEOUT,
        )
        figures = sorted(p.name for p in FIG_DIR.glob("*") if p.is_file())
        tables = sorted(p.name for p in TAB_DIR.glob("*") if p.is_file())
        return (
            "Pipeline completed successfully.\n\n"
            f"Notebook output: {output_nb.name}\n"
            f"Figures: {', '.join(figures) or '(none)'}\n"
            f"Tables: {', '.join(tables) or '(none)'}"
        )
    except Exception as exc:
        return f"Pipeline failed: {exc}\n\n{traceback.format_exc()[-5000:]}"


def read_json(path: Path) -> dict:
    import json
    with path.open(encoding="utf-8") as file:
        return json.load(file)


def load_table(path: Path) -> pd.DataFrame:
    """Safely preview a CSV or JSON artifact."""
    try:
        if path.suffix.lower() == ".json":
            obj = read_json(path)
            return pd.DataFrame([obj]) if isinstance(obj, dict) else pd.DataFrame(obj)
        return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
    except Exception as exc:
        return pd.DataFrame([{"error": str(exc)}])


def list_tables() -> list[str]:
    return sorted(p.name for p in TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"})


def gallery_items() -> list[tuple[str, str]]:
    return [(str(path), path.stem.replace("_", " ").title()) for path in sorted(FIG_DIR.glob("*.png"))]


def load_kpis() -> dict:
    for candidate in [TAB_DIR / "kpis.json", FIG_DIR / "kpis.json"]:
        if candidate.exists():
            try:
                return read_json(candidate)
            except Exception:
                return {}
    return {}


def kpi_cards_html() -> str:
    """Render compact KPI cards without any background image."""
    kpis = load_kpis()
    if not kpis:
        return '<div class="card-grid"><div class="card"><b>No data yet</b><br>Run the pipeline first.</div></div>'
    config = [
        ("n_titles", "Book Titles"),
        ("n_months", "Time Periods"),
        ("total_units_sold", "Units Sold"),
        ("total_revenue", "Revenue"),
    ]
    cards = []
    for key, label in config:
        if key in kpis:
            value = kpis[key]
            if isinstance(value, (int, float)) and abs(value) >= 100:
                value = f"{value:,.0f}"
            cards.append(f'<div class="card"><div class="label">{label}</div><div class="value">{value}</div></div>')
    return '<div class="card-grid">' + "".join(cards) + "</div>"


def empty_chart(title: str) -> go.Figure:
    fig = go.Figure()
    fig.update_layout(
        title=title,
        template="plotly_white",
        height=420,
        paper_bgcolor="white",
        plot_bgcolor="white",
        annotations=[dict(text="Run the pipeline first", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)],
    )
    return fig


def build_sales_chart() -> go.Figure:
    path = TAB_DIR / "df_dashboard.csv"
    if not path.exists():
        return empty_chart("Monthly Overview")
    df = pd.read_csv(path)
    date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
    val_cols = [c for c in df.columns if c != date_col and pd.api.types.is_numeric_dtype(df[c])]
    if not date_col or not val_cols:
        return empty_chart("Monthly Overview")
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
    fig = go.Figure()
    for col in val_cols:
        fig.add_trace(go.Scatter(x=df[date_col], y=df[col], mode="lines+markers", name=col.replace("_", " ").title()))
    fig.update_layout(title="Monthly Overview", template="plotly_white", height=450, paper_bgcolor="white", plot_bgcolor="white")
    return fig


def build_sentiment_chart() -> go.Figure:
    path = TAB_DIR / "sentiment_counts_sampled.csv"
    if not path.exists():
        return empty_chart("Sentiment Distribution")
    df = pd.read_csv(path)
    title_col = df.columns[0]
    fig = go.Figure()
    for col in [c for c in ["negative", "neutral", "positive"] if c in df.columns]:
        fig.add_trace(go.Bar(y=df[title_col], x=df[col], orientation="h", name=col.title()))
    fig.update_layout(title="Sentiment Distribution", barmode="stack", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
    fig.update_yaxes(autorange="reversed")
    return fig


def build_top_sellers_chart() -> go.Figure:
    path = TAB_DIR / "top_titles_by_units_sold.csv"
    if not path.exists():
        return empty_chart("Top Sellers")
    df = pd.read_csv(path).head(15)
    title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
    value_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
    fig = go.Figure(go.Bar(y=df[title_col], x=df[value_col], orientation="h"))
    fig.update_layout(title="Top Sellers", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
    fig.update_yaxes(autorange="reversed")
    return fig


def refresh_table(choice: str | None) -> pd.DataFrame:
    if not choice:
        return pd.DataFrame([{"hint": "Choose a table first."}])
    return load_table(TAB_DIR / choice)


def refresh_dashboard() -> tuple:
    choices = list_tables()
    selected = choices[0] if choices else None
    table_df = refresh_table(selected) if selected else pd.DataFrame()
    return (
        kpi_cards_html(),
        build_sales_chart(),
        build_sentiment_chart(),
        build_top_sellers_chart(),
        gallery_items(),
        gr.update(choices=choices, value=selected),
        table_df,
    )


ensure_dirs()

with gr.Blocks(title="Notebook Runner Space", css=load_css()) as demo:
    gr.Markdown(
        "# ESCP Notebook Runner\n"
        "Run the bundled notebook on the two bundled CSV datasets, or replace them with your own files."
    )

    with gr.Tab("1. Run Notebook"):
        gr.Markdown(
            "Default project files already included in the Space:\n"
            "- `analysis.ipynb`\n"
            "- `synthetic_book_reviews.csv`\n"
            "- `synthetic_sales_data.csv`\n\n"
            "You can leave all upload fields empty to use the bundled files."
        )
        notebook_file = gr.File(label="Optional notebook (.ipynb)", file_types=[".ipynb"], type="filepath")
        reviews_file = gr.File(label="Optional reviews CSV", file_types=[".csv"], type="filepath")
        sales_file = gr.File(label="Optional sales CSV", file_types=[".csv"], type="filepath")
        run_button = gr.Button("Run Full Pipeline", variant="primary")
        run_log = gr.Textbox(label="Execution Log", lines=18, interactive=False)
        run_button.click(run_pipeline, inputs=[notebook_file, reviews_file, sales_file], outputs=run_log)

    with gr.Tab("2. Dashboard"):
        kpis = gr.HTML(value=kpi_cards_html())
        refresh_button = gr.Button("Refresh Dashboard", variant="primary")
        chart_sales = gr.Plot(label="Monthly Overview")
        chart_sentiment = gr.Plot(label="Sentiment Distribution")
        chart_top = gr.Plot(label="Top Sellers")
        gallery = gr.Gallery(label="Generated Figures", columns=2, height=420, object_fit="contain")
        table_name = gr.Dropdown(label="Generated Tables", choices=[], interactive=True)
        table_preview = gr.Dataframe(label="Table Preview", interactive=False)
        refresh_button.click(refresh_dashboard, outputs=[kpis, chart_sales, chart_sentiment, chart_top, gallery, table_name, table_preview])
        table_name.change(refresh_table, inputs=table_name, outputs=table_preview)

    with gr.Tab("3. Project Files"):
        gr.Markdown(
            "The package includes the notebook, the two CSV datasets, `requirements.txt`, `style.css`, and the `artifacts/` folders."
        )

demo.launch(allowed_paths=[str(BASE_DIR)])