File size: 10,295 Bytes
4501e16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# AI-Assisted Code — Academic Integrity Notice
# Generated with The App Builder. ESCP coursework.
# Student must be able to explain all code when asked.

import shutil
import time
import traceback
from pathlib import Path

import gradio as gr
import pandas as pd
import papermill as pm
import plotly.graph_objects as go

BASE_DIR = Path(__file__).resolve().parent
RUNS_DIR = BASE_DIR / "runs"
ART_DIR = BASE_DIR / "artifacts"
FIG_DIR = ART_DIR / "py" / "figures"
TAB_DIR = ART_DIR / "py" / "tables"

DEFAULT_NOTEBOOK = BASE_DIR / "analysis.ipynb"
DEFAULT_REVIEWS = BASE_DIR / "synthetic_book_reviews.csv"
DEFAULT_SALES = BASE_DIR / "synthetic_sales_data.csv"

PAPERMILL_TIMEOUT = 1800
MAX_PREVIEW_ROWS = 50


def ensure_dirs() -> None:
    """Create folders used by the app."""
    for path in [RUNS_DIR, FIG_DIR, TAB_DIR]:
        path.mkdir(parents=True, exist_ok=True)


def load_css() -> str:
    """Read local CSS once at startup."""
    css_path = BASE_DIR / "style.css"
    return css_path.read_text(encoding="utf-8") if css_path.exists() else ""


def timestamp() -> str:
    return time.strftime("%Y%m%d-%H%M%S")


def copy_input(source_path: str | None, fallback: Path, target: Path) -> None:
    """Copy the uploaded file or reuse the bundled default file."""
    source = Path(source_path) if source_path else fallback
    if not source.exists():
        raise FileNotFoundError(f"Missing required file: {source.name}")
    shutil.copy2(source, target)


def prepare_inputs(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> None:
    """Normalize filenames so the notebook can use fixed paths."""
    copy_input(notebook_path, DEFAULT_NOTEBOOK, BASE_DIR / "analysis.ipynb")
    copy_input(reviews_path, DEFAULT_REVIEWS, BASE_DIR / "synthetic_book_reviews.csv")
    copy_input(sales_path, DEFAULT_SALES, BASE_DIR / "synthetic_sales_data.csv")


def run_pipeline(notebook_path: str | None, reviews_path: str | None, sales_path: str | None) -> str:
    """Execute the notebook with papermill and return a readable log."""
    ensure_dirs()
    try:
        prepare_inputs(notebook_path, reviews_path, sales_path)
        output_nb = RUNS_DIR / f"run_{timestamp()}_analysis.ipynb"
        pm.execute_notebook(
            input_path=str(BASE_DIR / "analysis.ipynb"),
            output_path=str(output_nb),
            cwd=str(BASE_DIR),
            log_output=True,
            progress_bar=False,
            request_save_on_cell_execute=True,
            execution_timeout=PAPERMILL_TIMEOUT,
        )
        figures = sorted(p.name for p in FIG_DIR.glob("*") if p.is_file())
        tables = sorted(p.name for p in TAB_DIR.glob("*") if p.is_file())
        return (
            "Pipeline completed successfully.\n\n"
            f"Notebook output: {output_nb.name}\n"
            f"Figures: {', '.join(figures) or '(none)'}\n"
            f"Tables: {', '.join(tables) or '(none)'}"
        )
    except Exception as exc:
        return f"Pipeline failed: {exc}\n\n{traceback.format_exc()[-5000:]}"


def read_json(path: Path) -> dict:
    import json
    with path.open(encoding="utf-8") as file:
        return json.load(file)


def load_table(path: Path) -> pd.DataFrame:
    """Safely preview a CSV or JSON artifact."""
    try:
        if path.suffix.lower() == ".json":
            obj = read_json(path)
            return pd.DataFrame([obj]) if isinstance(obj, dict) else pd.DataFrame(obj)
        return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
    except Exception as exc:
        return pd.DataFrame([{"error": str(exc)}])


def list_tables() -> list[str]:
    return sorted(p.name for p in TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"})


def gallery_items() -> list[tuple[str, str]]:
    return [(str(path), path.stem.replace("_", " ").title()) for path in sorted(FIG_DIR.glob("*.png"))]


def load_kpis() -> dict:
    for candidate in [TAB_DIR / "kpis.json", FIG_DIR / "kpis.json"]:
        if candidate.exists():
            try:
                return read_json(candidate)
            except Exception:
                return {}
    return {}


def kpi_cards_html() -> str:
    """Render compact KPI cards without any background image."""
    kpis = load_kpis()
    if not kpis:
        return '<div class="card-grid"><div class="card"><b>No data yet</b><br>Run the pipeline first.</div></div>'
    config = [
        ("n_titles", "Book Titles"),
        ("n_months", "Time Periods"),
        ("total_units_sold", "Units Sold"),
        ("total_revenue", "Revenue"),
    ]
    cards = []
    for key, label in config:
        if key in kpis:
            value = kpis[key]
            if isinstance(value, (int, float)) and abs(value) >= 100:
                value = f"{value:,.0f}"
            cards.append(f'<div class="card"><div class="label">{label}</div><div class="value">{value}</div></div>')
    return '<div class="card-grid">' + "".join(cards) + "</div>"


def empty_chart(title: str) -> go.Figure:
    fig = go.Figure()
    fig.update_layout(
        title=title,
        template="plotly_white",
        height=420,
        paper_bgcolor="white",
        plot_bgcolor="white",
        annotations=[dict(text="Run the pipeline first", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)],
    )
    return fig


def build_sales_chart() -> go.Figure:
    path = TAB_DIR / "df_dashboard.csv"
    if not path.exists():
        return empty_chart("Monthly Overview")
    df = pd.read_csv(path)
    date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
    val_cols = [c for c in df.columns if c != date_col and pd.api.types.is_numeric_dtype(df[c])]
    if not date_col or not val_cols:
        return empty_chart("Monthly Overview")
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
    fig = go.Figure()
    for col in val_cols:
        fig.add_trace(go.Scatter(x=df[date_col], y=df[col], mode="lines+markers", name=col.replace("_", " ").title()))
    fig.update_layout(title="Monthly Overview", template="plotly_white", height=450, paper_bgcolor="white", plot_bgcolor="white")
    return fig


def build_sentiment_chart() -> go.Figure:
    path = TAB_DIR / "sentiment_counts_sampled.csv"
    if not path.exists():
        return empty_chart("Sentiment Distribution")
    df = pd.read_csv(path)
    title_col = df.columns[0]
    fig = go.Figure()
    for col in [c for c in ["negative", "neutral", "positive"] if c in df.columns]:
        fig.add_trace(go.Bar(y=df[title_col], x=df[col], orientation="h", name=col.title()))
    fig.update_layout(title="Sentiment Distribution", barmode="stack", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
    fig.update_yaxes(autorange="reversed")
    return fig


def build_top_sellers_chart() -> go.Figure:
    path = TAB_DIR / "top_titles_by_units_sold.csv"
    if not path.exists():
        return empty_chart("Top Sellers")
    df = pd.read_csv(path).head(15)
    title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
    value_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
    fig = go.Figure(go.Bar(y=df[title_col], x=df[value_col], orientation="h"))
    fig.update_layout(title="Top Sellers", template="plotly_white", height=max(420, len(df) * 28), paper_bgcolor="white", plot_bgcolor="white")
    fig.update_yaxes(autorange="reversed")
    return fig


def refresh_table(choice: str | None) -> pd.DataFrame:
    if not choice:
        return pd.DataFrame([{"hint": "Choose a table first."}])
    return load_table(TAB_DIR / choice)


def refresh_dashboard() -> tuple:
    choices = list_tables()
    selected = choices[0] if choices else None
    table_df = refresh_table(selected) if selected else pd.DataFrame()
    return (
        kpi_cards_html(),
        build_sales_chart(),
        build_sentiment_chart(),
        build_top_sellers_chart(),
        gallery_items(),
        gr.update(choices=choices, value=selected),
        table_df,
    )


ensure_dirs()

with gr.Blocks(title="Notebook Runner Space", css=load_css()) as demo:
    gr.Markdown(
        "# ESCP Notebook Runner\n"
        "Run the bundled notebook on the two bundled CSV datasets, or replace them with your own files."
    )

    with gr.Tab("1. Run Notebook"):
        gr.Markdown(
            "Default project files already included in the Space:\n"
            "- `analysis.ipynb`\n"
            "- `synthetic_book_reviews.csv`\n"
            "- `synthetic_sales_data.csv`\n\n"
            "You can leave all upload fields empty to use the bundled files."
        )
        notebook_file = gr.File(label="Optional notebook (.ipynb)", file_types=[".ipynb"], type="filepath")
        reviews_file = gr.File(label="Optional reviews CSV", file_types=[".csv"], type="filepath")
        sales_file = gr.File(label="Optional sales CSV", file_types=[".csv"], type="filepath")
        run_button = gr.Button("Run Full Pipeline", variant="primary")
        run_log = gr.Textbox(label="Execution Log", lines=18, interactive=False)
        run_button.click(run_pipeline, inputs=[notebook_file, reviews_file, sales_file], outputs=run_log)

    with gr.Tab("2. Dashboard"):
        kpis = gr.HTML(value=kpi_cards_html())
        refresh_button = gr.Button("Refresh Dashboard", variant="primary")
        chart_sales = gr.Plot(label="Monthly Overview")
        chart_sentiment = gr.Plot(label="Sentiment Distribution")
        chart_top = gr.Plot(label="Top Sellers")
        gallery = gr.Gallery(label="Generated Figures", columns=2, height=420, object_fit="contain")
        table_name = gr.Dropdown(label="Generated Tables", choices=[], interactive=True)
        table_preview = gr.Dataframe(label="Table Preview", interactive=False)
        refresh_button.click(refresh_dashboard, outputs=[kpis, chart_sales, chart_sentiment, chart_top, gallery, table_name, table_preview])
        table_name.change(refresh_table, inputs=table_name, outputs=table_preview)

    with gr.Tab("3. Project Files"):
        gr.Markdown(
            "The package includes the notebook, the two CSV datasets, `requirements.txt`, `style.css`, and the `artifacts/` folders."
        )

demo.launch(allowed_paths=[str(BASE_DIR)])